I have application which queries our BQ datasets and store result to the BQ tables :
My Code :
BigQuery bigquery = bigQuery();
TableId destinationTable = TableId.of(datasetName, TableName);
QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query)
.setDestinationTable(destinationTable).setWriteDisposition(JobInfo.WriteDisposition.WRITE_APPEND)
.build();
TableResult results = bigquery.query(queryConfig);
While writing the result to BQ dataset i want to append a column to every row similar like this :
queryConfig.addNewColumnToEveryRow("ID", "123");
How to do that ?
This should be possible adding it to your query string.
String query = "SELECT yourOtherFields, 123 AS ID FROM yourSource";
The efficient solution is to change the query itself as shown in #Brent's solution. The other solution mentioned by #Mikhail is to post-process the returned result from the query execution. Please refer to the below code snippet for the programmatic way to post-process (add a new column) and load the data into BigQuery.
The flow of the program is as follows
Execute the query and obtain the results.
Iterate over the result and construct a JSON array.
Write the JSON array to a local file in NDJSON format.
Load the local file into a BigQuery table by creating a Batch load job (implemented below). You can also use the streaming API to load the data.
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.channels.Channels;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.UUID;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.FormatOptions;
import com.google.cloud.bigquery.Job;
import com.google.cloud.bigquery.JobId;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.TableDataWriteChannel;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableResult;
import com.google.cloud.bigquery.WriteChannelConfiguration;
import com.google.common.io.Files;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
public class AddNewColumn {
public static void main(String[] args) throws IOException {
runSimpleQuery();
}
public static void runSimpleQuery() throws IOException {
String query = "SELECT corpus, SUM(word_count) as word_count FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus ORDER BY word_count LIMIT 5;";
simpleQuery(query);
}
public static void simpleQuery(String query) throws IOException {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
// Create the query job.
QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query).build();
// Execute the query.
TableResult result = bigquery.query(queryConfig);
System.out.println("\nQuery ran successfully");
// Construct JSON array from the individual rows
ArrayList<String> columnNames = new ArrayList<String>();
result.getSchema().getFields().forEach(field -> columnNames.add(field.getName())); // get column names
JsonArray jsonArray = new JsonArray();
result.iterateAll().forEach(rows -> {
JsonObject jsonObject = new JsonObject();
jsonObject.addProperty("ID", 123);
columnNames.forEach(
column -> {
jsonObject.addProperty(column, rows.get(column).getValue().toString());
}
);
jsonArray.add(jsonObject);
});
// Writing JSON array to a temporary file in NDJSON format
FileWriter file = new FileWriter("./tempfile.json");
jsonArray.forEach(jsonElement -> {
try {
file.write(jsonElement.toString());
file.write("\n");
} catch (IOException e) {
e.printStackTrace();
}
});
file.close();
System.out.println("Data written to temporary file.");
// Create a load job to insert data
// TODO: Change the destination dataset and table information.
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";
Path jsonPath = FileSystems.getDefault().getPath(".", "tempfile.json");
insertDataIntoDestinationTable(datasetName, tableName, jsonPath, FormatOptions.json());
} catch (BigQueryException | InterruptedException e) {
System.out.println("Query did not run \n" + e.toString());
}
}
private static void insertDataIntoDestinationTable(String datasetName, String tableName, Path jsonPath, FormatOptions formatOptions) throws InterruptedException, IOException {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
TableId tableId = TableId.of(datasetName, tableName);
WriteChannelConfiguration writeChannelConfiguration =
WriteChannelConfiguration.newBuilder(tableId).setFormatOptions(formatOptions).build();
// The location and JobName must be specified; other fields can be auto-detected.
String jobName = "jobId_" + UUID.randomUUID().toString();
JobId jobId = JobId.newBuilder().setLocation("us").setJob(jobName).build();
// Imports a local file into a table.
try (TableDataWriteChannel writer = bigquery.writer(jobId, writeChannelConfiguration);
OutputStream stream = Channels.newOutputStream(writer)) {
Files.copy(jsonPath.toFile(), stream);
}
// Get the Job created by the TableDataWriteChannel and wait for it to complete.
Job job = bigquery.getJob(jobId);
Job completedJob = job.waitFor();
if (completedJob == null) {
System.out.println("Job not executed since it no longer exists.");
return;
} else if (completedJob.getStatus().getError() != null) {
System.out.println(
"BigQuery was unable to load local file to the table due to an error: \n"
+ job.getStatus().getError());
return;
}
} catch (BigQueryException e) {
System.out.println("Local file not loaded. \n" + e.toString());
}
}
}
Output:
The query results have been successfully inserted into the destination table.
Related
I am trying to use AWS EMR to make a word counter.
Currently what I have is WordCount.java code that will take my input text and do a map reduce on AWS EMR. I want to know if it is possible for the word count to only output specific words of a text file I stored in S3.
For example, I only want the words "the", "she", "he". I only want to output the total number of count of these 3 words instead of all the word count of my input text file.
WordCount.java
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {
public static class Map
extends Mapper<LongWritable, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1); // type of output value
private Text word = new Text(); // type of output key
public void map(LongWritable key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString()); // line to string token
while (itr.hasMoreTokens()) {
word.set(itr.nextToken()); // set word as each input keyword
context.write(word, one); // create a pair <keyword, 1>
}
}
}
public static class Reduce
extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {
int sum = 0; // initialize the sum for each keyword
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result); // create a pair <keyword, number of occurences>
}
}
// Driver program
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // get all args
if (otherArgs.length != 2) {
System.err.println("Usage: WordCount <in> <out>");
System.exit(2);
}
// create a job with name "wordcount"
Job job = new Job(conf, "wordcount");
job.setJarByClass(WordCount.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
// uncomment the following line to add the Combiner
job.setCombinerClass(Reduce.class);
// set output key type
job.setOutputKeyClass(Text.class);
// set output value type
job.setOutputValueClass(IntWritable.class);
//set the HDFS path of the input data
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
// set the HDFS path for the output
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
//Wait till job completion
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
This is what I intending to use to read my S3 text file for the words I desired to output. I have no idea how I can continue from here. How can I output only the desired words?
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.GetObjectResponse;
import software.amazon.awssdk.core.ResponseInputStream;
try {
Region region = Region.US_EAST_1;
S3Client s3 = S3Client.builder()
.region(region)
.build();
GetObjectRequest request = GetObjectRequest.builder()
.bucket(dictPath)
.key(DictFile)
.build();
ResponseInputStream<GetObjectResponse> s3objectResponse =
s3.getObject(request);
BufferedReader reader = new BufferedReader(new
InputStreamReader(s3objectResponse));
String line;
while ((line = reader.readLine()) != null) {
// System.out.println(line);
dict.add(line.toLowerCase());
}
reader.close();
s3.close();
} catch (Exception e) {
System.err.println(e.getMessage());
System.exit(1);
}
I'm following the AWS documentation about how to connect to redshift [generating user credentials][1]
But the get-cluster-credentials API requires a cluster id parameter, which i don't have for a serverless endpoint. What id should I use?
EDIT:
[![enter image description here][2]][2]
This is the screen of a serverless endpoint dashboard. There is no cluster ID.
[1]: https://docs.aws.amazon.com/redshift/latest/mgmt/generating-user-credentials.html
[2]: https://i.stack.imgur.com/VzvIs.png
Look at this Guide (a newer one) that talks about Connecting to Amazon Redshift Serverless. https://docs.aws.amazon.com/redshift/latest/mgmt/serverless-connecting.html
See this information that answers your question:
Connecting to the serverless endpoint with the Data API
You can also use the Amazon Redshift Data API to connect to serverless endpoint. Leave off the cluster-identifier parameter in your AWS CLI calls to route your query to serverless endpoint.
UPDATE
I wanted to test this to make sure that a successful connection can be made. I followed this doc to setup a Serverless instance.
Get started with Amazon Redshift Serverless
I loaded sample data and now have this.
Now I attemped to connect to it using software.amazon.awssdk.services.redshiftdata.RedshiftDataClient.
The Java V2 code:
try {
ExecuteStatementRequest statementRequest = ExecuteStatementRequest.builder()
.database(database)
.sql(sqlStatement)
.build();
ExecuteStatementResponse response = redshiftDataClient.executeStatement(statementRequest);
return response.id();
} catch (RedshiftDataException e) {
System.err.println(e.getMessage());
System.exit(1);
}
return "";
}
Notice there is no cluster id or user. Only a database name (sample_data_dev). The call worked perfectly.
HEre is the full code example that successfully queries data from a serverless instance using the AWS SDK for Java V2.
package com.example.redshiftdata;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.redshiftdata.model.*;
import software.amazon.awssdk.services.redshiftdata.RedshiftDataClient;
import software.amazon.awssdk.services.redshiftdata.model.DescribeStatementRequest;
import java.util.List;
/**
* To run this Java V2 code example, ensure that you have setup your development environment, including your credentials.
*
* For information, see this documentation topic:
*
* https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/get-started.html
*/
public class RetrieveDataServerless {
public static void main(String[] args) {
final String USAGE = "\n" +
"Usage:\n" +
" RetrieveData <database> <sqlStatement> \n\n" +
"Where:\n" +
" database - the name of the database (for example, sample_data_dev). \n" +
" sqlStatement - the sql statement to use. \n" ;
String database = "sample_data_dev" ;
String sqlStatement = "Select * from tickit.sales" ;
Region region = Region.US_WEST_2;
RedshiftDataClient redshiftDataClient = RedshiftDataClient.builder()
.region(region)
.build();
String id = performSQLStatement(redshiftDataClient, database, sqlStatement);
System.out.println("The identifier of the statement is "+id);
checkStatement(redshiftDataClient,id );
getResults(redshiftDataClient, id);
redshiftDataClient.close();
}
public static void checkStatement(RedshiftDataClient redshiftDataClient,String sqlId ) {
try {
DescribeStatementRequest statementRequest = DescribeStatementRequest.builder()
.id(sqlId)
.build() ;
// Wait until the sql statement processing is finished.
boolean finished = false;
String status = "";
while (!finished) {
DescribeStatementResponse response = redshiftDataClient.describeStatement(statementRequest);
status = response.statusAsString();
System.out.println("..."+status);
if (status.compareTo("FINISHED") == 0) {
break;
}
Thread.sleep(1000);
}
System.out.println("The statement is finished!");
} catch (RedshiftDataException | InterruptedException e) {
System.err.println(e.getMessage());
System.exit(1);
}
}
public static String performSQLStatement(RedshiftDataClient redshiftDataClient,
String database,
String sqlStatement) {
try {
ExecuteStatementRequest statementRequest = ExecuteStatementRequest.builder()
.database(database)
.sql(sqlStatement)
.build();
ExecuteStatementResponse response = redshiftDataClient.executeStatement(statementRequest);
return response.id();
} catch (RedshiftDataException e) {
System.err.println(e.getMessage());
System.exit(1);
}
return "";
}
public static void getResults(RedshiftDataClient redshiftDataClient, String statementId) {
try {
GetStatementResultRequest resultRequest = GetStatementResultRequest.builder()
.id(statementId)
.build();
GetStatementResultResponse response = redshiftDataClient.getStatementResult(resultRequest);
// Iterate through the List element where each element is a List object.
List<List<Field>> dataList = response.records();
// Print out the records.
for (List list: dataList) {
for (Object myField:list) {
Field field = (Field) myField;
String value = field.stringValue();
if (value != null)
System.out.println("The value of the field is " + value);
}
}
} catch (RedshiftDataException e) {
System.err.println(e.getMessage());
System.exit(1);
}
}
}
I have a custom training job that I run on a fixed schedule using Cloud Scheduler. When I create the job using either the Python client or gcp, the job runs fine. However, when I create the cloud scheduler job using the Java SDK, the job gets created but it fails. The SUMMARY of the error message I get in Cloud Logging is:
{"#type":"type.googleapis.com/google.cloud.scheduler.logging.AttemptFinished", "jobName":"projects/{my_project_id}/locations/us-central1/jobs/java_job", "status":"INVALID_ARGUMENT", "targetType":"HTTP", "url":"https://us-central1-aiplatform.googleapis.com/v1/projects/{my_project_id}/locations/us-central1/customJobs"}
I looked at the jobs created in gcp, all fields for the three jobs (the one created using python client, the one created using java SDK and the one created directly in gcp) are the same. I cannot figure out why the job created using the Java SDK keeps failing.
Java SDK code:
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import com.google.cloud.scheduler.v1.Job;
import com.google.cloud.scheduler.v1.LocationName;
import com.google.cloud.scheduler.v1.OAuthToken;
import com.google.protobuf.ByteString;
import com.google.cloud.scheduler.v1.CloudSchedulerClient;
import com.google.cloud.scheduler.v1.HttpMethod;
import com.google.cloud.scheduler.v1.HttpTarget;
public class Temp
{
static String projectId = "...";
static String location = "...";
static String serviceAccountEmail = "...-compute#developer.gserviceaccount.com";
static String outputUriPrefix = "gs://.../.../";
static String imageUri = String.format("%s-docker.pkg.dev/%s/.../...", location, projectId);
static String trainingJobName = "custom_training_job";
static String schedulerJobName = String.format("projects/%s/locations/%s/jobs/java_job", projectId, location);
static String scope = "https://www.googleapis.com/auth/cloud-platform";
static String httpTargetUri = String.format("https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/customJobs",
location, projectId, location);
static String machineType = "n1-standard-4";
static long replicaCount = 1;
static String getJobBody() throws JSONException {
JSONObject jobBody = new JSONObject();
jobBody.put("display_name", trainingJobName);
JSONObject base_output_directory = new JSONObject();
base_output_directory.put("output_uri_prefix", outputUriPrefix);
jobBody.put("base_output_directory", base_output_directory);
JSONObject jobSpec = new JSONObject();
JSONArray worker_pool_specs = new JSONArray();
JSONObject spec = new JSONObject();
spec.put("replica_count", replicaCount);
JSONObject machine_spec = new JSONObject();
machine_spec.put("machine_type", machineType);
spec.put("machine_spec", machine_spec);
JSONObject container_spec = new JSONObject();
container_spec.put( "image_uri", imageUri);
JSONArray args = new JSONArray();
args.put("--msg=hello!");
container_spec.put( "args", args);
spec.put("container_spec", container_spec);
worker_pool_specs.put(spec);
jobSpec.put("worker_pool_specs", worker_pool_specs);
jobBody.put("job_spec", jobSpec);
return jobBody.toString();
}
public static void main( String[] args ) throws IOException, JSONException
{
System.out.println(String.format("=======STARTING APPLICATION, version %s =======", "v5"));
CloudSchedulerClient client = CloudSchedulerClient.create();
String parent = LocationName.of(projectId, location).toString();
Map<String, String> headers = new HashMap<String, String>();
headers.put("User-Agent", "Google-Cloud-Scheduler");
headers.put("Content-Type", "application/json; charset=utf-8");
OAuthToken token = OAuthToken.newBuilder()
.setServiceAccountEmail(serviceAccountEmail)
.setScope(scope)
.build();
HttpTarget httpTarget = HttpTarget.newBuilder()
.setUri(httpTargetUri)
.setHttpMethod(HttpMethod.POST)
.putAllHeaders(headers)
.setBody(ByteString.copyFromUtf8(getJobBody()))
.setOauthToken(token)
.build();
Job job = Job.newBuilder()
.setName(schedulerJobName)
.setDescription("test java job")
.setSchedule("* * * * *")
.setTimeZone("Africa/Abidjan")
.setHttpTarget(httpTarget)
.build();
client.createJob(parent, job);
client.close();
}
}
Python Client code:
from google.cloud import scheduler
import json
project_id = "..."
location = "..."
service_account_email = "...-compute#developer.gserviceaccount.com"
output_uri_prefix="gs://.../.../"
image_uri=f'{location}-docker.pkg.dev/{project_id}/.../...'
traning_job__name ="custom_training_job"
scheduler_job_name = f'projects/{project_id}/locations/{location}/jobs/python_job'
scope = "https://www.googleapis.com/auth/cloud-platform"
http_target_uri = f'https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/customJobs'
machine_type = "n1-standard-4"
replica_count = 1
job_spec = {
"display_name": traning_job__name,
"job_spec": {
"worker_pool_specs": [
{
"machine_spec": {
"machine_type": machine_type,
},
"replica_count": replica_count,
"container_spec": {
"image_uri": image_uri,
"args": [
"--msg=hello!"
]
}
}
],
"base_output_directory": {
"output_uri_prefix": output_uri_prefix
}
}
}
job = {
"name": scheduler_job_name,
"description": "Created from Python client",
"http_target": {
"uri": http_target_uri,
"http_method": "POST",
"headers": {
"User-Agent": "Google-Cloud-Scheduler",
"Content-Type": "application/json; charset=utf-8"
},
"body": json.dumps(job_spec).encode('utf-8'),
"oauth_token": {
"service_account_email": service_account_email,
"scope": scope
}
},
"schedule": "* * * * *",
"time_zone": "Africa/Abidjan"
}
client = scheduler.CloudSchedulerClient()
parent = f'projects/{project_id}/locations/{location}'
response = client.create_job(parent = parent, job = job)
EDIT
The problem was that in the getJobBody function, I was setting base_output_directory as a top level field, whereas it should be a nested field inside the job_spec. The problem is solved but is there a better way to do this? I know there is a CustomJobSpec class, but could not find a way to convert it into a Json style string.
As mentioned in the edit, the problem was that in the getJobBody function, the base_output_directory was being set as a top level field, whereas it should be a nested field inside the job_spec. So currently, as far as I know, the way to avoid this mistake is to set the jobBody carefully, I don't know of a way to do this in a more structured manner.
In AWS DynamoDB, There are two options available to do the CRUD operations on the Table.
DynamoDBMapper :
com.amazonaws.services.dynamodbv2.datamodeling.DynamoDBMapper;.
AmazonDynamoDB dbClient = AmazonDynamoDBAsyncClientBuilder.standard().withCredentials(creds)
.withRegion("us-east-1").build();
// creds is AWSCredentialsProvider
DynamoDBMapper mapper = new DynamoDBMapper(dbClient);
mapper.save(item);
Table: com.amazonaws.services.dynamodbv2.document.Table;.
static DynamoDB dynamoDB =new DynamoDB(dbClient);
Table table = dynamoDB.getTable("TABLE_NAME");
Item item =new Item().withPrimaryKey("","")
.withString("":, "");
table.putItem(item);
Both seem to do the same operations.
Is DynamoDBMapper a layer over Table? If so what are the differences in using each of these?
If you want to map Java classes to DynamoDB tables (which is a useful feature), consider moving away from the old V1 API (com.amazonaws.services.dynamodbv2 is V1). V2 packages are software.amazon.awssdk.services.dynamodb.*.
Replace this old API with the DynamoDB V2 Enhanced Client. You can learn about this here:
Map items in DynamoDB tables
You can find code examples for using the Enhanced Client here.
Here is a Java V2 code example that shows you how to use the Enhanced Client to put data into a Customer table. As you see, you can map a Java Class to columns in a DynamoDB table and then create a Customer object when adding data to the table.
package com.example.dynamodb;
import software.amazon.awssdk.enhanced.dynamodb.DynamoDbEnhancedClient;
import software.amazon.awssdk.enhanced.dynamodb.DynamoDbTable;
import software.amazon.awssdk.enhanced.dynamodb.TableSchema;
import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbSortKey;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
import software.amazon.awssdk.services.dynamodb.model.DynamoDbException;
import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbBean;
import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbPartitionKey;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
/*
* Prior to running this code example, create an Amazon DynamoDB table named Customer with a key named id and populate it with data.
* Also, ensure that you have setup your development environment, including your credentials.
*
* For information, see this documentation topic:
*
* https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/get-started.html
*/
public class EnhancedPutItem {
public static void main(String[] args) {
Region region = Region.US_EAST_1;
DynamoDbClient ddb = DynamoDbClient.builder()
.region(region)
.build();
DynamoDbEnhancedClient enhancedClient = DynamoDbEnhancedClient.builder()
.dynamoDbClient(ddb)
.build();
putRecord(enhancedClient) ;
ddb.close();
}
// Puts an item into a DynamoDB table
public static void putRecord(DynamoDbEnhancedClient enhancedClient) {
try {
DynamoDbTable<Customer> custTable = enhancedClient.table("Customer", TableSchema.fromBean(Customer.class));
// Create an Instant
LocalDate localDate = LocalDate.parse("2020-04-07");
LocalDateTime localDateTime = localDate.atStartOfDay();
Instant instant = localDateTime.toInstant(ZoneOffset.UTC);
// Populate the Table
Customer custRecord = new Customer();
custRecord.setCustName("Susan Blue");
custRecord.setId("id103");
custRecord.setEmail("sblue#noserver.com");
custRecord.setRegistrationDate(instant) ;
// Put the customer data into a DynamoDB table
custTable.putItem(custRecord);
} catch (DynamoDbException e) {
System.err.println(e.getMessage());
System.exit(1);
}
System.out.println("done");
}
#DynamoDbBean
public static class Customer {
private String id;
private String name;
private String email;
private Instant regDate;
#DynamoDbPartitionKey
public String getId() {
return this.id;
};
public void setId(String id) {
this.id = id;
}
#DynamoDbSortKey
public String getCustName() {
return this.name;
}
public void setCustName(String name) {
this.name = name;
}
public String getEmail() {
return this.email;
}
public void setEmail(String email) {
this.email = email;
}
public Instant getRegistrationDate() {
return regDate;
}
public void setRegistrationDate(Instant registrationDate) {
this.regDate = registrationDate;
}
}
}
I want to fetch all the failed executions and need to re-trigger them dynamically.
PS: In stepfunction definition I had proper retry mechanism, now I want to rerun the failed executions dynamically.
I need to implement it in java. Please help me with the approach.
Thanks in advance.
You can use the AWS Step Functions API to get a list of excutions:
https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/sfn/SfnClient.html#listExecutions-
Then you can get a list of ExecutionListItem by calling the executions() method that belongs to the ListExecutionsResponse object (returned by the listExecutions method)
https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/sfn/model/ExecutionListItem.html
Using this object - you can do two things:
1 - check status - https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/sfn/model/ExecutionStatus.html
2 - get state machine ARN value - https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/sfn/model/ExecutionListItem.html#stateMachineArn--
Using the state machine ARN value, you can execute a state machine with the AWS Step Functions Java API V2:
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.sfn.SfnClient;
import software.amazon.awssdk.services.sfn.model.*;
import java.io.FileReader;
import java.io.IOException;
import java.util.UUID;
// snippet-end:[stepfunctions.java2.start_execute.import]
public class StartExecution {
public static void main(String[] args) {
final String USAGE = "\n" +
"Usage:\n" +
" StartExecution <stateMachineArn> <jsonFile>\n\n" +
"Where:\n" +
" stateMachineArn - the ARN of the state machine.\n\n" +
" jsonFile - A JSON file that contains the values to pass to the worflow.\n" ;
if (args.length != 2) {
System.out.println(USAGE);
System.exit(1);
}
String stateMachineArn = args[0];
String jsonFile = args[1];
Region region = Region.US_EAST_1;
SfnClient sfnClient = SfnClient.builder()
.region(region)
.build();
String exeArn = startWorkflow(sfnClient,stateMachineArn, jsonFile);
System.out.println("The execution ARN is" +exeArn);
sfnClient.close();
}
// snippet-start:[stepfunctions.java2.start_execute.main]
public static String startWorkflow(SfnClient sfnClient, String stateMachineArn, String jsonFile) {
String json = getJSONString(jsonFile);
// Specify the name of the execution by using a GUID value.
UUID uuid = UUID.randomUUID();
String uuidValue = uuid.toString();
try {
StartExecutionRequest executionRequest = StartExecutionRequest.builder()
.input(json)
.stateMachineArn(stateMachineArn)
.name(uuidValue)
.build();
StartExecutionResponse response = sfnClient.startExecution(executionRequest);
return response.executionArn();
} catch (SfnException e) {
System.err.println(e.awsErrorDetails().errorMessage());
System.exit(1);
}
return "";
}
private static String getJSONString(String path) {
try {
JSONParser parser = new JSONParser();
JSONObject data = (JSONObject) parser.parse(new FileReader(path));//path to the JSON file.
String json = data.toJSONString();
return json;
} catch (IOException | org.json.simple.parser.ParseException e) {
e.printStackTrace();
}
return "";
}
// snippet-end:[stepfunctions.java2.start_execute.main]
}