Multiple cloud-sql table export as csv - google-cloud-platform

Is there a way to export multiple SQL tables as csv by issuing specific queries from cloud-sql.
Below is the code i currently have. When I call the exportTables for multiple tables back to back, I see a 409 error. It's probably becaause cloud-sql instance is busy with an export and it's not allowing subsequent export request.
How can I get this to work ? What would be the ideal solution here.
private void exportTables(String table_name, String query)
throws IOException, InterruptedException {
HttpClient httpclient = new HttpClient();
PostMethod httppost =
new PostMethod(
"https://www.googleapis.com/sql/v1beta4/projects/"
+ "abc"
+ "/instances/"
+ "zxy"
+ "/export");
String destination_bucket =
String.join(
"/",
"gs://" + "test",
table_name,
DateTimeUtil.getCurrentDate() + ".csv");
GoogleCredentials credentials =
GoogleCredentials.getApplicationDefault().createScoped(SQLAdminScopes.all());
AccessToken access_token = credentials.refreshAccessToken();
access_token.getTokenValue();
httppost.addRequestHeader("Content-Type", "application/json");
httppost.addRequestHeader("Authorization", "Bearer " + access_token.getTokenValue());
String request =
"{"
+ " \"exportContext\": {"
+ " \"fileType\": \"CSV\","
+ " \"uri\":\""
+ destination_bucket
+ "\","
+ " \"databases\": [\""
+ "xyz"
+ "\"],"
+ " \"csvExportOptions\": {"
+ " \"selectQuery\": \""
+ query
+ "\""
+ " }\n"
+ " }"
+ "}";
httppost.setRequestEntity(new StringRequestEntity(request, "application/json", "UTF-8"));
httpclient.executeMethod(httppost);
if (httppost.getStatusCode() > 200) {
String response = new String(httppost.getResponseBody(), StandardCharsets.UTF_8);
if (httppost.getStatusCode() != 409) {
throw new RuntimeException(
"Exception occurred while exporting the table: " + table_name + " Error " + response);
} else {
throw new IOException("SQL instance seems to be busy at the moment. Please retry");
}
}
httppost.releaseConnection();
logger.info("Finished exporting table {} to {}", table_name, destination_bucket);
}

I don't have suggestion to fix the issue on Cloud SQL directly, but a solution to execute in sequence the export thanks to a new tool: Workflow
Define the data format that you want, in JSON, to define ONE export.
Then provide an array of configuration to your workflow
In this workflow,
Make a loops on the configuration array
Perform an API call to Cloud SQL to generate the export on each configuration
Get the answer of the API Call, you have the jobId
Sleep a while
Check if the export is over (with the jobId).
If not, sleep and check again
If yes, loop (and thus start the next export)
It's serverless and the free tier makes this use case free.

Related

AWS cognito `connection pool shutdown`

In my spring boot project, I am first creating a user pool in cognito and then adding a user to the user pool. when adding the user to user pool I need to create a userPoolClient
I am creating the user client with all the required params with
CreateUserPoolClientResponse response = cognitoClient.createUserPoolClient(clientRequest);
but its behaving strangely. when i am running the code its throwing connection pool shut down but when i run my code in debug mode with breakpoint, its able to create the user pool client successfully. I am not sure if creation of connection pool is async process and that's why with the debugger its getting some time before creating it?
Does anyone faced this issue? would appreciate any help. thanks
Try running AWS SDK Java V2 for this use case. Here is the code for V2.
package com.example.cognito;
//snippet-start:[cognito.java2.user_pool.create_user_pool_client.import]
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.cognitoidentityprovider.CognitoIdentityProviderClient;
import software.amazon.awssdk.services.cognitoidentityprovider.model.CognitoIdentityProviderException;
import software.amazon.awssdk.services.cognitoidentityprovider.model.CreateUserPoolClientRequest;
import software.amazon.awssdk.services.cognitoidentityprovider.model.CreateUserPoolClientResponse;
//snippet-end:[cognito.java2.user_pool.create_user_pool_client.import]
/**
* To run this Java V2 code example, ensure that you have setup your development environment, including your credentials.
*
* For information, see this documentation topic:
*
* https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/get-started.html
*/
public class CreateUserPoolClient {
public static void main(String[] args) {
final String USAGE = "\n" +
"Usage:\n" +
" <clientName> <userPoolId> \n\n" +
"Where:\n" +
" clientName - the name for the user pool client to create.\n\n" +
" userPoolId - the ID for the user pool.\n\n" ;
if (args.length != 2) {
System.out.println(USAGE);
System.exit(1);
}
String clientName = args[0];
String userPoolId = args[1];
CognitoIdentityProviderClient cognitoClient = CognitoIdentityProviderClient.builder()
.region(Region.US_EAST_1)
.build();
createPoolClient (cognitoClient, clientName, userPoolId) ;
cognitoClient.close();
}
//snippet-start:[cognito.java2.user_pool.create_user_pool_client.main]
public static void createPoolClient ( CognitoIdentityProviderClient cognitoClient,
String clientName,
String userPoolId ) {
try {
CreateUserPoolClientResponse response = cognitoClient.createUserPoolClient(
CreateUserPoolClientRequest.builder()
.clientName(clientName)
.userPoolId(userPoolId)
.build()
);
System.out.println("User pool " + response.userPoolClient().clientName() + " created. ID: " + response.userPoolClient().clientId());
} catch (CognitoIdentityProviderException e){
System.err.println(e.awsErrorDetails().errorMessage());
System.exit(1);
}
}
//snippet-end:[cognito.java2.user_pool.create_user_pool_client.main]
}
I just ran this in a unit test and it worked fine.
You can find this one and other code examples for this service here:
https://github.com/awsdocs/aws-doc-sdk-examples/tree/main/javav2/example_code/cognito

AmazonClientException: More data read has a different length the expected

When I try to upload content to an Amazon S3 bucket, I get an AmazonClientException: Data read has a different length than the expected.
Here is my code.
public Object uploadFile(MultipartFile file) {
String fileName = System.currentTimeMillis() + "_" + file.getOriginalFilename();
log.info("uploadFile-> starting file upload " + fileName);
Path path = Paths.get(file.getOriginalFilename());
File fileObj = new File(file.getOriginalFilename());
try (FileOutputStream os = new FileOutputStream(fileObj)) {
os.write(file.getBytes());
os.close();
String uploadFilePath = bucketName + "/" + uploadPath;
s3Client.putObject(new PutObjectRequest(uploadFilePath, fileName, fileObj));
Files.delete(path);
} catch (IOException ex) {
log.error("error [" + ex.getMessage() + "] occurred while uploading [" + fileName + "] ");
}
log.info("uploadFile-> file uploaded process completed at: " + LocalDateTime.now() + " for - " + fileName);
return "File uploaded : " + fileName;
}
Amazon recommends using the Amazon S3 Java V2 API over use of V1.
The AWS SDK for Java 2.x is a major rewrite of the version 1.x code base. It’s built on top of Java 8+ and adds several frequently requested features. These include support for non-blocking I/O and the ability to plug in a different HTTP implementation at run time.
To upload content to an Amazon S3 bucket, use this V2 code.
public static String putS3Object(S3Client s3,
String bucketName,
String objectKey,
String objectPath) {
try {
Map<String, String> metadata = new HashMap<>();
metadata.put("myVal", "test");
PutObjectRequest putOb = PutObjectRequest.builder()
.bucket(bucketName)
.key(objectKey)
.metadata(metadata)
.build();
PutObjectResponse response = s3.putObject(putOb,
RequestBody.fromBytes(getObjectFile(objectPath)));
return response.eTag();
} catch (S3Exception e) {
System.err.println(e.getMessage());
System.exit(1);
}
return "";
}
Full example here.
If you are not familiar with V2, please refer to this doc topic:
https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/get-started.html
AWS Java SDK 1.x
s3.putObject(bucketName, key, new File(filePath));
AWS Java SDK 2.X
PutObjectRequest putObjectRequest = PutObjectRequest
.builder()
.bucket(bucketName)
.key(key)
.build();
s3Client.putObject(putObjectRequest, Paths.get(filePath));

get serviceInstanceName and serviceKeyName for cloud foundry user provider serves by using cloudFoundryOperations

im trying to get the Credentials of UPS in cloud foundry:
using:
Mono<ServiceKey> serviceKey = (Mono<ServiceKey>) cloudFoundryOperations
.services()
.getServiceKey(
GetServiceKeyRequest.builder()
.serviceKeyName("digital_cassandra")
.serviceInstanceName("2a5aa377-e992-4f88-9f85-d9cec5c3bea9")
.build())
.subscribe();
serviceKey.map(serviceKey1 -> {
System.out.println(serviceKey1.getCredentials().toString());
return serviceKey1.getCredentials().get(0);
});
but nothing printed.
how to fet the serviceKeyName and serviceInstanceName by cloudFoundryOperations?
i need to print all the serviceKeyName and serviceInstanceName in my space.
.serviceInstanceName("2a5aa377-e992-4f88-9f85-d9cec5c3bea9")
It should be the actual name, not the guid. Like "my-key" or whatever you called your key.
but nothing printed. how to fet the serviceKeyName and serviceInstanceName by cloudFoundryOperations?
If you just want to print to the console, try something like this:
cloudFoundryOperations
.services()
.getServiceKey(GetServiceKeyRequest.builder()
.serviceInstanceName("reservation-db")
.serviceKeyName("cf-mysql")
.build())
.doOnNext(key -> {
System.out.println("Key:");
System.out.println(" " + key.getName() + " (" + key.getId() + ")");
key.getCredentials().forEach((k, v) -> {
System.out.println(" " + k + " => " + v);
});
})
.block();
The GetServiceKeyRequest determines which service key is looked up. The doOnNext call allows you to inspect but not consume the key, which works fine to print it out. Then the example calls .block() to wait for the results, which is fine cause this is just an example. You wouldn't want to do that in your actual code though. You'd probably want one of the subscribe() variants (you could swap subscribe() for doOnNext() too, just depends on what you're code is doing).
i need to print all the serviceKeyName and serviceInstanceName in my space.
To get all the keys for all the service instances:
cloudFoundryOperations
.services()
.listInstances()
.doOnNext(si -> {
System.out.println(" " + si.getName() + " (" + si.getId() + ")");
})
.flatMap((ServiceInstanceSummary si) -> {
return ops
.services()
.listServiceKeys(ListServiceKeysRequest.builder()
.serviceInstanceName(si.getName())
.build())
.doOnNext(key -> {
System.out.println("Key:");
System.out.println(" " + key.getName() + " (" + key.getId() + ")");
key.getCredentials().forEach((k, v) -> {
System.out.println(" " + k + " => " + v);
});
});
})
.blockLast();
This one is enumerating all the service instances, printing the name/id, then using flatMap to go out and get the service keys for each service instance. It then merges them all into one Flux<ServiceKey>. The doOnNext() is just for printing. You don't necessarily have to do that. You could consume the result in a number of ways, like collect it into a list or subscribe to it, this just works nicely for an example. Use what works best for your code.

Why do profile pic URLs returned from graph.facebook result in a 404

The backend of my application makes a request to:
https://graph.facebook.com/v2.8/me?access_token=<firebase-access-token>&fields=id,name,first_name,birthday,email,picture.type(large){url}&format=json&method=get&pretty=0&suppress_http_code=1
I get a successful (200) response with the JSON data I expect and picture field as such:
"picture": {
"data": {
"url": "https://platform-lookaside.fbsbx.com/platform/profilepic/?asid=<asid>&height=200&width=200&ext=<ext>&hash=<hash>"
}
}
(where in place of <asid> and <ext>, there are numbers and <hash> is some alphanumeric string).
However, when I make a GET request to the platform-lookaside URL above, I get a 404 error.
It's been happening every time since my very first graph.facebook request for the same user. The very first one returned a platform-lookaside URL which pointed to a proper image (not sure if this is simply coincidence).
Is there something I'm doing wrong or is this likely a bug with the Facebook API?
FB currently seems to have issues with some CDNs and therefore your issue might be only temporary. You should also see missing/broken images on some places on fb dot com. Worst time to debug your issue :)
Try this code it worked for me
GraphRequest request = GraphRequest.newMeRequest(
AccessToken.getCurrentAccessToken(), new GraphRequest.GraphJSONObjectCallback() {
#Override
public void onCompleted(JSONObject object, GraphResponse response) {
// Insert your code here
try {
String name = object.getString("name");
String email = object.getString("email");
String last_name = object.getString("last_name");
String first_name = object.getString("first_name");
String middle_name = object.getString("middle_name");
String link = object.getString("link");
String picture = object.getJSONObject("picture").getJSONObject("data").getString("url");
Log.e("Email = ", " " + email);
Log.e("facebookLink = ", " " + link);
Log.e("name = ", " " + name);
Log.e("last_name = ", " " + last_name);
Log.e("first_name = ", " " + first_name);
Log.e("middle_name = ", " " + middle_name);
Log.e("pictureLink = ", " " + picture);
} catch (JSONException e) {
e.printStackTrace();
Log.e("Sttaaaaaaaaaaaaaaaaa", e.getMessage());
}
}
});
Bundle parameters = new Bundle();
parameters.putString("fields", "id,name,email,link,last_name,first_name,middle_name,picture");
request.setParameters(parameters);
request.executeAsync();

How to do multiple parallel readers for data export using Google Spanner?

External Backups/Snapshots for Google Cloud Spanner recommends to use queries with timestamp bounds to create snapshots for export. On the bottom of the Timestamp Bounds documentation it states:
Cloud Spanner continuously garbage collects deleted and overwritten data in the background to reclaim storage space. This process is known as version GC. By default, version GC reclaims versions after they are one hour old. Because of this, Cloud Spanner cannot perform reads at a read timestamp more than one hour in the past.
So any export would need to complete within an hour. A single reader (i.e. select * from table; using timestamp X) would not be able to export the entire table within an hour.
How can multiple parallel readers be implemented in spanner?
Note: It is mentioned in one of the comments that support for Apache Beam is coming, but it looks like that uses a single reader:
/** A simplest read function implementation. Parallelism support is coming. */
https://github.com/apache/beam/blob/master/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/NaiveSpannerReadFn.java#L26
Is there a way to do the parallel reader that beam requires today using exising APIs? Or will Beam need to use something that isn't released yet on google spanner?
It is possible to read data in parallel from Cloud Spanner with the BatchClient class. Follow read_data_in_parallel for more information.
If you are looking to export data from Cloud Spanner, I'd recommend you to use Cloud Dataflow (see the integration details here) as it provides higher level abstractions and takes care data processing details, like scaling and failure handling.
Edit 2018-03-30 - The example project has been updated to use the BatchClient offered by Google Cloud Spanner
After the release of the BatchClient for reading/downloading large amounts of data, the example project below has been updated to use the new batch client instead of the standard database client. The basic idea behind the project is still the same: Copy data to/from Cloud Spanner and any other database using standard jdbc functionality. The following code snippet sets the jdbc connection in batch read mode:
if (source.isWrapperFor(ICloudSpannerConnection.class))
{
ICloudSpannerConnection con = source.unwrap(ICloudSpannerConnection.class);
// Make sure no transaction is running
if (!con.isBatchReadOnly())
{
if (con.getAutoCommit())
{
con.setAutoCommit(false);
}
else
{
con.commit();
}
con.setBatchReadOnly(true);
}
}
When the connection is in 'batch read only mode', the connection will use the BatchClient of Google Cloud Spanner instead of the standard database client. When one of the Statement#execute(String) or PreparedStatement#execute() methods are called (as these allow multiple result sets to be returned) the jdbc driver will create a partitioned query instead of a normal query. The results of this partitioned query will be a number of result sets (one per partition) that can be fetched by the Statement#getResultSet() and Statement#getMoreResults(int) methods.
Statement statement = source.createStatement();
boolean hasResults = statement.execute(select);
int workerNumber = 0;
while (hasResults)
{
ResultSet rs = statement.getResultSet();
PartitionWorker worker = new PartitionWorker("PartionWorker-" + workerNumber, config, rs, tableSpec, table, insertCols);
workers.add(worker);
hasResults = statement.getMoreResults(Statement.KEEP_CURRENT_RESULT);
workerNumber++;
}
The result sets that are returned by the Statement#execute(String) are not executed directly, but only after the first call to ResultSet#next(). Passing these result sets to separate worker threads ensures parallel download and copying of the data.
Original answer:
This project was initially created for conversion in the other direction (from a local database to Cloud Spanner), but as it uses JDBC for both source and destination it can also be used the other way around: Converting a Cloud Spanner database to a local PostgreSQL database. Large tables are converted in parallel using a thread pool.
The project uses this open source JDBC driver instead of the JDBC driver supplied by Google. The source Cloud Spanner JDBC connection is set to read-only mode and autocommit=false. This ensures that the connection automatically creates a read-only transaction using the current time as timestamp the first time you execute a query. All subsequent queries within the same (read-only) transaction will use the same timestamp giving you a consistent snapshot of your Google Cloud Spanner database.
It works as follows:
Set the source database to read-only transactional mode.
The convert(String catalog, String schema) method iterates over all
tables in the source database (Cloud Spanner)
For each table the number of records is determined, and depending on the size of the table, the table is copied using either the main thread of the application or by a worker pool.
The class UploadWorker is responsible for the parallel copying. Each worker is assigned a range of records from the table (for example rows 1 to 2,400). The range is selected by a select statement in this format: 'SELECT * FROM $TABLE ORDER BY $PK_COLUMNS LIMIT $BATCH_SIZE OFFSET $CURRENT_OFFSET'
Commit the read-only transaction on the source database after ALL tables have been converted.
Below is a code snippet of the most important parts.
public void convert(String catalog, String schema) throws SQLException
{
int batchSize = config.getBatchSize();
destination.setAutoCommit(false);
// Set the source connection to transaction mode (no autocommit) and read-only
source.setAutoCommit(false);
source.setReadOnly(true);
try (ResultSet tables = destination.getMetaData().getTables(catalog, schema, null, new String[] { "TABLE" }))
{
while (tables.next())
{
String tableSchema = tables.getString("TABLE_SCHEM");
if (!config.getDestinationDatabaseType().isSystemSchema(tableSchema))
{
String table = tables.getString("TABLE_NAME");
// Check whether the destination table is empty.
int destinationRecordCount = getDestinationRecordCount(table);
if (destinationRecordCount == 0 || config.getDataConvertMode() == ConvertMode.DropAndRecreate)
{
if (destinationRecordCount > 0)
{
deleteAll(table);
}
int sourceRecordCount = getSourceRecordCount(getTableSpec(catalog, tableSchema, table));
if (sourceRecordCount > batchSize)
{
convertTableWithWorkers(catalog, tableSchema, table);
}
else
{
convertTable(catalog, tableSchema, table);
}
}
else
{
if (config.getDataConvertMode() == ConvertMode.ThrowExceptionIfExists)
throw new IllegalStateException("Table " + table + " is not empty");
else if (config.getDataConvertMode() == ConvertMode.SkipExisting)
log.info("Skipping data copy for table " + table);
}
}
}
}
source.commit();
}
private void convertTableWithWorkers(String catalog, String schema, String table) throws SQLException
{
String tableSpec = getTableSpec(catalog, schema, table);
Columns insertCols = getColumns(catalog, schema, table, false);
Columns selectCols = getColumns(catalog, schema, table, true);
if (insertCols.primaryKeyCols.isEmpty())
{
log.warning("Table " + tableSpec + " does not have a primary key. No data will be copied.");
return;
}
log.info("About to copy data from table " + tableSpec);
int batchSize = config.getBatchSize();
int totalRecordCount = getSourceRecordCount(tableSpec);
int numberOfWorkers = calculateNumberOfWorkers(totalRecordCount);
int numberOfRecordsPerWorker = totalRecordCount / numberOfWorkers;
if (totalRecordCount % numberOfWorkers > 0)
numberOfRecordsPerWorker++;
int currentOffset = 0;
ExecutorService service = Executors.newFixedThreadPool(numberOfWorkers);
for (int workerNumber = 0; workerNumber < numberOfWorkers; workerNumber++)
{
int workerRecordCount = Math.min(numberOfRecordsPerWorker, totalRecordCount - currentOffset);
UploadWorker worker = new UploadWorker("UploadWorker-" + workerNumber, selectFormat, tableSpec, table,
insertCols, selectCols, currentOffset, workerRecordCount, batchSize, source,
config.getUrlDestination(), config.isUseJdbcBatching());
service.submit(worker);
currentOffset = currentOffset + numberOfRecordsPerWorker;
}
service.shutdown();
try
{
service.awaitTermination(config.getUploadWorkerMaxWaitInMinutes(), TimeUnit.MINUTES);
}
catch (InterruptedException e)
{
log.severe("Error while waiting for workers to finish: " + e.getMessage());
throw new RuntimeException(e);
}
}
public class UploadWorker implements Runnable
{
private static final Logger log = Logger.getLogger(UploadWorker.class.getName());
private final String name;
private String selectFormat;
private String sourceTable;
private String destinationTable;
private Columns insertCols;
private Columns selectCols;
private int beginOffset;
private int numberOfRecordsToCopy;
private int batchSize;
private Connection source;
private String urlDestination;
private boolean useJdbcBatching;
UploadWorker(String name, String selectFormat, String sourceTable, String destinationTable, Columns insertCols,
Columns selectCols, int beginOffset, int numberOfRecordsToCopy, int batchSize, Connection source,
String urlDestination, boolean useJdbcBatching)
{
this.name = name;
this.selectFormat = selectFormat;
this.sourceTable = sourceTable;
this.destinationTable = destinationTable;
this.insertCols = insertCols;
this.selectCols = selectCols;
this.beginOffset = beginOffset;
this.numberOfRecordsToCopy = numberOfRecordsToCopy;
this.batchSize = batchSize;
this.source = source;
this.urlDestination = urlDestination;
this.useJdbcBatching = useJdbcBatching;
}
#Override
public void run()
{
// Connection source = DriverManager.getConnection(urlSource);
try (Connection destination = DriverManager.getConnection(urlDestination))
{
log.info(name + ": " + sourceTable + ": Starting copying " + numberOfRecordsToCopy + " records");
destination.setAutoCommit(false);
String sql = "INSERT INTO " + destinationTable + " (" + insertCols.getColumnNames() + ") VALUES \n";
sql = sql + "(" + insertCols.getColumnParameters() + ")";
PreparedStatement statement = destination.prepareStatement(sql);
int lastRecord = beginOffset + numberOfRecordsToCopy;
int recordCount = 0;
int currentOffset = beginOffset;
while (true)
{
int limit = Math.min(batchSize, lastRecord - currentOffset);
String select = selectFormat.replace("$COLUMNS", selectCols.getColumnNames());
select = select.replace("$TABLE", sourceTable);
select = select.replace("$PRIMARY_KEY", selectCols.getPrimaryKeyColumns());
select = select.replace("$BATCH_SIZE", String.valueOf(limit));
select = select.replace("$OFFSET", String.valueOf(currentOffset));
try (ResultSet rs = source.createStatement().executeQuery(select))
{
while (rs.next())
{
int index = 1;
for (Integer type : insertCols.columnTypes)
{
Object object = rs.getObject(index);
statement.setObject(index, object, type);
index++;
}
if (useJdbcBatching)
statement.addBatch();
else
statement.executeUpdate();
recordCount++;
}
if (useJdbcBatching)
statement.executeBatch();
}
destination.commit();
log.info(name + ": " + sourceTable + ": Records copied so far: " + recordCount + " of "
+ numberOfRecordsToCopy);
currentOffset = currentOffset + batchSize;
if (recordCount >= numberOfRecordsToCopy)
break;
}
}
catch (SQLException e)
{
log.severe("Error during data copy: " + e.getMessage());
throw new RuntimeException(e);
}
log.info(name + ": Finished copying");
}
}