Dropzone.js + AWS S3 stalling queue - amazon-web-services

I'm trying to impliment a dropzone.js uploader to amazon S3 using the aws-sdk.js for the browser. But when I exceed the 'parallelUploads' maximum in the settings, the queue never completes. I'm using the approach in the following link:
amazon upload
relevant parts of my code:
var dz = new Dropzone("#DZContainer", {
acceptedFiles: "image/*,.jpg,.jpeg,.png,.gif",
autoQueue: true,
autoProcessQueue: true,
parallelUploads: 10,
clickable: [".uploadButton"],
accept: function(file, done){
let params = {
"Bucket": "upload-bucket",
"Key": getFullKey(file.name),
Body: file,
Region: "us-east-1,
ContentType: file.type
}
file.s3upload = AWS.S3.ManagedUpload(params);
if (typeof(done) === 'function') done();
},
canceled: function(file) {
if (file.s3upload) file.s3upload.abort();
},
init: function () {
this.on('removedfile', function (file) {
if (file.s3upload) file.s3upload.abort();
});
}
)
dz.uploadFiles = function (files) {
for (var j = 0; j < files.length; j++) {
var file = files[j];
dz.SendFile(file);
}
};
dz.SendFile = function(file) {
file.s3upload.send(function (err, data) {
if (err) {
console.err(err)
dz.emit("error", file, err.message);
} else {
dz.emit("complete", file);
}
});
if I drag in (or use the clickable) more than 10 files, the first 10 complete but it never processes the rest of the queue. What am I missing? All help is appreciated
EDIT: With a little more digging into Dropzone, it looks as though the file status is never getting set to complete. I see a function called _finished() in the dropzone code, but I'm having a hard time figuring out what specifically is supposed to trigger that function. I have tried dz.emit("complete", file) listed below as well as adding dz.emit("success",file) but my breakpoint at the first line of the _finished() function never triggers. Thus the file.status never gets set to completed.
Does anyone know when/what/how _finished() is supposed to be run?

As mentioned in the edit, I was able to track down where the .status was not properly getting set. This seemed to be in a private Dropzone function called _finished()
With further examination, I noticed that _finished() seemed to also be calling emit("complete", file) after setting file.status to Dropzone.SUCCESS and also emitting "success". It then checks if autoProcessQueue is set and if it is, returns the result of a processQueue() call.
I had a hard time figuring out what triggered this function as it was on an onload event that eventually realized was tied to an XHTTPRequest object used by the internal uploader (which is being overridden by the S3 uploader)
So I modified the function to emulate what the Dropzone._finished() was doing and it's behaving as expected:
dz.SendFile = function(file) {
file.s3upload.send(function (err, data) {
if (err) {
console.err(err)
dz.emit("error", file, err.message);
} else {
file.status = Dropzone.SUCCESS;
dz.emit("success", file, data, err);
dz.emit("complete", file);
if(dz.options.autoProcessQueue)
dz.processQueue()
}
});

Related

Amazon S3 multipart upload part size via lambda

I have a few lambda functions that allow to make a multipart upload to an Amazon S3 bucket. These are responsible for creating the multipart upload, then another one for each part upload and the last one for completing the upload.
First two seem to work fine (they respond with statusCode 200), but the last one fails. On Cloudwatch, I can see an error saying 'Your proposed upload is smaller than the minimum allowed size'.
This is not true, since I'm uploading files bigger than 5Mb minimum size specified on docs. However, I think the issue is happening in every single part upload.
Why? Because each part only has 2Mb of data. On docs, I can see that every but the last part needs to be at least 5Mb sized. However, when I try to upload parts bigger than 2Mb, I get a CORS error, most probably because I have passed the 6Mb lambda payload limit.
Can anyone help me with this? Below I leave my client-side code, just in case you can see any error on it.
setLoading(true);
const file = files[0];
const size = 2000000;
const extension = file.name.substring(file.name.lastIndexOf('.'));
try {
const multiStartResponse = await startMultiPartUpload({ fileType: extension });
console.log(multiStartResponse);
let part = 1;
let parts = [];
/* eslint-disable no-await-in-loop */
for (let start = 0; start < file.size; start += size) {
const chunk = file.slice(start, start + size + 1);
const textChunk = await chunk.text();
const partResponse = await uploadPart({
file: textChunk,
fileKey: multiStartResponse.data.Key,
partNumber: part,
uploadId: multiStartResponse.data.UploadId,
});
console.log(partResponse);
parts.push({ ETag: partResponse.data.ETag, PartNumber: part });
part++;
}
/* eslint-enable no-await-in-loop */
const completeResponse = await completeMultiPartUpload({
fileKey: multiStartResponse.data.Key,
uploadId: multiStartResponse.data.UploadId,
parts,
});
console.log(completeResponse);
} catch (e) {
console.log(e);
} finally {
setLoading(false);
}
It seems that uploading parts via lambda is simply not possible, so we need to use a different approach.
Now, our startMultiPartUpload lambda returns not only an upload ID but also a bunch of signedURLs, generated with S3 aws-sdk class, using getSignedUrlPromise method, and 'uploadPart' as operation, as shown below:
const getSignedPartURL = (bucket, fileKey, uploadId, partNumber) =>
s3.getSignedUrlPromise('uploadPart', { Bucket: bucket, Key: fileKey, UploadId:
uploadId, PartNumber: partNumber })
Also, since uploading a part this way does not return an ETag (or maybe it does, but I just couldn't achieve it), we need to call listParts method on S3 class after uploading each part in order to get those ETags. I'll leave my React code below:
const uploadPart = async (url, data) => {
try {
// return await uploadPartToS3(url, data);
return fetch(url, {
method: 'PUT',
body: data,
}).then((e) => e.body);
} catch (e) {
console.error(e);
throw new Error('Unknown error');
}
};
// If file is bigger than 50Mb then perform a multi part upload
const uploadMultiPart = async ({ name, size, originFileObj },
updateUploadingMedia) => {
// chunk size determines each part size. This needs to be > 5Mb
const chunkSize = 60000000;
let chunkStart = 0;
const extension = name.substring(name.lastIndexOf('.'));
const partsQuan = Math.ceil(size / chunkSize);
// Start multi part upload. This returns both uploadId and signed urls for each
part.
const startResponse = await startMultiPartUpload({
fileType: extension,
chunksQuan: partsQuan,
});
console.log('start response: ', startResponse);
const {
signedURLs,
startUploadResponse: { Key, UploadId },
} = startResponse.data;
try {
let promises = [];
/* eslint-disable no-await-in-loop */
for (let i = 0; i < partsQuan; i++) {
// Split file into parts and upload each one to it's signed url
const chunk = await originFileObj.slice(chunkStart, chunkStart +
chunkSize).arrayBuffer();
chunkStart += chunkSize;
promises.push(uploadPart(signedURLs[i], chunk));
if (promises.length === 5) {
await Promise.all(promises);
promises = [];
}
console.log('UPLOAD PART RESPONSE', uploadResponse);
}
/* eslint-enable no-await-in-loop */
// wait until every part is uploaded
await allProgress({ promises, name }, (media) => {
updateUploadingMedia(media);
});
// Get parts list to build complete request (each upload does not retrieve ETag)
const partsList = await listParts({
fileKey: Key,
uploadId: UploadId,
});
// build parts object for complete upload
const completeParts = partsList.data.Parts.map(({ PartNumber, ETag }) => ({
ETag,
PartNumber,
}));
// Complete multi part upload
completeMultiPartUpload({
fileKey: Key,
uploadId: UploadId,
parts: completeParts,
});
return Key;
} catch (e) {
console.error('ERROR', e);
const abortResponse = await abortUpload({
fileKey: Key,
uploadId: UploadId,
});
console.error(abortResponse);
}
};
Sorry for identation, I corrected it line by line as best as I could :).
Some considerations:
-We use 60Mb chunks because our backend took too long generating all those signed urls for big files.
-Also, this solution is meant to upload really big files, that's why we await every 5 parts.
However, we are stil facing issues to upload huge files (about 35gb) since after uploading 100/120 parts, fetch requests suddenly starts to fail and no more parts are uploaded. If someone knows what's going on, it would be amazing. I publish this as an answer because I think most people will find this very useful.

Lambda trigger is not working as intended with bulk data

I'm using lambda triggers to detect an insertion into a DynamoDB table (Tweets). Once triggered, I want to take the message in the event, and get the sentiment for it using Comprehend. I then want to update a second DynamoDB table (SentimentAnalysis) where I ADD + 1 to a value depending on the sentiment.
This works fine if I manually insert a single item, but I want to be able to use the Twitter API to insert bulk data into my DynamoDB table and have every tweet analysed for its sentiment. The lambda function works fine if the count specified in the Twitter params is <= 5, but anything above causes an issue with the update in the SentimentAnalysis table, and instead the trigger keeps repeating itself with no sign of progress or stopping.
This is my lambda code:
let AWS = require("aws-sdk");
let comprehend = new AWS.Comprehend();
let documentClient = new AWS.DynamoDB.DocumentClient();
exports.handler = (event, context) => {
event.Records.forEach(record => {
if (record.eventName == "INSERT") {
//console.log(JSON.stringify(record.dynamodb.NewImage.tweet.S));
let params = {
LanguageCode: "en",
Text: JSON.stringify(record.dynamodb.NewImage.tweet.S)
};
comprehend.detectSentiment(params, (err, data) => {
if (err) {
console.log("\nError with call to Comprehend:\n " + JSON.stringify(err));
} else {
console.log("\nSuccessful call to Comprehend:\n " + data.Sentiment);
//when comprehend is successful, update the sentiment analysis data
//we can use the ADD expression to increment the value of a number
let sentimentParams = {
TableName: "SentimentAnalysis",
Key: {
city: record.dynamodb.NewImage.city.S,
},
UpdateExpression: "ADD " + data.Sentiment.toLowerCase() + " :pr",
ExpressionAttributeValues: {
":pr": 1
}
};
documentClient.update(sentimentParams, (err, data) => {
if (err) {
console.error("Unable to read item " + JSON.stringify(sentimentParams.TableName));
} else {
console.log("Successful Update: " + JSON.stringify(data));
}
});
}
});
}
});
};
This is the image of a successful call, it works with the first few tweets
This is the unsuccessful call right after the first image. The request is always timed out
The timeout is why it’s happening repeatedly. If the lambda times out or otherwise errs it will cause the batch to be reprocessed. You need to handle this because the delivery is “at least once”. You also need to figure out the cause of the timeout. It might be as simple as smaller batches, or a more complex solution using step functions. You might just be able to increase the timeout on the lambda.

How to get result of AWS lambda function running with step function

I am using AWS step function to invoke lambda function like this.
return stepfunctions.startExecution(params).promise().then((result) => {
console.log(result);
console.log(result.output);
return result;
})
And result is
{ executionArn: 'arn:aws:states:eu-west-2:695510026694:...........:7c197be6-9dca-4bef-966a-ae9ad327bf23',
startDate: 2018-07-09T07:35:14.930Z }
But i want the result as output of final lambda function
I am going through https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/StepFunctions.html#sendTaskSuccess-property
There are multible function there i am confused which one could be used to get back result of final lambda function.
Same question is there on stackoverflow Api gateway get output results from step function? i dont want to call any function periodically and keep checking status.Even if i use DescribeExecution function periodically i will only get the status of execution but not the result i wanted. Is there any way or any function which returns promise and is resolved once all the lambda has executed and give back the result
You can't get back a result from a step function execution in a synchronous way.
Instead of polling the result of the step function on completion send a result to an SNS topic or SQS queue for further processing in the final lambda function or model the whole process in the step function state machine.
After doing some study and looking at various tutorial i realized that this stackoverflow answer Api gateway get output results from step function? gives a easier approach to solve the problem and get final result from step function, yes i am not sure about another approach and how to implement any new answer is always appreciated
This is my code to implement the same approach this might help someone.
// in function first start step function execution using startExecution()
var params = {
stateMachineArn: 'some correct ARN',
input: JSON.stringify(body)
};
return stepfunctions.startExecution(params).promise().then((result) => {
var paramsStatus = {
executionArn: result.executionArn
};
var finalResponse = new Promise(function(resolve,reject){
var checkStatusOfStepFunction = setInterval(function(){
//on regular interval keep checking status of step function
stepfunctions.describeExecution(paramsStatus, function(err, data) {
console.log('called describeExecution:', data.status);
if (err){
clearInterval(checkStatusOfStepFunction);
reject(err);
}
else {
if(data.status !== 'RUNNING'){
// once we get status is not running means step function execution is now finished and we get result as data.output
clearInterval(checkStatusOfStepFunction);
resolve(data.output);
}
}
});
},200);
});
return finalResponse
})
To be able to get the result of step function (example: combined gateway & step function). You need to:
1. startExecution,
2. wait for your state machine to finish the execution (to be sure make wait equivalent to timeout of your state machine => wait = TimeoutSeconds of your state machine)
3. call describeExecution with the receive executionArn from startExecution.
Note that startExecution is an async function and it's not waiting for the result.
In my case, I'm using Lambda named init to execute the 3 discussed steps:
Code lambda Init:
const AWS = require('aws-sdk')
exports.handler = async (event) => {
const stepFunctions = new AWS.StepFunctions();
const reqBody = event.body || {};
const params = {
stateMachineArn: process.en.stateMachineArn,
input: JSON.stringify(reqBody)
}
return stepFunctions.startExecution(params).promise()
.then(async data => {
console.log('==> data: ', data)
await new Promise(r => setTimeout(r, 6000));
return stepFunctions.describeExecution({ executionArn: data.executionArn }).promise();
})
.then(result => {
return {
statusCode: 200,
message: JSON.stringify(result)
}
})
.catch(err => {
console.error('err: ', err)
return {
statusCode: 500,
message: JSON.stringify({ message: 'facing error' })
}
})
}
Code stateMachine
Make sure that in your statemachine your returning "ResultPath".
{
"Comment": "Annoucement validation",
"StartAt": "contact-validation",
"Version": "1.0",
"TimeoutSeconds": 5,
"States": {
"contact-validation": {
"Type": "Task",
"Resource": "arn:aws:xxxxxxx:function:scam-detection-dev-contact",
"ResultPath": "$.res",
"Next": "WaitSeconds"
},
"WaitSeconds": {
"Type": "Wait",
"Seconds": 1,
"Next": "Result"
},
"Result": {
"Type": "Pass",
"ResultPath": "$.res",
"End": true
}
}
}

Loopback strange behaviour

I am talking about loopback push component. I am trying to intercept the "create" method of "Installation" model. My code looks like this -
server/boot/installationex.js
module.exports = function (app) {
var Installation = app.models.Installation;
var create = Installation.create;
Installation.create = function (data, cb) {
//reinitializing old implementation
this.create = create;
console.log("Received data: "+JSON.stringify(data));
if (!data || !data.imei) {
console.log("No data or imei was provided, creating new");
this.create(data, cb);
return;
}
//saving 'this' reference
var that = this;
//search by imei filter
var filter = {where: {imei: data.imei}};
this.findOne(filter, function (err, result) {
if (err) {
console.log("Error occurred while looking for installation by IMEI");
cb(err);
return;
}
if (!result) {
console.log("No installation found by IMEI, will create a new installation");
that.create(data, cb);
return;
}
console.log("Found existing installation with id: " + JSON.stringify(result));
result.deviceToken = result.gpsLocation = result.osVersion = result.vendor = result.phoneNumbers = null;
if (data.deviceToken) {
result.deviceToken = data.deviceToken;
}
if (data.gpsLocation) {
result.gpsLocation = data.gpsLocation;
}
if (data.osVersion) {
result.osVersion = data.osVersion;
}
if (data.vendor) {
//result.vendor=data.vendor;
result.vendor = 'jahid';
}
if (data.phoneNumbers) {
result.phoneNumbers = data.phoneNumbers;
}
that.upsert(result, cb);
});
}
}
Unfortunately this code is invoked only once, I mean the first time. After that this code is never invoked. I became sure by looking at the log. It only prints the log first time. After that it does not print any log.
Any idea why this glue code is only invoked once? My intention is to intercept all create method invocation for Installation model. And check if there is already an entry for supplied "IMEI", if so then reuse that. Otherwise create new.
Thanks in advance.
Best regards,
Jahid
What I would start here with is:
instead of implementing your own intercepting mechanism use Model Hooks
check out findOrCreate() method
boot scripts are only run once during application startup. if you want a function that triggers every time a function is called, use a remote hook or model hook. probably something along the lines of:
...
Installation.beforeRemote('create', ...
...
see http://docs.strongloop.com/display/LB/Adding+logic+to+models for more info

AWS S3 Bucket Upload using CollectionFS and cfs-s3 meteor package

I am using Meteor.js with Amazon S3 Bucket for uploading and storing photos. I am using the meteorite packges collectionFS and aws-s3. I have setup my aws-s3 connection correctly and the images collection is working fine.
Client side event handler:
'click .submit': function(evt, templ) {
var user = Meteor.user();
var photoFile = $('#photoInput').get(0).files[0];
if(photoFile){
var readPhoto = new FileReader();
readPhoto.onload = function(event) {
photodata = event.target.result;
console.log("calling method");
Meteor.call('uploadPhoto', photodata, user);
};
}
And my server side method:
'uploadPhoto': function uploadPhoto(photodata, user) {
var tag = Random.id([10] + "jpg");
var photoObj = new FS.File({name: tag});
photoObj.attachData(photodata);
console.log("s3 method called");
Images.insert(photoObj, function (err, fileObj) {
if(err){
console.log(err, err.stack)
}else{
console.log(fileObj._id);
}
});
The file that is selected is a .jpg image file but upon upload I get this error on the server method:
Exception while invoking method 'uploadPhoto' Error: DataMan constructor received data that it doesn't support
And no matter whether I directly pass the image file, or attach it as data or use the fileReader to read as text/binary/string. I still get that error. Please advise.
Ok, maybe some thoughts. I have done things with collectionFS some months ago, so take care to the docs, because my examples maybe not 100% correct.
Credentials should be set via environment variables. So your key and secret is available on server only. Check this link for further reading.
Ok first, here is some example code which is working for me. Check yours for differences.
Template helper:
'dropped #dropzone': function(event, template) {
addImage(event);
}
Function addImage:
function addImagePreview(event) {
//Go throw each file,
FS.Utility.eachFile(event, function(file) {
//Some Validationchecks
var reader = new FileReader();
reader.onload = (function(theFile) {
return function(e) {
var fsFile = new FS.File(image.src);
//setMetadata, that is validated in collection
//just own user can update/remove fsFile
fsFile.metadata = {owner: Meteor.userId()};
PostImages.insert(fsFile, function (err, fileObj) {
if(err) {
console.log(err);
}
});
};
})(file);
// Read in the image file as a data URL.
reader.readAsDataURL(file);
});
}
Ok, your next point is the validation. The validation can be done with allow/deny rules and with a filter on the FS.Collection. This way you can do all your validation AND insert via client.
Example:
PostImages = new FS.Collection('profileImages', {
stores: [profileImagesStore],
filter: {
maxSize: 3145728,
allow: {
contentTypes: ['image/*'],
extensions: ['png', 'PNG', 'jpg', 'JPG', 'jpeg', 'JPEG']
}
},
onInvalid: function(message) {
console.log(message);
}
});
PostImages.allow({
insert: function(userId, doc) {
return (userId && doc.metadata.owner === userId);
},
update: function(userId, doc, fieldNames, modifier) {
return (userId === doc.metadata.owner);
},
remove: function(userId, doc) {
return false;
},
download: function(userId) {
return true;
},
fetch: []
});
Here you will find another example click
Another point of error is maybe your aws configuration. Have you done everything like it is written here?
Based on this post click it seems that this error occures when FS.File() is not constructed correctly. So maybe this should be you first way to start.
A lot for reading so i hope this helps you :)