AWS javascript SDK request.js send request function execution time gradually increases - amazon-web-services

I am using aws-sdk to push data to Kinesis stream.
I am using PutRecord to achieve realtime data push.
I am observing same delay in putRecords as well in case of batch write.
I have tried out this with 4 records where I am not crossing any shard limit.
Below is my node js http agent configurations. Default maxSocket value is set to infinity.
Agent {
domain: null,
_events: { free: [Function] },
_eventsCount: 1,
_maxListeners: undefined,
defaultPort: 80,
protocol: 'http:',
options: { path: null },
requests: {},
sockets: {},
freeSockets: {},
keepAliveMsecs: 1000,
keepAlive: false,
maxSockets: Infinity,
maxFreeSockets: 256 }
Below is my code.
I am using following code to trigger putRecord call
event.Records.forEach(function(record) {
var payload = new Buffer(record.kinesis.data, 'base64').toString('ascii');
// put record request
evt = transformEvent(payload );
promises.push(writeRecordToKinesis(kinesis, streamName, evt ));
}
Event structure is
evt = {
Data: new Buffer(JSON.stringify(payload)),
PartitionKey: payload.PartitionKey,
StreamName: streamName,
SequenceNumberForOrdering: dateInMillis.toString()
};
This event is used in put request.
function writeRecordToKinesis(kinesis, streamName, evt ) {
console.time('WRITE_TO_KINESIS_EXECUTION_TIME');
var deferred = Q.defer();
try {
kinesis.putRecord(evt , function(err, data) {
if (err) {
console.warn('Kinesis putRecord %j', err);
deferred.reject(err);
} else {
console.log(data);
deferred.resolve(data);
}
console.timeEnd('WRITE_TO_KINESIS_EXECUTION_TIME');
});
} catch (e) {
console.error('Error occured while writing data to Kinesis' + e);
deferred.reject(e);
}
return deferred.promise;
}
Below is output for 3 messages.
WRITE_TO_KINESIS_EXECUTION_TIME: 2026ms
WRITE_TO_KINESIS_EXECUTION_TIME: 2971ms
WRITE_TO_KINESIS_EXECUTION_TIME: 3458ms
Here we can see gradual increase in response time and function execution time.
I have added counters in aws-sdk request.js class. I can see same pattern in there as well.
Below is code snippet for aws-sdk request.js class which executes put request.
send: function send(callback) {
console.time('SEND_REQUEST_TO_KINESIS_EXECUTION_TIME');
if (callback) {
this.on('complete', function (resp) {
console.timeEnd('SEND_REQUEST_TO_KINESIS_EXECUTION_TIME');
callback.call(resp, resp.error, resp.data);
});
}
this.runTo();
return this.response;
},
Output for send request:
SEND_REQUEST_TO_KINESIS_EXECUTION_TIME: 1751ms
SEND_REQUEST_TO_KINESIS_EXECUTION_TIME: 1816ms
SEND_REQUEST_TO_KINESIS_EXECUTION_TIME: 2761ms
SEND_REQUEST_TO_KINESIS_EXECUTION_TIME: 3248ms
Here you can see it is increasing gradually.
Can anyone please suggest how can I reduce this delay?
3 seconds to push single record to Kinesis is not at all acceptable.

Related

Kinesis Data Stream GetRecords giving Junk Data

While doing the Getrecords from Kinesis Data Stream it is is giving values like ����Y�ݽ#��5���I$ݔѵ�����ս��aQ���̽�9������ռ�� intermittently. I have a large data and the Junk data comes in between the correct data.
My Code is Nodejs in Lambda and is triggered by SQS (which is a DLQ of an upstream lambda that process the Kinesis Stream) . This lambda is expected to do the reprocessing logic of the failed messages of the upstream lambda.
Any suggestion would be very helpful. Thanks.
Sample Data :
{ "Attribute1" : "Value with over 400 charcters available ����Y�ݽ#��5���I$ݔѵ�����ս��aQ���̽�9������ռ�� . to make it work" , "Attribute2" : "value2" , "Attribute3" : "value3"}
var getKinesisRecords = async function (shardIterator){
var getParams = {
ShardIterator: shardIterator,
Limit: 1
};
await kinesis.getRecords(getParams, function(err, result) {
if (err) {console.log(err, err.stack); return null;}// an error occurred
else {
try {
for (var record in result.Records) {
var data = result.Records[record].Data
console.log("Kinesis Record data"+data);
}
} catch(err) {
console.log("Error while getting Record from Kinesis");
console.log(err);
return null;
}
if (result.NextShardIterator) getKinesisRecords(result.NextShardIterator);
} // successful response
}).promise();
//Process the record
}

Does AWS SQS Long poll return early if a message is received?

If I use SQS long polling and set the "WaitTimeSeconds" to say 10 seconds and "MaxNumberOfMessages" to 1, and a single message is delivered to the queue after say 0.1 seconds, will the call to sqs.receiveMessage() return immediately at that point, or should it not return until the 10 seconds of "WaitTimeSeconds" have elapsed?
In my testing the call to sqs.receiveMessage() seems to not return until the full duration of "WaitTimeSeconds" has elapsed.
Here is the code:
// Load the AWS SDK for Node.js
var AWS = require("aws-sdk");
const fmReqQ = "https://sqs.ap-southeast-2.amazonaws.com/myactid/fmReqQ";
const fmRspQ = "https://sqs.ap-southeast-2.amazonaws.com/myactid/fmRspQ";
const SydneyRegion = "ap-southeast-2";
var credentials = new AWS.SharedIniFileCredentials({ profile: "myprofile" });
AWS.config.credentials = credentials;
// Set the region
AWS.config.update({ region: SydneyRegion });
// Create an SQS service object
var sqs = new AWS.SQS({ apiVersion: "2012-11-05" });
async function sendRequest() {
var sendParams = {
MessageBody: "Information of 12/11/2016.",
QueueUrl: fmReqQ,
};
try {
data = await sqs.sendMessage(sendParams).promise();
console.log("Success, request MessageId: ", data.MessageId);
} catch (err) {
console.log("Error", err);
}
}
async function doModelling() {
console.time("modelling");
await sendRequest();
await receiveResponse();
console.timeEnd("modelling");
}
async function receiveResponse() {
var receiveParams = {
AttributeNames: ["SentTimestamp"],
MaxNumberOfMessages: 1,
MessageAttributeNames: ["All"],
QueueUrl: fmRspQ,
WaitTimeSeconds: 1,
};
let data = null;
try {
data = await sqs.receiveMessage(receiveParams).promise();
console.log("Success, response MessageId: ", data);
} catch (err) {
console.log("Error", err);
}
}
doModelling();
When I set "WaitTimeSeconds: 3" I get output:
Success, request MessageId: e5079c2a-050f-4681-aa8c-77b05ac7da7f
Success, response MessageId: {
ResponseMetadata: { RequestId: '1b4d6a6b-eaa2-59ea-a2c3-3d9b6fadbb3f' }
}
modelling: 3.268s
When I set "WaitTimeSeconds: 10" I get output:
Success, request MessageId: bbf0a429-b2f7-46f2-b9dd-38833b0c462a
Success, response MessageId: {
ResponseMetadata: { RequestId: '64bded2d-5398-5ca2-86f8-baddd6d4300a' }
}
modelling: 10.324s
Notice how the elapsed time durations match the WaitTimeSeconds.
From reading about AWS SQS long polling it says it long polling "Return messages as soon as they become available."
I don't seem to be seeing the messages "as soon as they become available", I seem to be noticing the sqs.receiveMessage() call always taking the duration set in WaitTimeSeconds.
As you can see in the sample code, I have set MaxNumberOfMessage to 1.
Using ReceiveMessage() with Long Polling will return as soon as there is at least one message in the queue.
I'm not a Node person, but here's how I tested it:
Created an Amazon SQS queue
In one window, I ran:
aws sqs receive-message --queue-url https://sqs.ap-southeast-2.amazonaws.com/123/foo --visibility-timeout 1 --wait-time-seconds 10
Then, in another window, I ran:
aws sqs send-message --queue-url https://sqs.ap-southeast-2.amazonaws.com/123/foo --message-body bar
The receive-message command returned very quickly after I used the send-message command.
It is possible that your tests are impacted by messages being 'received' but marked as 'invisible', and remaining invisible for later tests since your code does not call DeleteMessage(). I avoided this by specifically stating --visibility-timeout 1, which made the message immediately reappear on the queue for the next test.
The number of messages being requested (--max-number-of-messages) does not impact this result. It returns as soon as there is at least one message available.
I set the "WaitTimeSeconds" to 0, I seem to get the behaviour that I am after now:
Success, request MessageId: 9f286e22-1a08-4532-88ba-06c88be3dbc3
Success, response MessageId: {
ResponseMetadata: { RequestId: '5c264e27-0788-5772-a990-19d78c8b2565' }
}
modelling: 307.884ms
The value I was specifying for "WaitTimeSeconds" determines the duration of the sqs.receiveMessage() call because there were no messages on my queue, so the sqs.receiveMessage() call will wait for duration specified by "WaitTimeSeconds".

Self invoking lambda invocation timing out

We're trying to develop a self-invoking lambda to process S3 files in chunks. The lambda role has the policies needed for the invocation attached.
Here's the code for the self-invoking lambda:
export const processFileHandler: Handler = async (
event: S3CreateEvent,
context: Context,
callback: Callback,
) => {
let bucket = loGet(event, 'Records[0].s3.bucket.name');
let key = loGet(event, 'Records[0].s3.object.key');
let totalFileSize = loGet(event, 'Records[0].s3.object.size');
const lastPosition = loGet(event, 'position', 0);
const nextRange = getNextSizeRange(lastPosition, totalFileSize);
context.callbackWaitsForEmptyEventLoop = false;
let data = await loadDataFromS3ByRange(bucket, key, nextRange);
await database.connect();
log.debug(`Successfully connected to the database`);
const docs = await getParsedDocs(data, lastPosition);
log.debug(`upserting ${docs.length} records to database`);
if (docs.length) {
try {
// upserting logic
log.debug(`total documents added: ${await docs.length}`);
} catch (err) {
await recurse(nextRange.end, event, context);
log.debug(`error inserting docs: ${JSON.stringify(err)}`);
}
}
if (nextRange.end < totalFileSize) {
log.debug(`Last ${context.getRemainingTimeInMillis()} milliseconds left`);
if (context.getRemainingTimeInMillis() < 10 * 10 * 10 * 6) {
log.debug(`Less than 6000 milliseconds left`);
log.debug(`Invoking next iteration`);
await recurse(nextRange.end, event, context);
callback(null, {
message: `Lambda timed out processing file, please continue from LAST_POSITION: ${nextRange.start}`,
});
}
} else {
callback(null, { message: `Successfully completed the chunk processing task` });
}
};
Where recurse is an invocation call to the same lambda. Rest of the things work as expected it just times out whenever the call stack comes on this invocation request:
const recurse = async (position: number, event: S3CreateEvent, context: Context) => {
let newEvent = Object.assign(event, { position });
let request = {
FunctionName: context.invokedFunctionArn,
InvocationType: 'Event',
Payload: JSON.stringify(newEvent),
};
let resp = await lambda.invoke(request).promise();
console.log('Invocation complete', resp);
return resp;
};
This is the stack trace logged to CloudWatch:
{
"errorMessage": "connect ETIMEDOUT 63.32.72.196:443",
"errorType": "NetworkingError",
"stackTrace": [
"Object._errnoException (util.js:1022:11)",
"_exceptionWithHostPort (util.js:1044:20)",
"TCPConnectWrap.afterConnect [as oncomplete] (net.js:1198:14)"
]
}
Not a good idea to create a self-invoking lambda function. In case of an error (could also be a bad handler call on AWS side) a lambda function might re-run several times. Very hard to monitor and debug.
I would suggest using Step Functions. I believe this tutorial can help Iterating a Loop Using Lambda
From the top of my head, if you prefer not dealing with Step Functions, you could create a Lambda trigger for an SQS queue. Then you pass a message to the queue if you want to run the lambda function another time.

How to get result of AWS lambda function running with step function

I am using AWS step function to invoke lambda function like this.
return stepfunctions.startExecution(params).promise().then((result) => {
console.log(result);
console.log(result.output);
return result;
})
And result is
{ executionArn: 'arn:aws:states:eu-west-2:695510026694:...........:7c197be6-9dca-4bef-966a-ae9ad327bf23',
startDate: 2018-07-09T07:35:14.930Z }
But i want the result as output of final lambda function
I am going through https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/StepFunctions.html#sendTaskSuccess-property
There are multible function there i am confused which one could be used to get back result of final lambda function.
Same question is there on stackoverflow Api gateway get output results from step function? i dont want to call any function periodically and keep checking status.Even if i use DescribeExecution function periodically i will only get the status of execution but not the result i wanted. Is there any way or any function which returns promise and is resolved once all the lambda has executed and give back the result
You can't get back a result from a step function execution in a synchronous way.
Instead of polling the result of the step function on completion send a result to an SNS topic or SQS queue for further processing in the final lambda function or model the whole process in the step function state machine.
After doing some study and looking at various tutorial i realized that this stackoverflow answer Api gateway get output results from step function? gives a easier approach to solve the problem and get final result from step function, yes i am not sure about another approach and how to implement any new answer is always appreciated
This is my code to implement the same approach this might help someone.
// in function first start step function execution using startExecution()
var params = {
stateMachineArn: 'some correct ARN',
input: JSON.stringify(body)
};
return stepfunctions.startExecution(params).promise().then((result) => {
var paramsStatus = {
executionArn: result.executionArn
};
var finalResponse = new Promise(function(resolve,reject){
var checkStatusOfStepFunction = setInterval(function(){
//on regular interval keep checking status of step function
stepfunctions.describeExecution(paramsStatus, function(err, data) {
console.log('called describeExecution:', data.status);
if (err){
clearInterval(checkStatusOfStepFunction);
reject(err);
}
else {
if(data.status !== 'RUNNING'){
// once we get status is not running means step function execution is now finished and we get result as data.output
clearInterval(checkStatusOfStepFunction);
resolve(data.output);
}
}
});
},200);
});
return finalResponse
})
To be able to get the result of step function (example: combined gateway & step function). You need to:
1. startExecution,
2. wait for your state machine to finish the execution (to be sure make wait equivalent to timeout of your state machine => wait = TimeoutSeconds of your state machine)
3. call describeExecution with the receive executionArn from startExecution.
Note that startExecution is an async function and it's not waiting for the result.
In my case, I'm using Lambda named init to execute the 3 discussed steps:
Code lambda Init:
const AWS = require('aws-sdk')
exports.handler = async (event) => {
const stepFunctions = new AWS.StepFunctions();
const reqBody = event.body || {};
const params = {
stateMachineArn: process.en.stateMachineArn,
input: JSON.stringify(reqBody)
}
return stepFunctions.startExecution(params).promise()
.then(async data => {
console.log('==> data: ', data)
await new Promise(r => setTimeout(r, 6000));
return stepFunctions.describeExecution({ executionArn: data.executionArn }).promise();
})
.then(result => {
return {
statusCode: 200,
message: JSON.stringify(result)
}
})
.catch(err => {
console.error('err: ', err)
return {
statusCode: 500,
message: JSON.stringify({ message: 'facing error' })
}
})
}
Code stateMachine
Make sure that in your statemachine your returning "ResultPath".
{
"Comment": "Annoucement validation",
"StartAt": "contact-validation",
"Version": "1.0",
"TimeoutSeconds": 5,
"States": {
"contact-validation": {
"Type": "Task",
"Resource": "arn:aws:xxxxxxx:function:scam-detection-dev-contact",
"ResultPath": "$.res",
"Next": "WaitSeconds"
},
"WaitSeconds": {
"Type": "Wait",
"Seconds": 1,
"Next": "Result"
},
"Result": {
"Type": "Pass",
"ResultPath": "$.res",
"End": true
}
}
}

Why AWS Lambda function invoked multiple times for a single event?

I am trying to create AWS Lambda function that does following process.
Receive S3 "Put" event
Get fileA from S3
Get fileB from S3 that invoked lambda
Launch just one EC2 instance
Create tags for the new EC2 instance
Problem: Multiple(5) instances are launched unexpectedly.
An instance is successfully created, but 4 other instances are also launched. 5 instances in total are launched.
Logs
In the Log Streams for this function, I found 4 Streams for this invocation. Each Stream doesn't show any errors or exceptions, but it seems that the function is executed repeatedly.
Trial
I guessed that the function has been timed out and then re-run.
Then, I changed Timeout from 5s to 60s and put a file on S3.
It somehow effected. Only 2 Log Streams appeared, first one shows that the function has been executed just once, second shows the function has been executed twice. Number of launched instances is 3.
However, I have no idea why multiple(3) instances are launched.
Any comments are welcome!
Thank you in advance :-)
My Lambda function
My Lambda function is following. (It's simplified to hide credential informations but it doesn't lose its basic structure)
var AWS = require('aws-sdk');
function composeParams(data, config){
var block_device_name = "/dev/xvdb";
var security_groups = [
"MyGroupName"
];
var key_name = 'mykey';
var security_group_ids = [
"sg-xxxxxxx"
];
var subnet_id = "subnet-xxxxxxx";
// Configurations for a new EC2 instance
var params = {
ImageId: 'ami-22d27b22', /* required */
MaxCount: 1, /* required */
MinCount: 1, /* required */
KeyName: key_name,
SecurityGroupIds: security_group_ids,
InstanceType: data.instance_type,
BlockDeviceMappings: [
{
DeviceName: block_device_name,
Ebs: {
DeleteOnTermination: true,
Encrypted: true,
VolumeSize: data.volume_size,
VolumeType: 'gp2'
}
}
],
Monitoring: {
Enabled: false /* required */
},
SubnetId: subnet_id,
UserData: new Buffer(config).toString('base64'),
DisableApiTermination: false,
InstanceInitiatedShutdownBehavior: 'stop',
DryRun: data.dry_run,
EbsOptimized: false
};
return params;
}
exports.handler = function(event, context) {
// Get the object from the event
var s3 = new AWS.S3({ apiVersion: '2006-03-01' });
var bucket = event.Records[0].s3.bucket.name;
var key = event.Records[0].s3.object.key;
// Get fileA
var paramsA = {
Bucket: bucket,
Key: key
};
s3.getObject(paramsA, function(err, data) {
if (err) {
console.log(err);
} else {
var dataA = JSON.parse(String(data.Body));
// Get fileB
var paramsB = {
Bucket: bucket,
Key: 'config/config.yml'
};
s3.getObject(paramsB, function(err, data) {
if (err) {
console.log(err, err.stack);
} else {
var config = data.Body;
/* Some process */
// Launch EC2 Instance
var ec2 = new AWS.EC2({ region: REGION, apiVersion: '2015-04-15' });
var params = composeParams(dataA, config);
ec2.runInstances(params, function(err, data) {
if (err) {
console.log(err, err.stack);
} else {
console.log(data);
// Create tags for instance
for (var i=0; i<data.Instances.length; i++){
var instance = data.Instances[i];
var params = {
Resources: [ /* required */
instance.InstanceId
],
Tags: [ /* required */
{
Key: 'Name',
Value: instance_id
},
{
Key: 'userID',
Value: dataA.user_id
}
],
DryRun: dataA.dry_run
};
ec2.createTags(params, function(err, data) {
if (err) {
console.log(err, err.stack);
} else {
console.log("Tags created.");
console.log(data);
}
});
}
}
});
}
});
}
});
};
Solved.
Adding context.succeed(message); to the last part of the nested callback prevents the repeated execution of the function.
ec2.createTags(params, function(err, data) {
if (err) {
console.log(err, err.stack);
context.fail('Failed');
} else {
console.log("Tags created.");
console.log(data);
context.succeed('Completed');
}
});
Check in cloudwatch event that context.aws_request_id value for each invokation. If it is
same than it is retry because aws function got some error raised.
make your lambda idempotent
different than it is because of connection timeout from your aws
lambda client. check aws client configuration request timeout and
connect timeout values.
I was having the same problem with the newer runtime (Node.JS v4.3). Call
context.callbackWaitsForEmptyEventLoop = false;
before calling
callback(...)
Maximum Event Age
When a function returns an error before execution, Lambda returns the event to the queue and attempts to run the function again for up to 6 hours by default. With Maximum Event Age, you can configure the lifetime of an event in the queue from 60 seconds to 6 hours. This allows you to remove any unwanted events based on the event age.
Maximum Retry Attempts
When a function returns an error after execution, Lambda attempts to run it two more times by default. With Maximum Retry Attempts, you can customize the maximum number of retries from 0 to 2. This gives you the option to continue processing new events with fewer or no retries.
under the Configuration > Asynchronous invocation > Retry Attempts
you can set it to 0-2
Source:
https://aws.amazon.com/about-aws/whats-new/2019/11/aws-lambda-supports-max-retry-attempts-event-age-asynchronous-invocations/