Kinesis Firehose to ES using a lambda transformation - amazon-web-services

I want to get Logs from a subscription filter and then put the logs in a s3 bucket and sent them to ES.
Similar like in the diagram here:
https://aws.amazon.com/solutions/implementations/centralized-logging/
When I am using this function:
/*
For processing data sent to Firehose by Cloudwatch Logs subscription filters.
Cloudwatch Logs sends to Firehose records that look like this:
{
"messageType": "DATA_MESSAGE",
"owner": "123456789012",
"logGroup": "log_group_name",
"logStream": "log_stream_name",
"subscriptionFilters": [
"subscription_filter_name"
],
"logEvents": [
{
"id": "01234567890123456789012345678901234567890123456789012345",
"timestamp": 1510109208016,
"message": "log message 1"
},
{
"id": "01234567890123456789012345678901234567890123456789012345",
"timestamp": 1510109208017,
"message": "log message 2"
}
...
]
}
The data is additionally compressed with GZIP.
The code below will:
1) Gunzip the data
2) Parse the json
3) Set the result to ProcessingFailed for any record whose messageType is not DATA_MESSAGE, thus redirecting them to the
processing error output. Such records do not contain any log events. You can modify the code to set the result to
Dropped instead to get rid of these records completely.
4) For records whose messageType is DATA_MESSAGE, extract the individual log events from the logEvents field, and pass
each one to the transformLogEvent method. You can modify the transformLogEvent method to perform custom
transformations on the log events.
5) Concatenate the result from (4) together and set the result as the data of the record returned to Firehose. Note that
this step will not add any delimiters. Delimiters should be appended by the logic within the transformLogEvent
method.
6) Any additional records which exceed 6MB will be re-ingested back into Firehose.
*/
const zlib = require('zlib');
const AWS = require('aws-sdk');
/**
* logEvent has this format:
*
* {
* "id": "01234567890123456789012345678901234567890123456789012345",
* "timestamp": 1510109208016,
* "message": "log message 1"
* }
*
* The default implementation below just extracts the message and appends a newline to it.
*
* The result must be returned in a Promise.
*/
function transformLogEvent(logEvent: any) {
return Promise.resolve(`${logEvent.message}\n`);
}
function putRecordsToFirehoseStream(streamName: any, records: any, client: any, resolve: any, reject: any, attemptsMade: any, maxAttempts: any) {
client.putRecordBatch({
DeliveryStreamName: streamName,
Records: records,
}, (err: any, data: any) => {
const codes = [];
let failed = [];
let errMsg = err;
if (err) {
failed = records;
} else {
for (let i = 0; i < data.RequestResponses.length; i++) {
const code = data.RequestResponses[i].ErrorCode;
if (code) {
codes.push(code);
failed.push(records[i]);
}
}
errMsg = `Individual error codes: ${codes}`;
}
if (failed.length > 0) {
if (attemptsMade + 1 < maxAttempts) {
console.log('Some records failed while calling PutRecordBatch, retrying. %s', errMsg);
putRecordsToFirehoseStream(streamName, failed, client, resolve, reject, attemptsMade + 1, maxAttempts);
} else {
reject(`Could not put records after ${maxAttempts} attempts. ${errMsg}`);
}
} else {
resolve('');
}
});
}
function putRecordsToKinesisStream(streamName: any, records: any, client: any, resolve: any, reject: any, attemptsMade: any, maxAttempts: any) {
client.putRecords({
StreamName: streamName,
Records: records,
}, (err: any, data: any) => {
const codes = [];
let failed = [];
let errMsg = err;
if (err) {
failed = records;
} else {
for (let i = 0; i < data.Records.length; i++) {
const code = data.Records[i].ErrorCode;
if (code) {
codes.push(code);
failed.push(records[i]);
}
}
errMsg = `Individual error codes: ${codes}`;
}
if (failed.length > 0) {
if (attemptsMade + 1 < maxAttempts) {
console.log('Some records failed while calling PutRecords, retrying. %s', errMsg);
putRecordsToKinesisStream(streamName, failed, client, resolve, reject, attemptsMade + 1, maxAttempts);
} else {
reject(`Could not put records after ${maxAttempts} attempts. ${errMsg}`);
}
} else {
resolve('');
}
});
}
function createReingestionRecord(isSas: any, originalRecord: any) {
if (isSas) {
return {
Data: Buffer.from(originalRecord.data, 'base64'),
PartitionKey: originalRecord.kinesisRecordMetadata.partitionKey,
};
} else {
return {
Data: Buffer.from(originalRecord.data, 'base64'),
};
}
}
function getReingestionRecord(isSas: any, reIngestionRecord: any) {
if (isSas) {
return {
Data: reIngestionRecord.Data,
PartitionKey: reIngestionRecord.PartitionKey,
};
} else {
return {
Data: reIngestionRecord.Data,
};
}
}
exports.handler = (event: any, context: any, callback: any) => {
Promise.all(event.records.map(function (r: any) {
const buffer = Buffer.from(r.data, 'base64');
let decompressed;
try {
decompressed = zlib.unzipSync(buffer);
} catch (e) {
return Promise.resolve({
recordId: r.recordId,
result: 'ProcessingFailed',
});
}
const data = JSON.parse(decompressed);
// CONTROL_MESSAGE are sent by CWL to check if the subscription is reachable.
// They do not contain actual data.
if (data.messageType === 'CONTROL_MESSAGE') {
return Promise.resolve({
recordId: r.recordId,
result: 'Dropped',
});
} else if (data.messageType === 'DATA_MESSAGE') {
const promises = data.logEvents.map(transformLogEvent);
return Promise.all(promises)
.then(transformed => {
const payload: any = transformed.reduce(function (a: any, v: any) {
return a + v;
});
const encoded = Buffer.from(payload).toString();
return {
recordId: r.recordId,
result: 'Ok',
data: encoded,
};
});
} else {
return Promise.resolve({
recordId: r.recordId,
result: 'ProcessingFailed',
});
}
})).then(recs => {
const isSas = Object.prototype.hasOwnProperty.call(event, 'sourceKinesisStreamArn');
const streamARN = isSas ? event.sourceKinesisStreamArn : event.deliveryStreamArn;
const region = streamARN.split(':')[3];
const streamName = streamARN.split('/')[1];
const result: any = { records: recs };
let recordsToReingest = [];
const putRecordBatches: any = [];
let totalRecordsToBeReingested = 0;
const inputDataByRecId: any = {};
event.records.forEach(function (r: any) { inputDataByRecId[r.recordId] = createReingestionRecord(isSas, r) });
let projectedSize = recs.filter(function (rec: any) { return rec.result === 'Ok' })
.map(function (r: any) { return r.recordId.length + r.data.length })
.reduce((a, b) => a + b, 0);
// 6000000 instead of 6291456 to leave ample headroom for the stuff we didn't account for
for (let idx = 0; idx < event.records.length && projectedSize > 6000000; idx++) {
const rec: any = result.records[idx];
if (rec.result === 'Ok') {
totalRecordsToBeReingested++;
recordsToReingest.push(getReingestionRecord(isSas, inputDataByRecId[rec.recordId]));
projectedSize -= rec.data.length;
delete rec.data;
result.records[idx].result = 'Dropped';
// split out the record batches into multiple groups, 500 records at max per group
if (recordsToReingest.length === 500) {
putRecordBatches.push(recordsToReingest);
recordsToReingest = [];
}
}
}
if (recordsToReingest.length > 0) {
// add the last batch
putRecordBatches.push(recordsToReingest);
}
if (putRecordBatches.length > 0) {
new Promise((resolve, reject) => {
let recordsReingestedSoFar = 0;
for (let idx = 0; idx < putRecordBatches.length; idx++) {
const recordBatch = putRecordBatches[idx];
if (isSas) {
const client = new AWS.Kinesis({ region: region });
putRecordsToKinesisStream(streamName, recordBatch, client, resolve, reject, 0, 20);
} else {
const client = new AWS.Firehose({ region: region });
putRecordsToFirehoseStream(streamName, recordBatch, client, resolve, reject, 0, 20);
}
recordsReingestedSoFar += recordBatch.length;
console.log('Reingested %s/%s records out of %s in to %s stream', recordsReingestedSoFar, totalRecordsToBeReingested, event.records.length, streamName);
}}).then(
() => {
console.log('Reingested all %s records out of %s in to %s stream', totalRecordsToBeReingested, event.records.length, streamName);
callback(null, result);
},
failed => {
console.log('Failed to reingest records. %s', failed);
callback(failed, null);
});
} else {
console.log('No records needed to be reingested.');
callback(null, result);
}
}).catch(ex => {
console.log('Error: ', ex);
callback(ex, null);
});
};
But I am getting a Lambda.FunctionError:
Check your function and make sure the output is in required format. In addition to that, make sure the processed records contain valid result status of Dropped, Ok, or ProcessingFailed
Does anybody know, which function is suitable, to receive logs from the Cloudwatch subscription filter, sending them to S3 and ES?
My code for the FirehoseDeliveryStream looks like:
const firehoseDeliveryStream = new CfnDeliveryStream(this, "FirehoseDeliveryStream", {
deliveryStreamType: "DirectPut",
elasticsearchDestinationConfiguration: {
domainArn: elasticsearchDomain.domainArn,
roleArn: firehoseDeliveryRole.roleArn,
indexName: "test",
s3Configuration: {
bucketArn: this.logsBucket.bucketArn,
roleArn: firehoseDeliveryRole.roleArn,
cloudWatchLoggingOptions: {
enabled: true,
logGroupName: firehoseloggroup.logGroupName,
logStreamName: logstream.logStreamName
},
},
s3BackupMode: "AllDocuments",
cloudWatchLoggingOptions: {
enabled: true,
logGroupName: firehoseloggroup.logGroupName,
logStreamName: logstream.logStreamName
},
processingConfiguration: {
enabled: true,
processors: [{
type: "Lambda",
parameters: [{
parameterName: "LambdaArn",
parameterValue: handler.functionArn,
}],
}],
},
},
});

I have a CloudWatch log-group-1, kinesis firehose, lambda, S3.
log-group-1 sends logs to kinesis firehose (using subscription filter). Kinesis firehose triggers lambda to process the logs. Lambda returns the logs back to kinesis firehose and kinesis firehose saves transformed logs to S3.
Lambda gets the following input:
{
"invocationId": "000ac99...",
"deliveryStreamArn": "arn:aws:firehose:eu-central-1:123456789123:deliverystream/delivery-09",
"region": "eu-central-1",
"records": [
{
"recordId": "496199814216613477...",
"approximateArrivalTimestamp": 1625854080200,
"data": "H4sIAAAAAAAAADWOwQrCM......"
},
{
"recordId": "4961998142166134...",
"approximateArrivalTimestamp": 1625854100311,
"data": "H4sIAAAAAAAAADVPy07DMB......"
}
]
}
To return the transformed message you must change the records list. See example:
"records": [
{
"recordId": "you better take it from the input",
"result": "can be Ok, Dropped, ProcessingFailed",
"data": "must be an encoded base-64 string"
}
]
I attached a code written in Javascipt. It is enough just to copy-paste it to lambda.
const node_gzip_1 = require("node-gzip");
async function handler(event) {
console.log('event: ' + JSON.stringify(event, undefined, 3));
let result = [];
// Iterate through records list
const records = event.records;
for (let ii = 0; ii < records.length; ii++) {
const record = records[ii];
const recordId = record.recordId;
// Transform record data to a human readable string
const data = record.data;
const decodedData = Buffer.from(data, 'base64');
const ungziped = await node_gzip_1.ungzip(decodedData);
console.log('ungziped: ' + ungziped);
// Parse record data to JSON
const dataJson = JSON.parse(ungziped.toString());
// Get a list of log events and iterate through each element
const logEventsList = dataJson.logEvents;
logEventsList.forEach((logEventValue) => {
// Get the message which was saved in CloudWatch
const messageString = logEventValue.message;
// Create the transformed result
const transformedResultJson = {
someRandomNumber: Math.random(), // Some random variable I decided to put in the result
message: messageString + '-my-custom-change' // Edit the message
};
// Final data must be encoded to base 64
const messageBase64 = Buffer.from(JSON.stringify(transformedResultJson) + '\n').toString('base64'); // Adding a new line to transformed result is optional. It just make reading the S3 easier
console.log('messageBase64: ' + messageBase64);
// Save transformed result
result.push({
recordId: recordId,
result: 'Ok',
data: messageBase64
});
});
}
// Replace initial records list with the transformed list
event.records = result;
console.log('new event: ' + JSON.stringify(event, undefined, 2));
// Returned value will go back to kinesis firehose, then S3
return event;
}
exports.handler = handler;
Lambda return value is:
{
"invocationId": "000ac99...",
"deliveryStreamArn": "arn:aws:firehose:eu-central-1:123456789123:deliverystream/delivery-09",
"region": "eu-central-1",
"records": [
{
"recordId": "496199814216613477...",
"result": "Ok",
"data": "eyJzb21lUmF..."
},
{
"recordId": "4961998142166134...",
"result": "Ok",
"data": "eyJzb21lUmFuZG9..."
}
]
}
You can also use a lambda blueprint kinesis-firehose-syslog-to-json.
Also see:
https://docs.amazonaws.cn/en_us/firehose/latest/dev/data-transformation.html
Kinesis Firehose putting JSON objects in S3 without seperator comma

Related

AWS Lambda targerting CloudWatch Logs

I tried to follow Using DynamoDB with Custom Alexa Skills - Dabble Lab #226 - YouTube video, had some issues that mostly I was able to fix. I guess I am just to new to AWS and Lambda. But I wonder if someone is able to explain me why I don't have CloudWatch Logs as target, as shown in the video and how I can fix that. When I try to save a movietitle alexa sais 'we cannot save your movie right now. Try again!'. Would be amazing If someone here could help me : )
My lambda code:
/* eslint-disable func-names */
/* eslint-disable no-console */
const Alexa = require('ask-sdk');
const dbHelper = require('./helpers/dbHelper');
const GENERAL_REPROMPT = "What would you like to do?";
const dynamoDBTableName = "dynamodb-starter";
const LaunchRequestHandler = {
canHandle(handlerInput) {
return handlerInput.requestEnvelope.request.type === 'LaunchRequest';
},
handle(handlerInput) {
const speechText = 'Hello there. What is your favourite movie? You can say add moviename to add your favourite movie or say list my movies to get your favourite movies.';
const repromptText = 'What would you like to do? You can say HELP to get available options';
return handlerInput.responseBuilder
.speak(speechText)
.reprompt(repromptText)
.getResponse();
},
};
const InProgressAddMovieIntentHandler = {
canHandle(handlerInput) {
const request = handlerInput.requestEnvelope.request;
return request.type === 'IntentRequest' &&
request.intent.name === 'AddMovieIntent' &&
request.dialogState !== 'COMPLETED';
},
handle(handlerInput) {
const currentIntent = handlerInput.requestEnvelope.request.intent;
return handlerInput.responseBuilder
.addDelegateDirective(currentIntent)
.getResponse();
}
}
const AddMovieIntentHandler = {
canHandle(handlerInput) {
return handlerInput.requestEnvelope.request.type === 'IntentRequest'
&& handlerInput.requestEnvelope.request.intent.name === 'AddMovieIntent';
},
async handle(handlerInput) {
const {responseBuilder } = handlerInput;
const userID = handlerInput.requestEnvelope.context.System.user.userId;
const slots = handlerInput.requestEnvelope.request.intent.slots;
const movieName = slots.MovieName.value;
return dbHelper.addMovie(movieName, userID)
.then((data) => {
const speechText = `You have added movie ${movieName}. You can say add to add another one or remove to remove movie`;
return responseBuilder
.speak(speechText)
.reprompt(GENERAL_REPROMPT)
.getResponse();
})
.catch((err) => {
console.log("Error occured while saving movie", err);
const speechText = "we cannot save your movie right now. Try again!"
return responseBuilder
.speak(speechText)
.getResponse();
})
},
};
const GetMoviesIntentHandler = {
canHandle(handlerInput) {
return handlerInput.requestEnvelope.request.type === 'IntentRequest'
&& handlerInput.requestEnvelope.request.intent.name === 'GetMoviesIntent';
},
async handle(handlerInput) {
const {responseBuilder } = handlerInput;
const userID = handlerInput.requestEnvelope.context.System.user.userId;
return dbHelper.getMovies(userID)
.then((data) => {
var speechText = "Your movies are "
if (data.length == 0) {
speechText = "You do not have any favourite movie yet, add movie by saving add moviename "
} else {
speechText += data.map(e => e.movieTitle).join(", ")
}
return responseBuilder
.speak(speechText)
.reprompt(GENERAL_REPROMPT)
.getResponse();
})
.catch((err) => {
const speechText = "we cannot get your movie right now. Try again!"
return responseBuilder
.speak(speechText)
.getResponse();
})
}
}
const InProgressRemoveMovieIntentHandler = {
canHandle(handlerInput) {
const request = handlerInput.requestEnvelope.request;
return request.type === 'IntentRequest' &&
request.intent.name === 'RemoveMovieIntent' &&
request.dialogState !== 'COMPLETED';
},
handle(handlerInput) {
const currentIntent = handlerInput.requestEnvelope.request.intent;
return handlerInput.responseBuilder
.addDelegateDirective(currentIntent)
.getResponse();
}
}
const RemoveMovieIntentHandler = {
canHandle(handlerInput) {
return handlerInput.requestEnvelope.request.type === 'IntentRequest'
&& handlerInput.requestEnvelope.request.intent.name === 'RemoveMovieIntent';
},
handle(handlerInput) {
const {responseBuilder } = handlerInput;
const userID = handlerInput.requestEnvelope.context.System.user.userId;
const slots = handlerInput.requestEnvelope.request.intent.slots;
const movieName = slots.MovieName.value;
return dbHelper.removeMovie(movieName, userID)
.then((data) => {
const speechText = `You have removed movie with name ${movieName}, you can add another one by saying add`
return responseBuilder
.speak(speechText)
.reprompt(GENERAL_REPROMPT)
.getResponse();
})
.catch((err) => {
const speechText = `You do not have movie with name ${movieName}, you can add it by saying add`
return responseBuilder
.speak(speechText)
.reprompt(GENERAL_REPROMPT)
.getResponse();
})
}
}
const HelpIntentHandler = {
canHandle(handlerInput) {
return handlerInput.requestEnvelope.request.type === 'IntentRequest'
&& handlerInput.requestEnvelope.request.intent.name === 'AMAZON.HelpIntent';
},
handle(handlerInput) {
const speechText = 'You can introduce yourself by telling me your name';
return handlerInput.responseBuilder
.speak(speechText)
.reprompt(speechText)
.getResponse();
},
};
const CancelAndStopIntentHandler = {
canHandle(handlerInput) {
return handlerInput.requestEnvelope.request.type === 'IntentRequest'
&& (handlerInput.requestEnvelope.request.intent.name === 'AMAZON.CancelIntent'
|| handlerInput.requestEnvelope.request.intent.name === 'AMAZON.StopIntent');
},
handle(handlerInput) {
const speechText = 'Goodbye!';
return handlerInput.responseBuilder
.speak(speechText)
.getResponse();
},
};
const SessionEndedRequestHandler = {
canHandle(handlerInput) {
return handlerInput.requestEnvelope.request.type === 'SessionEndedRequest';
},
handle(handlerInput) {
console.log(`Session ended with reason: ${handlerInput.requestEnvelope.request.reason}`);
return handlerInput.responseBuilder.getResponse();
},
};
const ErrorHandler = {
canHandle() {
return true;
},
handle(handlerInput, error) {
console.log(`Error handled: ${error.message}`);
return handlerInput.responseBuilder
.speak('Sorry, I can\'t understand the command. Please say again.')
.reprompt('Sorry, I can\'t understand the command. Please say again.')
.getResponse();
},
};
const skillBuilder = Alexa.SkillBuilders.standard();
exports.handler = skillBuilder
.addRequestHandlers(
LaunchRequestHandler,
InProgressAddMovieIntentHandler,
AddMovieIntentHandler,
GetMoviesIntentHandler,
InProgressRemoveMovieIntentHandler,
RemoveMovieIntentHandler,
HelpIntentHandler,
CancelAndStopIntentHandler,
SessionEndedRequestHandler
)
.addErrorHandlers(ErrorHandler)
.withTableName(dynamoDBTableName)
.withAutoCreateTable(true)
.lambda();
The code when deployed:
lucasfalkowsky#Lucass-MacBook-Pro kneipe-temp % ask deploy -p ______
Deploy configuration loaded from ask-resources.json
Deploy project for profile [____]
==================== Deploy Skill Metadata ====================
[Warn]: The hash of current skill package folder does not change compared to the last deploy hash result, CLI will skip the deploy of skill package.
Skill ID: ___________
==================== Build Skill Code ====================
npm WARN dynamodb-starter#1.0.0 No repository field.
audited 18 packages in 1.094s
found 0 vulnerabilities
Skill code built successfully.
Code for region default built to /Users/___/Desktop/___/___/___/___/___/.ask/lambda/custom/build.zip successfully with build flow NodeJsNpmBuildFlow.
==================== Deploy Skill Infrastructure ====================
✔ Deploy Alexa skill infrastructure for region "default"
Skill infrastructures deployed successfully through #ask-cli/lambda-deployer.
==================== Enable Skill ====================
Skill is already enabled, skip the enable process.
The Original Project on Git Hub
As far as I understand you are missing the lambda logs in cloudwatch. So for them to enable you need to have an IAM Role with managed polciy AWSLambdaBasicExecutionRole.
That gives following permissions to lambda to log to cloudwatch:
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": "*"
}
]
}

How to check after sending an email

I am using SES service of AWS for send email.
I use Python and boto3.
(https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ses.html#SES.Client.send_email)
I know how to check the overall statistics.
(https://aws.amazon.com/ko/premiumsupport/knowledge-center/ses-email-opens-clicks/)
But I don't know how to do individual inquiry.
After sending an email, message_id and request_id are returned.
I think we're going to use message_id, request_id to search.
I don't know what to do.
Configure SES to send notifications (send, deliver, bounce, open) to SNS topics.
Write a Lambda function to listen to the SNS topics. When a notification is received, write the event to your table (I use DynamoDB)
Now you have all your email events in a table
Here is the Lambda function I use for writing SES events to DynamoDB
const tableName = "DEV_EmailAuditLogs";
const AWS = require("aws-sdk");
AWS.config.update({ region: "us-east-1" });
const dynamodb = new AWS.DynamoDB({ apiVersion: "2012-10-08" });
exports.handler = (event, context, callback) => {
console.log(JSON.stringify(event, null, 2));
console.log("From SNS:", event.Records[0].Sns.Message);
console.log("Arn Topic:", event.Records[0].Sns.TopicArn);
const json = event.Records[0].Sns.Message;
const parsed = JSON.parse(json);
const type = parsed.eventType;
const sent = parsed.mail.timestamp.slice(0, -1);
let to, subject, from;
const headers = parsed.mail.headers;
for (const h of headers) {
switch (h.name) {
case "To":
to = h.value;
break;
case "From":
from = h.value;
break;
case "Subject":
subject = h.value;
break;
}
}
const eventData = parsed[type.toLowerCase()];
let created;
if (eventData && eventData.timestamp) {
created = eventData.timestamp.slice(0, -1);
} else {
created = sent;
}
var params = {
TableName: tableName,
Item: {
to: { S: to },
from: { S: from },
created: { S: created },
sent: { S: sent },
type: { S: type },
version: { N: "1" }
}
};
if (subject){
params.Item.subject = { S: subject };
}
dynamodb.putItem(params, function(err, data) {
if (err) {
console.log(err);
} else {
console.log(data);
}
});
};

Session leak on spanner insert from cloud function

I'm trying to insert data into spanner through cloud function, using post request. I thing that I'm doing everything as described in the documentation, and i just can't understand what causes the next error:
"Error: 1 session leak(s) detected.
at _requests.onIdle.then (/srv/node_modules/#google-cloud/spanner/build/src/session-pool.js:193:25)
at <anonymous>"
And there is my cloud function
const {Spanner} = require('#google-cloud/spanner');
module.exports.http = (req, res) => {
const projectId = 'project-id';
const instanceId = 'instance-id';
const databaseId = 'database-id';
const spanner = new Spanner({
projectId: projectId,
});
const instance = spanner.instance(instanceId);
const database = instance.database(databaseId);
let sqlResponse = "";
database.runTransaction(async (err, transaction) => {
if (err) {
res.status(500).send(JSON.stringify({message: err, requestBody: req.body}));
return;
}
try {
const data = req.body;
const [rowCount] = await transaction.runUpdate({
sql:
'INSERT Feedbacks (age, comment, gender, rating) VALUES (#age, #comment, #gender, #rating)',
params: {
age: data.age.toString(),
comment: data.comment,
gender: data.gender,
rating: data.rating.toString(),
},
});
sqlResponse = 'Successfully inserted ' + rowCount + ' record into the Feedbacks table.';
await transaction.commit();
res.status(200).send(JSON.stringify({message: sqlResponse, requestBody: req.body}));
} catch (err) {
res.status(500).send(JSON.stringify({message: err, requestBody: req.body}));
} finally {
database.close();
}
});
};
Your code appears to be correct. As noted by #Mayeru in the comments for your question, the first thing to confirm is that you're inserting a new record with a unique value specified for the column that is your table's primary key column.
Another possibility that could be causing the issue you are encountering is that you are trying to test the function using the "Testing" UI of the Cloud Console's Cloud Functions > "Function details" section. If so then you may be either using an empty request body or a malformed request body when you click the "Test the function" button. In the "Triggering event" textarea that appears above the "Test the function" button, make sure you have entered a valid JSON request body which includes the elements and values that your INSERT statement expects.
For example a "Triggering event" JSON request body like the following should work:
{"singerId":"1001","firstName":"Test","lastName":"Singer"}
Using the following "nodeInsert" function that's similar to the code you've shared:
const {Spanner} = require('#google-cloud/spanner');
module.exports.nodeInsert = (req, res) => {
const projectId = 'my-project';
const instanceId = 'my-instance';
const databaseId = 'my-database';
const spanner = new Spanner({
projectId: projectId,
});
const instance = spanner.instance(instanceId);
const database = instance.database(databaseId);
let sqlResponse = "";
database.runTransaction(async (err, transaction) => {
if (err) {
res
.status(500)
.send(JSON.stringify({message: err, requestBody: req.body}));
transaction.end();
console.error('Transaction terminated.');
return;
}
try {
const data = req.body;
const parsedSingerId = parseInt(data.singerId, 10);
const [rowCount] = await transaction.runUpdate({
sql:
'INSERT Singers (SingerId, FirstName, LastName) VALUES (#singerId, #firstName, #lastName)',
params: {
singerId: parsedSingerId,
firstName: data.firstName,
lastName: data.lastName,
},
});
sqlResponse = 'Successfully inserted ' + rowCount + ' record into the Singers table.';
await transaction.commit();
res
.status(200)
.send(JSON.stringify({message: sqlResponse, requestBody: req.body}));
} catch (err) {
res
.status(500)
.send(JSON.stringify({message: err, requestBody: req.body}));
transaction.end();
console.error('Transaction terminated.');
} finally {
database.close();
}
});
};

AWS Firehose newline Character

I've read a lot of similar questions around adding newline characters to firehose, but they're all around adding the newline character to the source. The problem is that I don't have access to the source, and a third party is piping data to our Kinesis instance and I cannot add the \n to the source.
I've tried doing a Firehose data transformation using the following code:
'use strict';
console.log('Loading function');
exports.handler = (event, context, callback) => {
/* Process the list of records and transform them */
const output = [];
event.records.forEach((record) => {
const results = {
/* This transformation is the "identity" transformation, the data is left intact */
recordId: record.recordId,
result: record.data.event_type === 'alert' ? 'Dropped' : 'Ok',
data: record.data + '\n'
};
output.push(results);
});
console.log(`Processing completed. Successful records ${output.length}.`);
callback(null, { records: output });
};
but the newline is still lost. I've also tried JSON.stringify(record.data) + '\n' but then I get an Invalid output structure error.
Try decoding the record.data
add a new line
then encode it again as base64.
This is python but the idea is the same
for record in event['records']:
payload = base64.b64decode(record['data'])
# Do custom processing on the payload here
payload = payload + '\n'
output_record = {
'recordId': record['recordId'],
'result': 'Ok',
'data': base64.b64encode(json.dumps(payload))
}
output.append(output_record)
return {'records': output}
From the comment of #Matt Westlake:
For those looking for the Node.js answer, it's
const data =
JSON.parse(new Buffer.from(record.data,'base64').toString('utf8'));
and
new Buffer.from(JSON.stringify(data) + '\n').toString('base64')
The kinesis-firehose-cloudwatch-logs-processor blueprint lambda does this (with some additional handling for cloudwatch logs).
Here's the lambda code from the blueprint as of today:
/*
For processing data sent to Firehose by Cloudwatch Logs subscription filters.
Cloudwatch Logs sends to Firehose records that look like this:
{
"messageType": "DATA_MESSAGE",
"owner": "123456789012",
"logGroup": "log_group_name",
"logStream": "log_stream_name",
"subscriptionFilters": [
"subscription_filter_name"
],
"logEvents": [
{
"id": "01234567890123456789012345678901234567890123456789012345",
"timestamp": 1510109208016,
"message": "log message 1"
},
{
"id": "01234567890123456789012345678901234567890123456789012345",
"timestamp": 1510109208017,
"message": "log message 2"
}
...
]
}
The data is additionally compressed with GZIP.
The code below will:
1) Gunzip the data
2) Parse the json
3) Set the result to ProcessingFailed for any record whose messageType is not DATA_MESSAGE, thus redirecting them to the
processing error output. Such records do not contain any log events. You can modify the code to set the result to
Dropped instead to get rid of these records completely.
4) For records whose messageType is DATA_MESSAGE, extract the individual log events from the logEvents field, and pass
each one to the transformLogEvent method. You can modify the transformLogEvent method to perform custom
transformations on the log events.
5) Concatenate the result from (4) together and set the result as the data of the record returned to Firehose. Note that
this step will not add any delimiters. Delimiters should be appended by the logic within the transformLogEvent
method.
6) Any additional records which exceed 6MB will be re-ingested back into Firehose.
*/
const zlib = require('zlib');
const AWS = require('aws-sdk');
/**
* logEvent has this format:
*
* {
* "id": "01234567890123456789012345678901234567890123456789012345",
* "timestamp": 1510109208016,
* "message": "log message 1"
* }
*
* The default implementation below just extracts the message and appends a newline to it.
*
* The result must be returned in a Promise.
*/
function transformLogEvent(logEvent) {
return Promise.resolve(`${logEvent.message}\n`);
}
function putRecordsToFirehoseStream(streamName, records, client, resolve, reject, attemptsMade, maxAttempts) {
client.putRecordBatch({
DeliveryStreamName: streamName,
Records: records,
}, (err, data) => {
const codes = [];
let failed = [];
let errMsg = err;
if (err) {
failed = records;
} else {
for (let i = 0; i < data.RequestResponses.length; i++) {
const code = data.RequestResponses[i].ErrorCode;
if (code) {
codes.push(code);
failed.push(records[i]);
}
}
errMsg = `Individual error codes: ${codes}`;
}
if (failed.length > 0) {
if (attemptsMade + 1 < maxAttempts) {
console.log('Some records failed while calling PutRecordBatch, retrying. %s', errMsg);
putRecordsToFirehoseStream(streamName, failed, client, resolve, reject, attemptsMade + 1, maxAttempts);
} else {
reject(`Could not put records after ${maxAttempts} attempts. ${errMsg}`);
}
} else {
resolve('');
}
});
}
function putRecordsToKinesisStream(streamName, records, client, resolve, reject, attemptsMade, maxAttempts) {
client.putRecords({
StreamName: streamName,
Records: records,
}, (err, data) => {
const codes = [];
let failed = [];
let errMsg = err;
if (err) {
failed = records;
} else {
for (let i = 0; i < data.Records.length; i++) {
const code = data.Records[i].ErrorCode;
if (code) {
codes.push(code);
failed.push(records[i]);
}
}
errMsg = `Individual error codes: ${codes}`;
}
if (failed.length > 0) {
if (attemptsMade + 1 < maxAttempts) {
console.log('Some records failed while calling PutRecords, retrying. %s', errMsg);
putRecordsToKinesisStream(streamName, failed, client, resolve, reject, attemptsMade + 1, maxAttempts);
} else {
reject(`Could not put records after ${maxAttempts} attempts. ${errMsg}`);
}
} else {
resolve('');
}
});
}
function createReingestionRecord(isSas, originalRecord) {
if (isSas) {
return {
Data: new Buffer(originalRecord.data, 'base64'),
PartitionKey: originalRecord.kinesisRecordMetadata.partitionKey,
};
} else {
return {
Data: new Buffer(originalRecord.data, 'base64'),
};
}
}
function getReingestionRecord(isSas, reIngestionRecord) {
if (isSas) {
return {
Data: reIngestionRecord.Data,
PartitionKey: reIngestionRecord.PartitionKey,
};
} else {
return {
Data: reIngestionRecord.Data,
};
}
}
exports.handler = (event, context, callback) => {
Promise.all(event.records.map(r => {
const buffer = new Buffer(r.data, 'base64');
const decompressed = zlib.gunzipSync(buffer);
const data = JSON.parse(decompressed);
// CONTROL_MESSAGE are sent by CWL to check if the subscription is reachable.
// They do not contain actual data.
if (data.messageType === 'CONTROL_MESSAGE') {
return Promise.resolve({
recordId: r.recordId,
result: 'Dropped',
});
} else if (data.messageType === 'DATA_MESSAGE') {
const promises = data.logEvents.map(transformLogEvent);
return Promise.all(promises)
.then(transformed => {
const payload = transformed.reduce((a, v) => a + v, '');
const encoded = new Buffer(payload).toString('base64');
return {
recordId: r.recordId,
result: 'Ok',
data: encoded,
};
});
} else {
return Promise.resolve({
recordId: r.recordId,
result: 'ProcessingFailed',
});
}
})).then(recs => {
const isSas = Object.prototype.hasOwnProperty.call(event, 'sourceKinesisStreamArn');
const streamARN = isSas ? event.sourceKinesisStreamArn : event.deliveryStreamArn;
const region = streamARN.split(':')[3];
const streamName = streamARN.split('/')[1];
const result = { records: recs };
let recordsToReingest = [];
const putRecordBatches = [];
let totalRecordsToBeReingested = 0;
const inputDataByRecId = {};
event.records.forEach(r => inputDataByRecId[r.recordId] = createReingestionRecord(isSas, r));
let projectedSize = recs.filter(rec => rec.result === 'Ok')
.map(r => r.recordId.length + r.data.length)
.reduce((a, b) => a + b);
// 6000000 instead of 6291456 to leave ample headroom for the stuff we didn't account for
for (let idx = 0; idx < event.records.length && projectedSize > 6000000; idx++) {
const rec = result.records[idx];
if (rec.result === 'Ok') {
totalRecordsToBeReingested++;
recordsToReingest.push(getReingestionRecord(isSas, inputDataByRecId[rec.recordId]));
projectedSize -= rec.data.length;
delete rec.data;
result.records[idx].result = 'Dropped';
// split out the record batches into multiple groups, 500 records at max per group
if (recordsToReingest.length === 500) {
putRecordBatches.push(recordsToReingest);
recordsToReingest = [];
}
}
}
if (recordsToReingest.length > 0) {
// add the last batch
putRecordBatches.push(recordsToReingest);
}
if (putRecordBatches.length > 0) {
new Promise((resolve, reject) => {
let recordsReingestedSoFar = 0;
for (let idx = 0; idx < putRecordBatches.length; idx++) {
const recordBatch = putRecordBatches[idx];
if (isSas) {
const client = new AWS.Kinesis({ region: region });
putRecordsToKinesisStream(streamName, recordBatch, client, resolve, reject, 0, 20);
} else {
const client = new AWS.Firehose({ region: region });
putRecordsToFirehoseStream(streamName, recordBatch, client, resolve, reject, 0, 20);
}
recordsReingestedSoFar += recordBatch.length;
console.log('Reingested %s/%s records out of %s in to %s stream', recordsReingestedSoFar, totalRecordsToBeReingested, event.records.length, streamName);
}
}).then(
() => {
console.log('Reingested all %s records out of %s in to %s stream', totalRecordsToBeReingested, event.records.length, streamName);
callback(null, result);
},
failed => {
console.log('Failed to reingest records. %s', failed);
callback(failed, null);
});
} else {
console.log('No records needed to be reingested.');
callback(null, result);
}
}).catch(ex => {
console.log('Error: ', ex);
callback(ex, null);
});
};
Here is code that will solve the problem
__Author__ = "Soumil Nitin Shah"
import json
import boto3
import base64
class MyHasher(object):
def __init__(self, key):
self.key = key
def get(self):
keys = str(self.key).encode("UTF-8")
keys = base64.b64encode(keys)
keys = keys.decode("UTF-8")
return keys
def lambda_handler(event, context):
output = []
for record in event['records']:
payload = base64.b64decode(record['data'])
"""Get the payload from event bridge and just get data attr """""
serialize_payload = str(json.loads(payload)) + "\n"
hasherHelper = MyHasher(key=serialize_payload)
hash = hasherHelper.get()
output_record = {
'recordId': record['recordId'],
'result': 'Ok',
'data': hash
}
print("output_record", output_record)
output.append(output_record)
return {'records': output}

Timeout while launching EC2 instances from AWS Lambda

I have been trying to launch a new EC2 instance as well as add a piece of string data to my SQS through lambda in response to an object upload event in my s3 bucket.
I have been able to successfully to update my SQS but has been unable to initialise the new EC2 instance. Despite setting the time allocation to the lambda function to the maximum time of 5mins and increasing memory allocation, an operation timeout error continuously surface.
My code is as below. Can anyone point out what are the potential causes for such an error? While I have stuck my whole piece of code here for reference, the area concerning the launch is towards the end of the code.
Thank you so much!
console.log('Loading function');
var fs = require('fs');
var async = require('async');
var aws = require('aws-sdk');
var s3 = new aws.S3({ apiVersion: '2006-03-01' });
var sqs = new aws.SQS({apiVersion: '2012-11-05'});
var ecs = new aws.ECS({apiVersion: '2014-11-13'});
var ec2 = new aws.EC2({apiVersion: '2015-10-01'});
// Check if the given key suffix matches a suffix in the whitelist. Return true if it matches, false otherwise.
exports.checkS3SuffixWhitelist = function(key, whitelist) {
if(!whitelist){ return true; }
if(typeof whitelist == 'string'){ return key.match(whitelist + '$') }
if(Object.prototype.toString.call(whitelist) === '[object Array]') {
for(var i = 0; i < whitelist.length; i++) {
if(key.match(whitelist[i] + '$')) { return true; }
}
return false;
}
console.log(
'Unsupported whitelist type (' + Object.prototype.toString.call(whitelist) +
') for: ' + JSON.stringify(whitelist)
);
return false;
};
exports.handler = function(event, context) {
//console.log('Received event:', JSON.stringify(event, null, 2));
console.log('Received event:');
//Read in the configuration file
var config = JSON.parse(fs.readFileSync('config.json', 'utf8'));
if(!config.hasOwnProperty('s3_key_suffix_whitelist')) {
config.s3_key_suffix_whitelist = false;
}
console.log('Config: ' + JSON.stringify(config));
var name = event.Records[0].s3.object.key;
if(!exports.checkS3SuffixWhitelist(name, config.s3_key_suffix_whitelist)) {
context.fail('Suffix for key: ' + name + ' is not in the whitelist')
}
// Get the object from the event and show its key
var bucket = event.Records[0].s3.bucket.name;
var key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
var params = {
Bucket: bucket,
Key: key
};
s3.getObject(params, function(err, data) {
if (err) {
console.log(err);
var message = "Error getting object " + key + " from bucket " + bucket +
". Make sure they exist and your bucket is in the same region as this function.";
console.log(message);
context.fail(message);
} else {
console.log('CONTENT TYPE:', key);
context.succeed(key);
}
});
//Sending the image key as a message to SQS and starting a new instance on EC2
async.waterfall([
function(next){
var params = {
MessageBody: JSON.stringify(event),
QueueUrl: config.queue
};
console.log("IN QUEUE FUNCTION");
sqs.sendMessage(params, function (err, data) {
if (err) { console.warn('Error while sending message: ' + err); }
else { console.info('Message sent, ID: ' + data.MessageId); }
next(err);
});
},
function (next) {
console.log("INITIALIZING ECS");
var params = {
ImageId: 'ami-e559b485',
MinCount: 1,
MaxCount: 1,
DryRun: true,
InstanceType: 't2.micro',
KeyName: 'malpem2102',
SubnetId: 'subnet-e8607e8d'
}
ec2.runInstances(params, function(err,data){
if(err){
console.log(err, err.stack);
context.fail('Error', "Error getting file: " + err);
return;
} else{
var instanceId = data.Instances[0].InstanceId;
console.log("Created instance ", instanceId);
context.suceed("Created instance");
}
});
}
], function(err){
if (err) {
context.fail('An error has occurred: ' + err);
}
else {
context.succeed('Successfully processed Amazon S3 URL.');
}
}
);
};