I've read a lot of similar questions around adding newline characters to firehose, but they're all around adding the newline character to the source. The problem is that I don't have access to the source, and a third party is piping data to our Kinesis instance and I cannot add the \n to the source.
I've tried doing a Firehose data transformation using the following code:
'use strict';
console.log('Loading function');
exports.handler = (event, context, callback) => {
/* Process the list of records and transform them */
const output = [];
event.records.forEach((record) => {
const results = {
/* This transformation is the "identity" transformation, the data is left intact */
recordId: record.recordId,
result: record.data.event_type === 'alert' ? 'Dropped' : 'Ok',
data: record.data + '\n'
};
output.push(results);
});
console.log(`Processing completed. Successful records ${output.length}.`);
callback(null, { records: output });
};
but the newline is still lost. I've also tried JSON.stringify(record.data) + '\n' but then I get an Invalid output structure error.
Try decoding the record.data
add a new line
then encode it again as base64.
This is python but the idea is the same
for record in event['records']:
payload = base64.b64decode(record['data'])
# Do custom processing on the payload here
payload = payload + '\n'
output_record = {
'recordId': record['recordId'],
'result': 'Ok',
'data': base64.b64encode(json.dumps(payload))
}
output.append(output_record)
return {'records': output}
From the comment of #Matt Westlake:
For those looking for the Node.js answer, it's
const data =
JSON.parse(new Buffer.from(record.data,'base64').toString('utf8'));
and
new Buffer.from(JSON.stringify(data) + '\n').toString('base64')
The kinesis-firehose-cloudwatch-logs-processor blueprint lambda does this (with some additional handling for cloudwatch logs).
Here's the lambda code from the blueprint as of today:
/*
For processing data sent to Firehose by Cloudwatch Logs subscription filters.
Cloudwatch Logs sends to Firehose records that look like this:
{
"messageType": "DATA_MESSAGE",
"owner": "123456789012",
"logGroup": "log_group_name",
"logStream": "log_stream_name",
"subscriptionFilters": [
"subscription_filter_name"
],
"logEvents": [
{
"id": "01234567890123456789012345678901234567890123456789012345",
"timestamp": 1510109208016,
"message": "log message 1"
},
{
"id": "01234567890123456789012345678901234567890123456789012345",
"timestamp": 1510109208017,
"message": "log message 2"
}
...
]
}
The data is additionally compressed with GZIP.
The code below will:
1) Gunzip the data
2) Parse the json
3) Set the result to ProcessingFailed for any record whose messageType is not DATA_MESSAGE, thus redirecting them to the
processing error output. Such records do not contain any log events. You can modify the code to set the result to
Dropped instead to get rid of these records completely.
4) For records whose messageType is DATA_MESSAGE, extract the individual log events from the logEvents field, and pass
each one to the transformLogEvent method. You can modify the transformLogEvent method to perform custom
transformations on the log events.
5) Concatenate the result from (4) together and set the result as the data of the record returned to Firehose. Note that
this step will not add any delimiters. Delimiters should be appended by the logic within the transformLogEvent
method.
6) Any additional records which exceed 6MB will be re-ingested back into Firehose.
*/
const zlib = require('zlib');
const AWS = require('aws-sdk');
/**
* logEvent has this format:
*
* {
* "id": "01234567890123456789012345678901234567890123456789012345",
* "timestamp": 1510109208016,
* "message": "log message 1"
* }
*
* The default implementation below just extracts the message and appends a newline to it.
*
* The result must be returned in a Promise.
*/
function transformLogEvent(logEvent) {
return Promise.resolve(`${logEvent.message}\n`);
}
function putRecordsToFirehoseStream(streamName, records, client, resolve, reject, attemptsMade, maxAttempts) {
client.putRecordBatch({
DeliveryStreamName: streamName,
Records: records,
}, (err, data) => {
const codes = [];
let failed = [];
let errMsg = err;
if (err) {
failed = records;
} else {
for (let i = 0; i < data.RequestResponses.length; i++) {
const code = data.RequestResponses[i].ErrorCode;
if (code) {
codes.push(code);
failed.push(records[i]);
}
}
errMsg = `Individual error codes: ${codes}`;
}
if (failed.length > 0) {
if (attemptsMade + 1 < maxAttempts) {
console.log('Some records failed while calling PutRecordBatch, retrying. %s', errMsg);
putRecordsToFirehoseStream(streamName, failed, client, resolve, reject, attemptsMade + 1, maxAttempts);
} else {
reject(`Could not put records after ${maxAttempts} attempts. ${errMsg}`);
}
} else {
resolve('');
}
});
}
function putRecordsToKinesisStream(streamName, records, client, resolve, reject, attemptsMade, maxAttempts) {
client.putRecords({
StreamName: streamName,
Records: records,
}, (err, data) => {
const codes = [];
let failed = [];
let errMsg = err;
if (err) {
failed = records;
} else {
for (let i = 0; i < data.Records.length; i++) {
const code = data.Records[i].ErrorCode;
if (code) {
codes.push(code);
failed.push(records[i]);
}
}
errMsg = `Individual error codes: ${codes}`;
}
if (failed.length > 0) {
if (attemptsMade + 1 < maxAttempts) {
console.log('Some records failed while calling PutRecords, retrying. %s', errMsg);
putRecordsToKinesisStream(streamName, failed, client, resolve, reject, attemptsMade + 1, maxAttempts);
} else {
reject(`Could not put records after ${maxAttempts} attempts. ${errMsg}`);
}
} else {
resolve('');
}
});
}
function createReingestionRecord(isSas, originalRecord) {
if (isSas) {
return {
Data: new Buffer(originalRecord.data, 'base64'),
PartitionKey: originalRecord.kinesisRecordMetadata.partitionKey,
};
} else {
return {
Data: new Buffer(originalRecord.data, 'base64'),
};
}
}
function getReingestionRecord(isSas, reIngestionRecord) {
if (isSas) {
return {
Data: reIngestionRecord.Data,
PartitionKey: reIngestionRecord.PartitionKey,
};
} else {
return {
Data: reIngestionRecord.Data,
};
}
}
exports.handler = (event, context, callback) => {
Promise.all(event.records.map(r => {
const buffer = new Buffer(r.data, 'base64');
const decompressed = zlib.gunzipSync(buffer);
const data = JSON.parse(decompressed);
// CONTROL_MESSAGE are sent by CWL to check if the subscription is reachable.
// They do not contain actual data.
if (data.messageType === 'CONTROL_MESSAGE') {
return Promise.resolve({
recordId: r.recordId,
result: 'Dropped',
});
} else if (data.messageType === 'DATA_MESSAGE') {
const promises = data.logEvents.map(transformLogEvent);
return Promise.all(promises)
.then(transformed => {
const payload = transformed.reduce((a, v) => a + v, '');
const encoded = new Buffer(payload).toString('base64');
return {
recordId: r.recordId,
result: 'Ok',
data: encoded,
};
});
} else {
return Promise.resolve({
recordId: r.recordId,
result: 'ProcessingFailed',
});
}
})).then(recs => {
const isSas = Object.prototype.hasOwnProperty.call(event, 'sourceKinesisStreamArn');
const streamARN = isSas ? event.sourceKinesisStreamArn : event.deliveryStreamArn;
const region = streamARN.split(':')[3];
const streamName = streamARN.split('/')[1];
const result = { records: recs };
let recordsToReingest = [];
const putRecordBatches = [];
let totalRecordsToBeReingested = 0;
const inputDataByRecId = {};
event.records.forEach(r => inputDataByRecId[r.recordId] = createReingestionRecord(isSas, r));
let projectedSize = recs.filter(rec => rec.result === 'Ok')
.map(r => r.recordId.length + r.data.length)
.reduce((a, b) => a + b);
// 6000000 instead of 6291456 to leave ample headroom for the stuff we didn't account for
for (let idx = 0; idx < event.records.length && projectedSize > 6000000; idx++) {
const rec = result.records[idx];
if (rec.result === 'Ok') {
totalRecordsToBeReingested++;
recordsToReingest.push(getReingestionRecord(isSas, inputDataByRecId[rec.recordId]));
projectedSize -= rec.data.length;
delete rec.data;
result.records[idx].result = 'Dropped';
// split out the record batches into multiple groups, 500 records at max per group
if (recordsToReingest.length === 500) {
putRecordBatches.push(recordsToReingest);
recordsToReingest = [];
}
}
}
if (recordsToReingest.length > 0) {
// add the last batch
putRecordBatches.push(recordsToReingest);
}
if (putRecordBatches.length > 0) {
new Promise((resolve, reject) => {
let recordsReingestedSoFar = 0;
for (let idx = 0; idx < putRecordBatches.length; idx++) {
const recordBatch = putRecordBatches[idx];
if (isSas) {
const client = new AWS.Kinesis({ region: region });
putRecordsToKinesisStream(streamName, recordBatch, client, resolve, reject, 0, 20);
} else {
const client = new AWS.Firehose({ region: region });
putRecordsToFirehoseStream(streamName, recordBatch, client, resolve, reject, 0, 20);
}
recordsReingestedSoFar += recordBatch.length;
console.log('Reingested %s/%s records out of %s in to %s stream', recordsReingestedSoFar, totalRecordsToBeReingested, event.records.length, streamName);
}
}).then(
() => {
console.log('Reingested all %s records out of %s in to %s stream', totalRecordsToBeReingested, event.records.length, streamName);
callback(null, result);
},
failed => {
console.log('Failed to reingest records. %s', failed);
callback(failed, null);
});
} else {
console.log('No records needed to be reingested.');
callback(null, result);
}
}).catch(ex => {
console.log('Error: ', ex);
callback(ex, null);
});
};
Here is code that will solve the problem
__Author__ = "Soumil Nitin Shah"
import json
import boto3
import base64
class MyHasher(object):
def __init__(self, key):
self.key = key
def get(self):
keys = str(self.key).encode("UTF-8")
keys = base64.b64encode(keys)
keys = keys.decode("UTF-8")
return keys
def lambda_handler(event, context):
output = []
for record in event['records']:
payload = base64.b64decode(record['data'])
"""Get the payload from event bridge and just get data attr """""
serialize_payload = str(json.loads(payload)) + "\n"
hasherHelper = MyHasher(key=serialize_payload)
hash = hasherHelper.get()
output_record = {
'recordId': record['recordId'],
'result': 'Ok',
'data': hash
}
print("output_record", output_record)
output.append(output_record)
return {'records': output}
Related
I want to get Logs from a subscription filter and then put the logs in a s3 bucket and sent them to ES.
Similar like in the diagram here:
https://aws.amazon.com/solutions/implementations/centralized-logging/
When I am using this function:
/*
For processing data sent to Firehose by Cloudwatch Logs subscription filters.
Cloudwatch Logs sends to Firehose records that look like this:
{
"messageType": "DATA_MESSAGE",
"owner": "123456789012",
"logGroup": "log_group_name",
"logStream": "log_stream_name",
"subscriptionFilters": [
"subscription_filter_name"
],
"logEvents": [
{
"id": "01234567890123456789012345678901234567890123456789012345",
"timestamp": 1510109208016,
"message": "log message 1"
},
{
"id": "01234567890123456789012345678901234567890123456789012345",
"timestamp": 1510109208017,
"message": "log message 2"
}
...
]
}
The data is additionally compressed with GZIP.
The code below will:
1) Gunzip the data
2) Parse the json
3) Set the result to ProcessingFailed for any record whose messageType is not DATA_MESSAGE, thus redirecting them to the
processing error output. Such records do not contain any log events. You can modify the code to set the result to
Dropped instead to get rid of these records completely.
4) For records whose messageType is DATA_MESSAGE, extract the individual log events from the logEvents field, and pass
each one to the transformLogEvent method. You can modify the transformLogEvent method to perform custom
transformations on the log events.
5) Concatenate the result from (4) together and set the result as the data of the record returned to Firehose. Note that
this step will not add any delimiters. Delimiters should be appended by the logic within the transformLogEvent
method.
6) Any additional records which exceed 6MB will be re-ingested back into Firehose.
*/
const zlib = require('zlib');
const AWS = require('aws-sdk');
/**
* logEvent has this format:
*
* {
* "id": "01234567890123456789012345678901234567890123456789012345",
* "timestamp": 1510109208016,
* "message": "log message 1"
* }
*
* The default implementation below just extracts the message and appends a newline to it.
*
* The result must be returned in a Promise.
*/
function transformLogEvent(logEvent: any) {
return Promise.resolve(`${logEvent.message}\n`);
}
function putRecordsToFirehoseStream(streamName: any, records: any, client: any, resolve: any, reject: any, attemptsMade: any, maxAttempts: any) {
client.putRecordBatch({
DeliveryStreamName: streamName,
Records: records,
}, (err: any, data: any) => {
const codes = [];
let failed = [];
let errMsg = err;
if (err) {
failed = records;
} else {
for (let i = 0; i < data.RequestResponses.length; i++) {
const code = data.RequestResponses[i].ErrorCode;
if (code) {
codes.push(code);
failed.push(records[i]);
}
}
errMsg = `Individual error codes: ${codes}`;
}
if (failed.length > 0) {
if (attemptsMade + 1 < maxAttempts) {
console.log('Some records failed while calling PutRecordBatch, retrying. %s', errMsg);
putRecordsToFirehoseStream(streamName, failed, client, resolve, reject, attemptsMade + 1, maxAttempts);
} else {
reject(`Could not put records after ${maxAttempts} attempts. ${errMsg}`);
}
} else {
resolve('');
}
});
}
function putRecordsToKinesisStream(streamName: any, records: any, client: any, resolve: any, reject: any, attemptsMade: any, maxAttempts: any) {
client.putRecords({
StreamName: streamName,
Records: records,
}, (err: any, data: any) => {
const codes = [];
let failed = [];
let errMsg = err;
if (err) {
failed = records;
} else {
for (let i = 0; i < data.Records.length; i++) {
const code = data.Records[i].ErrorCode;
if (code) {
codes.push(code);
failed.push(records[i]);
}
}
errMsg = `Individual error codes: ${codes}`;
}
if (failed.length > 0) {
if (attemptsMade + 1 < maxAttempts) {
console.log('Some records failed while calling PutRecords, retrying. %s', errMsg);
putRecordsToKinesisStream(streamName, failed, client, resolve, reject, attemptsMade + 1, maxAttempts);
} else {
reject(`Could not put records after ${maxAttempts} attempts. ${errMsg}`);
}
} else {
resolve('');
}
});
}
function createReingestionRecord(isSas: any, originalRecord: any) {
if (isSas) {
return {
Data: Buffer.from(originalRecord.data, 'base64'),
PartitionKey: originalRecord.kinesisRecordMetadata.partitionKey,
};
} else {
return {
Data: Buffer.from(originalRecord.data, 'base64'),
};
}
}
function getReingestionRecord(isSas: any, reIngestionRecord: any) {
if (isSas) {
return {
Data: reIngestionRecord.Data,
PartitionKey: reIngestionRecord.PartitionKey,
};
} else {
return {
Data: reIngestionRecord.Data,
};
}
}
exports.handler = (event: any, context: any, callback: any) => {
Promise.all(event.records.map(function (r: any) {
const buffer = Buffer.from(r.data, 'base64');
let decompressed;
try {
decompressed = zlib.unzipSync(buffer);
} catch (e) {
return Promise.resolve({
recordId: r.recordId,
result: 'ProcessingFailed',
});
}
const data = JSON.parse(decompressed);
// CONTROL_MESSAGE are sent by CWL to check if the subscription is reachable.
// They do not contain actual data.
if (data.messageType === 'CONTROL_MESSAGE') {
return Promise.resolve({
recordId: r.recordId,
result: 'Dropped',
});
} else if (data.messageType === 'DATA_MESSAGE') {
const promises = data.logEvents.map(transformLogEvent);
return Promise.all(promises)
.then(transformed => {
const payload: any = transformed.reduce(function (a: any, v: any) {
return a + v;
});
const encoded = Buffer.from(payload).toString();
return {
recordId: r.recordId,
result: 'Ok',
data: encoded,
};
});
} else {
return Promise.resolve({
recordId: r.recordId,
result: 'ProcessingFailed',
});
}
})).then(recs => {
const isSas = Object.prototype.hasOwnProperty.call(event, 'sourceKinesisStreamArn');
const streamARN = isSas ? event.sourceKinesisStreamArn : event.deliveryStreamArn;
const region = streamARN.split(':')[3];
const streamName = streamARN.split('/')[1];
const result: any = { records: recs };
let recordsToReingest = [];
const putRecordBatches: any = [];
let totalRecordsToBeReingested = 0;
const inputDataByRecId: any = {};
event.records.forEach(function (r: any) { inputDataByRecId[r.recordId] = createReingestionRecord(isSas, r) });
let projectedSize = recs.filter(function (rec: any) { return rec.result === 'Ok' })
.map(function (r: any) { return r.recordId.length + r.data.length })
.reduce((a, b) => a + b, 0);
// 6000000 instead of 6291456 to leave ample headroom for the stuff we didn't account for
for (let idx = 0; idx < event.records.length && projectedSize > 6000000; idx++) {
const rec: any = result.records[idx];
if (rec.result === 'Ok') {
totalRecordsToBeReingested++;
recordsToReingest.push(getReingestionRecord(isSas, inputDataByRecId[rec.recordId]));
projectedSize -= rec.data.length;
delete rec.data;
result.records[idx].result = 'Dropped';
// split out the record batches into multiple groups, 500 records at max per group
if (recordsToReingest.length === 500) {
putRecordBatches.push(recordsToReingest);
recordsToReingest = [];
}
}
}
if (recordsToReingest.length > 0) {
// add the last batch
putRecordBatches.push(recordsToReingest);
}
if (putRecordBatches.length > 0) {
new Promise((resolve, reject) => {
let recordsReingestedSoFar = 0;
for (let idx = 0; idx < putRecordBatches.length; idx++) {
const recordBatch = putRecordBatches[idx];
if (isSas) {
const client = new AWS.Kinesis({ region: region });
putRecordsToKinesisStream(streamName, recordBatch, client, resolve, reject, 0, 20);
} else {
const client = new AWS.Firehose({ region: region });
putRecordsToFirehoseStream(streamName, recordBatch, client, resolve, reject, 0, 20);
}
recordsReingestedSoFar += recordBatch.length;
console.log('Reingested %s/%s records out of %s in to %s stream', recordsReingestedSoFar, totalRecordsToBeReingested, event.records.length, streamName);
}}).then(
() => {
console.log('Reingested all %s records out of %s in to %s stream', totalRecordsToBeReingested, event.records.length, streamName);
callback(null, result);
},
failed => {
console.log('Failed to reingest records. %s', failed);
callback(failed, null);
});
} else {
console.log('No records needed to be reingested.');
callback(null, result);
}
}).catch(ex => {
console.log('Error: ', ex);
callback(ex, null);
});
};
But I am getting a Lambda.FunctionError:
Check your function and make sure the output is in required format. In addition to that, make sure the processed records contain valid result status of Dropped, Ok, or ProcessingFailed
Does anybody know, which function is suitable, to receive logs from the Cloudwatch subscription filter, sending them to S3 and ES?
My code for the FirehoseDeliveryStream looks like:
const firehoseDeliveryStream = new CfnDeliveryStream(this, "FirehoseDeliveryStream", {
deliveryStreamType: "DirectPut",
elasticsearchDestinationConfiguration: {
domainArn: elasticsearchDomain.domainArn,
roleArn: firehoseDeliveryRole.roleArn,
indexName: "test",
s3Configuration: {
bucketArn: this.logsBucket.bucketArn,
roleArn: firehoseDeliveryRole.roleArn,
cloudWatchLoggingOptions: {
enabled: true,
logGroupName: firehoseloggroup.logGroupName,
logStreamName: logstream.logStreamName
},
},
s3BackupMode: "AllDocuments",
cloudWatchLoggingOptions: {
enabled: true,
logGroupName: firehoseloggroup.logGroupName,
logStreamName: logstream.logStreamName
},
processingConfiguration: {
enabled: true,
processors: [{
type: "Lambda",
parameters: [{
parameterName: "LambdaArn",
parameterValue: handler.functionArn,
}],
}],
},
},
});
I have a CloudWatch log-group-1, kinesis firehose, lambda, S3.
log-group-1 sends logs to kinesis firehose (using subscription filter). Kinesis firehose triggers lambda to process the logs. Lambda returns the logs back to kinesis firehose and kinesis firehose saves transformed logs to S3.
Lambda gets the following input:
{
"invocationId": "000ac99...",
"deliveryStreamArn": "arn:aws:firehose:eu-central-1:123456789123:deliverystream/delivery-09",
"region": "eu-central-1",
"records": [
{
"recordId": "496199814216613477...",
"approximateArrivalTimestamp": 1625854080200,
"data": "H4sIAAAAAAAAADWOwQrCM......"
},
{
"recordId": "4961998142166134...",
"approximateArrivalTimestamp": 1625854100311,
"data": "H4sIAAAAAAAAADVPy07DMB......"
}
]
}
To return the transformed message you must change the records list. See example:
"records": [
{
"recordId": "you better take it from the input",
"result": "can be Ok, Dropped, ProcessingFailed",
"data": "must be an encoded base-64 string"
}
]
I attached a code written in Javascipt. It is enough just to copy-paste it to lambda.
const node_gzip_1 = require("node-gzip");
async function handler(event) {
console.log('event: ' + JSON.stringify(event, undefined, 3));
let result = [];
// Iterate through records list
const records = event.records;
for (let ii = 0; ii < records.length; ii++) {
const record = records[ii];
const recordId = record.recordId;
// Transform record data to a human readable string
const data = record.data;
const decodedData = Buffer.from(data, 'base64');
const ungziped = await node_gzip_1.ungzip(decodedData);
console.log('ungziped: ' + ungziped);
// Parse record data to JSON
const dataJson = JSON.parse(ungziped.toString());
// Get a list of log events and iterate through each element
const logEventsList = dataJson.logEvents;
logEventsList.forEach((logEventValue) => {
// Get the message which was saved in CloudWatch
const messageString = logEventValue.message;
// Create the transformed result
const transformedResultJson = {
someRandomNumber: Math.random(), // Some random variable I decided to put in the result
message: messageString + '-my-custom-change' // Edit the message
};
// Final data must be encoded to base 64
const messageBase64 = Buffer.from(JSON.stringify(transformedResultJson) + '\n').toString('base64'); // Adding a new line to transformed result is optional. It just make reading the S3 easier
console.log('messageBase64: ' + messageBase64);
// Save transformed result
result.push({
recordId: recordId,
result: 'Ok',
data: messageBase64
});
});
}
// Replace initial records list with the transformed list
event.records = result;
console.log('new event: ' + JSON.stringify(event, undefined, 2));
// Returned value will go back to kinesis firehose, then S3
return event;
}
exports.handler = handler;
Lambda return value is:
{
"invocationId": "000ac99...",
"deliveryStreamArn": "arn:aws:firehose:eu-central-1:123456789123:deliverystream/delivery-09",
"region": "eu-central-1",
"records": [
{
"recordId": "496199814216613477...",
"result": "Ok",
"data": "eyJzb21lUmF..."
},
{
"recordId": "4961998142166134...",
"result": "Ok",
"data": "eyJzb21lUmFuZG9..."
}
]
}
You can also use a lambda blueprint kinesis-firehose-syslog-to-json.
Also see:
https://docs.amazonaws.cn/en_us/firehose/latest/dev/data-transformation.html
Kinesis Firehose putting JSON objects in S3 without seperator comma
I am using SES service of AWS for send email.
I use Python and boto3.
(https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ses.html#SES.Client.send_email)
I know how to check the overall statistics.
(https://aws.amazon.com/ko/premiumsupport/knowledge-center/ses-email-opens-clicks/)
But I don't know how to do individual inquiry.
After sending an email, message_id and request_id are returned.
I think we're going to use message_id, request_id to search.
I don't know what to do.
Configure SES to send notifications (send, deliver, bounce, open) to SNS topics.
Write a Lambda function to listen to the SNS topics. When a notification is received, write the event to your table (I use DynamoDB)
Now you have all your email events in a table
Here is the Lambda function I use for writing SES events to DynamoDB
const tableName = "DEV_EmailAuditLogs";
const AWS = require("aws-sdk");
AWS.config.update({ region: "us-east-1" });
const dynamodb = new AWS.DynamoDB({ apiVersion: "2012-10-08" });
exports.handler = (event, context, callback) => {
console.log(JSON.stringify(event, null, 2));
console.log("From SNS:", event.Records[0].Sns.Message);
console.log("Arn Topic:", event.Records[0].Sns.TopicArn);
const json = event.Records[0].Sns.Message;
const parsed = JSON.parse(json);
const type = parsed.eventType;
const sent = parsed.mail.timestamp.slice(0, -1);
let to, subject, from;
const headers = parsed.mail.headers;
for (const h of headers) {
switch (h.name) {
case "To":
to = h.value;
break;
case "From":
from = h.value;
break;
case "Subject":
subject = h.value;
break;
}
}
const eventData = parsed[type.toLowerCase()];
let created;
if (eventData && eventData.timestamp) {
created = eventData.timestamp.slice(0, -1);
} else {
created = sent;
}
var params = {
TableName: tableName,
Item: {
to: { S: to },
from: { S: from },
created: { S: created },
sent: { S: sent },
type: { S: type },
version: { N: "1" }
}
};
if (subject){
params.Item.subject = { S: subject };
}
dynamodb.putItem(params, function(err, data) {
if (err) {
console.log(err);
} else {
console.log(data);
}
});
};
I am trying to write a script that will loop thru an array of items for a DynamoDB table and run a batch write command. My functionality is good, but I am having trouble with DynamoDB. Would be great if I could point my AWS.DynamoDB.DocumentClient() to my localhost running DynamoDB. Any tips?
Would also consider a way to just run the commands via the aws cli but I am not sure how to do that. I am running Node.js so it maybe possible?
Here is my code:
var AWS = require('aws-sdk');
AWS.config.update({ region: 'eu-central-1' });
var DynamoDB = new AWS.DynamoDB.DocumentClient()
DynamoDB.endpoint = 'http://localhost:8000';
const allItems = require('./resource.json');
const tableName = 'some-table-name';
console.log({ tableName, allItems });
var batches = [];
var currentBatch = [];
var count = 0;
for (let i = 0; i < allItems.length; i++) {
//push item to the current batch
count++;
currentBatch.push(allItems[i]);
if (count % 25 === 0) {
batches.push(currentBatch);
currentBatch = [];
}
}
//if there are still items left in the curr batch, add to the collection of batches
if (currentBatch.length > 0 && currentBatch.length !== 25) {
batches.push(currentBatch);
}
var completedRequests = 0;
var errors = false;
//request handler for DynamoDB
function requestHandler(err, data) {
console.log('In the request handler...');
return function (err, data) {
completedRequests++;
errors = errors ? true : err;
//log error
if (errors) {
console.error(JSON.stringify(err, null, 2));
console.error('Request caused a DB error.');
console.error('ERROR: ' + err);
console.error(JSON.stringify(err, null, 2));
} else {
var res = {
statusCode: 200,
headers: {
'Content-Type': 'application/json',
'Access-Control-Allow-Methods': 'GET,POST,OPTIONS',
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Credentials': true,
},
body: JSON.stringify(data),
isBase64Encoded: false,
};
console.log(`Success: returned ${data}`);
return res;
}
if (completedRequests == batches.length) {
return errors;
}
};
}
//Make request
var params;
for (let j = 0; j < batches.length; j++) {
//items go in params.RequestedItems.id array
//format for the items is {PutRequest : {Item: ITEM_OBJECT}}
params = '{"RequestItems": {"' + tableName + '": []}}';
params = JSON.parse(params);
params.RequestItems[tableName] = batches[j];
console.log('before db.batchWriteItem: ', params);
//send to db
DynamoDB.batchWrite(params, requestHandler(params));
}
I figured it out and will leave this here for anyone that may need it.
var { DynamoDB } = require('aws-sdk');
var db = new DynamoDB.DocumentClient({
region: 'localhost',
endpoint: 'http://localhost:8000',
});
Following this Streaming CloudWatch Logs Data to Amazon Elasticsearch Service, it's working fine to stream cloud watch log to ELK having one log group and one Lambda function.
But now I want to change target lambda function for my other logs group, but I am not able to do that as there is no option in AWS console.
Any Help will be appreciated.
Thanks
I was streaming to ELK using the AWS console option which is Start Streaming to Amazon Elasticsearch Service, But I failed to change or choose different lambda function as there is only lambda function can be selected for any log group using this option.
So, I create new lambda function and set stream target to AWS lambda function,
Here is the code that all you need, Node version for lambda function is 4.* as it was some issue with the new version but the pulse point is it does not require any extra NPM packages.
// v1.1.2
var https = require('https');
var zlib = require('zlib');
var crypto = require('crypto');
var endpoint = 'search-my-test.us-west-2.es.amazonaws.com';
exports.handler = function(input, context) {
// decode input from base64
var zippedInput = new Buffer(input.awslogs.data, 'base64');
// decompress the input
zlib.gunzip(zippedInput, function(error, buffer) {
if (error) { context.fail(error); return; }
// parse the input from JSON
var awslogsData = JSON.parse(buffer.toString('utf8'));
// transform the input to Elasticsearch documents
var elasticsearchBulkData = transform(awslogsData);
// skip control messages
if (!elasticsearchBulkData) {
console.log('Received a control message');
context.succeed('Control message handled successfully');
return;
}
// post documents to the Amazon Elasticsearch Service
post(elasticsearchBulkData, function(error, success, statusCode, failedItems) {
console.log('Response: ' + JSON.stringify({
"statusCode": statusCode
}));
if (error) {
console.log('Error: ' + JSON.stringify(error, null, 2));
if (failedItems && failedItems.length > 0) {
console.log("Failed Items: " +
JSON.stringify(failedItems, null, 2));
}
context.fail(JSON.stringify(error));
} else {
console.log('Success: ' + JSON.stringify(success));
context.succeed('Success');
}
});
});
};
function transform(payload) {
if (payload.messageType === 'CONTROL_MESSAGE') {
return null;
}
var bulkRequestBody = '';
payload.logEvents.forEach(function(logEvent) {
var timestamp = new Date(1 * logEvent.timestamp);
// index name format: cwl-YYYY.MM.DD
var indexName = [
'prod-background-wo-' + timestamp.getUTCFullYear(), // year
('0' + (timestamp.getUTCMonth() + 1)).slice(-2), // month
('0' + timestamp.getUTCDate()).slice(-2) // day
].join('.');
var source = buildSource(logEvent.message, logEvent.extractedFields);
source['response_time'] = source["end"] - source["start"];
source['#id'] = logEvent.id;
source['#timestamp'] = new Date(1 * logEvent.timestamp).toISOString();
source['#message'] = logEvent.message;
source['#owner'] = payload.owner;
source['#log_group'] = payload.logGroup;
source['#log_stream'] = payload.logStream;
var action = { "index": {} };
action.index._index = indexName;
action.index._type = payload.logGroup;
action.index._id = logEvent.id;
bulkRequestBody += [
JSON.stringify(action),
JSON.stringify(source),
].join('\n') + '\n';
});
return bulkRequestBody;
}
function buildSource(message, extractedFields) {
if (extractedFields) {
var source = {};
for (var key in extractedFields) {
if (extractedFields.hasOwnProperty(key) && extractedFields[key]) {
var value = extractedFields[key];
if (isNumeric(value)) {
source[key] = 1 * value;
continue;
}
jsonSubString = extractJson(value);
if (jsonSubString !== null) {
source['$' + key] = JSON.parse(jsonSubString);
}
source[key] = value;
}
}
return source;
}
jsonSubString = extractJson(message);
if (jsonSubString !== null) {
return JSON.parse(jsonSubString);
}
return {};
}
function extractJson(message) {
var jsonStart = message.indexOf('{');
if (jsonStart < 0) return null;
var jsonSubString = message.substring(jsonStart);
return isValidJson(jsonSubString) ? jsonSubString : null;
}
function isValidJson(message) {
try {
JSON.parse(message);
} catch (e) { return false; }
return true;
}
function isNumeric(n) {
return !isNaN(parseFloat(n)) && isFinite(n);
}
function post(body, callback) {
var requestParams = buildRequest(endpoint, body);
var request = https.request(requestParams, function(response) {
var responseBody = '';
response.on('data', function(chunk) {
responseBody += chunk;
});
response.on('end', function() {
var info = JSON.parse(responseBody);
var failedItems;
var success;
if (response.statusCode >= 200 && response.statusCode < 299) {
failedItems = info.items.filter(function(x) {
return x.index.status >= 300;
});
success = {
"attemptedItems": info.items.length,
"successfulItems": info.items.length - failedItems.length,
"failedItems": failedItems.length
};
}
var error = response.statusCode !== 200 || info.errors === true ? {
"statusCode": response.statusCode,
"responseBody": responseBody
} : null;
callback(error, success, response.statusCode, failedItems);
});
}).on('error', function(e) {
callback(e);
});
request.end(requestParams.body);
}
function buildRequest(endpoint, body) {
var endpointParts = endpoint.match(/^([^\.]+)\.?([^\.]*)\.?([^\.]*)\.amazonaws\.com$/);
var region = endpointParts[2];
var service = endpointParts[3];
var datetime = (new Date()).toISOString().replace(/[:\-]|\.\d{3}/g, '');
var date = datetime.substr(0, 8);
var kDate = hmac('AWS4' + process.env.AWS_SECRET_ACCESS_KEY, date);
var kRegion = hmac(kDate, region);
var kService = hmac(kRegion, service);
var kSigning = hmac(kService, 'aws4_request');
var request = {
host: endpoint,
method: 'POST',
path: '/_bulk',
body: body,
headers: {
'Content-Type': 'application/json',
'Host': endpoint,
'Content-Length': Buffer.byteLength(body),
'X-Amz-Security-Token': process.env.AWS_SESSION_TOKEN,
'X-Amz-Date': datetime
}
};
var canonicalHeaders = Object.keys(request.headers)
.sort(function(a, b) { return a.toLowerCase() < b.toLowerCase() ? -1 : 1; })
.map(function(k) { return k.toLowerCase() + ':' + request.headers[k]; })
.join('\n');
var signedHeaders = Object.keys(request.headers)
.map(function(k) { return k.toLowerCase(); })
.sort()
.join(';');
var canonicalString = [
request.method,
request.path, '',
canonicalHeaders, '',
signedHeaders,
hash(request.body, 'hex'),
].join('\n');
var credentialString = [ date, region, service, 'aws4_request' ].join('/');
var stringToSign = [
'AWS4-HMAC-SHA256',
datetime,
credentialString,
hash(canonicalString, 'hex')
] .join('\n');
request.headers.Authorization = [
'AWS4-HMAC-SHA256 Credential=' + process.env.AWS_ACCESS_KEY_ID + '/' + credentialString,
'SignedHeaders=' + signedHeaders,
'Signature=' + hmac(kSigning, stringToSign, 'hex')
].join(', ');
return request;
}
function hmac(key, str, encoding) {
return crypto.createHmac('sha256', key).update(str, 'utf8').digest(encoding);
}
function hash(str, encoding) {
return crypto.createHash('sha256').update(str, 'utf8').digest(encoding);
}
I'm trying to insert data into spanner through cloud function, using post request. I thing that I'm doing everything as described in the documentation, and i just can't understand what causes the next error:
"Error: 1 session leak(s) detected.
at _requests.onIdle.then (/srv/node_modules/#google-cloud/spanner/build/src/session-pool.js:193:25)
at <anonymous>"
And there is my cloud function
const {Spanner} = require('#google-cloud/spanner');
module.exports.http = (req, res) => {
const projectId = 'project-id';
const instanceId = 'instance-id';
const databaseId = 'database-id';
const spanner = new Spanner({
projectId: projectId,
});
const instance = spanner.instance(instanceId);
const database = instance.database(databaseId);
let sqlResponse = "";
database.runTransaction(async (err, transaction) => {
if (err) {
res.status(500).send(JSON.stringify({message: err, requestBody: req.body}));
return;
}
try {
const data = req.body;
const [rowCount] = await transaction.runUpdate({
sql:
'INSERT Feedbacks (age, comment, gender, rating) VALUES (#age, #comment, #gender, #rating)',
params: {
age: data.age.toString(),
comment: data.comment,
gender: data.gender,
rating: data.rating.toString(),
},
});
sqlResponse = 'Successfully inserted ' + rowCount + ' record into the Feedbacks table.';
await transaction.commit();
res.status(200).send(JSON.stringify({message: sqlResponse, requestBody: req.body}));
} catch (err) {
res.status(500).send(JSON.stringify({message: err, requestBody: req.body}));
} finally {
database.close();
}
});
};
Your code appears to be correct. As noted by #Mayeru in the comments for your question, the first thing to confirm is that you're inserting a new record with a unique value specified for the column that is your table's primary key column.
Another possibility that could be causing the issue you are encountering is that you are trying to test the function using the "Testing" UI of the Cloud Console's Cloud Functions > "Function details" section. If so then you may be either using an empty request body or a malformed request body when you click the "Test the function" button. In the "Triggering event" textarea that appears above the "Test the function" button, make sure you have entered a valid JSON request body which includes the elements and values that your INSERT statement expects.
For example a "Triggering event" JSON request body like the following should work:
{"singerId":"1001","firstName":"Test","lastName":"Singer"}
Using the following "nodeInsert" function that's similar to the code you've shared:
const {Spanner} = require('#google-cloud/spanner');
module.exports.nodeInsert = (req, res) => {
const projectId = 'my-project';
const instanceId = 'my-instance';
const databaseId = 'my-database';
const spanner = new Spanner({
projectId: projectId,
});
const instance = spanner.instance(instanceId);
const database = instance.database(databaseId);
let sqlResponse = "";
database.runTransaction(async (err, transaction) => {
if (err) {
res
.status(500)
.send(JSON.stringify({message: err, requestBody: req.body}));
transaction.end();
console.error('Transaction terminated.');
return;
}
try {
const data = req.body;
const parsedSingerId = parseInt(data.singerId, 10);
const [rowCount] = await transaction.runUpdate({
sql:
'INSERT Singers (SingerId, FirstName, LastName) VALUES (#singerId, #firstName, #lastName)',
params: {
singerId: parsedSingerId,
firstName: data.firstName,
lastName: data.lastName,
},
});
sqlResponse = 'Successfully inserted ' + rowCount + ' record into the Singers table.';
await transaction.commit();
res
.status(200)
.send(JSON.stringify({message: sqlResponse, requestBody: req.body}));
} catch (err) {
res
.status(500)
.send(JSON.stringify({message: err, requestBody: req.body}));
transaction.end();
console.error('Transaction terminated.');
} finally {
database.close();
}
});
};