Related
I want to get Logs from a subscription filter and then put the logs in a s3 bucket and sent them to ES.
Similar like in the diagram here:
https://aws.amazon.com/solutions/implementations/centralized-logging/
When I am using this function:
/*
For processing data sent to Firehose by Cloudwatch Logs subscription filters.
Cloudwatch Logs sends to Firehose records that look like this:
{
"messageType": "DATA_MESSAGE",
"owner": "123456789012",
"logGroup": "log_group_name",
"logStream": "log_stream_name",
"subscriptionFilters": [
"subscription_filter_name"
],
"logEvents": [
{
"id": "01234567890123456789012345678901234567890123456789012345",
"timestamp": 1510109208016,
"message": "log message 1"
},
{
"id": "01234567890123456789012345678901234567890123456789012345",
"timestamp": 1510109208017,
"message": "log message 2"
}
...
]
}
The data is additionally compressed with GZIP.
The code below will:
1) Gunzip the data
2) Parse the json
3) Set the result to ProcessingFailed for any record whose messageType is not DATA_MESSAGE, thus redirecting them to the
processing error output. Such records do not contain any log events. You can modify the code to set the result to
Dropped instead to get rid of these records completely.
4) For records whose messageType is DATA_MESSAGE, extract the individual log events from the logEvents field, and pass
each one to the transformLogEvent method. You can modify the transformLogEvent method to perform custom
transformations on the log events.
5) Concatenate the result from (4) together and set the result as the data of the record returned to Firehose. Note that
this step will not add any delimiters. Delimiters should be appended by the logic within the transformLogEvent
method.
6) Any additional records which exceed 6MB will be re-ingested back into Firehose.
*/
const zlib = require('zlib');
const AWS = require('aws-sdk');
/**
* logEvent has this format:
*
* {
* "id": "01234567890123456789012345678901234567890123456789012345",
* "timestamp": 1510109208016,
* "message": "log message 1"
* }
*
* The default implementation below just extracts the message and appends a newline to it.
*
* The result must be returned in a Promise.
*/
function transformLogEvent(logEvent: any) {
return Promise.resolve(`${logEvent.message}\n`);
}
function putRecordsToFirehoseStream(streamName: any, records: any, client: any, resolve: any, reject: any, attemptsMade: any, maxAttempts: any) {
client.putRecordBatch({
DeliveryStreamName: streamName,
Records: records,
}, (err: any, data: any) => {
const codes = [];
let failed = [];
let errMsg = err;
if (err) {
failed = records;
} else {
for (let i = 0; i < data.RequestResponses.length; i++) {
const code = data.RequestResponses[i].ErrorCode;
if (code) {
codes.push(code);
failed.push(records[i]);
}
}
errMsg = `Individual error codes: ${codes}`;
}
if (failed.length > 0) {
if (attemptsMade + 1 < maxAttempts) {
console.log('Some records failed while calling PutRecordBatch, retrying. %s', errMsg);
putRecordsToFirehoseStream(streamName, failed, client, resolve, reject, attemptsMade + 1, maxAttempts);
} else {
reject(`Could not put records after ${maxAttempts} attempts. ${errMsg}`);
}
} else {
resolve('');
}
});
}
function putRecordsToKinesisStream(streamName: any, records: any, client: any, resolve: any, reject: any, attemptsMade: any, maxAttempts: any) {
client.putRecords({
StreamName: streamName,
Records: records,
}, (err: any, data: any) => {
const codes = [];
let failed = [];
let errMsg = err;
if (err) {
failed = records;
} else {
for (let i = 0; i < data.Records.length; i++) {
const code = data.Records[i].ErrorCode;
if (code) {
codes.push(code);
failed.push(records[i]);
}
}
errMsg = `Individual error codes: ${codes}`;
}
if (failed.length > 0) {
if (attemptsMade + 1 < maxAttempts) {
console.log('Some records failed while calling PutRecords, retrying. %s', errMsg);
putRecordsToKinesisStream(streamName, failed, client, resolve, reject, attemptsMade + 1, maxAttempts);
} else {
reject(`Could not put records after ${maxAttempts} attempts. ${errMsg}`);
}
} else {
resolve('');
}
});
}
function createReingestionRecord(isSas: any, originalRecord: any) {
if (isSas) {
return {
Data: Buffer.from(originalRecord.data, 'base64'),
PartitionKey: originalRecord.kinesisRecordMetadata.partitionKey,
};
} else {
return {
Data: Buffer.from(originalRecord.data, 'base64'),
};
}
}
function getReingestionRecord(isSas: any, reIngestionRecord: any) {
if (isSas) {
return {
Data: reIngestionRecord.Data,
PartitionKey: reIngestionRecord.PartitionKey,
};
} else {
return {
Data: reIngestionRecord.Data,
};
}
}
exports.handler = (event: any, context: any, callback: any) => {
Promise.all(event.records.map(function (r: any) {
const buffer = Buffer.from(r.data, 'base64');
let decompressed;
try {
decompressed = zlib.unzipSync(buffer);
} catch (e) {
return Promise.resolve({
recordId: r.recordId,
result: 'ProcessingFailed',
});
}
const data = JSON.parse(decompressed);
// CONTROL_MESSAGE are sent by CWL to check if the subscription is reachable.
// They do not contain actual data.
if (data.messageType === 'CONTROL_MESSAGE') {
return Promise.resolve({
recordId: r.recordId,
result: 'Dropped',
});
} else if (data.messageType === 'DATA_MESSAGE') {
const promises = data.logEvents.map(transformLogEvent);
return Promise.all(promises)
.then(transformed => {
const payload: any = transformed.reduce(function (a: any, v: any) {
return a + v;
});
const encoded = Buffer.from(payload).toString();
return {
recordId: r.recordId,
result: 'Ok',
data: encoded,
};
});
} else {
return Promise.resolve({
recordId: r.recordId,
result: 'ProcessingFailed',
});
}
})).then(recs => {
const isSas = Object.prototype.hasOwnProperty.call(event, 'sourceKinesisStreamArn');
const streamARN = isSas ? event.sourceKinesisStreamArn : event.deliveryStreamArn;
const region = streamARN.split(':')[3];
const streamName = streamARN.split('/')[1];
const result: any = { records: recs };
let recordsToReingest = [];
const putRecordBatches: any = [];
let totalRecordsToBeReingested = 0;
const inputDataByRecId: any = {};
event.records.forEach(function (r: any) { inputDataByRecId[r.recordId] = createReingestionRecord(isSas, r) });
let projectedSize = recs.filter(function (rec: any) { return rec.result === 'Ok' })
.map(function (r: any) { return r.recordId.length + r.data.length })
.reduce((a, b) => a + b, 0);
// 6000000 instead of 6291456 to leave ample headroom for the stuff we didn't account for
for (let idx = 0; idx < event.records.length && projectedSize > 6000000; idx++) {
const rec: any = result.records[idx];
if (rec.result === 'Ok') {
totalRecordsToBeReingested++;
recordsToReingest.push(getReingestionRecord(isSas, inputDataByRecId[rec.recordId]));
projectedSize -= rec.data.length;
delete rec.data;
result.records[idx].result = 'Dropped';
// split out the record batches into multiple groups, 500 records at max per group
if (recordsToReingest.length === 500) {
putRecordBatches.push(recordsToReingest);
recordsToReingest = [];
}
}
}
if (recordsToReingest.length > 0) {
// add the last batch
putRecordBatches.push(recordsToReingest);
}
if (putRecordBatches.length > 0) {
new Promise((resolve, reject) => {
let recordsReingestedSoFar = 0;
for (let idx = 0; idx < putRecordBatches.length; idx++) {
const recordBatch = putRecordBatches[idx];
if (isSas) {
const client = new AWS.Kinesis({ region: region });
putRecordsToKinesisStream(streamName, recordBatch, client, resolve, reject, 0, 20);
} else {
const client = new AWS.Firehose({ region: region });
putRecordsToFirehoseStream(streamName, recordBatch, client, resolve, reject, 0, 20);
}
recordsReingestedSoFar += recordBatch.length;
console.log('Reingested %s/%s records out of %s in to %s stream', recordsReingestedSoFar, totalRecordsToBeReingested, event.records.length, streamName);
}}).then(
() => {
console.log('Reingested all %s records out of %s in to %s stream', totalRecordsToBeReingested, event.records.length, streamName);
callback(null, result);
},
failed => {
console.log('Failed to reingest records. %s', failed);
callback(failed, null);
});
} else {
console.log('No records needed to be reingested.');
callback(null, result);
}
}).catch(ex => {
console.log('Error: ', ex);
callback(ex, null);
});
};
But I am getting a Lambda.FunctionError:
Check your function and make sure the output is in required format. In addition to that, make sure the processed records contain valid result status of Dropped, Ok, or ProcessingFailed
Does anybody know, which function is suitable, to receive logs from the Cloudwatch subscription filter, sending them to S3 and ES?
My code for the FirehoseDeliveryStream looks like:
const firehoseDeliveryStream = new CfnDeliveryStream(this, "FirehoseDeliveryStream", {
deliveryStreamType: "DirectPut",
elasticsearchDestinationConfiguration: {
domainArn: elasticsearchDomain.domainArn,
roleArn: firehoseDeliveryRole.roleArn,
indexName: "test",
s3Configuration: {
bucketArn: this.logsBucket.bucketArn,
roleArn: firehoseDeliveryRole.roleArn,
cloudWatchLoggingOptions: {
enabled: true,
logGroupName: firehoseloggroup.logGroupName,
logStreamName: logstream.logStreamName
},
},
s3BackupMode: "AllDocuments",
cloudWatchLoggingOptions: {
enabled: true,
logGroupName: firehoseloggroup.logGroupName,
logStreamName: logstream.logStreamName
},
processingConfiguration: {
enabled: true,
processors: [{
type: "Lambda",
parameters: [{
parameterName: "LambdaArn",
parameterValue: handler.functionArn,
}],
}],
},
},
});
I have a CloudWatch log-group-1, kinesis firehose, lambda, S3.
log-group-1 sends logs to kinesis firehose (using subscription filter). Kinesis firehose triggers lambda to process the logs. Lambda returns the logs back to kinesis firehose and kinesis firehose saves transformed logs to S3.
Lambda gets the following input:
{
"invocationId": "000ac99...",
"deliveryStreamArn": "arn:aws:firehose:eu-central-1:123456789123:deliverystream/delivery-09",
"region": "eu-central-1",
"records": [
{
"recordId": "496199814216613477...",
"approximateArrivalTimestamp": 1625854080200,
"data": "H4sIAAAAAAAAADWOwQrCM......"
},
{
"recordId": "4961998142166134...",
"approximateArrivalTimestamp": 1625854100311,
"data": "H4sIAAAAAAAAADVPy07DMB......"
}
]
}
To return the transformed message you must change the records list. See example:
"records": [
{
"recordId": "you better take it from the input",
"result": "can be Ok, Dropped, ProcessingFailed",
"data": "must be an encoded base-64 string"
}
]
I attached a code written in Javascipt. It is enough just to copy-paste it to lambda.
const node_gzip_1 = require("node-gzip");
async function handler(event) {
console.log('event: ' + JSON.stringify(event, undefined, 3));
let result = [];
// Iterate through records list
const records = event.records;
for (let ii = 0; ii < records.length; ii++) {
const record = records[ii];
const recordId = record.recordId;
// Transform record data to a human readable string
const data = record.data;
const decodedData = Buffer.from(data, 'base64');
const ungziped = await node_gzip_1.ungzip(decodedData);
console.log('ungziped: ' + ungziped);
// Parse record data to JSON
const dataJson = JSON.parse(ungziped.toString());
// Get a list of log events and iterate through each element
const logEventsList = dataJson.logEvents;
logEventsList.forEach((logEventValue) => {
// Get the message which was saved in CloudWatch
const messageString = logEventValue.message;
// Create the transformed result
const transformedResultJson = {
someRandomNumber: Math.random(), // Some random variable I decided to put in the result
message: messageString + '-my-custom-change' // Edit the message
};
// Final data must be encoded to base 64
const messageBase64 = Buffer.from(JSON.stringify(transformedResultJson) + '\n').toString('base64'); // Adding a new line to transformed result is optional. It just make reading the S3 easier
console.log('messageBase64: ' + messageBase64);
// Save transformed result
result.push({
recordId: recordId,
result: 'Ok',
data: messageBase64
});
});
}
// Replace initial records list with the transformed list
event.records = result;
console.log('new event: ' + JSON.stringify(event, undefined, 2));
// Returned value will go back to kinesis firehose, then S3
return event;
}
exports.handler = handler;
Lambda return value is:
{
"invocationId": "000ac99...",
"deliveryStreamArn": "arn:aws:firehose:eu-central-1:123456789123:deliverystream/delivery-09",
"region": "eu-central-1",
"records": [
{
"recordId": "496199814216613477...",
"result": "Ok",
"data": "eyJzb21lUmF..."
},
{
"recordId": "4961998142166134...",
"result": "Ok",
"data": "eyJzb21lUmFuZG9..."
}
]
}
You can also use a lambda blueprint kinesis-firehose-syslog-to-json.
Also see:
https://docs.amazonaws.cn/en_us/firehose/latest/dev/data-transformation.html
Kinesis Firehose putting JSON objects in S3 without seperator comma
I'm trying to load test an API (GET method) using loadimpact k6 which requires oauth token for authorization to get the successful response. I already have a postman collection file which does this by running pre-request script. The pre-request script will request token from the authorization server and then populates the token in the environment variable. I used the "Postman to LoadImpact converter" to generate the k6 script but it isn't doing any help. The script fails to get the access token.
The generated script from the converter is given below:
// Auto-generated by the Load Impact converter
import "./libs/shim/core.js";
export let options = { maxRedirects: 4 };
const Request = Symbol.for("request");
postman[Symbol.for("initial")]({
options,
collection: {
currentAccessToken: "",
"Client-Id": "",
"Client-Secret": "",
"Token-Scope": "",
"Grant-Type": "client_credentials",
"Access-Token-URL": "",
accessTokenExpiry: ""
}
});
export default function() {
postman[Request]({
name: "Collection Mock",
id: "",
method: "GET",
address:
"",
headers: {
Authorization: "Bearer {{currentAccessToken}}"
},
pre() {
const echoPostRequest = {
url: pm.environment.get("Access-Token-URL"),
method: "POST",
header: "Content-Type:x-www-form-urlencoded",
body: {
mode: "urlencoded",
urlencoded: [
{ key: "client_id", value: pm.environment.get("Client-Id") },
{
key: "client_secret",
value: pm.environment.get("Client-Secret")
},
{ key: "grant_type", value: pm.environment.get("Grant-Type") },
{ key: "scope", value: pm.environment.get("Token-Scope") }
]
}
};
var getToken = true;
if (
!pm.environment.get("accessTokenExpiry") ||
!pm.environment.get("currentAccessToken")
) {
console.log("Token or expiry date are missing");
} else if (
pm.environment.get("accessTokenExpiry") <= new Date().getTime()
) {
console.log("Token is expired");
} else {
getToken = false;
console.log("Token and expiry date are all good");
}
if (getToken === true) {
pm.sendRequest(echoPostRequest, function(err, res) {
console.log(err ? err : res.json());
if (err === null) {
console.log("Saving the token and expiry date");
var responseJson = res.json();
pm.environment.set("currentAccessToken", responseJson.access_token);
var expiryDate = new Date();
expiryDate.setSeconds(
expiryDate.getSeconds() + responseJson.expires_in
);
pm.environment.set("accessTokenExpiry", expiryDate.getTime());
}
});
}
}
});
}
The issue is with pm.sendRequest which is not supported by the converter and I'm not sure what the alternative is. So, I'm looking for ways to dynamically request access token from the authorization server and use that token to make a request to the API for load testing in k6 script.
As you have seen sendRequest is not supported ...
This is primarily because of the fact pm.sendRequest is asynchronous but k6 at this point doesn't have a event loop so ... no asynchronous http calls :( (except with http.batch but ... not
I find it unlikely that you want this to be asynchronous or ... well you can't do it with k6 at this point either way ... you can just rewrite it to use k6's http.post
As far as I can see this should look like
pre() {
var getToken = true;
if (
!pm.environment.get("accessTokenExpiry") ||
!pm.environment.get("currentAccessToken")
) {
console.log("Token or expiry date are missing");
} else if (
pm.environment.get("accessTokenExpiry") <= new Date().getTime()
) {
console.log("Token is expired");
} else {
getToken = false;
console.log("Token and expiry date are all good");
}
if (getToken === true) {
let res = http.post(pm.environment.get("Access-Token-URL"), {
"client_id": pm.environment.get("Client-Id") ,
"client_secret": pm.environment.get("Client-Secret"),
"grant_type": pm.environment.get("Grant-Type"),
"scope": pm.environment.get("Token-Scope")
});
console.log(err ? err : res.json());
if (err === null) {
console.log("Saving the token and expiry date");
var responseJson = res.json();
pm.environment.set("currentAccessToken", responseJson.access_token);
var expiryDate = new Date();
expiryDate.setSeconds(
expiryDate.getSeconds() + responseJson.expires_in
);
pm.environment.set("accessTokenExpiry", expiryDate.getTime());
}
}
Disclaimer: I have never used postman and the code above was written/copy-pasted by hand and not tested :)
I ended up using below code snippet to make a successful call for my purpose:
// Auto-generated by the Load Impact converter
import "./libs/shim/core.js";
import http from "k6/http";
import { check, sleep } from "k6";
export let options = {
max_vus: 10,
vus: 10,
stages: [
{ duration: "1m", target: 10 }
]
}
const Request = Symbol.for("request");
pm.environment.set("currentAccessToken", "");
pm.environment.set("accessTokenExpiry", "");
pm.environment.set("clientId", "");
pm.environment.set("clientSecret", "");
pm.environment.set("tokenScope", "");
pm.environment.set("grantType", "");
pm.environment.set("accesstokenUrl", "");
pm.environment.set("apiUrl", "");
pm.environment.set("subscriptionKeys", "");
export default function() {
var getToken = true;
if (!pm.environment.get("accessTokenExpiry") || !pm.environment.get("currentAccessToken")) {
//console.log("Token or expiry date are missing");
} else if (pm.environment.get("accessTokenExpiry") <= new Date().getTime()) {
//console.log("Token is expired");
} else {
getToken = false;
//console.log("Token and expiry date are all good");
}
if (getToken === true) {
//get the access token first
let res = http.post(pm.environment.get("accesstokenUrl"), {
"client_id": pm.environment.get("clientId"),
"client_secret": pm.environment.get("clientSecret"),
"grant_type": pm.environment.get("grantType"),
"scope": pm.environment.get("tokenScope")
});
var checkRes = check(res, {
"Token Request status is 200": (r) => r.status === 200,
});
if (checkRes) {
var responseJson = res.json();
pm.environment.set("currentAccessToken", responseJson.access_token);
var expiryDate = new Date();
expiryDate.setSeconds(
expiryDate.getSeconds() + responseJson.expires_in
);
pm.environment.set("accessTokenExpiry", expiryDate.getTime());
}
sleep(1);
//make the api request using the access token and subscription keys (if required)
let apiRes = http.get(pm.environment.get("apiUrl"),
{
headers: { "Authorization": "Bearer " + pm.environment.get("currentAccessToken"),
"Subscription-Key" : pm.environment.get("subscriptionKeys")
}
});
check(apiRes, {
"API Request status is 200": (res) => res.status === 200
});
sleep(3);
}
}
Following this Streaming CloudWatch Logs Data to Amazon Elasticsearch Service, it's working fine to stream cloud watch log to ELK having one log group and one Lambda function.
But now I want to change target lambda function for my other logs group, but I am not able to do that as there is no option in AWS console.
Any Help will be appreciated.
Thanks
I was streaming to ELK using the AWS console option which is Start Streaming to Amazon Elasticsearch Service, But I failed to change or choose different lambda function as there is only lambda function can be selected for any log group using this option.
So, I create new lambda function and set stream target to AWS lambda function,
Here is the code that all you need, Node version for lambda function is 4.* as it was some issue with the new version but the pulse point is it does not require any extra NPM packages.
// v1.1.2
var https = require('https');
var zlib = require('zlib');
var crypto = require('crypto');
var endpoint = 'search-my-test.us-west-2.es.amazonaws.com';
exports.handler = function(input, context) {
// decode input from base64
var zippedInput = new Buffer(input.awslogs.data, 'base64');
// decompress the input
zlib.gunzip(zippedInput, function(error, buffer) {
if (error) { context.fail(error); return; }
// parse the input from JSON
var awslogsData = JSON.parse(buffer.toString('utf8'));
// transform the input to Elasticsearch documents
var elasticsearchBulkData = transform(awslogsData);
// skip control messages
if (!elasticsearchBulkData) {
console.log('Received a control message');
context.succeed('Control message handled successfully');
return;
}
// post documents to the Amazon Elasticsearch Service
post(elasticsearchBulkData, function(error, success, statusCode, failedItems) {
console.log('Response: ' + JSON.stringify({
"statusCode": statusCode
}));
if (error) {
console.log('Error: ' + JSON.stringify(error, null, 2));
if (failedItems && failedItems.length > 0) {
console.log("Failed Items: " +
JSON.stringify(failedItems, null, 2));
}
context.fail(JSON.stringify(error));
} else {
console.log('Success: ' + JSON.stringify(success));
context.succeed('Success');
}
});
});
};
function transform(payload) {
if (payload.messageType === 'CONTROL_MESSAGE') {
return null;
}
var bulkRequestBody = '';
payload.logEvents.forEach(function(logEvent) {
var timestamp = new Date(1 * logEvent.timestamp);
// index name format: cwl-YYYY.MM.DD
var indexName = [
'prod-background-wo-' + timestamp.getUTCFullYear(), // year
('0' + (timestamp.getUTCMonth() + 1)).slice(-2), // month
('0' + timestamp.getUTCDate()).slice(-2) // day
].join('.');
var source = buildSource(logEvent.message, logEvent.extractedFields);
source['response_time'] = source["end"] - source["start"];
source['#id'] = logEvent.id;
source['#timestamp'] = new Date(1 * logEvent.timestamp).toISOString();
source['#message'] = logEvent.message;
source['#owner'] = payload.owner;
source['#log_group'] = payload.logGroup;
source['#log_stream'] = payload.logStream;
var action = { "index": {} };
action.index._index = indexName;
action.index._type = payload.logGroup;
action.index._id = logEvent.id;
bulkRequestBody += [
JSON.stringify(action),
JSON.stringify(source),
].join('\n') + '\n';
});
return bulkRequestBody;
}
function buildSource(message, extractedFields) {
if (extractedFields) {
var source = {};
for (var key in extractedFields) {
if (extractedFields.hasOwnProperty(key) && extractedFields[key]) {
var value = extractedFields[key];
if (isNumeric(value)) {
source[key] = 1 * value;
continue;
}
jsonSubString = extractJson(value);
if (jsonSubString !== null) {
source['$' + key] = JSON.parse(jsonSubString);
}
source[key] = value;
}
}
return source;
}
jsonSubString = extractJson(message);
if (jsonSubString !== null) {
return JSON.parse(jsonSubString);
}
return {};
}
function extractJson(message) {
var jsonStart = message.indexOf('{');
if (jsonStart < 0) return null;
var jsonSubString = message.substring(jsonStart);
return isValidJson(jsonSubString) ? jsonSubString : null;
}
function isValidJson(message) {
try {
JSON.parse(message);
} catch (e) { return false; }
return true;
}
function isNumeric(n) {
return !isNaN(parseFloat(n)) && isFinite(n);
}
function post(body, callback) {
var requestParams = buildRequest(endpoint, body);
var request = https.request(requestParams, function(response) {
var responseBody = '';
response.on('data', function(chunk) {
responseBody += chunk;
});
response.on('end', function() {
var info = JSON.parse(responseBody);
var failedItems;
var success;
if (response.statusCode >= 200 && response.statusCode < 299) {
failedItems = info.items.filter(function(x) {
return x.index.status >= 300;
});
success = {
"attemptedItems": info.items.length,
"successfulItems": info.items.length - failedItems.length,
"failedItems": failedItems.length
};
}
var error = response.statusCode !== 200 || info.errors === true ? {
"statusCode": response.statusCode,
"responseBody": responseBody
} : null;
callback(error, success, response.statusCode, failedItems);
});
}).on('error', function(e) {
callback(e);
});
request.end(requestParams.body);
}
function buildRequest(endpoint, body) {
var endpointParts = endpoint.match(/^([^\.]+)\.?([^\.]*)\.?([^\.]*)\.amazonaws\.com$/);
var region = endpointParts[2];
var service = endpointParts[3];
var datetime = (new Date()).toISOString().replace(/[:\-]|\.\d{3}/g, '');
var date = datetime.substr(0, 8);
var kDate = hmac('AWS4' + process.env.AWS_SECRET_ACCESS_KEY, date);
var kRegion = hmac(kDate, region);
var kService = hmac(kRegion, service);
var kSigning = hmac(kService, 'aws4_request');
var request = {
host: endpoint,
method: 'POST',
path: '/_bulk',
body: body,
headers: {
'Content-Type': 'application/json',
'Host': endpoint,
'Content-Length': Buffer.byteLength(body),
'X-Amz-Security-Token': process.env.AWS_SESSION_TOKEN,
'X-Amz-Date': datetime
}
};
var canonicalHeaders = Object.keys(request.headers)
.sort(function(a, b) { return a.toLowerCase() < b.toLowerCase() ? -1 : 1; })
.map(function(k) { return k.toLowerCase() + ':' + request.headers[k]; })
.join('\n');
var signedHeaders = Object.keys(request.headers)
.map(function(k) { return k.toLowerCase(); })
.sort()
.join(';');
var canonicalString = [
request.method,
request.path, '',
canonicalHeaders, '',
signedHeaders,
hash(request.body, 'hex'),
].join('\n');
var credentialString = [ date, region, service, 'aws4_request' ].join('/');
var stringToSign = [
'AWS4-HMAC-SHA256',
datetime,
credentialString,
hash(canonicalString, 'hex')
] .join('\n');
request.headers.Authorization = [
'AWS4-HMAC-SHA256 Credential=' + process.env.AWS_ACCESS_KEY_ID + '/' + credentialString,
'SignedHeaders=' + signedHeaders,
'Signature=' + hmac(kSigning, stringToSign, 'hex')
].join(', ');
return request;
}
function hmac(key, str, encoding) {
return crypto.createHmac('sha256', key).update(str, 'utf8').digest(encoding);
}
function hash(str, encoding) {
return crypto.createHash('sha256').update(str, 'utf8').digest(encoding);
}
I've read a lot of similar questions around adding newline characters to firehose, but they're all around adding the newline character to the source. The problem is that I don't have access to the source, and a third party is piping data to our Kinesis instance and I cannot add the \n to the source.
I've tried doing a Firehose data transformation using the following code:
'use strict';
console.log('Loading function');
exports.handler = (event, context, callback) => {
/* Process the list of records and transform them */
const output = [];
event.records.forEach((record) => {
const results = {
/* This transformation is the "identity" transformation, the data is left intact */
recordId: record.recordId,
result: record.data.event_type === 'alert' ? 'Dropped' : 'Ok',
data: record.data + '\n'
};
output.push(results);
});
console.log(`Processing completed. Successful records ${output.length}.`);
callback(null, { records: output });
};
but the newline is still lost. I've also tried JSON.stringify(record.data) + '\n' but then I get an Invalid output structure error.
Try decoding the record.data
add a new line
then encode it again as base64.
This is python but the idea is the same
for record in event['records']:
payload = base64.b64decode(record['data'])
# Do custom processing on the payload here
payload = payload + '\n'
output_record = {
'recordId': record['recordId'],
'result': 'Ok',
'data': base64.b64encode(json.dumps(payload))
}
output.append(output_record)
return {'records': output}
From the comment of #Matt Westlake:
For those looking for the Node.js answer, it's
const data =
JSON.parse(new Buffer.from(record.data,'base64').toString('utf8'));
and
new Buffer.from(JSON.stringify(data) + '\n').toString('base64')
The kinesis-firehose-cloudwatch-logs-processor blueprint lambda does this (with some additional handling for cloudwatch logs).
Here's the lambda code from the blueprint as of today:
/*
For processing data sent to Firehose by Cloudwatch Logs subscription filters.
Cloudwatch Logs sends to Firehose records that look like this:
{
"messageType": "DATA_MESSAGE",
"owner": "123456789012",
"logGroup": "log_group_name",
"logStream": "log_stream_name",
"subscriptionFilters": [
"subscription_filter_name"
],
"logEvents": [
{
"id": "01234567890123456789012345678901234567890123456789012345",
"timestamp": 1510109208016,
"message": "log message 1"
},
{
"id": "01234567890123456789012345678901234567890123456789012345",
"timestamp": 1510109208017,
"message": "log message 2"
}
...
]
}
The data is additionally compressed with GZIP.
The code below will:
1) Gunzip the data
2) Parse the json
3) Set the result to ProcessingFailed for any record whose messageType is not DATA_MESSAGE, thus redirecting them to the
processing error output. Such records do not contain any log events. You can modify the code to set the result to
Dropped instead to get rid of these records completely.
4) For records whose messageType is DATA_MESSAGE, extract the individual log events from the logEvents field, and pass
each one to the transformLogEvent method. You can modify the transformLogEvent method to perform custom
transformations on the log events.
5) Concatenate the result from (4) together and set the result as the data of the record returned to Firehose. Note that
this step will not add any delimiters. Delimiters should be appended by the logic within the transformLogEvent
method.
6) Any additional records which exceed 6MB will be re-ingested back into Firehose.
*/
const zlib = require('zlib');
const AWS = require('aws-sdk');
/**
* logEvent has this format:
*
* {
* "id": "01234567890123456789012345678901234567890123456789012345",
* "timestamp": 1510109208016,
* "message": "log message 1"
* }
*
* The default implementation below just extracts the message and appends a newline to it.
*
* The result must be returned in a Promise.
*/
function transformLogEvent(logEvent) {
return Promise.resolve(`${logEvent.message}\n`);
}
function putRecordsToFirehoseStream(streamName, records, client, resolve, reject, attemptsMade, maxAttempts) {
client.putRecordBatch({
DeliveryStreamName: streamName,
Records: records,
}, (err, data) => {
const codes = [];
let failed = [];
let errMsg = err;
if (err) {
failed = records;
} else {
for (let i = 0; i < data.RequestResponses.length; i++) {
const code = data.RequestResponses[i].ErrorCode;
if (code) {
codes.push(code);
failed.push(records[i]);
}
}
errMsg = `Individual error codes: ${codes}`;
}
if (failed.length > 0) {
if (attemptsMade + 1 < maxAttempts) {
console.log('Some records failed while calling PutRecordBatch, retrying. %s', errMsg);
putRecordsToFirehoseStream(streamName, failed, client, resolve, reject, attemptsMade + 1, maxAttempts);
} else {
reject(`Could not put records after ${maxAttempts} attempts. ${errMsg}`);
}
} else {
resolve('');
}
});
}
function putRecordsToKinesisStream(streamName, records, client, resolve, reject, attemptsMade, maxAttempts) {
client.putRecords({
StreamName: streamName,
Records: records,
}, (err, data) => {
const codes = [];
let failed = [];
let errMsg = err;
if (err) {
failed = records;
} else {
for (let i = 0; i < data.Records.length; i++) {
const code = data.Records[i].ErrorCode;
if (code) {
codes.push(code);
failed.push(records[i]);
}
}
errMsg = `Individual error codes: ${codes}`;
}
if (failed.length > 0) {
if (attemptsMade + 1 < maxAttempts) {
console.log('Some records failed while calling PutRecords, retrying. %s', errMsg);
putRecordsToKinesisStream(streamName, failed, client, resolve, reject, attemptsMade + 1, maxAttempts);
} else {
reject(`Could not put records after ${maxAttempts} attempts. ${errMsg}`);
}
} else {
resolve('');
}
});
}
function createReingestionRecord(isSas, originalRecord) {
if (isSas) {
return {
Data: new Buffer(originalRecord.data, 'base64'),
PartitionKey: originalRecord.kinesisRecordMetadata.partitionKey,
};
} else {
return {
Data: new Buffer(originalRecord.data, 'base64'),
};
}
}
function getReingestionRecord(isSas, reIngestionRecord) {
if (isSas) {
return {
Data: reIngestionRecord.Data,
PartitionKey: reIngestionRecord.PartitionKey,
};
} else {
return {
Data: reIngestionRecord.Data,
};
}
}
exports.handler = (event, context, callback) => {
Promise.all(event.records.map(r => {
const buffer = new Buffer(r.data, 'base64');
const decompressed = zlib.gunzipSync(buffer);
const data = JSON.parse(decompressed);
// CONTROL_MESSAGE are sent by CWL to check if the subscription is reachable.
// They do not contain actual data.
if (data.messageType === 'CONTROL_MESSAGE') {
return Promise.resolve({
recordId: r.recordId,
result: 'Dropped',
});
} else if (data.messageType === 'DATA_MESSAGE') {
const promises = data.logEvents.map(transformLogEvent);
return Promise.all(promises)
.then(transformed => {
const payload = transformed.reduce((a, v) => a + v, '');
const encoded = new Buffer(payload).toString('base64');
return {
recordId: r.recordId,
result: 'Ok',
data: encoded,
};
});
} else {
return Promise.resolve({
recordId: r.recordId,
result: 'ProcessingFailed',
});
}
})).then(recs => {
const isSas = Object.prototype.hasOwnProperty.call(event, 'sourceKinesisStreamArn');
const streamARN = isSas ? event.sourceKinesisStreamArn : event.deliveryStreamArn;
const region = streamARN.split(':')[3];
const streamName = streamARN.split('/')[1];
const result = { records: recs };
let recordsToReingest = [];
const putRecordBatches = [];
let totalRecordsToBeReingested = 0;
const inputDataByRecId = {};
event.records.forEach(r => inputDataByRecId[r.recordId] = createReingestionRecord(isSas, r));
let projectedSize = recs.filter(rec => rec.result === 'Ok')
.map(r => r.recordId.length + r.data.length)
.reduce((a, b) => a + b);
// 6000000 instead of 6291456 to leave ample headroom for the stuff we didn't account for
for (let idx = 0; idx < event.records.length && projectedSize > 6000000; idx++) {
const rec = result.records[idx];
if (rec.result === 'Ok') {
totalRecordsToBeReingested++;
recordsToReingest.push(getReingestionRecord(isSas, inputDataByRecId[rec.recordId]));
projectedSize -= rec.data.length;
delete rec.data;
result.records[idx].result = 'Dropped';
// split out the record batches into multiple groups, 500 records at max per group
if (recordsToReingest.length === 500) {
putRecordBatches.push(recordsToReingest);
recordsToReingest = [];
}
}
}
if (recordsToReingest.length > 0) {
// add the last batch
putRecordBatches.push(recordsToReingest);
}
if (putRecordBatches.length > 0) {
new Promise((resolve, reject) => {
let recordsReingestedSoFar = 0;
for (let idx = 0; idx < putRecordBatches.length; idx++) {
const recordBatch = putRecordBatches[idx];
if (isSas) {
const client = new AWS.Kinesis({ region: region });
putRecordsToKinesisStream(streamName, recordBatch, client, resolve, reject, 0, 20);
} else {
const client = new AWS.Firehose({ region: region });
putRecordsToFirehoseStream(streamName, recordBatch, client, resolve, reject, 0, 20);
}
recordsReingestedSoFar += recordBatch.length;
console.log('Reingested %s/%s records out of %s in to %s stream', recordsReingestedSoFar, totalRecordsToBeReingested, event.records.length, streamName);
}
}).then(
() => {
console.log('Reingested all %s records out of %s in to %s stream', totalRecordsToBeReingested, event.records.length, streamName);
callback(null, result);
},
failed => {
console.log('Failed to reingest records. %s', failed);
callback(failed, null);
});
} else {
console.log('No records needed to be reingested.');
callback(null, result);
}
}).catch(ex => {
console.log('Error: ', ex);
callback(ex, null);
});
};
Here is code that will solve the problem
__Author__ = "Soumil Nitin Shah"
import json
import boto3
import base64
class MyHasher(object):
def __init__(self, key):
self.key = key
def get(self):
keys = str(self.key).encode("UTF-8")
keys = base64.b64encode(keys)
keys = keys.decode("UTF-8")
return keys
def lambda_handler(event, context):
output = []
for record in event['records']:
payload = base64.b64decode(record['data'])
"""Get the payload from event bridge and just get data attr """""
serialize_payload = str(json.loads(payload)) + "\n"
hasherHelper = MyHasher(key=serialize_payload)
hash = hasherHelper.get()
output_record = {
'recordId': record['recordId'],
'result': 'Ok',
'data': hash
}
print("output_record", output_record)
output.append(output_record)
return {'records': output}
i use this Lambda function to generate thumbnails on the fly. But i get the following error:
REPORT RequestId: 9369f148-2a85-11e7-a571-5f1e1818669e Duration: 188.18 ms Billed Duration: 200 ms Memory Size: 1536 MB Max Memory Used: 1536 MB
AND...
RequestId: 9369f148-2a85-11e7-a571-5f1e1818669e Process exited before completing request
So i think i reach the max Memory Limit. Without the function "uploadRecentImage()" it works. But if i add a new size to imgVariants[] i will also hit the Memory Limit.
I think the way the function handle the imgVariants (each loop) will cause this but i don't know to make it better.
I will be grateful for any help.
Here is my function:
// dependencies
var async = require('async');
var AWS = require('aws-sdk');
var gm = require('gm').subClass({
imageMagick: true
}); // use ImageMagick
var util = require('util');
// configuration as code - add, modify, remove array elements as desired
var imgVariants = [
{
"SIZE": "Large1",
"POSTFIX": "-l",
"MAX_WIDTH": 6000,
"MAX_HEIGHT": 6000,
"SIZING_QUALITY": 75,
"INTERLACE": "Line"
},
{
"SIZE": "Large1",
"POSTFIX": "-l",
"MAX_WIDTH": 1280,
"MAX_HEIGHT": 1280,
"SIZING_QUALITY": 75,
"INTERLACE": "Line"
},
{
"SIZE": "Large1",
"POSTFIX": "-l",
"MAX_WIDTH": 500,
"MAX_HEIGHT": 500,
"SIZING_QUALITY": 75,
"INTERLACE": "Line"
},
{
"SIZE": "Large1",
"POSTFIX": "-l",
"MAX_WIDTH": 100,
"MAX_HEIGHT": 100,
"SIZING_QUALITY": 75,
"INTERLACE": "Line"
}
];
var DST_BUCKET_POSTFIX = "resized";
// get reference to S3 client
var s3 = new AWS.S3();
exports.handler = function (event, context) {
// Read options from the event.
console.log("Reading options from event:\n", util.inspect(event, {
depth: 5
}));
var srcBucket = event.Records[0].s3.bucket.name;
// Object key may have spaces or unicode non-ASCII characters.
var srcKey = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, " "));
// derive the file name and extension
var srcFile = srcKey.match(/(.+)\.([^.]+)/);
var srcName = srcFile[1];
var scrExt = srcFile[2];
// set the destination bucket
var dstBucket = srcBucket + DST_BUCKET_POSTFIX;
// make sure that source and destination are different buckets.
if (srcBucket === dstBucket) {
console.error("Destination bucket must be different from source bucket.");
return;
}
if (!scrExt) {
console.error('unable to derive file type extension from file key ' + srcKey);
return;
}
if (scrExt != "jpg" && scrExt != "png") {
console.log('skipping non-supported file type ' + srcKey + ' (must be jpg or png)');
return;
}
function processImage(data, options, callback) {
gm(data.Body).size(function (err, size) {
var scalingFactor = Math.min(
options.MAX_WIDTH / size.width,
options.MAX_HEIGHT / size.height
);
var width = scalingFactor * size.width;
var height = scalingFactor * size.height;
this.resize(width, height)
.quality(options.SIZING_QUALITY || 75)
.interlace(options.INTERLACE || 'None')
.toBuffer(scrExt, function (err, buffer) {
if (err) {
callback(err);
} else {
uploadImage(data.ContentType, buffer, options, callback);
uploadRecentImage(data.ContentType, buffer, options, callback);
}
});
});
}
function uploadImage(contentType, data, options, callback) {
// Upload the transformed image to the destination S3 bucket.
s3.putObject({
Bucket: dstBucket,
Key: options.MAX_WIDTH + '/' + srcName + '.' + scrExt,
Body: data,
ContentType: contentType
},
callback);
}
function uploadRecentImage(contentType, data, options, callback) {
if(options.MAX_WIDTH == 500){
s3.putObject({
Bucket: dstBucket,
Key: 'recent_optimized.' + scrExt,
Body: data,
ContentType: contentType
},
callback);
}
if(options.MAX_WIDTH == 100){
s3.putObject({
Bucket: dstBucket,
Key: 'recent_thumb.' + scrExt,
Body: data,
ContentType: contentType
},
callback);
}
}
// Download the image from S3 and process for each requested image variant.
async.waterfall(
[
function download(next) {
// Download the image from S3 into a buffer.
s3.getObject({
Bucket: srcBucket,
Key: srcKey
},
next);
},
function processImages(data, next) {
async.each(imgVariants, function (variant, next) {
processImage(data, variant, next);
}, next);
}
],
function (err) {
if (err) {
console.error(
'Unable to resize ' + srcBucket + '/' + srcKey +
' and upload to ' + dstBucket +
' due to an error: ' + err
);
} else {
console.log(
'Successfully resized ' + srcBucket + '/' + srcKey +
' and uploaded to ' + dstBucket
);
}
context.done();
}
);
};
You can limit the number of parallel processImages calls:
Replace async.each(imgVariants,
with async.eachLimit(imgVariants, 2,
to not process more than two images in parallel.
The script has a bug:
uploadImage(data.ContentType, buffer, options, callback);
uploadRecentImage(data.ContentType, buffer, options, callback);
This will call callback twice which is not allowed. Only call the callback once!
The script has another bug: event.Records[0] it will only process the first image. If you upload multiple images at the same time this will miss some images.