AWS Lambda function to scan/query DynamoDB table using array values as FilterExpression - amazon-web-services

here's my case: I'm trying to make a query on a table (table name HCI.LocCatApp) using a value sent by API as KeyConditionExpression, and I'm storing the results (which must be numbers not strings) in an array, and I want to use each value from this array as a FilterExpression to scan another table (table name HCI.Category) .. So what I need is to loop on the array values, take each of them as FilterExpression and perform the scan operation. I'm currently trying to use IN but I'm not sure if it's even supported or not.
And keep in mind that the array is being filled during the runtime. And the callback can be performed only once.
here's my code:
'use strict'
var AWS = require('aws-sdk');
var mydocumentClient = new AWS.DynamoDB.DocumentClient();
exports.handler = function (event, context, callback) {
var params = {
TableName: 'HCI.LocCatApp',
KeyConditionExpression : 'LocID = :lid',
ExpressionAttributeValues: {
":lid": event.LocID
},
ProjectionExpression: 'CatID'
};
var catIDs = [];
var catIDsObject = {};
var index = 0;
mydocumentClient.query(params, function (err, data){
if (err) {
callback(err, null);
}else{
data.Items.forEach(function(item){catIDs.push(item.CatID)});
//callback(null, catIDs);
}
})
catIDs.forEach(function(value){
index ++;
var catIDsKey = ":catID"+index;
catIDsObject[catIDsKey] = value;
})
var params2 = {
TableName: 'HCI.Category',
FilterExpression : "CatID IN (:cIDs)",
ExpressionAttributeValues : {
':cIDs' : catIDs
}
};
mydocumentClient.scan(params2, function (err, data){
if (err) {
callback(err, null);
}else{
callback(null, data);
}
})
}
For some reason, the current code runs successfully but it doesn't find any matches, even if I fill in the values manually in the array, there's still no results, the IN operation doesn't seem to work.
And many thanks in advance

In your code catIds is an array of IDs (strings probably).
When you pass it to FilterExpression, you are assuming that it will be converted to a) string b) to a string in correct format.
FilterExpression : "CatID IN (:cIDs)",
ExpressionAttributeValues : {
':cIDs' : catIDs
}
I cannot try this myself at the moment, but I'm assuming this is where the query fails. IN operator expects a comma separated list of values to compare to, in parenthesis. So, after the array is inserted to query, it should be like this
FilterExpression : "CatID IN (cat1, cat2, cat2)",
But most probably it contains extra set of [ and ], and maybe even the array to string conversion causes it to something like [Object object] etc.
One solution would be to use Array.join to concatenate all the elements from the array to single string before passing it to FilterExperession. Something like this
FilterExpression : "CatID IN (:cIDs)",
ExpressionAttributeValues : {
':cIDs' : catIDs.join()
}

Related

Function scan in DynamoDB doesn't bring some of the results

I got a function in AWS Lambda that lists every patient in a table from DynamoDB. I realized that some items from the table were not on the list. This is my function to list:
module.exports.listPatients = async (event) => {
try {
const queryString = {
limit: 5,
...event.queryStringParameters,
};
const { limit, next, name } = queryString;
const localParams = {
...patientsParams,
Limit: limit,
FilterExpression: "contains(full_name, :full_name)",
ExpressionAttributeValues: { ":full_name": name },
};
if (next) {
localParams.ExclusiveStartKey = {
id: next,
};
}
const data = await dynamoDb.scan(localParams).promise();
const nextToken = data.LastEvaluatedKey ? data.LastEvaluatedKey.id : "";
const result = {
items: data.Items,
next_token: nextToken,
};
return {
statusCode: 200,
body: JSON.stringify(result),
};
} catch (error) {
console.log("Error: ", error);
return {
statusCode: error.statusCode ? error.statusCode : 500,
body: JSON.stringify({
error: error.name ? error.name : "Exception",
message: error.message ? error.message : "Unknown error",
}),
};
}
};
Am I missing something?
I tried with and without a limit, removed the filters, and yet nothing.
I tested one of the ids with get() to test with the server can find one of those who are missing, and it worked.
I am using Serverless to deploy the code, and when I try offline, it's working.
Stackoverflow recommended this post when writing my question, but I am using DynamoDB.DocumentClient without specifying the full attribute type in the filter expression:
How to scan in DynamoDB without primary sort key with Nodejs
Looks like you are paginating using scan(). Using query() with some Global Secondary Indexes and ScanIndexForward would give you a much better performance. scan() doesn't scale well when your data grows.

how to define attribute types in Dynamodb?

I want to store data attributes are threadId , threadType (sms, livechat ,fb) , createat and updateat how I define the threadType I follow this procedure but output displays all of the types of threadType?
and how to fix the time? this is manual input is there any method to get system time?
var doClient = new AWS.DynamoDB.DocumentClient();
var DynamoDB = new AWS.DynamoDB({});
var table = "thread";
var threadId = "3";
var threadType = "livechat";
var createDate = "11:38";
var updateDate = "12:00";
var channelName = "three";
var params = {
TableName : table,
Item:
{
"threadId" : threadId,
"threadType" : { "SS": ["sms", "livechat" ,"fb"] },
"createDate" : { 'S' : createDate },
"updateDate" : { 'S' : updateDate },
"channelName" :{'S' : channelName }
}
};
console.log("Adding a new item...");
doClient.put(params, function(err, data) {
if (err) {
console.error("Unable to add item. Error JSON:", JSON.stringify(err, null, 2));
} else {
console.log("Added item:", JSON.stringify(data, null, 2));
}
});
I need that only livechat display in a result of threadtype because in this insertion I need livechat
It looks like you are modelling threadType as a StringSet, which means that it can contain multiple values. If you just want to store 'livechat' then just store this value as a String. DynamoDB doesn't have the concept of server side validation, or support for Enums, though some SDK's do - in node.js there is no Enum type. As for the system time, there is no way to ask for DynamoDB to insert the system time for you. Most people will insert epoch seconds or milliseconds as a Number type for sorting purposes, and based on the client's timestamp, which should use NTP and when hosted within EC2 will be super super close to the DDB fleet timestamp.

DynamoDB BatchGetItem dynamic TableName in Lambda function

I'm building a serverless backend for my current application using dynamoDb as my database. I use aws sam to upload my lambda functions to aws. In addition, I pass all my table names as global variables to lambda (nodejs8.10 runtime) to access them on the process.env object within my lambda function. The problem that I'm facing with this is the following: Whenever I run the batchGetItem method on dynamoDB I have to pass a string as my table name, I cannot dynamically change the table name depending on the global variable:
const AWS = require('aws-sdk');
const dynamodb = new AWS.DynamoDB({region: 'ap-south-1'}, {apiVersion:'2012-08-10'});
const params = {
RequestItems: {
//needs to be a string, cannot be a variable containing a string
'tableName': {
Keys: [] //array of keys
}
}
}
dynamodb.batchGetItem(params, (err, result) => {
// some logic
})
I need to pass the table name as a string, essentially hardcoding the table name into my function. Other DynamoDB operations, like for example the getItem method, accept a key value pair for the table name in the parameter object:
const tableName = process.env.TableName;
const getItemParams = {
Key: {
"Key": {
S: 'some key'
}
},
// table name can be changed according to the value past to lambda's environment variable
TableName: tableName
}
dynamodb.getItem(getItemParams, (err, result) => {
// some logic
}
Hence my question, is there any way to avoid hardcoding the table name in the batchGetItem method and, instead, allocate it dynamically like in the getItem method?
You can use the tableName from environment variables. Build your params in 2 steps :
const { tableName } = process.env;
const params = {
RequestItems: {},
};
// `tableName` is your environment variable, it may have any value
params.RequestItems[tableName] = {
Keys: [], //array of keys
};
dynamodb.batchGetItem(params, (err, result) => {
// some logic
})

"IN" statement in dynamodb

I have a "Users" table, here is a sample :
{
username:"haddox",
formattedPhoneNumber:"676767676",
verified: 0,
}
My wish is to retrieve all users whose formattedPhoneNumber is contained in an array of phone numbers (retrieved from my contacts). I created a secondary index, with verified as HASH and formattedPhoneNumber as RANGE. Here is my try :
var params = {
TableName: "Users",
IndexName: "FormattedPhoneSecondaryIndex",
KeyConditionExpression: "verified = :v AND formattedPhone IN :n",
ExpressionAttributeValues: {
":v":1,
":n": ["672053916", "642117296"]
},
ProjectionExpression: "username, formattedPhoneNumber"
};
dynamodb.query(params, function(err, data) {
if (err)
console.log(JSON.stringify(err, null, 2));
else
console.log(JSON.stringify(data, null, 2));
});
But I get the following error : Invalid KeyConditionExpression: Syntax error; token: \":n\", near: \"IN :n\"",
Is there something wrong with the IN keyword ?
Maybe there is another way to achieve this ?
KeyConditionExpression's cannot use the "IN" operator (see http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/QueryAndScan.html#FilteringResults). The idea with KeyConditions/KeyConditionExpression in a query operation is to more efficiently read pages of items from DynamoDB, since items with the same hash key but different range keys are stored contiguously and in sorted order. The IN operator would require extracting small portions of certain pages, which makes the Query operation less efficient, so it is not allowed in KeyConditions. You would want to add that as a FilterExpression instead, which is a convenience parameter to reduce the number of items returned from DynamoDB, but does not impact how the data is read from DynamoDB.
This is how we solved.
-(AWSDynamoDBScanExpression *) prepareScanExpressionWithName:(NSString*)name andValues:(NSArray *)vals {
AWSDynamoDBScanExpression *scanExpression = [AWSDynamoDBScanExpression new];
NSMutableString* filterExpression = [NSMutableString string];
NSMutableDictionary* expression = [NSMutableDictionary dictionary];
for(int i = 0; i < vals.count; i++)
NSString *val = vals[i];
NSString* key = [NSString stringWithFormat:#":val%i",i];
[filterExpression appendString:key];
[expression setObject:val forKey:key];
if (i < vals.count) {
[filterExpression appendString:#","];
}
}
scanExpression.filterExpression = [NSString stringWithFormat:#"#P IN (%#)", filterExpression];
scanExpression.expressionAttributeNames = #{#"#P": name};
scanExpression.expressionAttributeValues = expression;
return scanExpression;
}

How do you sort results of a _View_ by value in the in Couchbase?

So from what I understand in Couchbase is that one can sort keys* by using
descending=true
but in my case I want to sort by values instead. Consider the Twitter data in json format, my question is What it the most popular user mentioned?
Each tweet has the structure of:
{
"text": "",
"entities" : {
"hashtags" : [ ... ],
"user_mentions" : [ ...],
"urls" : [ ... ]
}
So having used MongoDB before I reused the Map function and modified it slightly to be usable in Couchbase as follows:
function (doc, meta) {
if (!doc.entities) { return; }
doc.entities.user_mentions.forEach(
function(mention) {
if (mention.screen_name !== undefined) {
emit(mention.screen_name, null);
}
}
)
}
And then I used the reduce function _count to count all the screen_name occurrences. Now my problem is How do I sort by the count values, rather than the key?
Thanks
The short answer is you cannot sort by value the result of you view. You can only sort by key.
Some work around will be to either:
analyze the data before inserting them into Couchbase and create a counter for the values you are interested by (mentions in your case)
use the view you have to sort on the application size if the size of the view is acceptable for a client side sort.
The following JS code calls a view, sorts the result, and prints the 10 hottest subjects (hashtags):
var http = require('http');
var options = {
host: '127.0.0.1',
port: 8092,
path: '/social/_design/dev_tags/_view/tags?full_set=true&connection_timeout=60000&group=true',
method: 'GET'
}
http.request(
options,
function(res) {
var buf = new Buffer(0);
res.on('data', function(data) {
buf += data;
});
res.on('end', function() {
var tweets = JSON.parse(buf);
var rows = tweets.rows;
rows.sort( function (a,b){ return b.value - a.value }
);
for ( var i = 0; i < 10; i++ ) {
console.log( rows[i] );
}
});
}
).end();
In the same time I am looking at other options to achieve this
I solved this by using a compound key.
function (doc, meta) {
emit([doc.constraint,doc.yoursortvalue]);
}
url elements:
&startkey=["jim",5]&endkey=["jim",10]&descending=true