I have populated a local DynamoDB instance with some values from this JSON
[
{
"Id": 1,
"Type": "fruit",
"Name": "bananas",
"Price": 0.25
},
{
"Id": 2,
"Type": "fruit",
"Name": "apples",
"Price": 0.50
},
{
"Id": 3,
"Type": "fruit",
"Name": "avocados",
"Price": 1.25
},
{
"Id": 4,
"Type": "vegetable",
"Name": "carrots",
"Price": 1.00
} ]
And want to read, say all the items in this database. I believe Querying the database can do this, but in for my query input code I have
func main() {
type Product struct {
Id int `json:"Id"`
Type string `json:"Type"`
Name string `json:"Name"`
Price float64 `json:"Price"`
}
config := &aws.Config{
Region: aws.String("us-west-2"),
Endpoint: aws.String("http://localhost:8000"),
}
sess := session.Must(session.NewSession(config))
svc := dynamodb.New(sess)
input := &dynamodb.QueryInput{
TableName: aws.String("Products"),
KeyConditions: map[string]*dynamodb.Condition{
"Id": {
ComparisonOperator: aws.String("EQ"),
AttributeValueList: []*dynamodb.AttributeValue{
{
N: aws.String("1"),
},
},
},
},
}
result, err := svc.Query(input)
if err != nil {
fmt.Println(err.Error())
return
}
var products []Product
err = dynamodbattribute.UnmarshalListOfMaps(result.Items, &products)
// print the response data
for _, m := range products {
fmt.Printf("Product: '%s' (%f)\n", m.Name, m.Price)
}
}
so far and it only outputs bananas as the since the query is trying to match id = 1 (Id is my hash primary key).
I tried to modify the keycondition to something like this instead
"Id": {
ComparisonOperator: aws.String("BETWEEN"),
AttributeValueList: []*dynamodb.AttributeValue{
{
N: aws.String("0"),
},
{
N: aws.String("5"),
},
},
},
in an attempt to read every item but it says "ValidationException: Query key condition not supported". Am I doing the query statement incorrectly? Or is there some better way of listing items in dynamoDB? (Like if I wanted to list everything with "Type" == "fruit" for example, then output the first 3 items in the database.
Related
I want to filter data using dynamoDB QueryInput approach, the problem is in filtering data with an object in Array.
I have a data with below structure
[
{
"Pk": "mimzeslami#gmail.com",
"Sk": "social-shared-goal-2022-09-27 12:29:27",
"Gsi1Pk": "social-shared-goal",
"Gsi1Sk": "2022-09-27 12:29:27",
"Username": "test#gmail.com",
"Goals": [
{
"M": {
"Gsi2Sk": {
"S": "goal-end-2022-11-26 20:30:00"
},
"Gsi1Sk": {
"S": "goal-start-2022-09-27 12:28:47"
},
"Pk": {
"S": "mimzeslami#gmail.com"
},
"Gsi1Pk": {
"S": "mimzeslami#gmail.com"
},
"BaseCategoryId": {
"S": "85j85nachallll9idja"
},
"SubCategoryId": {
"S": "49023842874xhhiayx"
},
"Gsi2Pk": {
"S": "mimzeslami#gmail.com"
}
}
}
]
}
]
I filtered data with Username in this way:
keyCondition := map[string]*dynamodb.Condition{
Gsi1Pk: {
ComparisonOperator: aws.String(Eq),
AttributeValueList: []*dynamodb.AttributeValue{
{
S: aws.String(constants.SocialSharedGoal),
},
},
},
}
var queryFilter = map[string]*dynamodb.Condition{}
if maybeUserName != "" {
queryFilter["Username"] = &dynamodb.Condition{
ComparisonOperator: aws.String("CONTAINS"),
AttributeValueList: []*dynamodb.AttributeValue{
{
S: aws.String(maybeUserName),
},
},
}
}
params := &dynamodb.QueryInput{
KeyConditions: keyCondition,
TableName: aws.String(myTable),
IndexName: aws.String(Gsi1PkGsi1SkIndex),
Limit: &limit,
ScanIndexForward: &ascSort,
QueryFilter: queryFilter,
}
Now I want to filter data by BaseCategoryId and SubCategoryId that are in a Golas array and I don't know how to do that.
I am looking for a way like this to filter data
for example
if maybeBaseCategoryId != "" {
queryFilter[""] = &dynamodb.Condition{
ComparisonOperator: aws.String("CONTAINS"),
AttributeValueList: []*dynamodb.AttributeValue{
{S: aws.String(maybeBaseCategoryId),
},
},
}
}
Do we need to specify the hyperparameters both in algorithmHyperParameters and algorithmHyperParameterRanges? If yes, then should we specify a single value (string as per documentation) in algorithmHyperParameters, but a range (integer in case of integer-valued hyperparameter) in algorithmHyperParameterRanges?
For example: Similar-Items recipe has an integer-valued hyperparameter item_id_hidden_dimension. If I use the following solution_config, where item_id_hidden_dimension is specified only in algorithmHyperParameterRanges and not in algorithmHyperParameters, I get the error:
An error occurred (InvalidInputException) when calling the CreateSolution operation: Provide a hyperparameter that is used in the algorithm: arn:aws:personalize:::algorithm/aws-similar-items
"solution_config": {
"algorithmHyperParameters": {},
"hpoConfig": {
"algorithmHyperParameterRanges": {
"integerHyperParameterRanges": [
{
"name": "item_id_hidden_dimension",
"minValue": 30,
"maxValue": 200
}
],
"categoricalHyperParameterRanges": [],
"continuousHyperParameterRanges": []
},
"hpoResourceConfig": {
"maxNumberOfTrainingJobs": "4",
"maxParallelTrainingJobs": "2"
}
}
}
But if I use the following solution_config, where item_id_hidden_dimension is specified both in algorithmHyperParameterRanges and in algorithmHyperParameters, I still get the same error:
An error occurred (InvalidInputException) when calling the CreateSolution operation: Provide a hyperparameter that is used in the algorithm: arn:aws:personalize:::algorithm/aws-similar-items
"solution_config": {
"algorithmHyperParameters": {
"item_id_hidden_dimension": "100"
},
"hpoConfig": {
"algorithmHyperParameterRanges": {
"integerHyperParameterRanges": [
{
"name": "item_id_hidden_dimension",
"minValue": 30,
"maxValue": 200
}
],
"categoricalHyperParameterRanges": [],
"continuousHyperParameterRanges": []
},
"hpoResourceConfig": {
"maxNumberOfTrainingJobs": "4",
"maxParallelTrainingJobs": "2"
}
}
}
This is caused by an error in the documentation. The hyperparameter names should be item_id_hidden_dim and item_metadata_hidden_dim (note they are dim and not dimension as the documentation states).
This can be confirmed by calling the DescribeRecipe API to get the algorithmArn for the Similar-Items recipe and then calling the DescribeAlgorithm API to get details on the algorithm.
import boto3
import json
personalize = boto3.client('personalize')
response = personalize.describe_recipe(recipeArn = 'arn:aws:personalize:::recipe/aws-similar-items')
print(json.dumps(response['recipe'], indent=2, default=str))
{
"name": "aws-similar-items",
"recipeArn": "arn:aws:personalize:::recipe/aws-similar-items",
"algorithmArn": "arn:aws:personalize:::algorithm/aws-similar-items",
"featureTransformationArn": "arn:aws:personalize:::feature-transformation/similar-items",
"status": "ACTIVE",
"description": "Predicts items similar to a given item based on co-occurrence of items in the user-item interactions dataset and item metadata in the item dataset.",
"creationDateTime": "2019-06-10 00:00:00+00:00",
"recipeType": "RELATED_ITEMS",
"lastUpdatedDateTime": "2022-08-17 00:25:42.935000+00:00"
}
algo_arn = response['recipe']['algorithmArn']
response = personalize.describe_algorithm(algorithmArn = algo_arn)
print(json.dumps(response['algorithm'], indent=2, default=str))
{
"name": "aws-similar-items",
"algorithmArn": "arn:aws:personalize:::algorithm/aws-similar-items",
"algorithmImage": {
"name": "Item Similarity"
},
"defaultHyperParameters": {
"item_id_hidden_dim": "100",
"item_metadata_hidden_dim": "100"
},
"defaultHyperParameterRanges": {
"integerHyperParameterRanges": [
{
"name": "item_id_hidden_dim",
"minValue": 30,
"maxValue": 200,
"isTunable": true
},
{
"name": "item_metadata_hidden_dim",
"minValue": 30,
"maxValue": 200,
"isTunable": true
}
],
"continuousHyperParameterRanges": [],
"categoricalHyperParameterRanges": []
},
"defaultResourceConfig": {
"maxNumberOfTrainingJobs": "20",
"maxParallelTrainingJobs": "5"
},
"trainingInputMode": "File",
"creationDateTime": "2019-06-10 00:00:00+00:00",
"lastUpdatedDateTime": "2022-08-17 00:24:41.307000+00:00"
}
Note the hyperparameter names in the last response above.
We will get this error fixed in the documentation ASAP.
My data is stored in a table "mysettings" in DynamoDB with the following schema : "pk" as hash key and "sk" as range key
Example data item:
{
"mSettings": {
"gflag": true,
"name": "green",
"type": "B"
},
"pk": "Group1",
"sk": "A1000",
"xSettings": {
"gflag": false,
"name": "blue",
"type": "A"
},
"ySettings": {
"gflag": false,
"name": "green",
"type": "B"
},
"zSettings": {
"gflag": true,
"name": "red",
"type": "A"
}
}
Now, here I am trying to fetch(filter) only those settings where my gflag field is true.
Like for above example item, it should return only "mSettings" & "zSettings" nodes.
Below is my tried sample code:
var AWS = require('aws-sdk');
let region = "us-east-1";
AWS.config.region = region;
var docClient = new AWS.DynamoDB.DocumentClient();
let settingsItem = ["mSettings", "xSettings", "ySettings", "zSettings"];
let params = {
TableName: "mysettings",
KeyConditionExpression: "#pk = :pk AND #sk = :sk",
ExpressionAttributeNames:{
"#pk": "pk",
"#sk": "sk"
},
// FilterExpression : "ySettings.gflag = :gflag",
ExpressionAttributeValues: {
":pk": 'Group1',
":sk": 'A1000',
":gflag" : true
}
};
//for(let settings in settingsItem){
// params['FilterExpression'] = settingsItem[settings] + ".gflag = :gflag";
//}
console.log('params = ', params)
docClient.query(params, function(err, data){
if(err){
console.log(err)
}
console.log('data = ', data)
})
Kindly suggest to retrieve only those objects where flag value is true.
DynamoDB filters operate at an item level. They determine whether or not an item is returned to you. They can't be used to control which groups of attributes are returned to you. You can easily do that client-side with the query results.
Here's an example of post-processing the query results object to remove the sub-objects where gflag is not true:
const _ = require('lodash');
function gflag_true(value, key) {
return _.isObject(value) && value.gflag;
}
const item = {
"mSettings": { "gflag": true, "name": "green", "type": "B" },
"pk": "Group1",
"sk": "A1000",
"xSettings": { "gflag": false, "name": "blue", "type": "A" },
"ySettings": { "gflag": false, "name": "green", "type": "B" },
"zSettings": { "gflag": true, "name": "red", "type": "A" }
}
// transform item returned from DocumentClient query method
const rc = _.pickBy(item, gflag_true)
This results in:
{
mSettings: { gflag: true, name: 'green', type: 'B' },
zSettings: { gflag: true, name: 'red', type: 'A' }
}
I have a JSON file, in that three objects are available, In that 2nd and 3rd objects does not have some fields which I actually needed. In missing fields, I need to add my own values. I will provide my code below
I tried this So far:
with open("final.json") as data1:
a = json.load(data1)
final = []
for item in a:
d = {}
d["AppName"]= item["name"]
d["AppId"] = item["id"]
d["Health"] = item["health"]
d["place1"] = item["cities"][0]["place1"]
d["place2"] = item["cities"][0]["place2"]
print(final)
Error: I am getting Key Error
My Input JSON file has similar data:
[{
"name": "python",
"id": 1234,
"health": "Active",
"cities": {
"place1": "us",
"place2": "newyork"
}
},
{
"name": "java",
"id": 2345,
"health": "Active"
}, {
"name": "python",
"id": 1234
}
]
I am expecting output:
[{
"name": "python",
"id": 1234,
"health": "Active",
"cities": {
"place1": "us",
"place2": "newyork"
}
},
{
"name": "java",
"id": 2345,
"health": "Null",
"cities": {
"place1": "0",
"place2": "0"
}
}, {
"name": "python",
"id": 1234,
"health": "Null",
"cities": {
"place1": "0",
"place2": "0"
}
}
]
I see two issues with the code that you have posted.
First, you are referring to the 'cities' field in you input JSON as if it is a list when it is, in fact, an object.
Second, to handle JSON containing objects which may be missing certain fields, you should use the Python dictionary get method. This method takes a key and an optional value to return if the key is not found (default is None).
for item in a:
d = {}
d["AppName"]= item["name"]
d["AppId"] = item["id"]
d["Health"] = item.get("health", "Null")
d["place1"] = item.get("cities", {}).get("place1", "0")
d["place2"] = item.get("cities", {}).get("place2", "0")
In my project I store data in active record model and index html document in elasticsearch using mapper-attachments plugin. My document mapping look like this:
include Elasticsearch::Model
settings index: { number_of_shards: 5 } do
mappings do
indexes :alerted
indexes :title, analyzer: 'english', index_options: 'offsets'
indexes :summary, analyzer: 'english', index_options: 'offsets'
indexes :content, type: 'attachment', fields: {
author: { index: "no"},
date: { index: "no"},
content: { store: "yes",
type: "string",
term_vector: "with_positions_offsets"
}
}
end
end
I run a query to double check my doc mapping and the result:
"mappings": {
"feed_entry": {
"properties": {
"content": {
"type": "attachment",
"path": "full",
"fields": {
"content": {
"type": "string",
"store": true,
"term_vector": "with_positions_offsets"
},
It works great (the type: 'attachment' above). I can do the search through html doc perfectly.
I have a performance problem with activerecord which is mysql and I don't really need to store it in database so I decide to migrate to store in elasticsearch.
I am doing an experiment with elasticsearch-persistence gem.
I configure the mapping as below:
include Elasticsearch::Persistence::Model
attribute :alert_id, Integer
attribute :title, String, mapping: { analyzer: 'english' }
attribute :url, String, mapping: { analyzer: 'english' }
attribute :summary, String, mapping: { analyzer: 'english' }
attribute :alerted, Boolean, default: false, mapping: { analyzer: 'english' }
attribute :fingerprint, String, mapping: { analyzer: 'english' }
attribute :feed_id, Integer
attribute :keywords
attribute :content, nil, mapping: { type: 'attachment', fields: {
author: { index: "no"},
date: { index: "no"},
content: { store: "yes",
type: "string",
term_vector: "with_positions_offsets"
}
}
but when i do a query to mapping i got something like this:
"mappings": {
"entry": {
"properties": {
"content": {
"properties": {
"_content": {
"type": "string"
},
"_content_type": {
"type": "string"
},
"_detect_language": {
"type": "boolean"
},
which is wrong. can anyone tell me how to do a mapping with attachment type ?
Really appreciate your help.
In the mean time, I have to hard-code it this way:
def self.recreate_index!
mappings = {}
mappings[FeedEntry::ELASTIC_TYPE_NAME]= {
"properties": {
"alerted": {
"type": "boolean"
},
"title": {
#for exact match
"index": "not_analyzed",
"type": "string"
},
"url": {
"index": "not_analyzed",
"type": "string"
},
"summary": {
"analyzer": "english",
"index_options": "offsets",
"type": "string"
},
"content": {
"type": "attachment",
"fields": {
"author": {
"index": "no"
},
"date": {
"index": "no"
},
"content": {
"store": "yes",
"type": "string",
"term_vector": "with_positions_offsets"
}
}
}
}
}
options = {
index: FeedEntry::ELASTIC_INDEX_NAME,
}
self.gateway.client.indices.delete(options) rescue nil
self.gateway.client.indices.create(options.merge( body: { mappings: mappings}))
end
And then override the to_hash method
def to_hash(options={})
hash = self.as_json
map_attachment(hash) if !self.alerted
hash
end
# encode the content to Base64 formatj
def map_attachment(hash)
hash["content"] = {
"_detect_language": false,
"_language": "en",
"_indexed_chars": -1 ,
"_content_type": "text/html",
"_content": Base64.encode64(self.content)
}
hash
end
Then I have to call
FeedEntry.recreate_index!
before hand to create the mapping for elastic search. Becareful when you update the document you might end up with double base64 encoding of the content field. In my scenario, I checked the alerted field.