CouchDB calculate sums and averages of group - mapreduce

I have bunch of documents like this:
{
"schema": "property",
"schema_version": 1,
"name": "test foobar"
"account": "1969360",
"web_data": {
"conversion_rate": 1.49,
"average_order_value": 123,
"visitor_count": 25000,
"visits_count": 35000,
"revenue": 50000
}
}
There could be multiple documents with same account.
Now I should create MapReduce to calculate average of web_data.conversion_rate and web_data.average_order_value and sum of web_data.visitor_count, web_data.visits_count and web_data.revenue for each document which has same account.
Currently my mapping function emits array like this:
['10449266', 'test foobar']
where first item is account and second name.
Emitted value is just web_data.
I would like to have output like this:
GET /list_properties?group_level=1
{
rows: [
{
key: [
"10449266"
],
value: {
"property_count": 2,
"conversion_rate": 1.8625,
"average_order_value": 153.75,
"visitor_count": 31250,
"visits_count": 43750,
"revenue": 62500
}
},
{
key: [
"66294401"
],
value: { ... }
},
...
}
How could I achieve this?
Pretty complex pattern, huh?
I've tried so many reduce functions to do this, I'm totally exhausted now. This is one I tried to get started:
function (keys, values, rereduce) {
return values.reduce(function(a, b){
return a + b.web_data.revenue;
});
}
That doesn't work as it returns object instead of number. And I have no idea why. It sums up numbers and returns object?
EDIT:
I updated reduce function to this:
function (keys, values, rereduce) {
if(rereduce){
return values;
}else{
var revenue = values.reduce(function(a, b){
return a + b.web_data.revenue;
}, 0);
return {
revenue : revenue
};
}
}
Now I get response like this:
{
"key": [
"2035864"
],
"value": [
[
{
"revenue": 0
},
{
"revenue": 66321.2
},
{
"revenue": 5319.35
}
],
[
{
"revenue": 0
}
],
[
{
"revenue": 45432.02
}
],
[
{
"revenue": 185732.78
}
]
]
}
Now if I change return values to return values[0] in rereduce-part, I get response like:
{
"key": [
"2035864"
],
"value": {
"revenue": 0
}
}
NONSENSE AGAIN!
Everything I want is just:
{
"key": [
"2035864"
],
"value": {
"revenue": 302805.35
}
}
JavaScript allows to do this so easily in browser but I don't understand why CouchDB wants to do things as weirdly as this.

Related

Access an Array Item by index in AWS Dynamodb Query Results "Items" in Step Function

I have this dynamodb:Query in my step function:
{
"Type": "Task",
"Resource": "arn:aws:states:::aws-sdk:dynamodb:query",
"Next": "If nothing returned by query Or Study not yet Zipped",
"Parameters": {
"TableName": "TEST-StudyProcessingTable",
"ScanIndexForward": false,
"Limit": 1,
"KeyConditionExpression": "OrderID = :OrderID",
"FilterExpression": "StudyID = :StudyID",
"ExpressionAttributeValues": {
":OrderID": {
"S.$": "$.body.order_id"
},
":StudyID": {
"S.$": "$.body.study_id"
}
}
},
"ResultPath": "$.processed_files"
}
The results comes in as an array called Items which is nested under my ResultPath
processed_files.Items:
{
"body": {
"order_id": "1001",
"study_id": "1"
},
"processed_files": {
"Count": 1,
"Items": [
{
"Status": {
"S": "unzipped"
},
"StudyID": {
"S": "1"
},
"ZipFileS3Key": {
"S": "path/to/the/file"
},
"UploadSet": {
"S": "4"
},
"OrderID": {
"S": "1001"
},
"UploadSet#StudyID": {
"S": "4#1"
}
}
],
"LastEvaluatedKey": {
"OrderID": {
"S": "1001"
},
"UploadSet#StudyID": {
"S": "4#1"
}
},
"ScannedCount": 1
}
}
My question is how do i access the items inside this array from a choice state in a step function?
I need to query then decide something based on the results by checking the item in a condition in a choice state.
The problem is that since this is an array I can't access it using regular JsonPath (like with Items.item), and in my next step the choice condition does NOT accept an index like processed_files.Items['0'].Status
Ok so the answer was so simple all you need to do is use a number instead of string for the array index like this.
processed_files.Items[0].Status
I was originally mislead by an error I received which said that it expected a ' or '[' after the first '['. I mistakenly thought this meant it only accepts strings.
I was wrong, it works like any other array.
I hope this helps somebody one day.

Must query in Should query clause in Elastic Search Query

I have a scenario where inside the should clause in elastic search query I need to have a must clause.
eg I need to filter data in such a way that if data should come only for orders that have dispatch Area ids as 10 only and carrier Ids as 1,2,3 only but should also pull data for all orders having driver id as 1,2,3.
In my current scenario its pulling data for all carrier Ids as 1,2,3 and dispatch area ids 10 and also dispatch areas which are not 10 .i.e if carrier id 1 had dispatch area-id as 9 that data is also coming.
how can I add a must clause in should query.
{
"from": 0,
"size": 10000,
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
[
{
"terms": {
"dispatchAreaId": [
10
]
}
},
{
"terms": {
"carrierId": [
1,2,3
]
}
},
{
"terms": {
"driverIds": [
1,2,3
]
}
}
]
]
}
}
]
}
}
}
Try Below Query
{
"from": 0,
"size": 10000,
"query": {
"bool": {
"should": [
{
"terms": {
"dispatchAreaId": [
10
]
}
},
{
"bool": {
"must": [
{
"terms": {
"dispatchAreaId": [
10
]
}
},
{
"terms": {
"carrierId": [
1,
2,
3
]
}
}
]
}
}
],
"minimum_should_match": 1
}
}
}
Hope this helps!!

How do I get only the element values that match in the list in the Elastic Search?

[Hi, there]
I want to create an ES query that only retrieves certain elements that match in the list.
Here is my ES index schema.
"test-es-2018":{
"aliases": {},
"mappings": {
"test-1": {
"properties": {
"categoryName": {
"type": "keyword",
"index": false
},
"genDate": {
"type": "date"
},
"docList": {
"properties": {
"rank": {
"type": "integer",
"index": false
},
"doc-info": {
"properties": {
"docId": {
"type": "keyword"
},
"docName": {
"type": "keyword",
"index": false
},
}
}
}
},
"categoryId": {
"type": "keyword"
},
}
}
}
}
There are documents listed in the category. Documents in the list have their own information.
*search query in Kibana.
source": {
"categoryName" : "food" ,
"genDate" : 1577981646638,
"docList" [
{
"rank": 2,
"doc-info": {...}
},
{
"rank": 1,
"doc-info": {...}
},
{
"rank": 5,
"doc-info": {...}
},
],
"categoryId": "201"
}
First, I want to get only the element value that match in the list.
I would like to see only documents with rank 1 in the list. However, if I query using match as below, the result is the same as *search query in kibana.
*match query in Kibana.
GET test-es-2018/_search
{
"query": {
"bool": {
"must": [
{ "match": { "docList.rank": 1 } },
]
}
}
}
In my opinion, it seems to print the entire list because it contains a document with rank one.
What I want is:
source": {
"categoryName" : "food" ,
"genDate" : 1577981646638,
"docList" [
{
"rank": 1,
"doc-info": {...}
},
],
"categoryId": "201"
}
Is this possible?
Second, I want to sort the docList by rank. I tried sorting by creating a query like the following, but it was not sorted.
*sort query in Kibana.
GET test-es-2018/_search?
{
"query" : {
"bool" : {...}
},
"sort" : [
{
"docList.rank" : {
"order" : "asc"
}
}
]
}
What I want is:
source": {
"categoryName" : "food" ,
"genDate" : 1577981646638,
"docList" [
{
"rank": 1,
"doc-info": {...}
},
{
"rank": 2,
"doc-info": {...}
},
{
"rank": 5,
"doc-info": {...}
},
],
"categoryId": "201"
}
I do not know how to access the list. Is there a good idea for both of these issues?
In general you could use source filter to retrieve only part of the document but this way it's not possible to exclude some fields based on their values.
As far as I know Elasticsearch doesn't support changing order of field values in the _source. Partly the desired result can be achieved by using nested fields along with inner_hits -> sort query expression. This way sorted subhits will be returned in the inner_hits section of the response.
P.S. Typically working with Elasticsearch you should consider indexed document as the smallest indivisible search unit.

How can I exclude results from elasticsearch based on the contents of a field?

I'm using elasticsearch on AWS to store logs from Cloudfront. I have created a simple query that will give me all entries from the past 24h, sorted from new to old:
{
"from": 0,
"size": 1000,
"query": {
"bool": {
"must": [
{ "match": { "site_name": "some-site" } }
],
"filter": [
{
"range": {
"timestamp": {
"lt": "now",
"gte": "now-1d"
}
}
}
]
}
},
"sort": [
{ "timestamp": { "order": "desc" } }
]
}
Now, there a are certain sources (based on the user agent) for which I would like to exclude results. So my question boils down to this:
How can I filter out entries from the results when a certain field contains a certain string? Or:
query.filter.where('cs_user_agent').does.not.contain('Some string')
(This is not real code, obviously.)
I have tried to make sense of the Elasticsearch documentation, but I couldn't find a good example of how to achieve this.
I hope this makes sense. Thanks in advance!
Okay, I figured it out. What I've done is use a Bool Query in combination with a wildcard:
{
"from": 0,
"size": 1000,
"query": {
"bool": {
"must": [
{ "match": { "site_name": "some-site" } }
],
"filter": [
{
"range": {
"timestamp": {
"lt": "now",
"gte": "now-1d"
}
}
}
],
"must_not": [
{ "wildcard": { "cs_user_agent": "some string*" } }
]
}
},
"sort": [
{ "timestamp": { "order": "desc" } }
]
}
This basically matches any user agent string containing "some string", and then filters it out (because of the "must_not").
I hope this helps others who run into this problem.
nod.js client version:
const { from, size, value, tagsIdExclude } = req.body;
const { body } = await elasticWrapper.client.search({
index: ElasticIndexs.Tags,
body: {
from: from,
size: size,
query: {
bool: {
must: {
wildcard: {
name: {
value: `*${value}*`,
boost: 1.0,
rewrite: 'constant_score',
},
},
},
filter: {
bool: {
must_not: [
{
terms: {
id: tagsIdExclude ? tagsIdExclude : [],
},
},
],
},
},
},
},
},
});

Pass a list to Elasticsearch query template

I am trying to pass a list of parameters to a search query (filter by terms) in Elasticsearch. It works when it's not in a template, just in a query:
"terms": {
"speaker": ["HAMLET", "KING HENRY IV"]
}
I've put it into the template like this:
"terms": {
"{{filter1}}": "{{filter1_val}}"}
}
And then call it like this:
GET shakespeare/_search/template
{
"id":"template",
"params": {
"filter1": "speaker",
"filter_value1": ["HAMLET", "KING HENRY IV"]
}
}
And I get the following error:
{
"error": {
"root_cause": [
{
"type": "parsing_exception",
"reason": "[terms] query does not support [speaker]",
"line": 1,
"col": 98
}
],
"type": "parsing_exception",
"reason": "[terms] query does not support [speaker]",
"line": 1,
"col": 98
},
"status": 400
}
I have tried adding brackets to the template itself like "{{filter1}}": [{{filter1_val}}] and adding quotes and deleting them, and passing a parameters in the form of "[\"HAMLET\", \"KING HENRY IV\"]", but none of this worked.
What am I doing wrong? What is the right way to do this? Any suggestions are welcome.
Thank you!
Found the solution here:
https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-template.html#_passing_an_array_of_strings
Passing an array of strings
GET /_search/template
{
"template": {
"query": {
"terms": {
"status": [
"{{#status}}",
"{{.}}",
"{{/status}}"
]
}
}
},
"params": {
"status": [ "pending", "published" ]
}
}
which is rendered as:
{
"query": {
"terms": {
"status": [ "pending", "published" ]
}
}