I'm writing a view that returns this:
[
{
"field_id" : 1,
"stock" : [
{
"size" : "M",
"total" : 3
}
],
"reserved" : [
{
"size" : "M",
"total" : 1
}
]
},
{
"field_id" : 2,
"stock" : [
{
"size" : "M",
"total" : 2
},
{
"size" : "PP",
"total" : 2
}
],
"reserved" : [
{
"size" : "PP",
"total" : 1
},
{
"size" : "M",
"total" : 2
}
]
}
]
For this result, I used values and annotation(django orm):
reserved = Reserved.objects.all().values("size").annotate(total=Count("size")).order_by("total")
stock = Stock.objects.filter(amount=0).values('size').annotate(total=Count('size')).order_by('total'))
It's ok for me, but I would like put the reserved queryset inside stock. Like this:
[
{
"field_id" : 1,
"stock" : [
{
"size" : "M",
"total" : 3,
"reserved": 1
}
],
},
{
"field_id" : 2,
"stock" : [
{
"size" : "M",
"total" : 2,
"reserved": 1
},
{
"size" : "PP",
"total" : 2,
"reserved": 0
}
],
}
]
It's possibile? Reserved and Stock doesn't relationship.
Related
I have the following data
{
"companyID" : "amz",
"companyType" : "ret",
"employeeID" : "ty-5a62fd78e8d20ad"
},
{
"companyID" : "amz",
"companyType" : "ret",
"employeeID" : "ay-5a62fd78e8d20ad"
},
{
"companyID" : "mic",
"companyType" : "cse",
"employeeID" : "by-5a62fd78e8d20ad"
},
{
"companyID" : "ggl",
"companyType" : "cse",
"employeeID" : "ply-5a62fd78e8d20ad"
},
{
"companyID" : "ggl",
"companyType" : "cse",
"employeeID" : "wfly-5a62ad"
}
I want the following result. basically combination of values like this mic-cse,ggl-cse,amz-ret .
"agg_by_company_type" : {
"buckets" : [
{
"key" : "ret",
"doc_count" : 1
},
{
"key" : "cse",
"doc_count" : 2
}
]
How do I do it?
I have tried the following aggregations:
"agg_by_companyID_topHits": {
"terms": {
"field": "companyID.keyword",
"size": 100000,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": true,
"order": {
"_key": "asc"
}
},
"aggs": {
"agg_by_companyType" : {
"top_hits": {
"size": 1,
"_source": {
"includes": ["companyType"]
}
}
}
}
}
But this just gives me first groupBy of company id now on top of that data I want count of company type.
this is the response I get
"agg_by_companyID_topHits" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "amz",
"doc_count" : 2,
"doc_count_error_upper_bound" : 0,
"agg_by_companytype" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "my-index",
"_type" : "_doc",
"_id" : "uytuygjuhg",
"_score" : 0.0,
"_source" : {
"companyType" : "ret"
}
}
]
}
}
},
{
"key" : "mic",
"doc_count" : 1,
"doc_count_error_upper_bound" : 0,
"agg_by_companytype" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "my-index",
"_type" : "_doc",
"_id" : "uytuygjuhg",
"_score" : 0.0,
"_source" : {
"companyType" : "cse"
}
}
]
}
}
},
{
"key" : "ggl",
"doc_count" : 2,
"doc_count_error_upper_bound" : 0,
"agg_by_companytype" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "my-index",
"_type" : "_doc",
"_id" : "uytuygjuhg",
"_score" : 0.0,
"_source" : {
"companyType" : "ret"
}
}
]
}
}
},
]
}
If it were spark, it would be simple to partition by companyID, group it and then group by companyType and count to get the desired result but not sure how to do it in ES.
Important Note: I am working with Opensearch.
Possible solution for this in elastic search multi-terms-aggregation
is not available in versions before v7.12.
So wondering how it was done before this feature in ES.
We came across this issue because AWS migrated from ES to Opensearch.
use multi_terms agg doc here
GET /products/_search
{
"aggs": {
"genres_and_products": {
"multi_terms": {
"terms": [{
"field": "companyID"
}, {
"field": "companyType"
}]
}
}
}
}
can you use script in terms agg ,like this:
GET b1/_search
{
"aggs": {
"genres": {
"terms": {
"script": {
"source": "doc['companyID'].value+doc['companyType'].value",
"lang": "painless"
}
}
}
}
}
This is what my data looks like
"age" : "5-6",
"gender" : "male",
"id" : 3 ,
"userType" : "dormant",
"location" : "560101",
"status" : "completed",
"subject" : "hindi",
"score" : 100,
"date" : "2021-06-01"
}
I have multiple entries of such data for different user Ids , I want to calculate the average score Per user for a particular day ,week ,year.
This is what i have written till now :
POST /worksheetdata/_search
{
"aggs": {
"hourlydata": {
"date_histogram": {
"field": "date",
"calendar_interval":"year"
, "extended_bounds": {
"min": "2020",
"max": "2021"
}
}
,
"aggs": {
"userId": {
"terms": {
"field": "id"
}
,
"aggs": {
"avgScore": {
"avg": {
"field": "score"
}
}
}
}
}
}
}
}
I am able to obtain average score per user in a bucket for a particular year ,
"aggregations" : {
"yearlydata" : {
"buckets" : [
{
"key_as_string" : "2020-01-01T00:00:00.000Z",
"key" : 1577836800000,
"doc_count" : 0,
"userId" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ ]
}
},
{
"key_as_string" : "2021-01-01T00:00:00.000Z",
"key" : 1609459200000,
"doc_count" : 28,
"userId" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 2,
"doc_count" : 14,
"avgScore" : {
"value" : 43.714285714285715
}
},
{
"key" : 1,
"doc_count" : 8,
"avgScore" : {
"value" : 54.0
}
},
{
"key" : 3,
"doc_count" : 6,
"avgScore" : {
"value" : 100.0
}
}
]
}
}
]
}
}
}
Now how can i find the average of this userId.bucket and add it to the userId Object.
POST /worksheetdata/_search
{
"aggs": {
"yearlydata": {
"date_histogram": {
"field": "date",
"calendar_interval":"year"
, "extended_bounds": {
"min": "2020",
"max": "2021"
}
}
,
"aggs": {
"userId": {
"terms": {
"field": "id"
}
,
"aggs": {
"avgScore": {
"avg": {
"field": "score"
}
}
}
},
"avgScore" :{
"avg_bucket": {
"buckets_path": "userId>avgScore"
}
}
}
}
}
}
In below json file i want to exactly filter the decimal value in child array of text.while excute the sample code the output shown the all the json value instead of filtered value.
sample json file:
{
"_id" : ObjectId("01"),
"project_id" : "100",
"snapshot" : "Symbol",
"obs" : {
"land" : {
"b2" : {
"points" : [
"p1",
"p2"
]
}
"points" : {
"p1" : {
"position" : [
123.00000000,
123.00000000
]
},
"p2" : {
"position" : [
1235.896523,
]
}
"text" : {
"t2" : {
"box" : [
[
123.0,
3361.0
],
[
117,
60
],
0.0
],
"value" : "813"
},
"t3" : {
"box" : [
[
1260.76745605469,
726.63720703125
],
[
51.4486427307129,
88.5970306396484
],
-36.2538375854492
],
"value" : "27.06"
}
}
sample code:
db.getCollection('obs').aggregate([{$match: {project_id: "100",snapshot : "Symbol"}},
{$addFields:{
"obs.text":{
$arrayToObject:{
$filter:{
input:{$objectToArray:"$obs.text"},
cond:{
$regexMatch:{
input:"$$this.k",
regex:/t/
}
}
}
}
}
}}
])
Excepted output:
"_id" : ObjectId("01"),
"project_id" : "100",
"snapshot" : "Symbol",
"obs" : {
"text" : {
"t3" : {
"value" : "27.06"
}
}
}
Consider the following example:
db.article.aggregate(
{ $group : {
_id : "$author",
docsPerAuthor : { $sum : 1 },
viewsPerAuthor : { $sum : "$pageViews" }
}}
);
This groups by the author field and computes two fields.
I have values for $author = FirstName_LastName.
Now instead of grouping by $author, I want to group by all authors who share the same LastName.
I tried $regex to group by all matching strings after the '_'
$author.match(/_[a-zA-Z0-9]+$/)
db.article.aggregate(
{ $group : {
_id : "$author".match(/_[a-zA-Z0-9]+$/),
docsPerAuthor : { $sum : 1 },
viewsPerAuthor : { $sum : "$pageViews" }
}}
);
also tried the following:
db.article.aggregate(
{ $group : {
_id : {$author: {$regex: /_[a-zA-Z0-9]+$/}},
docsPerAuthor : { $sum : 1 },
viewsPerAuthor : { $sum : "$pageViews" }
}}
);
Actually there is no such method which provides this kind of functionality or i could not find the appropriate version which contains it. That will not work with $regexp i think : http://docs.mongodb.org/manual/reference/operator/regex/ it is just for pattern matching.
There is an improvement request in the jira : https://jira.mongodb.org/browse/SERVER-6773
It is in open unresolved state.
BUT
in github i found this disscussion: https://github.com/mongodb/mongo/pull/336
And if you check this commit: https://github.com/nleite/mongo/commit/2dd175a5acda86aaad61f5eb9dab83ee19915709
it contains more or less exactly the method you likely to have. I do not really get the point of the state of this improvement: in 2.2.3 it is not working .
Use mapReduce: it is the general form of aggregation. This is how to proceed in mongo shell:
Define the map function
var mapFunction = function() {
var key = this.author.match(/_[a-zA-Z0-9]+$/)[0];
var nb_match_bar2 = 0;
if( this.bar.match(/bar2/g) ){
nb_match_bar2 = 1;
}
var value = {
docsPerAuthor: 1,
viewsPerAuthor: Array.sum(this.pageViews)
};
emit( key, value );
};
and the reduce function
var reduceFunction = function(key, values) {
var reducedObject = {
_id: key,
docsPerAuthor: 0,
viewsPerAuthor: 0
};
values.forEach( function(value) {
reducedObject.docsPerAuthor += value.docsPerAuthor;
reducedObject.viewsPerAuthor += value.viewsPerAuthor;
}
);
return reducedObject;
};
run mapReduce and save the result in map_reduce_result
>db.st.mapReduce(mapFunction, reduceFunction, {out:'map_reduce_result'})
query map_reduce_result to have the result
>db.map_reduce_result.find()
A possible workaround with the aggregation framework consists in using $project to compute the author name. However, it is dirty as you need to manually loop through the different first name sizes:
Here, we compute the field name as the substring after the '_' character, trying each of its possible position (this is why there is a chain of $cond), and fallbacking in returning the whole $author if the first name is too long:
http://mongotry.herokuapp.com/#?bookmarkId=52fb5f24a0378802003b4c68
[
{
"$project": {
"author": 1,
"pageViews": 1,
"name": {
"$cond": [
{
"$eq": [
{
"$substr": [
"$author",
0,
1
]
},
"_"
]
},
{
"$substr": [
"$author",
1,
999
]
},
{
"$cond": [
{
"$eq": [
{
"$substr": [
"$author",
1,
1
]
},
"_"
]
},
{
"$substr": [
"$author",
2,
999
]
},
{
"$cond": [
{
"$eq": [
{
"$substr": [
"$author",
2,
1
]
},
"_"
]
},
{
"$substr": [
"$author",
3,
999
]
},
{
"$cond": [
{
"$eq": [
{
"$substr": [
"$author",
3,
1
]
},
"_"
]
},
{
"$substr": [
"$author",
4,
999
]
},
{
"$cond": [
{
"$eq": [
{
"$substr": [
"$author",
4,
1
]
},
"_"
]
},
{
"$substr": [
"$author",
5,
999
]
},
"$author"
]
}
]
}
]
}
]
}
]
}
}
},
{
"$group": {
"_id": "$name",
"viewsPerAuthor": {
"$sum": "$pageViews"
}
}
}
]
$group combining $addFields and $arrayElemAt works for me (version ≥ 3.4).
Say we have following data in collection faculty, database school:
{ "_id" : ObjectId("5ed5a59b1febc4c796a88e80"), "name" : "Harry_Potter" }
{ "_id" : ObjectId("5ed5a60e1febc4c796a88e81"), "name" : "Edison_Potter" }
{ "_id" : ObjectId("5ed5a6231febc4c796a88e82"), "name" : "Jack_Potter" }
{ "_id" : ObjectId("5ed5a62f1febc4c796a88e83"), "name" : "Alice_Walker" }
{ "_id" : ObjectId("5ed5a65f1febc4c796a88e84"), "name" : "Bob_Walker" }
{ "_id" : ObjectId("5ed5a6731febc4c796a88e85"), "name" : "Will_Smith" }
Following can group each document by the last name:
db.faculty.aggregate([
{
$addFields: {
lastName: {
$arrayElemAt: [ { $split: ["$name", "_"] }, 1 ]
}
}
},
{
$group: {
_id: "$lastName",
count: {$sum: 1}
}
}
])
Running result is:
{ "_id" : "Potter", "count" : 3 }
{ "_id" : "Walker", "count" : 2 }
{ "_id" : "Smith", "count" : 1 }
The trick I used is to add a field named lastName. Based on what you have for the name field, it can be split into an array by _. Last name is at index 1 and first name at index 0.
Reference
$addFields (aggregation)
$arrayElemAt (aggregation)
I have collection of cities like this
{ "name": "something","population":2121}
there are thousands of documents like this in one collection
now, I have created index like this
$coll->ensureIndex(array("name" => 1, "population" => -1),
array("background" => true));
now I want to query like this
$cities = $coll->find(array("name" => array('$regex' => "^$name")))
->limit(30)
->sort(array("name" => 1, "population" => -1));
But this returns cities in ascending order of population. But I want result as descending order of population i.e. highest population first.
Any idea???
EDIT: I have created individual indexes on name and population. Following is output of db.city_info.getIndexes() and db.city_info.find({ "name": { "$regex": "^Ban" } }).sort({ "population": -1 }).explain(); respectively
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"ns" : "city_database.city_info",
"name" : "_id_"
},
{
"v" : 1,
"key" : {
"name" : 1
},
"ns" : "city_database.city_info",
"background" : 1,
"name" : "ascii_1"
},
{
"v" : 1,
"key" : {
"population" : 1
},
"ns" : "city_database.city_info",
"background" : 1,
"name" : "population_1"
}
]
and
{
"cursor" : "BtreeCursor ascii_1 multi",
"nscanned" : 70739,
"nscannedObjects" : 70738,
"n" : 70738,
"scanAndOrder" : true,
"millis" : 17461,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
"name" : [
[
"Ban",
"Bao"
],
[
/^Ban/,
/^Ban/
]
]
}
}
Just look at time taken by query :-(
If you want the results to be in descending order of population (greatest to least) then remove the sort on name within the query.
my is too short has the right idea
When you sort on name and then descending population, what you have now, it sorts by name, which is most likely unique-ish because we are talking about cities, and then by population.
Also, make sure you have an index on name:
db.cities.ensureIndex({population: 1})
Direction doesn't matter when the index is on one field.
Update (sample of similar index, query and explain):
> db.test.insert({name: "New York", population: 5000})
> db.test.insert({name: "Longdon", population: 7000})
> db.test.ensureIndex({name: 1})
> db.test.find({name: {"$regex": "^New"}}).sort({poplation: -1})
{ "_id" : ObjectId("4f0ff70072999b69b616d2b6"), "name" : "New York", "population" : 5000 }
> db.test.find({name: {"$regex": "^New"}}).sort({poplation: -1}).explain()
{
"cursor" : "BtreeCursor name_1 multi",
"nscanned" : 1,
"nscannedObjects" : 1,
"n" : 1,
"scanAndOrder" : true,
"millis" : 1,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
"name" : [
[
"New",
"Nex"
],
[
/^New/,
/^New/
]
]
}
}