performing priority query in mongo - regex

sample document :
{"name":"John", "age":35, "address":".....",.....}
Employees whose join_month=3 is priority 1
Employees whose address contains the string "Avenue" is priority 2
Employees whose address contains the string "Street" is priority 3
Employees whose address contains the string "Road" is priority 4
As of now, I'm at this stage:
db.collection.aggregate([
{ "$match": {
"$or": [
{ "join_month": 3 },
{ "address": /.*Avenue.*/i },
{ "address": /.*Street.*/i },
{ "address": /.*Road.*/i }
]
}},
{ "$project": {
"name": 1,
"age": 1,
"_id": 0,
"priority": { ?????????? }
}},
{ "$sort": { "priority": 1 } }
])
I'm stuck at priority field. What should I put there?

Using the aggregation framework you would "ideally" want to use the $regex filter within the $cond logical operator in the $project pipeline step but unfortunately MongoDB is yet to support this.
There is a JIRA ticket for this currently open $project filter using $regex
However, a workaround (though not the best solution performant-wise) would be to use map-reduce. Consider populating a test collection:
db.test.insert([
{ _id: 0, "join_month": 12, "address": "33 Point Avenue", "name": "John", "age":35 },
{ _id: 1, "join_month": 10, "address": "2A Broad Street, Surbub", "name": "Jane", "age":21 },
{ _id: 2, "join_month": 3, "address": "127 Umpstreeten Road, Surbub", "name": "Alan", "age":63 },
{ _id: 3, "join_month": 3, "address": "127 Umpstreeten Road, Surbub", "name": "Louise", "age":30 }
])
Define your map function as:
var mapper = function() {
var priority;
if (this.join_month==3){
priority = 1;
}
else if (this.address.match(/Avenue/i)){
priority = 2;
}
else if (this.address.match(/Street/i)){
priority = 3;
}
else if (this.address.match(/Road/i)){
priority = 4;
}
else {
priority = 99;
}
var value = {
"name": this.name,
"age": this.age,
"priority": priority
};
emit( this._id, value );
};
The reduce function follows:
var reducer = function() { };
Then run the mapduce operation on the test collection and store the result in the collection mr_result
db.test.mapReduce(mapper, reducer, {
"out": 'mr_result'
"query": {
"$or": [
{ "join_month": 3 },
{ "address": /.*Avenue.*/i },
{ "address": /.*Street.*/i },
{ "address": /.*Road.*/i }
]
}
})
Query the result collection:
db.mr_result.find().sort({ "priority": 1})
Sample Output
{ "_id" : 2, "value" : { "name" : "Alan", "age" : 63, "priority" : 1 } }
{ "_id" : 3, "value" : { "name" : "Louise", "age" : 30, "priority" : 1 } }
{ "_id" : 0, "value" : { "name" : "John", "age" : 35, "priority" : 2 } }
{ "_id" : 1, "value" : { "name" : "Jane", "age" : 21, "priority" : 3 } }

Related

Query with id, nested array and range in Elastic Search (Open Search AWS)

I have a ES document like below :
{
"_id" : "test#domain.com",
"age" : 12,
"hobbiles" : ["Singing", "Dancing"]
},
{
"_id" : "test1#domain.com",
"age" : 7,
"hobbiles" : ["Coding", "Chess"]
}
I am storing email as id, age and hobbiles, hobbies is nested type, age is long I want to query with id, age and hobbiles, something like below :
Select * FROM tbl where _id IN ('val1', 'val2') AND age > 5 AND hobbiles should match with Chess or Dancing
How can I do in Elastic Search ? I am using OpenSearch 1.3 (latest) : AWS
I will suspect that field hobbiles is keyword, then the query suggested:
PUT test
{
"mappings": {
"properties": {
"age": {
"type": "long"
},
"hobbiles": {
"type": "keyword"
}
}
}
}
POST test/_doc/test#domain.com
{
"age": 12,
"hobbiles": [
"Singing",
"Dancing"
]
}
POST test/_doc/test1#domain.com
{
"age": 7,
"hobbiles": [
"Coding",
"Chess"
]
}
GET test/_search
{
"query": {
"bool": {
"filter": [
{
"terms": {
"_id": [
"test1#domain.com",
"test#domain.com"
]
}
}
],
"must": [
{
"range": {
"age": {
"gt": 5
}
}
},
{
"terms": {
"hobbiles": [
"Coding",
"Chess"
]
}
}
]
}
}
}

MongoDB Aggregate - Match condition only on second collection but provide all documents from the first

Basically, I have 2 collections in my MongoDB database -> Books, Scores.
Books
{
"BOOK_ID" : "100",
"BOOK_NAME" : "Book 1",
"BOOK_DESC" : "abcd",
},
{
"BOOK_ID" : "101",
"BOOK_NAME" : "Book 2",
"BOOK_DESC" : "efgh",
},
{
"BOOK_ID" : "102",
"BOOK_NAME" : "Book 3",
"BOOK_DESC" : "ijkl",
}
Scores
{
"BOOK_ID" : "100",
"BOOK_CATEGORY" : "kids",
"BOOK_SCORE" : "6",
},
{
"BOOK_ID" : "100",
"BOOK_CATEGORY" : "Educational",
"BOOK_SCORE" : "8",
},
{
"BOOK_ID" : "101",
"BOOK_CATEGORY" : "Kids",
"BOOK_SCORE" : "6",
},
{
"BOOK_ID" : "101",
"BOOK_CATEGORY" : "Fantasy",
"BOOK_SCORE" : "7",
}
Expected output :
Searching for all books with BOOKS_CATEGORY="Kids" and `BOOKS_SCORE=6``
{
"BOOK_ID" : "100",
"BOOK_NAME" : "Book 1",
"BOOK_DESC" : "abcd",
"BOOK_CATEGORY" : "Kids",
"BOOK_SCORE" : 6
},
{
"BOOK_ID" : "101",
"BOOK_NAME" : "Book 2",
"BOOK_DESC" : "efgh",
"BOOK_CATEGORY" : "Kids",
"BOOK_SCORE" : 6
},
{
"BOOK_ID" : "102",
"BOOK_NAME" : "Book 3",
"BOOK_DESC" : "ijkl",
}
Notice that, for all the books to which scores are available, they are appended. If a Book does not have any score associated, it still comes in the result.
What I have tried?
I have tried using $lookup
pipeline = [
{
"$lookup": {
"from": "Scores",
"pipeline":[
{
"$match" : {
"BOOK_CATEGORY" : "Kids",
"BOOK_SCORE" : "6",
}
}
],
"localField": "BOOK_ID",
"foreignField": "BOOK_ID",
"as": "SCORES",
},
},
]
db.Books.aggregate(pipeline)
Also, by reading the $lookup subquery docs,(https://www.mongodb.com/docs/manual/reference/operator/aggregation/lookup/#join-conditions-and-subqueries-on-a-joined-collection)
I got the feeling that what I am expecting may not be possible.
Can anyone help me with executing such query? (I use PyMongo btw)
For the last two stages:
$replaceRoot - Replace the input document(s) with the new document(s) by merging the current document with the document which is the first document from the SCORES array.
$unset - Remove SCORES array.
db.Books.aggregate([
{
"$lookup": {
"from": "Scores",
"pipeline": [
{
"$match": {
"BOOK_CATEGORY": "Kids",
"BOOK_SCORE": "6",
}
}
],
"localField": "BOOK_ID",
"foreignField": "BOOK_ID",
"as": "SCORES"
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$$ROOT",
{
$first: "$$ROOT.SCORES"
}
]
}
}
},
{
$unset: "SCORES"
}
])
Sample Mongo Playground
You can achieve this by using a conditional $addFields, if the $lookup value exists then populate the values, else use $$REMOVE to remove the field, like so:
db.Books.aggregate([
{
"$lookup": {
"from": "Scores",
"pipeline": [
{
"$match": {
"BOOK_CATEGORY": "kids",
"BOOK_SCORE": "6"
}
}
],
"localField": "BOOK_ID",
"foreignField": "BOOK_ID",
"as": "SCORES"
}
},
{
$addFields: {
SCORES: "$$REMOVE",
"BOOK_SCORE": {
$cond: [
{
"$ifNull": [
{
"$arrayElemAt": [
"$SCORES",
0
]
},
false
]
},
{
$getField: {
field: "BOOK_SCORE",
input: {
"$arrayElemAt": [
"$SCORES",
0
]
}
}
},
"$$REMOVE"
]
},
"BOOK_CATEGORY": {
$cond: [
{
"$ifNull": [
{
"$arrayElemAt": [
"$SCORES",
0
]
},
false
]
},
{
$getField: {
field: "BOOK_CATEGORY",
input: {
"$arrayElemAt": [
"$SCORES",
0
]
}
}
},
"$$REMOVE"
]
},
}
}
])
Mongo Playground

Django pymongo search, sort, limit on inner array and count them

I am learning Django with MongoDb and have a collection to search into. Here is a sample document in the collection:
{
"_id": { "$oid": "62615907568ddfca4fef3a25" },
"word": 'entropy,
"count": 4,
"occurrence": [
{"book": "62615907568ddfca4fef3a23", "year": 1942, "sentence": 0 },
{"book": "62615907568ddfca4fef3a23", "year": 1942, "sentence": 5 },
{"book": "62615907568ddfca4fef3a75", "year": 1928, "sentence": 0 },
{"book": "62615907568ddfca4fef3a90", "year": 1959, "sentence": 8 }
]
}
I want to retrieve the array elements of 'occurrence' field of a specific document (word):
Sorted by year
Within an year range
Limited with offset
count the total results (without limit/offset)
What I have done so far is:
offset= 0
limit= 10
search= {'$and': [{"_id": word_id_obj, "occurrence": {"$elemMatch": {"year": {"$gte": 1940, "$lte": 1960}}}}]}
word_docs= wordcollec.aggregate([
{"$match": search},
{"$project":
{"occurrence":
{"$slice": ['$occurrence', offset, limit]}
}
},
{"$sort": {'occurrence.year': -1}}
])
# Count total results
recnt= wordcollec.aggregate([{"$match": search}, {'$group' : {"_id": "$_id", "sno" : {"$sum" : {"$size": "$occurrence"}}}}])
The year range, count are not working and I am unable to sort them. How can I rewrite my query for this?
Thanks in advance.
Use $unwind then $sort
db.collection.aggregate([
{
$match: {
_id: { "$oid": "62615907568ddfca4fef3a25" },
occurrence: { $elemMatch: { year: { $gte: 1940, $lte: 1960 } } }
}
},
{
$set: { totalWithoutLimitOrOffset: { $size: "$occurrence" } }
},
{
$unwind: "$occurrence"
},
{
$match: { "occurrence.year": { $gte: 1940, $lte: 1960 } }
},
{
$sort: { "occurrence.year": -1 }
},
{
$skip: 1
},
{
$limit: 2
},
{
$group: {
_id: "$_id",
word: { $first: "$word" },
count: { $first: "$count" },
totalWithoutLimitOrOffset: { $first: "$totalWithoutLimitOrOffset" },
occurrence: { $push: "$occurrence" }
}
}
])
mongoplayground

Find the average value from a result of the aggregation of a bucket in elasticsearch

This is what my data looks like
"age" : "5-6",
"gender" : "male",
"id" : 3 ,
"userType" : "dormant",
"location" : "560101",
"status" : "completed",
"subject" : "hindi",
"score" : 100,
"date" : "2021-06-01"
}
I have multiple entries of such data for different user Ids , I want to calculate the average score Per user for a particular day ,week ,year.
This is what i have written till now :
POST /worksheetdata/_search
{
"aggs": {
"hourlydata": {
"date_histogram": {
"field": "date",
"calendar_interval":"year"
, "extended_bounds": {
"min": "2020",
"max": "2021"
}
}
,
"aggs": {
"userId": {
"terms": {
"field": "id"
}
,
"aggs": {
"avgScore": {
"avg": {
"field": "score"
}
}
}
}
}
}
}
}
I am able to obtain average score per user in a bucket for a particular year ,
"aggregations" : {
"yearlydata" : {
"buckets" : [
{
"key_as_string" : "2020-01-01T00:00:00.000Z",
"key" : 1577836800000,
"doc_count" : 0,
"userId" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ ]
}
},
{
"key_as_string" : "2021-01-01T00:00:00.000Z",
"key" : 1609459200000,
"doc_count" : 28,
"userId" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 2,
"doc_count" : 14,
"avgScore" : {
"value" : 43.714285714285715
}
},
{
"key" : 1,
"doc_count" : 8,
"avgScore" : {
"value" : 54.0
}
},
{
"key" : 3,
"doc_count" : 6,
"avgScore" : {
"value" : 100.0
}
}
]
}
}
]
}
}
}
Now how can i find the average of this userId.bucket and add it to the userId Object.
POST /worksheetdata/_search
{
"aggs": {
"yearlydata": {
"date_histogram": {
"field": "date",
"calendar_interval":"year"
, "extended_bounds": {
"min": "2020",
"max": "2021"
}
}
,
"aggs": {
"userId": {
"terms": {
"field": "id"
}
,
"aggs": {
"avgScore": {
"avg": {
"field": "score"
}
}
}
},
"avgScore" :{
"avg_bucket": {
"buckets_path": "userId>avgScore"
}
}
}
}
}
}

MongoDB Aggregate Regex Match or Full Text Search returns whole Document

Ex. Record
[
{
"_id": "5528cfd2e71144e020cb6494",
"__v": 11,
"Product": [
{
"_id": "5528cfd2e71144e020cb6495",
"isFav": true,
"quantity": 27,
"price": 148,
"description": "100g",
"brand": "JaldiLa",
"name": "Grapes",
"sku": "GRP"
},
{
"_id": "552963ed63d867b81e18d357",
"isFav": false,
"quantity": 13,
"price": 290,
"description": "100g",
"brand": "JaldiLa",
"name": "Apple",
"sku": "APL"
}
],
"brands": [
"Whole Foods",
"Costco",
"Bee's",
"Masons"
],
"sku": "FRT",
"name": "Fruits"
}
]
My Mongoose function to return query from AngularJS(http://localhost:8080/api/search?s=)
router.route('/search')
.get(function(req, res) {
Dept.aggregate(
{ $match: { $text: { $search: req.query.s } } },
{ $project : { name : 1, _id : 1, 'Product.name' : 1, 'Product._id' : 1} },
{ $unwind : "$Product" },
{ $group : {
_id : "$_id",
Category : { $addToSet : "$name"},
Product : { $push : "$Product"}
}}
)
});
RESULT: e.g. http://localhost:8080/api/search?s=Apple / Grape / Carrot, result is same for all.
[
{
"_id": "5528cfd2e71144e020cb6494",
"Category": ["Fruits"],
"Product": [
{
"_id": "5528cfd2e71144e020cb6495",
"name": "Grapes"
},
{
"_id": "552963ed63d867b81e18d357",
"name": "Apple"
},
{
"_id": "552e61920c530fb848c61510",
"name": "Carrots"
}
]
}
]
PROBLEM: On a query of "apple", it returns all objects within Product instead of just "grapes", i think maybe putting match after unwind would do the trick or $regex case
WHAT I WANT: e.g. for a searchString of "grape"
Also I want it to start sending results as soon as I send in the first two letters of my query.
[{
"_id": ["5528cfd2e71144e020cb6494"], //I want this in array as it messes my loop up
"Category": "Fruits", //Yes I do not want this in array like I'm getting in my resutls
"Product": [{
"_id": "5528cfd2e71144e020cb6495",
"name": "Grapes"
}]
}]
Thanks for being patient.
Use the following aggregation pipeline:
var search = "apple",
pipeline = [
{
"$match": {
"Product.name": { "$regex": search, "$options": "i" }
}
},
{
"$unwind": "$Product"
},
{
"$match": {
"Product.name": { "$regex": search, "$options": "i" }
}
},
{
"$project": {
"Category": "$name",
"Product._id": 1,
"Product.name": 1
}
}
];
db.collection.aggregate(pipeline);
With the above sample document and a regex (case-insensitive) search for "apple" on the name field of the Product array, the above aggregation pipeline produces the result:
Output:
/* 1 */
{
"result" : [
{
"_id" : "5528cfd2e71144e020cb6494",
"Product" : {
"_id" : "552963ed63d867b81e18d357",
"name" : "Apple"
},
"Category" : "Fruits"
}
],
"ok" : 1
}