using $regex in mongodb aggregation framework in $group - regex

Consider the following example:
db.article.aggregate(
{ $group : {
_id : "$author",
docsPerAuthor : { $sum : 1 },
viewsPerAuthor : { $sum : "$pageViews" }
}}
);
This groups by the author field and computes two fields.
I have values for $author = FirstName_LastName.
Now instead of grouping by $author, I want to group by all authors who share the same LastName.
I tried $regex to group by all matching strings after the '_'
$author.match(/_[a-zA-Z0-9]+$/)
db.article.aggregate(
{ $group : {
_id : "$author".match(/_[a-zA-Z0-9]+$/),
docsPerAuthor : { $sum : 1 },
viewsPerAuthor : { $sum : "$pageViews" }
}}
);
also tried the following:
db.article.aggregate(
{ $group : {
_id : {$author: {$regex: /_[a-zA-Z0-9]+$/}},
docsPerAuthor : { $sum : 1 },
viewsPerAuthor : { $sum : "$pageViews" }
}}
);

Actually there is no such method which provides this kind of functionality or i could not find the appropriate version which contains it. That will not work with $regexp i think : http://docs.mongodb.org/manual/reference/operator/regex/ it is just for pattern matching.
There is an improvement request in the jira : https://jira.mongodb.org/browse/SERVER-6773
It is in open unresolved state.
BUT
in github i found this disscussion: https://github.com/mongodb/mongo/pull/336
And if you check this commit: https://github.com/nleite/mongo/commit/2dd175a5acda86aaad61f5eb9dab83ee19915709
it contains more or less exactly the method you likely to have. I do not really get the point of the state of this improvement: in 2.2.3 it is not working .

Use mapReduce: it is the general form of aggregation. This is how to proceed in mongo shell:
Define the map function
var mapFunction = function() {
var key = this.author.match(/_[a-zA-Z0-9]+$/)[0];
var nb_match_bar2 = 0;
if( this.bar.match(/bar2/g) ){
nb_match_bar2 = 1;
}
var value = {
docsPerAuthor: 1,
viewsPerAuthor: Array.sum(this.pageViews)
};
emit( key, value );
};
and the reduce function
var reduceFunction = function(key, values) {
var reducedObject = {
_id: key,
docsPerAuthor: 0,
viewsPerAuthor: 0
};
values.forEach( function(value) {
reducedObject.docsPerAuthor += value.docsPerAuthor;
reducedObject.viewsPerAuthor += value.viewsPerAuthor;
}
);
return reducedObject;
};
run mapReduce and save the result in map_reduce_result
>db.st.mapReduce(mapFunction, reduceFunction, {out:'map_reduce_result'})
query map_reduce_result to have the result
>db.map_reduce_result.find()

A possible workaround with the aggregation framework consists in using $project to compute the author name. However, it is dirty as you need to manually loop through the different first name sizes:
Here, we compute the field name as the substring after the '_' character, trying each of its possible position (this is why there is a chain of $cond), and fallbacking in returning the whole $author if the first name is too long:
http://mongotry.herokuapp.com/#?bookmarkId=52fb5f24a0378802003b4c68
[
{
"$project": {
"author": 1,
"pageViews": 1,
"name": {
"$cond": [
{
"$eq": [
{
"$substr": [
"$author",
0,
1
]
},
"_"
]
},
{
"$substr": [
"$author",
1,
999
]
},
{
"$cond": [
{
"$eq": [
{
"$substr": [
"$author",
1,
1
]
},
"_"
]
},
{
"$substr": [
"$author",
2,
999
]
},
{
"$cond": [
{
"$eq": [
{
"$substr": [
"$author",
2,
1
]
},
"_"
]
},
{
"$substr": [
"$author",
3,
999
]
},
{
"$cond": [
{
"$eq": [
{
"$substr": [
"$author",
3,
1
]
},
"_"
]
},
{
"$substr": [
"$author",
4,
999
]
},
{
"$cond": [
{
"$eq": [
{
"$substr": [
"$author",
4,
1
]
},
"_"
]
},
{
"$substr": [
"$author",
5,
999
]
},
"$author"
]
}
]
}
]
}
]
}
]
}
}
},
{
"$group": {
"_id": "$name",
"viewsPerAuthor": {
"$sum": "$pageViews"
}
}
}
]

$group combining $addFields and $arrayElemAt works for me (version ≥ 3.4).
Say we have following data in collection faculty, database school:
{ "_id" : ObjectId("5ed5a59b1febc4c796a88e80"), "name" : "Harry_Potter" }
{ "_id" : ObjectId("5ed5a60e1febc4c796a88e81"), "name" : "Edison_Potter" }
{ "_id" : ObjectId("5ed5a6231febc4c796a88e82"), "name" : "Jack_Potter" }
{ "_id" : ObjectId("5ed5a62f1febc4c796a88e83"), "name" : "Alice_Walker" }
{ "_id" : ObjectId("5ed5a65f1febc4c796a88e84"), "name" : "Bob_Walker" }
{ "_id" : ObjectId("5ed5a6731febc4c796a88e85"), "name" : "Will_Smith" }
Following can group each document by the last name:
db.faculty.aggregate([
{
$addFields: {
lastName: {
$arrayElemAt: [ { $split: ["$name", "_"] }, 1 ]
}
}
},
{
$group: {
_id: "$lastName",
count: {$sum: 1}
}
}
])
Running result is:
{ "_id" : "Potter", "count" : 3 }
{ "_id" : "Walker", "count" : 2 }
{ "_id" : "Smith", "count" : 1 }
The trick I used is to add a field named lastName. Based on what you have for the name field, it can be split into an array by _. Last name is at index 1 and first name at index 0.
Reference
$addFields (aggregation)
$arrayElemAt (aggregation)

Related

Query with id, nested array and range in Elastic Search (Open Search AWS)

I have a ES document like below :
{
"_id" : "test#domain.com",
"age" : 12,
"hobbiles" : ["Singing", "Dancing"]
},
{
"_id" : "test1#domain.com",
"age" : 7,
"hobbiles" : ["Coding", "Chess"]
}
I am storing email as id, age and hobbiles, hobbies is nested type, age is long I want to query with id, age and hobbiles, something like below :
Select * FROM tbl where _id IN ('val1', 'val2') AND age > 5 AND hobbiles should match with Chess or Dancing
How can I do in Elastic Search ? I am using OpenSearch 1.3 (latest) : AWS
I will suspect that field hobbiles is keyword, then the query suggested:
PUT test
{
"mappings": {
"properties": {
"age": {
"type": "long"
},
"hobbiles": {
"type": "keyword"
}
}
}
}
POST test/_doc/test#domain.com
{
"age": 12,
"hobbiles": [
"Singing",
"Dancing"
]
}
POST test/_doc/test1#domain.com
{
"age": 7,
"hobbiles": [
"Coding",
"Chess"
]
}
GET test/_search
{
"query": {
"bool": {
"filter": [
{
"terms": {
"_id": [
"test1#domain.com",
"test#domain.com"
]
}
}
],
"must": [
{
"range": {
"age": {
"gt": 5
}
}
},
{
"terms": {
"hobbiles": [
"Coding",
"Chess"
]
}
}
]
}
}
}

want to search by integer value in mongoDB regex [duplicate]

I want to regex search an integer value in MongoDB. Is this possible?
I'm building a CRUD type interface that allows * for wildcards on the various fields. I'm trying to keep the UI consistent for a few fields that are integers.
Consider:
> db.seDemo.insert({ "example" : 1234 });
> db.seDemo.find({ "example" : 1234 });
{ "_id" : ObjectId("4bfc2bfea2004adae015220a"), "example" : 1234 }
> db.seDemo.find({ "example" : /^123.*/ });
>
As you can see, I insert an object and I'm able to find it by the value. If I try a simple regex, I can't actually find the object.
Thanks!
If you are wanting to do a pattern match on numbers, the way to do it in mongo is use the $where expression and pass in a pattern match.
> db.test.find({ $where: "/^123.*/.test(this.example)" })
{ "_id" : ObjectId("4bfc3187fec861325f34b132"), "example" : 1234 }
I am not a big fan of using the $where query operator because of the way it evaluates the query expression, it doesn't use indexes and the security risk if the query uses user input data.
Starting from MongoDB 4.2 you can use the $regexMatch|$regexFind|$regexFindAll available in MongoDB 4.1.9+ and the $expr to do this.
let regex = /123/;
$regexMatch and $regexFind
db.col.find({
"$expr": {
"$regexMatch": {
"input": {"$toString": "$name"},
"regex": /123/
}
}
})
$regexFinAll
db.col.find({
"$expr": {
"$gt": [
{
"$size": {
"$regexFindAll": {
"input": {"$toString": "$name"},
"regex": "123"
}
}
},
0
]
}
})
From MongoDB 4.0 you can use the $toString operator which is a wrapper around the $convert operator to stringify integers.
db.seDemo.aggregate([
{ "$redact": {
"$cond": [
{ "$gt": [
{ "$indexOfCP": [
{ "$toString": "$example" },
"123"
] },
-1
] },
"$$KEEP",
"$$PRUNE"
]
}}
])
If what you want is retrieve all the document which contain a particular substring, starting from release 3.4, you can use the $redact operator which allows a $conditional logic processing.$indexOfCP.
db.seDemo.aggregate([
{ "$redact": {
"$cond": [
{ "$gt": [
{ "$indexOfCP": [
{ "$toLower": "$example" },
"123"
] },
-1
] },
"$$KEEP",
"$$PRUNE"
]
}}
])
which produces:
{
"_id" : ObjectId("579c668c1c52188b56a235b7"),
"example" : 1234
}
{
"_id" : ObjectId("579c66971c52188b56a235b9"),
"example" : 12334
}
Prior to MongoDB 3.4, you need to $project your document and add another computed field which is the string value of your number.
The $toLower and his sibling $toUpper operators respectively convert a string to lowercase and uppercase but they have a little unknown feature which is that they can be used to convert an integer to string.
The $match operator returns all those documents that match your pattern using the $regex operator.
db.seDemo.aggregate(
[
{ "$project": {
"stringifyExample": { "$toLower": "$example" },
"example": 1
}},
{ "$match": { "stringifyExample": /^123.*/ } }
]
)
which yields:
{
"_id" : ObjectId("579c668c1c52188b56a235b7"),
"example" : 1234,
"stringifyExample" : "1234"
}
{
"_id" : ObjectId("579c66971c52188b56a235b9"),
"example" : 12334,
"stringifyExample" : "12334"
}
Now, if what you want is retrieve all the document which contain a particular substring, the easier and better way to do this is in the upcoming release of MongoDB (as of this writing) using the $redact operator which allows a $conditional logic processing.$indexOfCP.
db.seDemo.aggregate([
{ "$redact": {
"$cond": [
{ "$gt": [
{ "$indexOfCP": [
{ "$toLower": "$example" },
"123"
] },
-1
] },
"$$KEEP",
"$$PRUNE"
]
}}
])

is it possible to write regular expression in $cond in MongoDB

I need to use $cond to combine differenet column, and one $cond I need to write is as following:
create_widget: {
$sum:{
$cond:[{$and: [ {$eq: ['$Method', 'POST']},
{Url:{$regex: /.*\/widgets$/}} ]}, 1, 0]
}
}
and this code is not right, it seems, regular expression can not be put here.Is there any other way to do this? I want to match Url and regular expression and put the code under $cond.
A sample data looks as
{"BrandId":"a","SessionId":"a1","Method":"POST","Url":"/sample/widgets"}
{"BrandId":"a","SessionId":"a2","Method":"POST","Url":"/sample/blog"}
{"BrandId":"b","SessionId":"b1","Method":"PUT","Url":"/sample/widgets"}
The whole code I wrote is as following:
db.tmpAll.aggregate([
{$group: {
_id: {BrandId:'$BrandId'},
SessionId: {$addToSet: '$SessionId'},
create_widget: {
$sum:{
$cond:[{$and: [ {$eq: ['$Method', 'POST']},
{} ]}, 1, 0]
}
}
}},
{$group: {
_id: '$_id.BrandId',
distinct_session: {$sum: {$size: '$SessionId'}},
create_widget: {$sum: '$create_widget'}
}}
]);
The expected result of sample code is
{ "_id" : "a", "distinct_session" : 2, "create_widget" : 1 }
{ "_id" : "b", "distinct_session" : 1, "create_widget" : 0 }
For MongoDB 4.2 and newer production releases, and in the 4.1.11 and newer development versions, use $regexMatch which is a syntactic sugar on top of $regexFind which can be used for regex matching and capturing.
db.tmpAll.aggregate([
{ "$group": {
"_id": {
"BrandId": "$BrandId",
"SessionId": "$SessionId"
},
"widget_count": {
"$sum": {
"$cond": [
{
"$and": [
{ "$eq": ["$Method", "POST"] },
{ "$regexMatch": {
"input": "$Url",
"regex": /widget/
} }
]
}, 1, 0
]
}
},
"session_count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.BrandId",
"create_widget": { "$sum": "$widget_count" },
"distinct_session": { "$sum": "$session_count" }
} }
]);
There is an open JIRA issue for this SERVER-8892 - Use $regex as the expression in a $cond. However, as a workaround, For older MongoDB versions which do not have the above features, use the following workaround in your aggregation pipeline.
It uses the $substr operator in the $project operator stage to extract the part of the URL and acts as a workaround for the regex. :
db.tmpAll.aggregate([
{ "$group": {
"_id": {
"BrandId": "$BrandId",
"SessionId": "$SessionId"
},
"widget_count": {
"$sum": {
"$cond": [
{
"$and": [
{ "$eq": ["$Method", "POST"] },
{ "$eq": [ { "$substr": [ "$Url", 8, -1 ] }, "widget"] }
]
}, 1, 0
]
}
},
"session_count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.BrandId",
"create_widget": { "$sum": "$widget_count" },
"distinct_session": { "$sum": "$session_count" }
} }
]);
Output
/* 1 */
{
"result" : [
{
"_id" : "a",
"create_widget" : 1,
"distinct_session" : 2
},
{
"_id" : "b",
"create_widget" : 0,
"distinct_session" : 1
}
],
"ok" : 1
}

Regex inside array in mongoDB

i want to do a query inside a array in mongodb with regex, the collections have documents like this:
{
"_id" : ObjectId("53340d07d6429d27e1284c77"),
"company" : "New Company",
"worktypes" : [
{
"name" : "Pompas",
"works" : [
{
"name" : "name 2",
"code" : "A00011",
"price" : "22,22"
},
{
"name" : "name 3",
"code" : "A00011",
"price" : "22,22"
},
{
"name" : "name 4",
"code" : "A00011",
"price" : "22,22"
},
{
"code" : "asdasd",
"name" : "asdads",
"price" : "22"
},
{
"code" : "yy",
"name" : "yy",
"price" : "11"
}
]
},
{
"name" : "name 4",
"works" : [
{
"code" : "A112",
"name" : "Nombre",
"price" : "11,2"
}
]
},
{
"name" : "ee",
works":[
{
"code" : "aa",
"name" : "aa",
"price" : "11"
},
{
"code" : "A00112",
"name" : "Nombre",
"price" : "12,22"
}
]
}
]
}
Then i need to find a document by the company name and any work inside it have match a regex in code or name work.
I have this:
var companyquery = { "company": "New Company"};
var regQuery = new RegExp('^A0011.*$', 'i');
db.categories.find({$and: [companyquery,
{$or: [
{"worktypes.works.$.name": regQuery},
{"worktypes.works.$.code": regQuery}
]}]})
But dont return any result..I think the error is try to search inside array with de dot and $..
Any idea?
Edit:
With this:
db.categories.find({$and: [{"company":"New Company"},
{$or: [
{"worktypes.works.name": {"$regex": "^A00011$|^a00011$"}},
{"worktypes.works.code": {"$regex": "^A00011$|^a00011$"}}
]}]})
This is the result:
{
"_id" : ObjectId("53340d07d6429d27e1284c77"),
"company" : "New Company",
"worktypes" : [
{
"name" : "Pompas",
"works" : [
{
"name" : "name 2",
"code" : "A00011",
"price" : "22,22"
},
{
"code" : "aa",
"name" : "aa",
"price" : "11"
},
{
"code" : "A00112",
"name" : "Nombre",
"price" : "12,22"
},
{
"code" : "asdasd",
"name" : "asdads",
"price" : "22"
},
{
"code" : "yy",
"name" : "yy",
"price" : "11"
}
]
},
{
"name" : "name 4",
"works" : [
{
"code" : "A112",
"name" : "Nombre",
"price" : "11,2"
}
]
},
{
"name" : "Bombillos"
},
{
"name" : "Pompas"
},
{
"name" : "Bombillos 2"
},
{
"name" : "Other type"
},
{
"name" : "Other new type"
}
]
}
The regex dont field the results ok..
You are using a JavaScript native RegExp object for the regular expression, however for mongo to process the regular expression it needs to be sent as part of the query document, and this is not the same thing.
Also the regex will not match the values that you want. It could actualy be ^A0111$ for the exact match, but your case insensitive match causes a problem causing a larger scan of a possible index. So there is a better way to write that. Also see the documentation link for the problems with case insensitive matches.
Use the $regex operator instead:
db.categories.find({
"$and": [
{"company":"New Company"},
{ "$or": [
{ "worktypes.works.name": { "$regex": "^A00011$|^a00011$" }},
{ "worktypes.works.code": { "$regex": "^A00011$|^a00011$" }}
]}
]
})
Also the positional $ placeholders are not valid for a query, they are only used in projection or an update or the first matching element found by the query.
But your actual problem seems to be that you are trying to only get the elements of an array that "match" your conditions. You cannot do this with .find() and for that you need to use .aggregate() instead:
db.categories.aggregate([
// Always makes sense to match the actual documents
{ "$match": {
"$and": [
{"company":"New Company"},
{ "$or": [
{ "worktypes.works.name": { "$regex": "^A00011$|^a00011$" }},
{ "worktypes.works.code": { "$regex": "^A00011$|^a00011$" }}
]}
]
}},
// Unwind the worktypes array
{ "$unwind": "$worktypes" },
// Unwind the works array
{ "$unwind": "$worktypes.works" },
// Then use match to filter only the matching entries
{ "$match": {
"$or": [
{ "worktypes.works.name": { "$regex": "^A00011$|^a00011$" } },
{ "worktypes.works.code": { "$regex": "^A00011$|^a00011$" } }
]
}},
/* Stop */
// If you "really" need the arrays back then include all the following
// Otherwise the steps up to here actually got you your results
// First put the "works" array back together
{ "$group": {
"_id": {
"_id": "$_id",
"company": "$company",
"workname": "$worktypes.name"
},
"works": { "$push": "$worktypes.works" }
}},
// Then put the "worktypes" array back
{ "$group": {
"_id": "$_id._id",
"company": { "$first": "$_id.company" },
"worktypes": {
"$push": {
"name": "$_id.workname",
"works": "$works"
}
}
}}
])
So what .aggregate() does with all of these stages is it breaks the array elements into normal document form so they can be filtered using the $match operator. In that way, only the elements that "match" are returned.
What "find" is correctly doing is matching the "document" that meets the conditions. Since documents contain the elements that match then they are returned. The two principles are very different things.
When you mean to "filter" use aggregate.
i think there is a typo :
the regex should be : ^A00011.*$
triple 0 instead of double 0
You can try aggregate method and aggregation array operators, so this query will be supported from MongoDB 4.2,
$match to match your condition
$addFields to add/edit field in document
$map to iterate loop of worktypes array
$filter to iterate loop of works array and it will return the filtered result as per provided condition
$regexMatch to match regex expression same as we did in $match stage, it will return a boolean response, so we checked $or condition here,
$mergeObjects to merge current object of worktypes and updated works array property
second $addFields for remove empty result of works array
$filter to iterate loop of worktypes array and check negative condition to remove empty works document
db.categories.aggregate([
{
$match: {
$and: [
{ "company": "New Company" },
{
$or: [
{ "worktypes.works.name": { "$regex": "^A00011$|^a00011$" } },
{ "worktypes.works.code": { "$regex": "^A00011$|^a00011$" } }
]
}
]
}
},
{
$addFields: {
worktypes: {
$map: {
input: "$worktypes",
in: {
$mergeObjects: [
"$$this",
{
works: {
$filter: {
input: "$$this.works",
cond: {
$or: [
{
$regexMatch: {
input: "$$this.name",
regex: "^A00011$|^a00011$"
}
},
{
$regexMatch: {
input: "$$this.code",
regex: "^A00011$|^a00011$"
}
}
]
}
}
}
}
]
}
}
}
}
},
{
$addFields: {
worktypes: {
$filter: {
input: "$worktypes",
cond: { $ne: ["$$this.works", []] }
}
}
}
}
])
Playground

MongoDB Regex Search on Integer Value

I want to regex search an integer value in MongoDB. Is this possible?
I'm building a CRUD type interface that allows * for wildcards on the various fields. I'm trying to keep the UI consistent for a few fields that are integers.
Consider:
> db.seDemo.insert({ "example" : 1234 });
> db.seDemo.find({ "example" : 1234 });
{ "_id" : ObjectId("4bfc2bfea2004adae015220a"), "example" : 1234 }
> db.seDemo.find({ "example" : /^123.*/ });
>
As you can see, I insert an object and I'm able to find it by the value. If I try a simple regex, I can't actually find the object.
Thanks!
If you are wanting to do a pattern match on numbers, the way to do it in mongo is use the $where expression and pass in a pattern match.
> db.test.find({ $where: "/^123.*/.test(this.example)" })
{ "_id" : ObjectId("4bfc3187fec861325f34b132"), "example" : 1234 }
I am not a big fan of using the $where query operator because of the way it evaluates the query expression, it doesn't use indexes and the security risk if the query uses user input data.
Starting from MongoDB 4.2 you can use the $regexMatch|$regexFind|$regexFindAll available in MongoDB 4.1.9+ and the $expr to do this.
let regex = /123/;
$regexMatch and $regexFind
db.col.find({
"$expr": {
"$regexMatch": {
"input": {"$toString": "$name"},
"regex": /123/
}
}
})
$regexFinAll
db.col.find({
"$expr": {
"$gt": [
{
"$size": {
"$regexFindAll": {
"input": {"$toString": "$name"},
"regex": "123"
}
}
},
0
]
}
})
From MongoDB 4.0 you can use the $toString operator which is a wrapper around the $convert operator to stringify integers.
db.seDemo.aggregate([
{ "$redact": {
"$cond": [
{ "$gt": [
{ "$indexOfCP": [
{ "$toString": "$example" },
"123"
] },
-1
] },
"$$KEEP",
"$$PRUNE"
]
}}
])
If what you want is retrieve all the document which contain a particular substring, starting from release 3.4, you can use the $redact operator which allows a $conditional logic processing.$indexOfCP.
db.seDemo.aggregate([
{ "$redact": {
"$cond": [
{ "$gt": [
{ "$indexOfCP": [
{ "$toLower": "$example" },
"123"
] },
-1
] },
"$$KEEP",
"$$PRUNE"
]
}}
])
which produces:
{
"_id" : ObjectId("579c668c1c52188b56a235b7"),
"example" : 1234
}
{
"_id" : ObjectId("579c66971c52188b56a235b9"),
"example" : 12334
}
Prior to MongoDB 3.4, you need to $project your document and add another computed field which is the string value of your number.
The $toLower and his sibling $toUpper operators respectively convert a string to lowercase and uppercase but they have a little unknown feature which is that they can be used to convert an integer to string.
The $match operator returns all those documents that match your pattern using the $regex operator.
db.seDemo.aggregate(
[
{ "$project": {
"stringifyExample": { "$toLower": "$example" },
"example": 1
}},
{ "$match": { "stringifyExample": /^123.*/ } }
]
)
which yields:
{
"_id" : ObjectId("579c668c1c52188b56a235b7"),
"example" : 1234,
"stringifyExample" : "1234"
}
{
"_id" : ObjectId("579c66971c52188b56a235b9"),
"example" : 12334,
"stringifyExample" : "12334"
}
Now, if what you want is retrieve all the document which contain a particular substring, the easier and better way to do this is in the upcoming release of MongoDB (as of this writing) using the $redact operator which allows a $conditional logic processing.$indexOfCP.
db.seDemo.aggregate([
{ "$redact": {
"$cond": [
{ "$gt": [
{ "$indexOfCP": [
{ "$toLower": "$example" },
"123"
] },
-1
] },
"$$KEEP",
"$$PRUNE"
]
}}
])