django-elasticsearch-dsl Mapping for completion suggest field not working - django

This problem has been driving me crazy for days with not solution.
I create a document as follows from my django model.
from django_elasticsearch_dsl import fields
#registry.register_document class QuestionDocument(Document):
complete = fields.CompletionField(attr='title')
class Index:
name = 'questions'
class Django:
model = QuestionModel
fields = ['text', 'title']
Now i want to perform a completion query like this:
matched_questions = list(QuestionDocument.search().suggest("suggestions", word, completion={'field': 'complete'}).execute())
But i keep getting the following error:
elasticsearch.exceptions.RequestError: RequestError(400, 'search_phase_execution_exception', 'Field [complete] is not a completion suggest field')
I think the problem is that The mapping for this field is not created correctly, but i don't know how to fix it. Can anybody help with this it is literally driving me crazy.
UPDATE:
I realized that in my mapping, complete is created as a text field, and i don't know why this is happening or how to fix this. This is my mapping:
{
"questions" : {
"mappings" : {
"doc" : {
"properties" : {
"complete" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"text" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"title" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}

I struggled too with a similar issue.
You should try to declare your Document like this:
#registry.register_document
class QuestionDocument(Document):
title = fields.TextField(
fields={
'raw': fields.TextField(analyzer='standard'),
'suggest': fields.CompletionField(),
}
)
class Index:
name = 'questions'
class Django:
model = QuestionModel
fields = ['text']
then to recover suggests from the title field:
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
client = Elasticsearch("localhost:9200")
s = Search(using=client)
query = s.suggest('name_suggestion',"your_prefixword",completion={'field':'title.suggest'})
response = query.execute()
response.suggest['name_suggestion']
Hope it helps. Let me know if that do the job.

Your index is being created automatically instead of you creating it with the mappings you need. You need to, before you index any document, create the index. Not sure how this is done in django_elasticsearch_dsl but in elasticsearch_dsl it would be just calling QuestionDocument.init()
Hope this helps!

Related

AWS AppSync vtl make set null if argument is empty list

I want to update a 'person' item in my table.
I want to update the persons name and his set of skills.
It's also possible that we just use the updatePerson mutation to update the name. And we will update the skills later.
At that point the argument 'skills' is an empty list. However DynamoDB does not allow for empty sets.
Currently I am trying to work around this by first checking if the skills argument is an empty list. But it is still telling me "An string set may not be empty for key :skills".
This is my current request mapping template, but atm the isNullOrDefault check does not work.
#if ($util.isNullOrEmpty($context.arguments.skills))
#set ($skills = $utils.dynamodb.toNullJson())
#else
#set ($skills = $utils.dynamodb.toStringSetJson($context.arguments.skills))
#end
{
"version" : "2018-05-29",
"operation" : "UpdateItem",
"key": {
"id" : $utils.dynamodb.toDynamoDBJson($context.arguments.id)
},
"update" : {
"expression" : "set #name = :name, #skills= :skills,
"expressionNames" : {
"#name": "name",
"#skills": "skills",
},
"expressionValues" : {
":name" : $utils.dynamodb.toDynamoDBJson($context.arguments.name),
":skills" : $skills,
}
}
}
Do you know how I can set the set of skills if the skills argument is not an empty array and not set it if the skills argument is an empty array?
Instead of setting null into a string-set attribute, I think you just remove the attribute from the item item.skills = undefined.
You can use SET, and REMOVE actions to achieve that. The update is dynamically generated based on the input of skills. Sample code (I haven't tested it myself)
#set ($update = {
"expression" : "set #name = :name remove #skills",
"expressionNames" : {
"#name": "name",
"#skills": "skills"
},
"expressionValues" : {
":name" : $utils.dynamodb.toDynamoDBJson($context.arguments.name)
}
})
#if (!$util.isNullOrEmpty($context.arguments.skills))
#set ($update = {
"expression" : "set #name = :name set #skills = $skill",
"expressionNames" : {
"#name": "name",
"#skills": "skills"
},
"expressionValues" : {
":name" : $utils.dynamodb.toDynamoDBJson($context.arguments.name),
":skills" :$utils.dynamodb.toStringSetJson($context.arguments.skills),
}
})
#end
{
"version" : "2018-05-29",
"operation" : "UpdateItem",
"key": {
"id" : $utils.dynamodb.toDynamoDBJson($context.arguments.id)
},
"update" : $update // or maybe $util.toJson($update)
}

Elasticsearch document fields type index automatically changes

I'm working on a project containing django, elasticsearch and django-elasticsearch-dsl. I'm collecting a quite large amount of data and saving it to postgres and indexing it to elasticsearch, via django-elasticsearch-dsl.
Im bumping into a problem I dont understant, nor do I have any further hints what happens:
Relevant part of Django's models.py file:
class LinkDenorm(BaseModel):
...
link = CharField(null=True, max_length=2710, db_index=True)
link_expanded = TextField(null=True, db_index=True)
title = TextField(null=True, db_index=True)
text = TextField(null=True)
...
Relevant part of django-elasticsearch-dsl documents.py file:
#registry.register_document
class LinkDenorm(Document):
link_expanded = fields.KeywordField(attr='link_expanded')
class Index:
name = 'denorms_v10'
class Django:
model = models.LinkDenorm
fields = [
...
'link',
'title',
'text',
...
]
After data is successfully indexed, I verify that the index is containing the correct fields:
curl -X GET -u <myuser>:<mypasswd> "http://<my-hostname>/denorms_v10/?pretty"
{
"denorms_v10" : {
"mappings" : {
"properties" : {
...
"link" : {
"type" : "text"
},
"title" : {
"type" : "text"
},
"text" : {
"type" : "text"
}
"link_expanded" : {
"type" : "keyword"
},
...
}
}
}
}
After a certain amount of time (sometimes weeks, sometimes days) the index fields are changed. Executing the same CURL lookup as before gives me:
curl -X GET -u <myuser>:<mypasswd> "http://<my-hostname>/denorms_v10/?pretty"
{
"denorms_v10" : {
"mappings" : {
"properties" : {
...
"link" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"title" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"text" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"link_expanded" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
...
}
}
}
}
After the change happens, the queries fail, since the datatype is not correct. After investigating elasticsearch and django logs, there is nothing that would give a clue what happens with the index.
I'm a bit lost and running out of ideas. Any suggestions are most welcome. Thank you!
Miha, Your index probably use kind of an ILM without any index template.
Either you query an alias, and aliases under that are changing.
Either a process on your side delete regularly the index (depending on his size or the number of documents in it)
Then when your app do a post again it recreate an index with default Elastic mapping.

Spring data mongo query with regex within an array

I have a collection with structure somewhat like this :
{
"organization" : "Org1",
"active" : true,
"fields" : [
{
"key" : "key1",
"value" : "table"
},
{
"key" : "key2",
"value" : "Harrison"
}
]
}
I need to find all documents with organization : "Org1", active : true, and regex match the 'value' in fields.
In mongo shell, it works perfectly. I tried the query:
db.collection.find({"organization" : "Org1", "active" : true, "fields" : {$elemMatch : {"key" : "key2","value" : {$regex : /iso/i}}}}).pretty()
But when I tried to convert it to a Java code with Spring, it gives wrong results.
1. This one will give documents even if it didn't match the pattern:
#Query("{'organization' : ?0, 'active' : true, 'fields' : {$elemMatch : {'key' : ?1, 'value' : {$regex : ?2}}}}")
List<ObjectCollection> findFieldDataByRegexMatch(String org, String key, String pattern);
This one doesn't give any documents even though it should.
MongoTemplate MONGO_TEMPLATE = null;
try {
MONGO_TEMPLATE = multipleMongoConfig.secondaryMongoTemplate();
} catch (Exception e) {
e.printStackTrace();
}
List<Criteria> criteriaListAnd = new ArrayList<Criteria>();
Criteria criteria = new Criteria();
String pattern = "/iso/i";
criteriaListAnd.add(Criteria.where("organization").is("Org1"));
criteriaListAnd.add(Criteria.where("active").is(true));
criteriaListAnd.add(Criteria.where("fields").elemMatch(Criteria.where("key").is(key).and("value").regex(pattern)));
criteria.andOperator(criteriaListAnd.toArray(new Criteria[criteriaListAnd.size()]));
Query query = new Query();
query.addCriteria(criteria);
List<ObjectCollection> objects = MONGO_TEMPLATE.find(query, ObjectCollection.class);
What am I missing here and how should I form my query?
You are making a very small mistake, in the pattern you are passing / which is the mistake, it took me half an hour to identify it, finally, I got it after enabling the debug log of spring boot.
For the first query, it should be called as below:
springDataRepository.findFieldDataByRegexMatch("Org1", "key2", "iso")
And the query should be modified in the Repository as to hanlde the case sensetivity:
#Query("{'organization' : ?0, 'active' : true, 'fields' : {$elemMatch : {'key' : ?1, 'value' : {$regex : ?2, $options: 'i'}}}}")
List<Springdata> findFieldDataByRegexMatch(String org, String key, String pattern);
The same issue in your second query also, just change String pattern = "/iso/i"; to String pattern = "iso" or String pattern = "iso.*" ;
Both will start working, For details please check the my GitHub repo https://github.com/krishnaiitd/learningJava/blob/master/spring-boot-sample-data-mongodb/src/main/java/sample/data/mongo/main/Application.java#L60
I hope this will resolve your problem.

How can i replace little part of profilepic url path in all documents by running single query in mongodb

{
"_id" : ObjectId("5bd6ed6a49ba281f5c54f185"),
"AvatarSet" : {
"Avatar" : [
{
"IsPrimaryAvatar" : true,
"ProfilePictureUrl" : "https://blob.blob.core.windows.net/avatarcontainer/avatardba36759-3e8e-4666-bc2b-e53ffb527716.jpeg?version=8b1b58b3-94f8-4608-b4db-05746eea8bfe"
}
]
}
Here I need to Replace only https://blob.blob.core.windows.net to every candidateID present in the database please help me how to write MongoDB Query for this?
I'm using Query but it's not working
db.getCollection("candidate-staging")
.find({},{"AvatarSet":[0]})..forEach(function(e) {
e.ProfilePictureUrl= e.ProfilePictureUrl.replace("https://blob.blob.core.windows.net", "https://blob123.blob.core.windows.net");
db.candidate-staging.save(e);
});
The problem in your script is that the ProfilePictureUrl is not properly referred, using dot notation like in the example below should solve the problem.
In your code e.ProfilePictureUrl points to a missing field in the top level document, while doc.AvatarSet.Avatar[0].ProfilePictureUrl in the following example points to the ProfilePictureUrl field for the first element in the Avatar array under the AvatarSet field from the main document.
db.test.find({}).forEach(function(doc) {
doc.AvatarSet.Avatar[0].ProfilePictureUrl= doc.AvatarSet.Avatar[0].ProfilePictureUrl.replace("https://blob.blob.core.windows.net", "https://blob123.blob.core.windows.net");
db.test.save(doc);
});
Local test:
mongos> db.test.find()
{ "_id" : ObjectId("5bdb5e3c553c271478a9a006"), "AvatarSet" : { "Avatar" : [ { "IsPrimaryAvatar" : true, "ProfilePictureUrl" : "https://blob.blob.core.windows.net/avatarcontainer/avatardba36759-3e8e-4666-bc2b-e53ffb527716.jpeg?version=8b1b58b3-94f8-4608-b4db-05746eea8bfe" } ] } }
{ "_id" : ObjectId("5bdb5e3e553c271478a9a007"), "AvatarSet" : { "Avatar" : [ { "IsPrimaryAvatar" : true, "ProfilePictureUrl" : "https://blob.blob.core.windows.net/avatarcontainer/avatardba36759-3e8e-4666-bc2b-e53ffb527716.jpeg?version=8b1b58b3-94f8-4608-b4db-05746eea8bfe" } ] } }
mongos> db.test.find({}).forEach(function(doc) {
doc.AvatarSet.Avatar[0].ProfilePictureUrl= doc.AvatarSet.Avatar[0].ProfilePictureUrl.replace("https://blob.blob.core.windows.net", "https://blob123.blob.core.windows.net");
db.test.save(doc); });
mongos> db.test.find()
{ "_id" : ObjectId("5bdb5e3c553c271478a9a006"), "AvatarSet" : { "Avatar" : [ { "IsPrimaryAvatar" : true, "ProfilePictureUrl" : "https://blob123.blob.core.windows.net/avatarcontainer/avatardba36759-3e8e-4666-bc2b-e53ffb527716.jpeg?version=8b1b58b3-94f8-4608-b4db-05746eea8bfe" } ] } }
{ "_id" : ObjectId("5bdb5e3e553c271478a9a007"), "AvatarSet" : { "Avatar" : [ { "IsPrimaryAvatar" : true, "ProfilePictureUrl" : "https://blob123.blob.core.windows.net/avatarcontainer/avatardba36759-3e8e-4666-bc2b-e53ffb527716.jpeg?version=8b1b58b3-94f8-4608-b4db-05746eea8bfe" } ] } }
In this code contains objects of an array of the object In this code reach AvatarSetArray points to a missing field in the top-level document because we need to access objects within the Another Array so we need to write another loop for 'Avatar' Array like e.AvatarSet.Avatar.forEach its really works. it's work for me.
db.getCollection("test").find({}).forEach(function(e,i) {
e.AvatarSet.Avatar.forEach(function(url, j) {
url.ProfilePictureUrl = url.ProfilePictureUrl.replace("https://blob.blob.core.windows.net", "https://blob123.blob.core.windows.net");
e.AvatarSet.Avatar[j] = url;
});
db.getCollection("test").save(e);
eval(printjson(e));
})
thanks!! manfonton and stackoverflow

Django Haystack w/Elasticsearch returns no results using MultiValueField

I am attempting to index a large, very normalized, government database. The model I want to return from a search contains none of the fields I need indexed, so I have attempted to use MultiValueFields. My searches are yielding 0 results.
How does one see the contents of an elasticsearch index? Is is permissible to have a haystack index that only includes search fields from models other than the model being returned?
from models.py
class Person(models.Model):
per_idn = models.IntegerField(primary_key=True)
sex_cod = models.ForeignKey(Sex, to_field='sex_cod_val_cod')
...
class Name(models.Model):
name_idn = models.IntegerField(primary_key=True)
per_idn = models.ForeignKey(Person, to_field='per_idn')
name_txt = models.CharField()
...
class Birthdate(models.Model):
bd_idn = models.IntegerField(primary_key=True)
per_idn = models.ForeignKey(Person, to_field='per_idn')
bd_txt = models.CharField()
...
from search_indexes.py
class PersonIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
names = indexes.MultiValueField()
dobs = indexes.MultiValueField()
def prepare_names(self,obj):
return [Name.name_txt for Name in Name.objects.filter(per_idn=obj.per_idn)]
def prepare_dobs(self,obj):
return [Birthdate.bd_txt for Birthdate in Birthdate.objects.filter(per_idn=obj.per_idn)]
def get_model(self):
return Person
def index_queryset(self, using=None):
return self.get_model().objects.filter()
I previously created a NameIndex and it worked fine, but with over 14,000,000 names for 4.8 millions people, I need to be able to filter on the DOB as well. It would also make life easier to return a person object instead of a name object. I know it seems strange to have multiple names associated with a person, but this is a government database and people often have many aka's.
./manage.py rebuild_index executes fine. I did some debugging and printed the values of name and dob to the console as it was indexing and the correct values are being returned in the prepare functions. When I use the search.html template, I get 0 results, no matter the query.
from ./manage.py shell
>>> from haystack.query import SearchQuerySet
>>> sqs = SearchQuerySet().all()
>>> len(sqs)
4799561
>>> sqs = SearchQuerySet().auto_query('jones')
>>> len(sqs)
0
>>>
from curl -XGET 'http://127.0.0.1:9200/my_index_name/_mapping?pretty=1'
{
"my_index_name" : {
"mappings" : {
"modelresult" : {
"properties" : {
"django_ct" : {
"type" : "string",
"index" : "not_analyzed",
"include_in_all" : false
},
"django_id" : {
"type" : "string",
"index" : "not_analyzed",
"include_in_all" : false
},
"dobs" : {
"type" : "string",
"analyzer" : "snowball"
},
"id" : {
"type" : "string"
},
"names" : {
"type" : "string",
"analyzer" : "snowball"
},
"text" : {
"type" : "string",
"analyzer" : "snowball"
}
}
}
}
How does one see the contents of an elasticsearch index? Is is permissible to have a haystack index that only includes search fields from models other than the model being returned?