Failed to add documents to Solr: [Reason: None] - django

I'm trying to index a model in Solr with django-haystack, but it returns me the following error(when using rebuild_index or update_index) :
Indexing 2 jobposts
Failed to add documents to Solr: [Reason: None]
<response><lst name="responseHeader"><int name="status">400</int><int name="QTime">358</int></lst><lst name="error"><str name="msg">ERROR: [doc=jobpost.jobpost.1] unknown field 'django_id'</str><int name="code">400</int></lst></response>
This is search_indexes.py
from haystack import indexes
from haystack.indexes import SearchIndex
from jobpost.models import *
class JobIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
post_type = indexes.CharField(model_attr='post_type')
location = indexes.CharField(model_attr='location')
job_type = indexes.CharField(model_attr='job_type')
company_name = indexes.CharField(model_attr='company_name')
title = indexes.CharField(model_attr='title')
def get_model(self):
return jobpost
def index_queryset(self,**kwargs):
return self.get_model().objects.all()

You need to update schema.xml of your solr engine, as it written here:
"You’ll need to revise your schema. You can generate this from your application (once Haystack is installed and setup) by running ./manage.py build_solr_schema. Take the output from that command and place it in apache-solr-3.5.0/example/solr/conf/schema.xml. Then restart Solr."

Related

Haystack-Whoosh not indexing any documents

I followed the Haystack tutorial to set up for Whoosh
>>> pip install whoosh
settings.py
import os
HAYSTACK_CONNECTIONS = {
'default': {
'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine',
'PATH': os.path.join(os.path.dirname(__file__), 'whoosh_index'),
},
}
and I am getting an empty list
>>> list(ix.searcher().documents())
[]
Following is my code for searcher_indexes.py
from haystack import indexes
from view_links.models import Projdb
class ProjdbIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
title = indexes.CharField(model_attr='title')
author = indexes.CharField(model_attr = 'owner')
# pub_date = indexes.DateTimeField(model_attr='date_start')
def get_model(self):
return Projdb
def index_queryset(self,using=None):
"""Used when the entire index for model is updated."""
return self.get_model().objects.all()#filter(pub_date__lte=datetime.datetime.now())
I was previously able to get results for elasticsearch but when I shifted to Whoosh I am getting no results.
Thank you for your time. If you require further information, please let me know.
EDIT:
I am getting results now and here are two things I learned.
I need to register the app whose model is being used for indexing.
If a Model's class is misspelled in search_indexes.py, running the python manage.py rebuild_index does not throw any error and you will get zero indexed objects
Did you run the command?
./manage.py rebuild_index
Do you have any Projdb records?
You have this in your code:
text = indexes.CharField(document=True, use_template=True)
Have you set-up the corresponding template (projdb_text.txt)?

Haystack SearchIndex model_attr not following relation correctly?

I'm using Django Haystack v2.0.0 and Whoosh v2.4.0. According to Haystack's documentation search indexes can use Django's related field lookup in the model_attr parameter. However, running the following code using manage.py shell command:
from haystack.query import SearchQuerySet
for r in SearchQuerySet():
print r.recruitment_agency # Prints True for every job
print r.recruitment_agency == r.object.employer.recruitment_agency
# Prints False if r.object.employer.recruitment_agency is False
I have tried rebuilding the index several times, the index's directory is writeable, and I don't get any error messages. All other fields work as expected.
I have the following (simplified) models:
companies/models.py:
class Company(models.Model):
recruitment_agency = models.BooleanField(default=False)
jobs/models.py:
class Job(models.Model):
employer = models.ForeignKey(Company, related_name='jobs')
jobs/search_indexes.py:
class JobIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
recruitment_agency = indexes.BooleanField(model_attr='employer__recruitment_agency')
def get_model(self):
return Job
jobs/forms.py:
class JobSearchForm(SearchForm):
no_recruitment_agencies = forms.BooleanField(label="Hide recruitment agencies", required=False)
def search(self):
sqs = super(JobSearchForm, self).search()
if self.cleaned_data['no_recruitment_agencies']:
sqs = sqs.filter(recruitment_agency=False)
return sqs
Does anyone know what could be the problem?
Meanwhile I've switched over to the ElasticSearch backend, but the problem persisted, indicating that it might be a problem in haystack, and not in Whoosh.
The problem is that the python values True and False are not saved as boolean values, but as string, and they are not converted back to boolean values. To filter on boolean values, you have to check for the strings 'true' and 'false':
class JobSearchForm(SearchForm):
no_recruitment_agencies = forms.BooleanField(label="Hide recruitment agencies", required=False)
def search(self):
sqs = super(JobSearchForm, self).search()
if self.cleaned_data['no_recruitment_agencies']:
sqs = sqs.filter(recruitment_agency='false') # Change the filter here
return sqs

Elasticsearch and auto_query

In the database objects are named news and news test
class ItemIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True)
name = indexes.CharField(model_attr='name')
name_alt = indexes.CharField(model_attr='name_alt')
def get_model(self):
return Serial
>>> from haystack.query import SearchQuerySet
>>> sqs = SearchQuerySet().all()
>>> sqs.count()
4
>>> SearchQuerySet().auto_query('new') # not working all query!
[]
If use haystack.backends.simple_backend.SimpleEngine its working.
Django==1.5.1
Elasticsearch==0.90
django-haystack==master (2.0)
Why????
It doesn't look like you're populating the all import document field.
Your SearchIndex class has these fields:
text = indexes.CharField(document=True)
name = indexes.CharField(model_attr='name')
name_alt = indexes.CharField(model_attr='name_alt')
You've defined the data source for name and name_alt but not for text. The output from your command line search shows that that field is empty in the search index. You have several options:
Populate that field from a model attribute
Use a prepare_FOO method to prepare the content for that field
Use a template, using the use_template argument for the text field and include any and all content in that template
Now the follow up question is why did auto_query fail but a basic curl query work? Because auto_query is searching the content - the document - and that's missing.

haystack.exceptions.SearchBackendError: No fields were found in any search_indexes. Please correct this before attempting to search

I am trying to implement Haystack with whoosh.
I keep getting this error although everything seems to be configured fine. I get the error:
haystack.exceptions.SearchBackendError: No fields were found in any search_indexes. Please correct this before attempting to search.
...when I try to do ./manage.py rebuild_index
configuration:
HAYSTACK_SITECONF = 'myproject'
HAYSTACK_SEARCH_ENGINE = 'whoosh'
HAYSTACK_WHOOSH_PATH = cwd + '/whoosh/mysite_index'
There are successfully created whoosh/mysite_index directories in the root folder of my project.
*search_sites.py*
import haystack
haystack.autodiscover()
*search_indexes.py*
from haystack.indexes import *
from haystack import site
from myproject.models import *
class ResearchersIndex(SearchIndex):
text = CharField(document=True, use_template=True)
name = CharFIeld(model_attr='name')
class SubjectIndex(SearchIndex):
short_name = CharField(model_attr='short_name')
name = CharField(model_attr='name')
text = CharField(document=True, use_template=True)
class ResearchIndex(SearchIndex):
text = CharField(document=True, use_template=True)
abstract = TextField(model_attr='abstract')
methodology = TextField(model_attr='methodology')
year = IntegerField(model_attr='year')
name = CharField(model_attr='name')
class Graph(SearchIndex):
text = CharField(document=True, use_template=True)
explanation = TextField(model_attr='explanation')
type = CharField(model_attr='type')
name = CharField(model_attr='name')
site.register(Researchers, ResearchersIndex)
site.register(Subject, SubjectIndex)
site.register(Research, ResearchIndex)
site.register(Graph, GraphIndex)
Thanks
the problem is in your HAYSTACK_SITECONF. It must be the path to your search_sites file. Fix this and it should work.
Make sure your site_indexes.py is in an app that you have registered in the INSTALLED_APPS in settings.py

How do I query for empty MultiValueField results in Django Haystack

Using Django 1.4.2, Haystack 2.0beta, and ElasticSearch 0.19, how do I query for results which have an empty set [] for a MultiValueField?
I'd create an integer field named num_<field> and query against it.
In this example 'emails' is the MultiValueField, so we'll create 'num_emails':
class PersonIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
name = indexes.CharField(model_attr='name')
emails = indexes.MultiValueField(null=True)
num_emails = indexes.IntegerField()
def prepare_num_emails(self, object):
return len(object.emails)
Now, in your searches you can use
SearchQuerySet().filter(num_emails=0)
You can also change prepare_ method of your MultiValueField:
def prepare_emails(self, object):
emails = [e for e in object.emails]
return emails if emails else ['None']
Then you can filter:
SearchQuerySet().filter(emails=None)