Celery raised unexpected LookUp Error - django

I am using celery for my django project.
java_path = "/your/Java/jdk/home/java.exe
os.environ['JAVAHOME'] = java_path
st = POSTagger('/your/postagger/models/path/english-bidirectional-distsim.tagger','/your/postagger/jar/file/path/stanford-postagger.jar')
tag = st.tag([key])
here the key is a list of feature words.
I got following errors, when using celery to execute:
raised unexpected:
LookupError('\n\n===========================================================================\nNLTK
`was unable to find the java file!\nUse software specific configuration paramaters or set the JAVAHOME environment`
variable.\n===========================================================================',)
Traceback (most recent call last):
File "/Users/Envs/lib/python2.7/site-packages/celery/app/trace.py", line 240, in trace_task
R = retval = fun(*args, **kwargs)
File "/Users/Envs/lib/python2.7/site-packages/celery/app/trace.py", line 438, in __protected_call__
return self.run(*args, **kwargs)
File "/Users/Envs/src/evolvelearning/tasks.py", line 772, in RunProgram
tag = st.tag([key])
File "/Users/Envs/lib/python2.7/site-packages/nltk/tag/stanford.py", line 59, in tag
return self.tag_sents([tokens])[0]
File "/Users/Envs/lib/python2.7/site-packages/nltk/tag/stanford.py", line 64, in tag_sents
config_java(options=self.java_options, verbose=False)
File "/Users/Envs/lib/python2.7/site-packages/nltk/internals.py", line 82, in config_java
_java_bin = find_binary('java', bin, env_vars=['JAVAHOME', 'JAVA_HOME'], verbose=verbose, binary_names=['java.exe'])
File "/Users/Envs/lib/python2.7/site-packages/nltk/internals.py", line 544, in find_binary
binary_names, url, verbose))
File "/Users/Envs/lib/python2.7/site-packages/nltk/internals.py", line 538, in find_binary_iter
url, verbose):
File "/Users/Envs/lib/python2.7/site-packages/nltk/internals.py", line 517, in find_file_iter
raise LookupError('\n\n%s\n%s\n%s' % (div, msg, div))
LookupError:
===========================================================================
NLTK was unable to find the java file!
Use software specific configuration paramaters or set the JAVAHOME environment variable.
===========================================================================
I have set java_path and javahome and I would like to know why still thess errors occur?

my environment is MAC.
The problem is the java_path, should be:
java_path = "/your/Java/jdk/home

Related

Django celery task unable to use cairo svg2pdf

I have a small task that reads a svg file from a path and uses cairo svg2pdf to convert the svg to pdf. If I run the function without using celery delay then the function runs fine and converts the file to pdf. If I run the function as a celery task then I get some errors:
Traceback (most recent call last):
File "/..../tasks.py", line 24, in create_download_pdf
svg2pdf(bytestring=bytestring, write_to=completeName)
File "/.../lib/python3.10/site-packages/cairosvg/__init__.py", line 67, in svg2pdf
return surface.PDFSurface.convert(
File "/.../lib/python3.10/site-packages/cairosvg/surface.py", line 131, in convert
instance = cls(
File "/.../lib/python3.10/site-packages/cairosvg/surface.py", line 202, in __init__
self.cairo, self.width, self.height = self._create_surface(
File "/.../lib/python3.10/site-packages/cairosvg/surface.py", line 242, in _create_surface
cairo_surface = self.surface_class(self.output, width, height)
File "/.../lib/python3.10/site-packages/cairocffi/surfaces.py", line 876, in __init__
Surface.__init__(self, pointer, target_keep_alive=write_func)
File "/.../lib/python3.10/site-packages/cairocffi/surfaces.py", line 158, in __init__
self._check_status()
File "/../lib/python3.10/site-packages/cairocffi/surfaces.py", line 170, in _check_status
_check_status(cairo.cairo_surface_status(self._pointer))
File "/../lib/python3.10/site-packages/cairocffi/__init__.py", line 88, in _check_status
raise exception(message, status)
OSError: [Errno cairo returned CAIRO_STATUS_WRITE_ERROR: b'error while writing to output stream'] 11
Here is the function:
#app.task(name="create_download_pdf")
def create_download_pdf(folder_path: str, svg_data=None, filename=None, file_path=None) -> None:
try:
completeName = os.path.join(folder_path, f"{filename}.pdf")
if svg_data:
svg2pdf(bytestring=svg_data, write_to=completeName)
elif file_path:
with open(file_path, 'r') as f:
bytestring=f.read()
print(bytestring)
svg2pdf(bytestring=bytestring, write_to=completeName)
except (OSError, ValueError):
logger.exception(
f"PDF creation and download exception: Unable to download | create PDF from svg data for {str(filename) or ''}. \n \
{traceback.format_exc()}"
)
pass
How can this be solved. I am not sending a file to celery task, therefore, its not that problem.

nltk lookup error in Stanford Neural Dependency Parser

I am trying to use the Stanford Neural Dependency Parser provided by nltk. The problem I'm having is that when I call st = nltk.parse.stanford.StanfordNeuralDependencyParser(), I get the following error:
>>> st = nltk.parse.stanford.StanfordNeuralDependencyParser()
Traceback (most recent call last):
File "C:\Users\<user>\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py", line 2885, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-5-ca2dec4f3c1f>", line 1, in <module>
st = nltk.parse.stanford.StanfordNeuralDependencyParser()
File "C:\Users\<user>\Anaconda2\lib\site-packages\nltk\parse\stanford.py", line 378, in __init__
super(StanfordNeuralDependencyParser, self).__init__(*args, **kwargs)
File "C:\Users\<user>\Anaconda2\lib\site-packages\nltk\parse\stanford.py", line 51, in __init__
key=lambda model_name: re.match(self._JAR, model_name)
File "C:\Users\<user>\Anaconda2\lib\site-packages\nltk\internals.py", line 714, in find_jar_iter
raise LookupError('\n\n%s\n%s\n%s' % (div, msg, div))
LookupError:
===========================================================================
NLTK was unable to find stanford-corenlp-(\d+)(\.(\d+))+\.jar! Set
the CLASSPATH environment variable.
For more information, on stanford-corenlp-(\d+)(\.(\d+))+\.jar, see:
<http://nlp.stanford.edu/software/lex-parser.shtml>
===========================================================================
But, when I run os.environ.get('CLASSPATH') I get the result
`C:\nltk_data\;C:\nltk_data\stanford\;C:\nltk_data\stanford\stanford-ner\`
I know that I have the corenlp jar file in C:\nltk_data\stanford\ so I run the following and end up with a slightly different error.
>>> st = nltk.parse.stanford.StanfordNeuralDependencyParser('C:\\nltk_data\\stanford\\')
Traceback (most recent call last):
File "C:\Users\<user>\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py", line 2885, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-22-28d797d702d9>", line 1, in <module>
st = StanfordNeuralDependencyParser('C:\\nltk_data\\stanford\\')
File "C:\Users\<user>\Anaconda2\lib\site-packages\nltk\parse\stanford.py", line 378, in __init__
super(StanfordNeuralDependencyParser, self).__init__(*args, **kwargs)
File "C:\Users\<user>\Anaconda2\lib\site-packages\nltk\parse\stanford.py", line 51, in __init__
key=lambda model_name: re.match(self._JAR, model_name)
File "C:\Users\<user>\Anaconda2\lib\site-packages\nltk\internals.py", line 635, in find_jar_iter
(name_pattern, path_to_jar))
LookupError: Could not find stanford-corenlp-(\d+)(\.(\d+))+\.jar jar file at C:\nltk_data\stanford\
I have downloaded the jar stanford-english-corenlp-2016-01-10-models.jar from the Stanford NLP website and also renamed it to stanford-corenlp-2016-01-10.jar to try and match the pattern but I was still end up with the same errors. I have also downloaded the Stanford Parser version 3.6.0 but it doesn't contain any corenlp files.
Is there any way to get this to work, or am I misunderstanding something?

Attempting to debug terminal applications made with Python+Blessed using ipdb breaks IPython?

I am using the Blessed library to build a simple terminal application.
My application builds upon the following simple example for a dumb editor: https://github.com/jquast/blessed/blob/master/bin/editor.py
Warning: the following steps will break your IPython, and I don't know how to fix it!
For the purposes of this question, I'll just use editor.py. Let's make a couple of changes to allow debugging:
1) import ipdb
2) put in ipdb.set_trace() on line 224
Run editor.py now: python editor.py. The following error should be produced:
Traceback (most recent call last):
File "editor.py", line 14, in <module>
from manager import Manager
File "/home/abcd/python_scripts/editor.py", line 25, in <module>
import ipdb
File "/usr/local/lib/python2.7/dist-packages/ipdb/__init__.py", line 7, in <module>
from ipdb.__main__ import set_trace, post_mortem, pm, run, runcall, runeval, launch_ipdb_on_exception
File "/usr/local/lib/python2.7/dist-packages/ipdb/__main__.py", line 47, in <module>
ipapp.initialize([])
File "<decorator-gen-110>", line 2, in initialize
File "/usr/lib/python2.7/dist-packages/IPython/config/application.py", line 92, in catch_config_error
return method(app, *args, **kwargs)
File "/usr/lib/python2.7/dist-packages/IPython/terminal/ipapp.py", line 332, in initialize
self.init_shell()
File "/usr/lib/python2.7/dist-packages/IPython/terminal/ipapp.py", line 348, in init_shell
ipython_dir=self.ipython_dir, user_ns=self.user_ns)
File "/usr/lib/python2.7/dist-packages/IPython/config/configurable.py", line 354, in instance
inst = cls(*args, **kwargs)
File "/usr/lib/python2.7/dist-packages/IPython/terminal/interactiveshell.py", line 328, in __init__
**kwargs
File "/usr/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 483, in __init__
self.init_readline()
File "/usr/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 1843, in init_readline
self.readline_startup_hook = readline.set_startup_hook
AttributeError: 'module' object has no attribute 'set_startup_hook'
If you suspect this is an IPython bug, please report it at:
https://github.com/ipython/ipython/issues
or send an email to the mailing list at ipython-dev#scipy.org
You can print a more detailed traceback right now with "%tb", or use "%debug"
to interactively debug it.
Extra-detailed tracebacks for bug-reporting purposes can be enabled via:
c.Application.verbose_crash=True
Now, whenever one runs IPython by executing the ipython command, this error will be produced:
Traceback (most recent call last):
File "/usr/bin/ipython", line 5, in <module>
start_ipython()
File "/usr/lib/python2.7/dist-packages/IPython/__init__.py", line 120, in start_ipython
return launch_new_instance(argv=argv, **kwargs)
File "/usr/lib/python2.7/dist-packages/IPython/config/application.py", line 564, in launch_instance
app.initialize(argv)
File "<decorator-gen-110>", line 2, in initialize
File "/usr/lib/python2.7/dist-packages/IPython/config/application.py", line 92, in catch_config_error
return method(app, *args, **kwargs)
File "/usr/lib/python2.7/dist-packages/IPython/terminal/ipapp.py", line 332, in initialize
self.init_shell()
File "/usr/lib/python2.7/dist-packages/IPython/terminal/ipapp.py", line 348, in init_shell
ipython_dir=self.ipython_dir, user_ns=self.user_ns)
File "/usr/lib/python2.7/dist-packages/IPython/config/configurable.py", line 354, in instance
inst = cls(*args, **kwargs)
File "/usr/lib/python2.7/dist-packages/IPython/terminal/interactiveshell.py", line 328, in __init__
**kwargs
File "/usr/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 483, in __init__
self.init_readline()
File "/usr/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 1843, in init_readline
self.readline_startup_hook = readline.set_startup_hook
AttributeError: 'module' object has no attribute 'set_startup_hook'
If you suspect this is an IPython bug, please report it at:
https://github.com/ipython/ipython/issues
or send an email to the mailing list at ipython-dev#scipy.org
You can print a more detailed traceback right now with "%tb", or use "%debug"
to interactively debug it.
Extra-detailed tracebacks for bug-reporting purposes can be enabled via:
c.Application.verbose_crash=True
So, IPython seems to be globally broken. I have gotten this issue on both Cygwin and Ubuntu.
What's going wrong?

Solr issues with searching

I was using Apache Solr for quite some time and only recently started running into some severe issues with it. I'm using it with haystack and a django project. When I do it from manage.py shell i'm getting the below:
>>> from haystack.query import SearchQuerySet
>>> emps = SearchQuerySet().filter(django_ct='web.employer').filter(name__icontains='Mi')[:10]
Traceback (most recent call last):
File "<console>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/haystack/query.py", line 241, in __getitem__
self._fill_cache(start, bound)
File "/usr/local/lib/python2.7/dist-packages/haystack/query.py", line 140, in _fill_cache
results = self.query.get_results(**kwargs)
File "/usr/local/lib/python2.7/dist-packages/haystack/backends/__init__.py", line 469, in get_results
self.run(**kwargs)
File "/usr/local/lib/python2.7/dist-packages/haystack/backends/solr_backend.py", line 501, in run
results = self.backend.search(final_query, **search_kwargs)
File "/usr/local/lib/python2.7/dist-packages/haystack/backends/__init__.py", line 47, in wrapper
return func(obj, query_string, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/haystack/backends/solr_backend.py", line 202, in search
raw_results = self.conn.search(query_string, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/pysolr.py", line 578, in search
response = self._select(params)
File "/usr/local/lib/python2.7/dist-packages/pysolr.py", line 308, in _select
return self._send_request('get', path)
File "/usr/local/lib/python2.7/dist-packages/pysolr.py", line 293, in _send_request
error_message = self._extract_error(resp)
File "/usr/local/lib/python2.7/dist-packages/pysolr.py", line 372, in _extract_error
reason, full_html = self._scrape_response(resp.headers, resp.content)
File "/usr/local/lib/python2.7/dist-packages/pysolr.py", line 404, in _scrape_response
p_nodes = body_node.cssselect('p')
AttributeError: 'NoneType' object has no attribute 'cssselect'
I tried reinstalling haystack, lxml, cssselect, pysolr and still i'm getting these errors. Is there anything else I can try for this? Thanks for any help!
I also tried reading few other SO questions including this:
XML error object has no attribute 'cssselect'
Seems like the issue is with pysolr. You might find some help here.
I had the same issue persist even after bringing up pysolr and lxml to latest version.
Turned out it was because I was not using haystack generated schema which has a few additional fields compared to the default solr one.
You can confirm if this is the case by looking at your solr logs.
It is an issue with pysolr. It hasn't been fixed till 3.3.0.
The only alternative would be to override the pysolr code and make adjustments for when Solr returns a reponse status!=200.
You can check if the response has body attribute or not and make adjustments according to that.

Scrapy cannot handle bad headers properly [ScrapyHTTPPageGetter,client] Unhandled Error

Environment:
Scrapy 0.16.2
Twisted-12.2.0
python 2.7
macosx-10.6
Okey here is my problem:
I try to run
scrapy shell http://aaa.17domn.com/bt9/file.php/MERH77V.html
Error:
[ScrapyHTTPPageGetter,client] Unhandled Error
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/Twisted-12.2.0-py2.7-macosx-10.6-intel.egg/twisted/internet/selectreactor.py", line 150, in _doReadOrWrite
why = getattr(selectable, method)()
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/Twisted-12.2.0-py2.7-macosx-10.6-intel.egg/twisted/internet/tcp.py", line 202, in doRead
return self._dataReceived(data)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/Twisted-12.2.0-py2.7-macosx-10.6-intel.egg/twisted/internet/tcp.py", line 208, in _dataReceived
rval = self.protocol.dataReceived(data)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/Twisted-12.2.0-py2.7-macosx-10.6-intel.egg/twisted/protocols/basic.py", line 564, in dataReceived
why = self.lineReceived(line)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/Scrapy-0.16.2-py2.7.egg/scrapy/core/downloader/webclient.py", line 50, in lineReceived
return HTTPClient.lineReceived(self, line.rstrip())
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/Twisted-12.2.0-py2.7-macosx-10.6-intel.egg/twisted/web/http.py", line 450, in lineReceived
self.extractHeader(self._header)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/Twisted-12.2.0-py2.7-macosx-10.6-intel.egg/twisted/web/http.py", line 406, in extractHeader
key, val = header.split(':',1)
exceptions.ValueError: need more than 1 value to unpack
I found the solution from https://groups.google.com/forum/#!msg/scrapy-users/xFKo8ggzPxs/VXDl3CZ4V4cJ
They describe this is caused by twisted. Then I patched function extractHeader in /twisted/web/http.py from http://twistedmatrix.com/trac/ticket/2842. Its WORKS
BUT BUT, Hold on NOt yet!!!
I run another web
scrapy shell http://www1.wkdown.info/fs3/file.php/M994ATR.html
Error:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/Twisted-12.2.0-py2.7-macosx-10.6-intel.egg/twisted/internet/defer.py", line 551, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/Scrapy-0.16.2-py2.7.egg/scrapy/core/downloader/webclient.py", line 122, in _build_response
status = int(self.status)
ValueError: invalid literal for int() with base 10: 'html'
I think something happen on response headers. Scrapy cannot handle it well.
Any idea?
Thank you!