How to get the "full" async result in Celery link_error callback - django

I have Celery 3.1.18 running with Django 1.6.11 and RabbitMQ 3.5.4, and trying to test my async task in a failure state (CELERY_ALWAYS_EAGER=True). However, I cannot get the proper "result" in the error callback. The example in the Celery docs shows:
#app.task(bind=True)
def error_handler(self, uuid):
result = self.app.AsyncResult(uuid)
print('Task {0} raised exception: {1!r}\n{2!r}'.format(
uuid, result.result, result.traceback))
When I do this, my result is still "PENDING", result.result = '', and result.traceback=''. But the actual result returned by my .apply_async call has the right "FAILURE" state and traceback.
My code (basically a Django Rest Framework RESTful endpoint that parses a .tar.gz file, and then sends a notification back to the user, when the file is done parsing):
views.py:
from producer_main.celery import app as celery_app
#celery_app.task()
def _upload_error_simple(uuid):
print uuid
result = celery_app.AsyncResult(uuid)
print result.backend
print result.state
print result.result
print result.traceback
msg = 'Task {0} raised exception: {1!r}\n{2!r}'.format(uuid,
result.result,
result.traceback)
class UploadNewFile(APIView):
def post(self, request, repository_id, format=None):
try:
uploaded_file = self.data['files'][self.data['files'].keys()[0]]
self.path = default_storage.save('{0}/{1}'.format(settings.MEDIA_ROOT,
uploaded_file.name),
uploaded_file)
print type(import_file)
self.async_result = import_file.apply_async((self.path, request.user),
link_error=_upload_error_simple.s())
print 'results from self.async_result:'
print self.async_result.id
print self.async_result.backend
print self.async_result.state
print self.async_result.result
print self.async_result.traceback
return Response()
except (PermissionDenied, InvalidArgument, NotFound, KeyError) as ex:
gutils.handle_exceptions(ex)
tasks.py:
from producer_main.celery import app
from utilities.general import upload_class
#app.task
def import_file(path, user):
"""Asynchronously import a course."""
upload_class(path, user)
celery.py:
"""
As described in
http://celery.readthedocs.org/en/latest/django/first-steps-with-django.html
"""
from __future__ import absolute_import
import os
import logging
from celery import Celery
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'producer_main.settings')
from django.conf import settings
log = logging.getLogger(__name__)
app = Celery('producer') # pylint: disable=invalid-name
# Using a string here means the worker will not have to
# pickle the object when using Windows.
app.config_from_object('django.conf:settings')
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) # pragma: no cover
#app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request))
My backend is configured as such:
CELERY_ALWAYS_EAGER = True
CELERY_EAGER_PROPAGATES_EXCEPTIONS = False
BROKER_URL = 'amqp://'
CELERY_RESULT_BACKEND = 'redis://localhost'
CELERY_RESULT_PERSISTENT = True
CELERY_IGNORE_RESULT = False
When I run my unittest for the link_error state, I get:
Creating test database for alias 'default'...
<class 'celery.local.PromiseProxy'>
130ccf13-c2a0-4bde-8d49-e17eeb1b0115
<celery.backends.redis.RedisBackend object at 0x10aa2e110>
PENDING
None
None
results from self.async_result:
130ccf13-c2a0-4bde-8d49-e17eeb1b0115
None
FAILURE
Non .zip / .tar.gz file passed in.
Traceback (most recent call last):
So the task results are not available in my _upload_error_simple() method, but they are available from the self.async_result returned variable...

I could not get the link and link_error callbacks to work, so I finally had to use the on_failure and on_success task methods described in the docs and this SO question. My tasks.py then looks like:
class ErrorHandlingTask(Task):
abstract = True
def on_failure(self, exc, task_id, targs, tkwargs, einfo):
msg = 'Import of {0} raised exception: {1!r}'.format(targs[0].split('/')[-1],
str(exc))
def on_success(self, retval, task_id, targs, tkwargs):
msg = "Upload successful. You may now view your course."
#app.task(base=ErrorHandlingTask)
def import_file(path, user):
"""Asynchronously import a course."""
upload_class(path, user)

You appear to have _upload_error() as a bound method of your class - this is probably not what you want. try making it a stand-along task:
#celery_app.task(bind=True)
def _upload_error(self, uuid):
result = celery_app.AsyncResult(uuid)
msg = 'Task {0} raised exception: {1!r}\n{2!r}'.format(uuid,
result.result,
result.traceback)
class Whatever(object):
....
self.async_result = import_file.apply_async((self.path, request.user),
link=self._upload_success.s(
"Upload finished."),
link_error=_upload_error.s())
in fact there's no need for the self paramater since it's not used so you could just do this:
#celery_app.task()
def _upload_error(uuid):
result = celery_app.AsyncResult(uuid)
msg = 'Task {0} raised exception: {1!r}\n{2!r}'.format(uuid,
result.result,
result.traceback)
note the absence of bind=True and self

Be careful with UUID instance!
If you will try to get status of a task with id not string type but UUID type, you will only get PENDING status.
from uuid import UUID
from celery.result import AsyncResult
task_id = UUID('d4337c01-4402-48e9-9e9c-6e9919d5e282')
print(AsyncResult(task_id).state)
# PENDING
print(AsyncResult(str(task_id)).state)
# SUCCESS

Related

Issue with using django celery when django signals is being used to sent email?

I have used the default Django admin panel as my backend. I have a Blogpost model. What I am trying to do is whenever an admin user saves a blogpost object on Django admin, I need to send an email to the newsletter subscribers notifying them that there is a new blog on the website.
I have to send mass emails so I am using django-celery. Also, I am using django signals to trigger the send email function.
But Right now, I am sending without using celery but it is too slow.
class Subscribers(models.Model):
email = models.EmailField(unique=True)
date_subscribed = models.DateField(auto_now_add=True)
def __str__(self):
return self.email
class Meta:
verbose_name_plural = "Newsletter Subscribers"
# binding signal:
#receiver(post_save,sender=BlogPost)
def send_mails(sender,instance,created,**kwargs):
subscribers = Subscribers.objects.all()
if created:
blog = BlogPost.objects.latest('date_created')
for abc in subscribers:
emailad = abc.email
send_mail('New Blog Post ', f" Checkout our new blog with title {blog.title} ",
EMAIL_HOST_USER, [emailad],
fail_silently=False)
else:
return
Using celery documentation i have written following files.
My celery.py
from __future__ import absolute_import
import os
from celery import Celery
from django.conf import settings
os.environ.setdefault('DJANGO_SETTINGS_MODULE','travel_crm.settings')
app = Celery('travel_crm')
app.config_from_object('django.conf:settings')
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS)
#app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request))
Mu init file:
from __future__ import absolute_import, unicode_literals
from .celery import app as celery_app
__all__ = ('celery_app',)
Tasks file from docs:
def my_first_task(duration):
subject= 'Celery'
message= 'My task done successfully'
receiver= 'receiver_mail#gmail.com'
is_task_completed= False
error=''
try:
sleep(duration)
is_task_completed= True
except Exception as err:
error= str(err)
logger.error(error)
if is_task_completed:
send_mail_to(subject,message,receivers)
else:
send_mail_to(subject,error,receivers)
return('first_task_done')
This task doesn't work because I am using Django signal to trigger the send email function, How to employ this into tasks.py
I think I understand your question ... I was recently faced with similar challenge which included the complexity of a multi-tenant [schema] database [proved to be an issue with Redis]. I also tried django-celery, but it is dependent on a much older version of Celery. In addition, I wanted to send mass mail initiated by model signal post_save ... using EmailMultiAlternatives with 'bcc' and 'reply-to' features.
So now I am using the latest [as of this post] Django, the latest Celery with Redis ... running on macOS localhost with Poetry virtual env & package manager. The following worked for me:
Celery: I spent several hours net searching for tutorials and advice ... among others, this one added value for me Celery w Django. Good practice to dig deeper into Celery anyway if you have not done it already.
Redis: This will depend on your OS and if you are developing local or remote. The Redis website will guide you to set up. I also tried RabbitMQ but found it [personally] more complex to set up.
The code fractions: There are 4 fractions ... myapp/signals.py, myapp/tasks.py, myproj/celery.py, myproj/settings.py Disclaimer: I'm a hobby programmer ... more experienced engineers may well improve on my code ... I've done some minor testing and all seems to work.
# myapp/signals.py
#receiver(post_save, sender=MyModel)
def post_save_handler(sender, instance, **kwargs):
if instance.some_field == True:
recipient_list = list(get_user_model().objects.filter('some filters'))
from_email = SomeModel.objects.first().site_email
to_email = SomeModel.first().admin_email
# call async task Celery
task_send_mail.delay(instance.some_field, instance.someother_field, from_email, to_email, recipient_list)
# myapp/tasks.py
#shared_task(name='task_sendmail')
def task_send_mail(instance.some_field, instance.someother_field, from_email, to_email, recipient_list):
template_name = 'emails/sometemplate.html'
html_message = render_to_string(template_name, {'body': instance.some_field,}) # These variables are added to the email template
plain_message = strip_tags(html_message)
subject = f'Do Not Reply : {instance.someother_field}'
connection = get_connection()
connection.open()
message = EmailMultiAlternatives(
subject,
plain_message,
from_email,
[to_email],
bcc=recipient_list,
reply_to=[to_email],
)
message.attach_alternative(html_message, "text/html")
try:
message.send()
connection.close()
except SMTPException as e:
print('There was an error sending email: ', e)
connection.close()
# myproj/celery.py
import os
from celery import Celery
# Set the default Django settings module for the 'celery' program.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'myproj.settings')
app = Celery('myproj')
# Using a string here means the worker doesn't have to serialize
# the configuration object to child processes.
# - namespace='CELERY' means all celery-related configuration keys
# should have a `CELERY_` prefix.
app.config_from_object('django.conf:settings', namespace='CELERY')
# Load task modules from all registered Django apps.
app.autodiscover_tasks()
#app.task(bind=True)
def debug_task(self):
print(f'Request: {self.request!r}')
# myproj/settings.py
...
##Celery Configuration Options
CELERY_BROKER_URL = 'redis://localhost:6379//'
CELERY_TIMEZONE = "Africa/SomeCity"
CELERY_TASK_TRACK_STARTED = True
CELERY_TASK_TIME_LIMIT = 30 * 60

Tornado on pika consumer can't run

I want to build monitoring system using RabbitMQ and Tornado. I can run the producer and my consumer can consume the data on queue but the data cant be show on website.
This just my experiment before I using the sensor
import pika
import tornado.ioloop
import tornado.web
import tornado.websocket
import logging
from threading import Thread
logging.basicConfig(lvl=logging.INFO)
clients=[]
credentials = pika.credentials.PlainCredentials('ayub','ayub')
connection = pika.BlockingConnection(pika.ConnectionParameters('192.168.43.101',
5672,
'/',
credentials))
channel = connection.channel()
def threaded_rmq():
channel.basic_consume('Queue',
on_message_callback= consumer_callback,
auto_ack=True,
exclusive=False,
consumer_tag=None,
arguments=None)
channel.start_consuming()
def disconect_rmq():
channel.stop_consuming()
Connection.close()
logging.info('Disconnected from broker')
def consumer_callback(ch,method,properties,body):
for itm in clients:
itm.write_message(body)
class SocketHandler(tornado.websocket.WebSocketHandler):
def open(self):
logging.info('websocket open')
clients.remove(self)
def close(self):
logging.info('websocket closed')
clients.remove(self)
class MainHandler(tornado.web.RequestHandler):
def get(self):
self.render("websocket.html")
application = tornado.web.Application([
(r'/ws',SocketHandler),
(r"/", MainHandler),
])
def startTornado():
application.listen(8888)
tornado.ioloop.IOLoop.instance().start()
def stopTornado():
tornado.ioloop.IOLoop.instance().stop()
if __name__ == "__main__":
logging.info('starting thread RMQ')
threadRMQ = Thread(target=threaded_rmq)
threadRMQ.start()
logging.info('starting thread tornado')
threadTornado = Thread(target=startTornado)
threadTornado.start()
try:
raw_input("server ready")
except SyntaxError:
pass
try:
logging.info('disconnected')
disconnect_rmq()
except Exception, e:
pass
stopTornado()
but I got this error
WARNING:tornado.access:404 GET /favicon.ico (192.168.43.10) 0.98ms
please help me
In your SocketHandler.open function you need to add the client not remove it.
Also consider using a set for clients instead of a list because the remove operation will be faster:
clients = set()
...
class SocketHandler(tornado.websocket.WebSocketHandler):
def open(self):
logging.info('websocket open')
clients.add(self)
def close(self):
logging.info('websocket closed')
clients.remove(self)
The message you get regarding favicon.ico is actually a warning and it's harmless (the browser is requesting an icon to show for web application but won't complain if none is available).
You might also run into threading issues because Tornado and Pika are running in different threads so you will have to synchronize them; you can use Tornado's IOLoop.add_callback method for that.

Using Memcached with Flask Blueprints

Within the application context, I can't seem to set objects in memcached. Logs indicate that I connect to memcached, but when I attempt to set an object the set function returns "0" or False. Outside of the application context, I can connect to the server IP and port, and easily get and set objects. Here is my setup:
application/__init__.py
class App(Flask):
def __init__(self):
super(App, self).__init__(__name__)
self.config.from_object('app.config')
self.config.from_object('app.deployments.Prod')
logging.basicConfig(filename=self.config['LOG_PATH'] + config.LOG_FILE, level=logging.INFO, format=config.LOG_FORMAT, datefmt='%m/%d/%Y %I:%M:%S')
self.static_folder=config.STATICS
self.before_request(self.init_dbs)
self.teardown_request(self.teardown)
self.after_request(self.teardown)
try:
self.init_session()
self.init_login()
self.init_templates()
except Exception as e:
logging.info(e)
def init_dbs(self):
g.ES = init_elasticsearch(hosts=self.config['ES_HOSTS'])
g.MEMCACHED = init_memcached(host=self.config['MEMCACHED_HOST'],port=self.config['MEMCACHED_PORT'])
...
cache/__init__.py
from werkzeug.contrib.cache import MemcachedCache
import gevent
import logging
def init_memcached(host,port):
memcached_connected = False
while not memcached_connected:
try:
MEMCACHED = MemcachedCache([host + ':' + str(port)])
memcached_connected = True
except Exception as e:
logging.info("Memcached not connected")
logging.error(e)
gevent.sleep(1)
return MEMCACHED
controllers/page.py
from flask import Blueprint, request, render_template, url_for, flash, g, redirect
from flask.views import MethodView
from flask.ext.login import current_user
from json import dumps
from app import config
...
items = Blueprint(
'items',
__name__,
template_folder=config.TEMPLATES,
)
class Item(MethodView):
def get(self,item):
result = g.MEMCACHED.get('item')
if result is None:
...
g.MEMCACHED.set('item', result, timeout=60)
return render_template('item.html',result=result)
items.add_url_rule("/path/<item>", view_func=Item.as_view('item'))
I'm assuming this has something to do with using memcached within the g object. I'd prefer setting the connection to memcached once, as I'm doing with the dbs, but it seems like memcached doesn't respond in the same way.

Getting all queries that django run on postgresql

I am working on a django-postgresql project and I need to see every query that django run on database(so I can fine-tune queries). Is there a way to get those queries.
Update: My development environment is on ubuntu linux
Well, you could just set the pgsql server to log every query. Or just to log the slow ones. Look in the postgresql.conf file, it's pretty close to self-documenting.
Check out this Question (and the two top most answers):
django orm, how to view (or log) the executed query?
You can also have a look at the Djando documenation:
https://docs.djangoproject.com/en/dev/faq/models/#how-can-i-see-the-raw-sql-queries-django-is-running
Hope this helps,
Anton
You can decorate a request handler or other function with this and it will print the sql nicely formated with totals at the end.
from functools import wraps
from django.utils import termcolors
format_ok = termcolors.make_style(opts=('bold',), fg='green')
format_warning = termcolors.make_style(opts=('bold',), fg='yellow')
format_error = termcolors.make_style(opts=('bold',), fg='red')
try:
from pygments import highlight
from pygments.lexers import SqlLexer
from pygments.formatters import TerminalFormatter
pygments_sql_lexer = SqlLexer()
pygments_terminal_formatter = TerminalFormatter()
highlight_sql = lambda s: highlight(s, pygments_sql_lexer,
pygments_terminal_formatter)
except ImportError:
highlight_sql = lambda s: s
def debug_sql(f):
"""
Turn SQL statement debugging on for a test run.
"""
#wraps(f)
def wrapper(*a, **kw):
from django.conf import settings
from django.db import connection
try:
debug = settings.DEBUG
settings.DEBUG = True
connection.queries = []
return f(*a, **kw)
finally:
total_time = 0
for q in connection.queries:
fmt = format_ok
t = float(q['time'])
total_time += t
if t > 1:
fmt = format_error
elif t > 0.3:
fmt = format_warning
print '[%s] %s' % (fmt(q['time']), highlight_sql(q['sql']))
print "total time =", total_time
print "num queries =", len(connection.queries)
settings.DEBUG = debug
return wrapper
Try the django debug toolbar. It'll show you all the SQL executed over the request. When something is executing way too many queries, it becomes really slow, though. For that, I've been meaning to try out this profiler. However, I've rolled this middleware on a couple of projects:
try:
from cStringIO import StringIO
except ImportError:
import StringIO
from django.conf import settings
from django.db import connection
class DatabaseProfilerMiddleware(object):
def can(self, request):
return settings.DEBUG and 'dbprof' in request.GET
def process_response(self, request, response):
if self.can(request):
out = StringIO()
out.write('time sql\n')
total_time = 0
for query in reversed(sorted(connection.queries, key=lambda x: x['time'])):
total_time += float(query['time'])*1000
out.write('%s %s\n' % (query['time'], query['sql']))
response.content = '<pre style="white-space:pre-wrap">%d queries executed in %.3f seconds\n%s</pre>' \
% (len(connection.queries), total_time/1000, out.getvalue())
return response
Just go to the relevant URL for the request you are interested in and add a dbprof GET parameter, you'll see the profiling output instead of the normal response.

Django: is there a way to count SQL queries from an unit test?

I am trying to find out the number of queries executed by a utility function. I have written a unit test for this function and the function is working well. What I would like to do is track the number of SQL queries executed by the function so that I can see if there is any improvement after some refactoring.
def do_something_in_the_database():
# Does something in the database
# return result
class DoSomethingTests(django.test.TestCase):
def test_function_returns_correct_values(self):
self.assertEqual(n, <number of SQL queries executed>)
EDIT: I found out that there is a pending Django feature request for this. However the ticket is still open. In the meantime is there another way to go about this?
Since Django 1.3 there is a assertNumQueries available exactly for this purpose.
One way to use it (as of Django 3.2) is as a context manager:
# measure queries of some_func and some_func2
with self.assertNumQueries(2):
result = some_func()
result2 = some_func2()
Vinay's response is correct, with one minor addition.
Django's unit test framework actually sets DEBUG to False when it runs, so no matter what you have in settings.py, you will not have anything populated in connection.queries in your unit test unless you re-enable debug mode. The Django docs explain the rationale for this as:
Regardless of the value of the DEBUG setting in your configuration file, all Django tests run with DEBUG=False. This is to ensure that the observed output of your code matches what will be seen in a production setting.
If you're certain that enabling debug will not affect your tests (such as if you're specifically testing DB hits, as it sounds like you are), the solution is to temporarily re-enable debug in your unit test, then set it back afterward:
def test_myself(self):
from django.conf import settings
from django.db import connection
settings.DEBUG = True
connection.queries = []
# Test code as normal
self.assert_(connection.queries)
settings.DEBUG = False
If you are using pytest, pytest-django has django_assert_num_queries fixture for this purpose:
def test_queries(django_assert_num_queries):
with django_assert_num_queries(3):
Item.objects.create('foo')
Item.objects.create('bar')
Item.objects.create('baz')
If you don't want use TestCase (with assertNumQueries) or change settings to DEBUG=True, you can use context manager CaptureQueriesContext (same as assertNumQueries using).
from django.db import ConnectionHandler
from django.test.utils import CaptureQueriesContext
DB_NAME = "default" # name of db configured in settings you want to use - "default" is standard
connection = ConnectionHandler()[DB_NAME]
with CaptureQueriesContext(connection) as context:
... # do your thing
num_queries = context.initial_queries - context.final_queries
assert num_queries == expected_num_queries
db settings
In modern Django (>=1.8) it's well documented (it's also documented for 1.7) here, you have the method reset_queries instead of assigning connection.queries=[] which indeed is raising an error, something like that works on django>=1.8:
class QueriesTests(django.test.TestCase):
def test_queries(self):
from django.conf import settings
from django.db import connection, reset_queries
try:
settings.DEBUG = True
# [... your ORM code ...]
self.assertEquals(len(connection.queries), num_of_expected_queries)
finally:
settings.DEBUG = False
reset_queries()
You may also consider resetting queries on setUp/tearDown to ensure queries are reset for each test instead of doing it on finally clause, but this way is more explicit (although more verbose), or you can use reset_queries in the try clause as many times as you need to evaluate queries counting from 0.
Here is the working prototype of context manager withAssertNumQueriesLessThan
import json
from contextlib import contextmanager
from django.test.utils import CaptureQueriesContext
from django.db import connections
#contextmanager
def withAssertNumQueriesLessThan(self, value, using='default', verbose=False):
with CaptureQueriesContext(connections[using]) as context:
yield # your test will be run here
if verbose:
msg = "\r\n%s" % json.dumps(context.captured_queries, indent=4)
else:
msg = None
self.assertLess(len(context.captured_queries), value, msg=msg)
It can be simply used in your unit tests for example for checking the number of queries per Django REST API call
with self.withAssertNumQueriesLessThan(10):
response = self.client.get('contacts/')
self.assertEqual(response.status_code, 200)
Also you can provide exact DB using and verbose if you want to pretty-print list of actual queries to stdout
If you have DEBUG set to True in your settings.py (presumably so in your test environment) then you can count queries executed in your test as follows:
from django.db import connection
class DoSomethingTests(django.test.TestCase):
def test_something_or_other(self):
num_queries_old = len(connection.queries)
do_something_in_the_database()
num_queries_new = len(connection.queries)
self.assertEqual(n, num_queries_new - num_queries_old)
If you want to use a decorator for that there is a nice gist:
import functools
import sys
import re
from django.conf import settings
from django.db import connection
def shrink_select(sql):
return re.sub("^SELECT(.+)FROM", "SELECT .. FROM", sql)
def shrink_update(sql):
return re.sub("SET(.+)WHERE", "SET .. WHERE", sql)
def shrink_insert(sql):
return re.sub("\((.+)\)", "(..)", sql)
def shrink_sql(sql):
return shrink_update(shrink_insert(shrink_select(sql)))
def _err_msg(num, expected_num, verbose, func=None):
func_name = "%s:" % func.__name__ if func else ""
msg = "%s Expected number of queries is %d, actual number is %d.\n" % (func_name, expected_num, num,)
if verbose > 0:
queries = [query['sql'] for query in connection.queries[-num:]]
if verbose == 1:
queries = [shrink_sql(sql) for sql in queries]
msg += "== Queries == \n" +"\n".join(queries)
return msg
def assertNumQueries(expected_num, verbose=1):
class DecoratorOrContextManager(object):
def __call__(self, func): # decorator
#functools.wraps(func)
def inner(*args, **kwargs):
handled = False
try:
self.__enter__()
return func(*args, **kwargs)
except:
self.__exit__(*sys.exc_info())
handled = True
raise
finally:
if not handled:
self.__exit__(None, None, None)
return inner
def __enter__(self):
self.old_debug = settings.DEBUG
self.old_query_count = len(connection.queries)
settings.DEBUG = True
def __exit__(self, type, value, traceback):
if not type:
num = len(connection.queries) - self.old_query_count
assert expected_num == num, _err_msg(num, expected_num, verbose)
settings.DEBUG = self.old_debug
return DecoratorOrContextManager()