Celery class based task is ran synchronously instead of asynchronously - django

I have used cookiecutter to setup a django project. The relevant parts of the project should be:
File config/settings/base.py:
INSTALLED_APPS += ['foo.taskapp.celery.CeleryAppConfig']
if USE_TZ:
CELERY_TIMEZONE = TIME_ZONE
CELERY_BROKER_URL = env('CELERY_BROKER_URL', default='redis://redis:6379/0')
CELERY_RESULT_BACKEND = CELERY_BROKER_URL
CELERY_ACCEPT_CONTENT = ['json']
CELERY_TASK_SERIALIZER = 'json'
CELERY_RESULT_SERIALIZER = 'json'
CELERYD_TASK_TIME_LIMIT = 5 * 60
CELERYD_TASK_SOFT_TIME_LIMIT = 60
### EDIT: I had forgotten these: ###
CELERY_ALWAYS_EAGER=True # thats the problem
CELERY_TASK_EAGER_PROPAGATES = True
File foo/taskapp/celery.py (should be same as cookiecutter):
import os
from celery import Celery
from django.apps import apps, AppConfig
from django.conf import settings
if not settings.configured:
# set the default Django settings module for the 'celery' program.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.local') # pragma: no cover
app = Celery('foo')
class CeleryAppConfig(AppConfig):
name = 'foo.taskapp'
verbose_name = 'Celery Config'
def ready(self):
# Using a string here means the worker will not have to
# pickle the object when using Windows.
# - namespace='CELERY' means all celery-related configuration keys
# should have a `CELERY_` prefix.
app.config_from_object('django.conf:settings', namespace='CELERY')
installed_apps = [app_config.name for app_config in apps.get_app_configs()]
app.autodiscover_tasks(lambda: installed_apps, force=True)
if hasattr(settings, 'RAVEN_CONFIG'):
# Celery signal registration
# Since raven is required in production only,
# imports might (most surely will) be wiped out
# during PyCharm code clean up started
# in other environments.
# #formatter:off
from raven import Client as RavenClient
from raven.contrib.celery import register_signal as raven_register_signal
from raven.contrib.celery import register_logger_signal as raven_register_logger_signal
# #formatter:on
raven_client = RavenClient(dsn=settings.RAVEN_CONFIG['dsn'])
raven_register_logger_signal(raven_client)
raven_register_signal(raven_client)
#app.task(bind=True)
def debug_task(self):
print(f'Request: {self.request!r}') # pragma: no cover
File foo/foo/tasks.py:
class TaskHandler(Task):
name = "..."
def run(self, *args, **kwargs):
# this will block the main django thread
time.sleep(10)
def on_success(self, retval, task_id, args, kwargs):
# enters here succesfully
...
def on_failure(self, exc, task_id, args, kwargs, einfo):
# does NOT enter here if exception is raised in run()
...
tasktype = app.register_task(TaskHandler())
# app.tasks.register(TaskHandler()) # also tried this
File foo/foo/views.py:
class FooCreateView(FormValidMessageMixin, CreateView):
...
def form_valid(self, form):
form.instance.user = self.request.user
with transaction.atomic():
# Save the self.object and render the success_url response page
response = super().form_valid(form)
...
kwargs = { ... } # json serializable
transaction.on_commit(lambda: tasktype.delay(**kwargs))
# transaction.on_commit(lambda: TaskHandler().delay(**kwargs)) # also tried
return response
The http response is returned to the browser only after the celery task is completed. There is no celery log produced neither in
$ celery worker -A foo.taskapp -l info
nor in the devserver (runserver) console. I can only see the django root logger's messages (no info either).
Any ideas?

Related

Correctly Setup Celery with Flask-Application Factory Pattern/Gunicorn/Nginx/Supervisor

I have a task of updating every single row of a MySQL table but it's super slow. I rarely need to do it and only when I change something fundamental, but I thought this would be a great change to learn about multi threading. However all the examples and tutorials online go over some things and not others and I'm struggling to piece all the information together.
I know I need to make a celery process I just don't know if I'm doing it right. A lot of tutorials talk about dockerizing a redis environment without explaining how to do it so I thought I'd come here for some real human-to-human interaction to maybe help me feel less stupid about this.Here's my code so far
/website/__init__.py
from flask import Flask, appcontext_popped, render_template
from flask_sqlalchemy import SQLAlchemy
from flask_login import LoginManager, UserMixin, login_user, login_required, logout_user, current_user
from flask_migrate import Migrate
from flask_wtf import CSRFProtect
import logging
import celery
#Path Math
import sys
import os
from . import config
db:SQLAlchemy = SQLAlchemy()
migrate = Migrate()
csrf = CSRFProtect()
celery: celery.Celery
DB_NAME = "main"
def create_app(name):
#Flask Instance
app = Flask(__name__)
app.config.from_object(config.ProdTestConfig)
# logging stuff
#Database
db.init_app(app)
migrate.init_app(app, db)
csrf.init_app(app)
global celery
celery = make_celery(app)
with app.app_context():
db.create_all()
# Models and Blueprints here
from .helper_functions import migration_handling as mgh
#where you will find the thing I need to run async
app.before_first_request(mgh.run_back_check)
# log manager stuff
#error page handling
return app
def make_celery(app):
celery = celery.Celery(
app.import_name,
backend=app.config['CELERY_RESULT_BACKEND'],
broker=app.config['CELERY_BROKER_URL']
)
celery.conf.update(app.config)
class ContextTask(celery.Task):
def __call__(self, *args, **kwargs):
with app.app_context():
return self.run(*args, **kwargs)
celery.Task = ContextTask
return celery
I've read some other ways seem to fit a bit better like using:
celery = Celery(__name__, broker=Config.CELERY_BROKER_URL, result_backend=Config.RESULT_BACKEND)
Then in create_app() they run celery.conf.update(app.config). The issue with this is that I don't know how to setup a redis server on my linode machine hosting the site and my personal windows machine. I have redis pip installed. This is how the function I'm trying to run async looks:
#celery.task(name='app.tasks.campaign_pay_out_process')
def campaign_pay_out_process():
'''
Process Every Campaigns Pay
'''
campaign: Campaigns
for campaign in Campaigns.query.filter_by():
campaign.process_pay()
db.session.commit()
current_app.logger.info('Done Campaign Pay Out Processing')
I'm running gunicorn off of supervisor because restarting is super easy and ridding my life of super long linux commands to start a process has been great. I know this is the command for celery: celery -A celery_worker.celery worker --pool=solo --loglevel=info and I'd love to know how to include that in my work flow. Here's my supervisor config:
[program:paymentwebapp]
directory=/home/sai/paymentWebApp
command=/home/sai/paymentWebApp/venv/bin/gunicorn --workers 1 --threads 3 wsgi:app
user=sai
autostart=true
autorestart=true
stopasgroup=true
killasgroup=true
stderr_logfile=/var/log/paymentwebapp/paymentwebapp.err.log
stdout_logfile=/var/log/paymentwebapp/paymentwebapp.out.log
Here's my flask config right now:
from os import environ, path
from dotenv import load_dotenv
DB_NAME = "main"
class Config:
"""Base config."""
#SESSION_COOKIE_NAME = environ.get('SESSION_COOKIE_NAME')
MAX_CONTENT_LENGTH = 16*1000*1000
RECEIPT_FOLDER = '../uploads/receipts'
IMPORT_FOLDER = 'uploads/imports'
UPLOAD_FOLDER = 'uploads'
EXPORT_FOLDER = '/uploads/exports'
UPLOAD_EXTENSIONS = ['.jpg', '.png', '.pdf', '.csv', '.xls', '.xlsx']
STATIC_FOLDER = 'static'
TEMPLATES_FOLDER = 'templates'
class ProdConfig(Config):
basedir = path.abspath(path.dirname(__file__))
load_dotenv('/home/sai/.env')
env_dict = dict(environ)
FLASK_ENV = 'production'
DEBUG = False
TESTING = False
SQLALCHEMY_DATABASE_URI = environ.get('PROD_DATABASE_URI')
SECRET_KEY = environ.get('SECRET_KEY')
SERVER_NAME = environ.get('SERVER_NAME')
SESSION_COOKIE_SECURE = True
WTF_CSRF_TIME_LIMIT = 600
#Uploads
class DevConfig(Config):
basedir = path.abspath(path.dirname(__file__))
load_dotenv('C:\saiscripts\intercept_branch\Payment Web App Project\.env')
env_dict = dict(environ)
FLASK_ENV = 'development'
DEBUG = True
SQLALCHEMY_DATABASE_URI = environ.get('DEV_DATABASE_URI')
SECRET_KEY = environ.get('SECRET_KEY')
class ProdTestConfig(DevConfig):
'''
Developer config settings but production database server
'''
SQLALCHEMY_DATABASE_URI = environ.get('PROD_DATABASE_URI')
if __name__ == '__main__':
print(environ.get('SQLALCHEMY_DATABASE_URI'))
This is where I copied some code from a tutorial because I'm supposed to make a celery worker:
#!/usr/bin/env python
import os
#from app import create_app, celery
from website import create_app
app = create_app()
app.app_context().push()
from website import celery

celery beat using flask task issue

I would like to do cron jobs from Flask using Celery but I have an issue regarding celery beat schedule, because it seems that my task is not loaded and I don't know how to check where the issue is.
This is where I define my Flask app in views.py :
from celery.schedules import crontab
app = Flask(__name__)
app.config.update(
CELERY_BROKER_URL='redis://localhost:6379',
CELERY_RESULT_BACKEND='redis://localhost:6379',
CELERY_BEAT_SCHEDULE={
'task-number-one': {
'task': 'app.tasks.test',
'schedule': crontab(minute="*"),
}
},
CELERY_IMPORTS = ('app.tasks'),
CELERY_TASK_RESULT_EXPIRES = 30,
CELERY_TIMEZONE = 'UTC',
)
and this where my celery object is created in tasks.py:
from celery import Celery
from app.views import app
from celery import shared_task
def make_celery(app):
celery = Celery(app.import_name, backend=app.config['CELERY_RESULT_BACKEND'],
broker=app.config['CELERY_BROKER_URL'])
celery.conf.update(app.config)
TaskBase = celery.Task
class ContextTask(TaskBase):
abstract = True
def __call__(self, *args, **kwargs):
with app.app_context():
return TaskBase.__call__(self, *args, **kwargs)
celery.Task = ContextTask
return celery
celery_app = make_celery(app)
#celery_app.task()
def test():
logger = test.get_logger()
logger.info("Hello")
views.py and tasks.py are under the same directories which is called app
This is what I have when launching celery worker (everything seems normal here):
But this is what I have when launching celery beat, it seems that my task is never sent by my schedule but I don't know where to check:
Can you help me on this?
Best
I believe Celery-Beat Tasks need to be configured at least after the #app.on_after_configure.connect signal is sent. You should be able to do the following in your tasks.py file.
celery_app.conf.CELERYBEAT_SCHEDULE = {
"test-every-minue": {
"task": "tasks.test",
"schedule": crontab(minute="*"),
},
}
Alternatively you can use this decorator syntax if your task is defined in the same file as your celery application instance.
#celery_app.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
sender.add_periodic_task(5 , test_two.s(), name='test-every-5')
If your tasks are defined in a separate module you can use the #app.on_after_finalize.connect decorator after importing your tasks.
#app.on_after_finalize.connect
def setup_periodic_tasks(sender, **kwargs):
from app.tasks import test
sender.add_periodic_task(10.0, test.s(), name='test-every-10')
Celery Beat Entry Docs

celery task status showing pending

I am working with celery and i am getting tasks status is pending, may be it is implementation problem. please check my code.
I am trying to save task info like id, name, status in my mongodb database, for this i am using a function which my task will call to save data in mongodb.
Am i getting my task pending because my function call is happening before return statement of task?
settings.py
CELERY_BROKER_URL = 'mongodb://localhost:27017/jobs'
CELERY_RESULT_BACKEND = "mongodb"
CELERY_IGNORE_RESULT = False
CELERY_TRACK_STARTED = True
CELERY_MONGODB_BACKEND_SETTINGS = {
"host": "127.0.0.1",
"port": 27017,
"database": "jobs",
"taskmeta_collection": "my_taskmeta_collection",
}
CELERY_BEAT_SCHEDULE = {
'add-every-minute-contrab': {
'task': 'username_length_periodically',
'schedule': crontab(minute='*/1'),
#'args' : (2,3),
},
}
CELERY_ACCEPT_CONTENT = ['application/json']
CELERY_TASK_SERIALIZER = 'json'
CELERY_RESULT_SERIALIZER = 'json'
CELERY_TIMEZONE = TIME_ZONE
celery.py
from __future__ import absolute_import, unicode_literals
import os, logging
from celery import Celery
from celery.schedules import crontab
# set the default Django settings module for the 'celery' program.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'RestUserAPI.settings')
app = Celery('UserAPI')
# Using a string here means the worker don't have to serialize
# the configuration object to child processes.
# - namespace='CELERY' means all celery-related configuration keys
# should have a `CELERY_` prefix.
app.config_from_object('django.conf:settings', namespace='CELERY')
# Load task modules from all registered Django app configs.
app.autodiscover_tasks()
#app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request))
tasks.py
from __future__ import absolute_import, unicode_literals
from celery import task, current_task, result
from django.conf import settings
import datetime
from .models import TaskMetaData
#task(name='username_length_periodically', bind=True)
def get_username_length_periodically(self):
last_run = datetime.datetime.now()
dict = {'name':self.name,
'id':self.request.id,
'status':self.AsyncResult(self.request.id).state,
'last_run': last_run,
}
store_metadata(dict)
return dict
def store_metadata(dict):
metadata = TaskMetaData()
metadata.task_id = dict['id']
metadata.task_name = dict['name']
metadata.task_status = dict['status']
metadata.task_last_run = dict['last_run']
metadata.save()
I think this is just a plain old logic error. If you take a look at your call to check the status of the task using AsyncResult:
'status':self.AsyncResult(self.request.id).state,
You'll notice that you are checking the status of the task, while the task is running. That means that the task will always show state PENDING (unless you have track_task_started set) when you check the task because you are always checking the status of the task from inside the task and then never go back and update the status!
In order to update the status of the task, you should kick off a separate monitoring task that periodically checks the status of the task and records it to the database until the tasks is finished or errors out. e.g.,
#app.task(name='monitor')
def monitor(task_id):
result = AsyncResult(task_id)
if result.state in celery.results.READY_STATES:
# update metadata table for the task_id
...
else:
monitor.apply_async(kwargs={ 'task_id': task_id }, countdown=60)

Why a call to a celery task block forever?

I follow the tutorial at http://docs.celeryproject.org/en/master/django/first-steps-with-django.html
from __future__ import absolute_import, unicode_literals
import os
from celery import Celery
# set the default Django settings module for the 'celery' program.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'cobros.settings')
app = Celery('cobros')
# Using a string here means the worker doesn't have to serialize
# the configuration object to child processes.
# - namespace='CELERY' means all celery-related configuration keys
# should have a `CELERY_` prefix.
app.config_from_object('django.conf:settings', namespace='CELERY')
# Load task modules from all registered Django app configs.
app.autodiscover_tasks()
# Settings
CELERY_APP="cobros"
CELERY_BROKER_URL = 'redis://localhost:6379/0'
CELERY_RESULT_BACKEND = 'redis://localhost:6379'
CELERY_ACCEPT_CONTENT = ['application/json']
CELERY_TASK_SERIALIZER = 'json'
CELERY_RESULT_SERIALIZER = 'json'
CELERY_TIMEZONE = TIME_ZONE
and call my first job:
#task
def add(x, y):
logger.info("Sum ", x, y)
return x + y
def syncClientAsync():
baseDir = getBaseDir(str(cobrador.id))
print("Delay...")
return add.delay(4, 4) #No work
return add.delay(4, 4).get(timeout=1) #No work
When I run the code python get stuck/blocked forever. Redis and Celery are running and not report any error or problem.

Issues while integrating tornado app with django site

I have a simple chat application in Tornado powered with RethinkDB.
Am trying to integrate this tornado chat application to run with django site.
For that reason, I have adopted below in rechat.py in order for it to work with django.
Namespaces tornado.wsgi and django.core.wsgi (get_wsgi_application)
Set environment variable for Django settings.py
os.environ['DJANGO_SETTINGS_MODULE'] = 'djangoapp.settings'
When I try to run it after the above changes, it connects the db server, but doesn't do anything. What am I missing?
How can I make this tornado app to work with django 1.8 site?
Below is my code of rechat.py (https://github.com/v3ss0n/rechat) -
import logging
import tornado.escape
from tornado.ioloop import IOLoop
import tornado.web
import os.path
import rethinkdb as r
from tornado import httpserver
from time import time
# from tornado.concurrent import Future
from tornado import gen
from tornado.options import define, options, parse_command_line
import tornado.wsgi
from django.core.wsgi import get_wsgi_application
define("port", default=8888, help="run on the given port", type=int)
define("debug", default=True, help="run in debug mode")
def setup_db(db_name="rechat", tables=['events']):
connection = r.connect(host="localhost")
try:
r.db_create(db_name).run(connection)
for tbl in tables:
r.db(db_name).table_create(tbl, durability="hard").run(connection)
logging.info('Database setup completed.')
except r.RqlRuntimeError:
logging.warn('Database/Table already exists.')
finally:
connection.close()
class RechatApp(tornado.web.Application):
def __init__(self, db):
handlers = [
(r"/", MainHandler),
(r"/a/message/new", MessageNewHandler),
(r"/a/message/updates", MessageUpdatesHandler),
]
settings = dict(cookie_secret="_asdfasdaasdfasfas",
template_path=os.path.join(
os.path.dirname(__file__), "templates"),
static_path=os.path.join(
os.path.dirname(__file__), "static"),
xsrf_cookies=True,
debug=options.debug)
self.db = db
logging.info(db)
tornado.web.Application.__init__(self, handlers, **settings)
class BaseHandler(tornado.web.RequestHandler):
def initialize(self):
self.db = self.application.db
self.evt = r.table("events")
class MainHandler(BaseHandler):
#gen.coroutine
def get(self):
curs = yield self.evt.run(self.db)
messages = []
while (yield curs.fetch_next()):
item = yield curs.next()
messages.append(item)
self.render("index.html", messages=messages)
class MessageNewHandler(BaseHandler):
#gen.coroutine
def post(self):
message = {
"body": self.get_argument("body")
}
# to_basestring is necessary for Python 3's json encoder,
# which doesn't accept byte strings.
start = time()
messages = (yield self.evt.insert(message).run(self.db))
time_taken = time() - start
logging.warn("DBINSERT: %s seconds" % time_taken)
message['id'] = messages['generated_keys'][0]
message["html"] = tornado.escape.to_basestring(
self.render_string("message.html", message=message))
if self.get_argument("next", None):
self.redirect(self.get_argument("next"))
else:
self.write(message)
class MessageUpdatesHandler(BaseHandler):
#gen.coroutine
def post(self):
curs = yield self.evt.changes().run(self.db)
while (yield curs.fetch_next()):
feed = yield curs.next()
message = {
'id': feed['new_val']['id'],
'html': tornado.escape.to_basestring(
self.render_string("message.html",
message=feed['new_val']))}
break
self.finish(dict(messages=[message]))
#gen.coroutine
def main():
""" Async main method. It needed to be async due to r.connect is async . """
parse_command_line()
os.environ['DJANGO_SETTINGS_MODULE'] = 'djangoapp.settings'
db_name = "rechat"
setup_db(db_name)
r.set_loop_type("tornado")
db = yield r.connect("localhost", db=db_name)
#Single db connection for everything thanks a lot Ben and Jeese
http_server = httpserver.HTTPServer(RechatApp(db))
http_server.listen(options.port)
if __name__ == "__main__":
IOLoop.current().run_sync(main)
IOLoop.current().start()