How to execute multiple django celery tasks with a single worker? - django

Here is my celery file:
from __future__ import absolute_import
import os
from celery import Celery
# set the default Django settings module for the 'celery' program.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ets.settings')
from django.conf import settings # noqa
app = Celery('proj',
broker='redis://myredishost:6379/0',
backend='redis://myredishost:6379/0',
include=['tracking.tasks'])
# Optional configuration, see the application user guide.
app.conf.update(
CELERY_TASK_RESULT_EXPIRES=3600,
)
if __name__ == '__main__':
app.start()
Here is my task file:
#app.task
def escalate_to_sup(id, group):
escalation_email, created = EscalationEmail.objects.get_or_create()
escalation_email.send()
return 'sup email sent to: '+str(group)
#app.task
def escalate_to_fm(id, group):
escalation_email, created = EscalationEmail.objects.get_or_create()
escalation_email.send()
return 'fm email sent to: '+str(group)
I start the worker like this:
celery -A ets worker -l info
I have also tried to add concurrency like this:
celery -A ets worker -l info --concurrency=10
I attempt to call the tasks above with the following:
from tracking.tasks import escalate_to_fm, escalate_to_sup
def status_change(equipment):
r1 = escalate_to_sup.apply_async((equipment.id, [1,2]), countdown=10)
r2 = escalate_to_fm.apply_async((equipment.id, [3,4]), countdown=20)
print r1.id
print r2.id
This prints:
c2098768-61fb-41a7-80a2-f79a73570966
23959fa3-7f80-4e20-a42f-eef75e9bedeb
The escalate_to_sup and escalate_fm functions log to the worker intermittently. At least 1 executes, but never both.
I have tried spinning up more workers, and then both tasks execute. I do this like:
celery -A ets worker -l info --concurrency=10 -n worker1.%h
celery -A ets worker -l info --concurrency=10 -n worker2.%h
The problem is I don't know how many of the tasks might execute concurrently so spinning up a worker for every possible tasks to execute is not feasible.
Does celery expect a work for every active task?
How do I execute multiple tasks with a single worker?

Related

Call celery tasks from different server

I have a django app + redis on one server and celery on another server. I want to call celery task from django app.
My task.py on Celery Server:
from celery import Celery
app = Celery('tasks')
app.conf.broker_url = 'redis://localhost:6379/0'
#app.task(bind=True)
def test():
print('Testing')
Calling the celery task from Django Server:
from celery import Celery
celery = Celery()
celery.conf.broker_url = 'redis://localhost:6379/0'
celery.send_task('tasks.test')
I am running the celery worker using this command:
celery -A tasks worker --loglevel=INFO
When i call the celery task from django, it pings the celery server but i get the following error:
Received unregistered task of type 'tasks.test'. The message has been
ignored and discarded.
Did you remember to import the module containing this task? Or maybe
you're using relative imports?
How to fix this or is there any way to call the task?
Your task should be a shared task within celery as follows:
tasks.py
from celery import Celery, shared_task
app = Celery('tasks')
app.conf.broker_url = 'redis://localhost:6379/0'
#shared_task(name="test")
def test(self):
print('Testing')
and start celery as normal:
celery -A tasks worker --loglevel=INFO
Your application can then call your test task:
main.py
from celery import Celery
celery = Celery()
celery.conf.broker_url = 'redis://localhost:6379/0'
celery.send_task('tasks.test')

Celery + SQS is receiving same task twice, with same task id at same time

using celery with SQS in flask app
but celery is receiving same task twice with same task id at same time,
running worker like this,
celery worker -A app.jobs.run -l info --pidfile=/var/run/celery/celery.pid --logfile=/var/log/celery/celery.log --time-limit=7200 --concurrency=8
here are the logs of celery
[2019-11-29 08:07:35,464: INFO/MainProcess] Received task: app.jobs.booking.bookFlightTask[657985d5-c3a3-438d-a524-dbb129529443]
[2019-11-29 08:07:35,465: INFO/MainProcess] Received task: app.jobs.booking.bookFlightTask[657985d5-c3a3-438d-a524-dbb129529443]
[2019-11-29 08:07:35,471: WARNING/ForkPoolWorker-4] in booking funtion1
[2019-11-29 08:07:35,473: WARNING/ForkPoolWorker-3] in booking funtion1
[2019-11-29 08:07:35,537: WARNING/ForkPoolWorker-3] book_request_pp
[2019-11-29 08:07:35,543: WARNING/ForkPoolWorker-4] book_request_pp
same task received twice and both are running simultaneously,
using celery==4.4.0rc4 , boto3==1.9.232, kombu==4.6.6 with SQS in python flask.
In SQS, Default Visibility Timeout is 30 minutes, and my task is not having ETA and not ack
my task.py
from app import app as flask_app
from app.jobs.run import capp
from flask_sqlalchemy import SQLAlchemy
db = SQLAlchemy(flask_app)
class BookingTasks:
def addBookingToTask(self):
request_data = request.json
print ('in addBookingToTask',request_data['request_id'])
print (request_data)
bookFlightTask.delay(request_data)
return 'addBookingToTask added'
#capp.task(max_retries=0)
def bookFlightTask(request_data):
task_id = capp.current_task.request.id
try:
print ('in booking funtion1')
----
my config file, config.py
import os
from urllib.parse import quote_plus
aws_access_key = quote_plus(os.getenv('AWS_ACCESS_KEY'))
aws_secret_key = quote_plus(os.getenv('AWS_SECRET_KEY'))
broker_url = "sqs://{aws_access_key}:{aws_secret_key}#".format(
aws_access_key=aws_access_key, aws_secret_key=aws_secret_key,
)
imports = ('app.jobs.run',)
## Using the database to store task state and results.
result_backend = 'db' + '+' + os.getenv('SQLALCHEMY_DATABASE_URI')
and lastly my celery app file, run.py
from __future__ import absolute_import, unicode_literals
import os
from celery import Celery
from flask import Flask
from app import app as flask_app
import sqlalchemy
capp = Celery()
capp.config_from_object('app.jobs.config')
# Optional configuration, see the capplication user guide.
capp.conf.update(
result_expires=3600,
)
# SQS_QUEUE_NAME is like 'celery_test.fifo' , .fifo is required
capp.conf.task_default_queue = os.getenv('FLIGHT_BOOKINNG_SQS_QUEUE_NAME')
if __name__ == '__main__':
capp.start()
The default SQS visiblity_timeout is 30s. You need to update the celery config value:
broker_transport_options={'visibility_timeout': 3600}.
When celery goes to create the queue it will set the visibility timeout to 1h.
NOTE: If you specify the task_default_queue, and the queue has already been created without specifying broker_transport_options={'visibility_timeout': 3600}, celery will not update the visibility timeout when restarted with broker_transport_options={'visibility_timeout': 3600}. You will need to delete the queue and have celery recreate it.

Celery + Django not working at the same time

I have Django 2.0 project that is working fine, its integrated with Celery 4.1.0, I am using jquery to send ajax request to the backend but I just realized its loading endlessly due to some issues with celery.
Celery Settings (celery.py)
from __future__ import absolute_import, unicode_literals
import os
from celery import Celery
# set the default Django settings module for the 'celery' program.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'converter.settings')
app = Celery('converter', backend='amqp', broker='amqp://guest#localhost//')
# Using a string here means the worker doesn't have to serialize
# the configuration object to child processes.
# - namespace='CELERY' means all celery-related configuration keys
# should have a `CELERY_` prefix.
app.config_from_object('django.conf:settings', namespace='CELERY')
# Load task modules from all registered Django app configs.
app.autodiscover_tasks()
#app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request))
Celery Tasks (tasks.py)
from __future__ import absolute_import, unicode_literals
from celery import shared_task
#shared_task(time_limit=300)
def add(number1, number2):
return number1 + number2
Django View (views.py)
class AddAjaxView(JSONResponseMixin, AjaxResponseMixin, View):
def post_ajax(self, request, *args, **kwargs):
url = request.POST.get('number', '')
task = tasks.convert.delay(url, client_ip)
result = AsyncResult(task.id)
data = {
'result': result.get(),
'is_ready': True,
}
if result.successful():
return self.render_json_response(data, status=200)
When I send ajax request to the Django app it is loading endlessly but when terminate Django server, and I run celery -A demoproject worker --loglevel=info that's when my tasks are running.
Question
How do I automate this so that when I run Django project my celery tasks will work automatically when I send ajax request?
If you are on development environment, you have to run manually celery worker as it does not run automatically on the background, in order to process the jobs in the queue. So if you want to have a flawless workflow, you need both Django default server and celery worker running. As stated in the documentation:
In a production environment you’ll want to run the worker in the background as a daemon - see Daemonization - but for testing and development it is useful to be able to start a worker instance by using the celery worker manage command, much as you’d use Django’s manage.py runserver:
celery -A proj worker -l info
You can read their documentation for daemonization.
http://docs.celeryproject.org/en/latest/userguide/daemonizing.html

Django and Celery - re-loading code into Celery after a change

If I make a change to tasks.py while celery is running, is there a mechanism by which it can re-load the updated code? or do I have to shut Celery down a re-load?
I read celery had an --autoreload argument in older versions, but I can't find it in the current version:
celery: error: unrecognized arguments: --autoreload
Unfortunately --autoreload doesn't work and it is deprecated.
You can use Watchdog which provides watchmedo a shell utilitiy to perform actions based on file events.
pip install watchdog
You can start worker with
watchmedo auto-restart -- celery worker -l info -A foo
By default it will watch for all files in current directory. These can be changed by passing corresponding parameters.
watchmedo auto-restart -d . -p '*.py' -- celery worker -l info -A foo
Add -R option to recursively watch the files.
If you are using django and don't want to depend on watchdog, there is a simple trick to achieve this. Django has autoreload utility which is used by runserver to restart WSGI server when code changes.
The same functionality can be used to reload celery workers. Create a seperate management command called celery. Write a function to kill existing worker and start a new worker. Now hook this function to autoreload as follows. For Django >= 2.2
import sys
import shlex
import subprocess
from django.core.management.base import BaseCommand
from django.utils import autoreload
class Command(BaseCommand):
def handle(self, *args, **options):
autoreload.run_with_reloader(self._restart_celery)
#classmethod
def _restart_celery(cls):
if sys.platform == "win32":
cls.run('taskkill /f /t /im celery.exe')
cls.run('celery -A phoenix worker --loglevel=info --pool=solo')
else: # probably ok for linux2, cygwin and darwin. Not sure about os2, os2emx, riscos and atheos
cls.run('pkill celery')
cls.run('celery worker -l info -A foo')
#staticmethod
def run(cmd):
subprocess.call(shlex.split(cmd))
For django < 2.2
import sys
import shlex
import subprocess
from django.core.management.base import BaseCommand
from django.utils import autoreload
class Command(BaseCommand):
def handle(self, *args, **options):
autoreload.main(self._restart_celery)
#classmethod
def _restart_celery(cls):
if sys.platform == "win32":
cls.run('taskkill /f /t /im celery.exe')
cls.run('celery -A phoenix worker --loglevel=info --pool=solo')
else: # probably ok for linux2, cygwin and darwin. Not sure about os2, os2emx, riscos and atheos
cls.run('pkill celery')
cls.run('celery worker -l info -A foo')
#staticmethod
def run(cmd):
subprocess.call(shlex.split(cmd))
Now you can run celery worker with python manage.py celery which will autoreload when codebase changes.
This is only for development purposes and do not use it in production.
You could try SIGHUP on the parent worker process, it restarts the worker, but I'm not sure if it picks up new tasks. Worth a shot, thought :)
FYI, for anyone using Docker, I couldn't find an easy way to make the above options work, but I found (along with others) another little script here which does use watchdog and works perfectly.
Save it as some_name.py file in your main directory, add pip install psutil and watchdog to requirements.txt, update the path/cmdline variables at the top, then in the worker container of your docker-compose.yml insert:
command: python ./some_name.py
Watchmedog doesn't work for me inside a docker container.
This is the way I made it work with Django:
# worker_dev.py (put it next to manage.py)
from django.utils import autoreload
def run_celery():
from projectname import celery_app
celery_app.worker_main(["-Aprojectname", "-linfo", "-Psolo"])
print("Starting celery worker with autoreload...")
autoreload.run_with_reloader(run_celery)
Then run python worker_dev.py or set it as your Dockerfile CMD or docker-compose command.

celery daemon production local config file without django

I am newbie to Celery. I create a project as per instruction provided by the celery4.1 docs.Below is my project folder and files:
mycelery
|
test_celery
|
celery_app.py
tasks.py
__init__.py
1-celery_app.py
from __future__ import absolute_import
import os
from celery import Celery
from kombu import Queue, Exchange
from celery.schedules import crontab
import datetime
app = Celery('test_celery',
broker='amqp://jimmy:jimmy123#localhost/jimmy_v_host',
backend='rpc://',
include=['test_celery.tasks'])
# Optional configuration, see the application user guide.
app.conf.update(
result_expires=3600,
)
if __name__ == '__main__':
app.start()
app.name
2-tasks.py
from __future__ import absolute_import
from test_celery.celery_app import app
import time
from kombu import Queue, Exchange
from celery.schedules import crontab
import datetime
app.conf.beat_schedule = {
'planner_1': {
'task': 'test_celery.tasks.printTask',
'schedule': crontab(minute='*/1'),
},
}
#app.task
def longtime_add(x, y):
print 'long time task begins'
# sleep 5 seconds
time.sleep(5)
print 'long time task finished'
return x + y
#app.task
def printTask():
print 'Hello i am running'
time=str(datetime.datetime.now())
file=open('/home/hub9/mycelery/data.log','ab')
file.write(time)
file.close()
I copied celeryd and celerybeat file from Celery github project and copied to /etc/init.d/ and make them executables. Then i create celeryd and celerybeat file to /etc/default/.
I- /etc/default/celeryd
# Names of nodes to start
# most will only start one node:
#CELERYD_NODES="worker1"
# but you can also start multiple and configure settings
# for each in CELERYD_OPTS (see `celery multi --help` for examples).
CELERYD_NODES="worker1 worker2 worker3"
# Absolute or relative path to the 'celery' command:
CELERY_BIN="/usr/local/bin/celery"
#CELERY_BIN="/virtualenvs/def/bin/celery"
# Where to chdir at start. path to folder containing task
CELERYD_CHDIR="/home/hub9/mycelery/test_celery/"
# App instance to use
# comment out this line if you don't use an app
#CELERY_APP = "file/locatin/of/app"
# or fully qualified:
CELERY_APP="test_celery.celery_app:app"
# Extra command-line arguments to the worker
CELERYD_OPTS="--time-limit=3000 --concurrency=3 --config=celeryconfig"
# %N will be replaced with the first part of the nodename.
CELERYD_LOG_FILE="/var/log/celery/%N.log"
CELERYD_PID_FILE="/var/run/celery/%N.pid"
# Workers should run as an unprivileged user.
# You need to create this user manually (or you can choose
# a user/group combination that already exists, e.g. nobody).
CELERYD_USER="celery"
CELERYD_GROUP="celery"
# If enabled pid and log directories will be created if missing,
# and owned by the userid/group configured.
CELERY_CREATE_DIRS=1
II- /etc/default/celerybeat
# Names of nodes to start
# most will only start one node:
#CELERYD_NODES="worker1"
# but you can also start multiple and configure settings
# for each in CELERYD_OPTS (see `celery multi --help` for examples).
CELERYD_NODES="worker1 worker2 worker3"
# Absolute or relative path to the 'celery' command:
CELERY_BIN="/usr/local/bin/celery"
#CELERY_BIN="/virtualenvs/def/bin/celery"
# Where to chdir at start. path to folder containing task
CELERYD_CHDIR="/home/hub9/mycelery/test_celery/"
# App instance to use
# comment out this line if you don't use an app
#CELERY_APP = "file/locatin/of/app"
# or fully qualified:
CELERY_APP="test_celery.celery_app:app"
# Extra command-line arguments to the worker
CELERYD_OPTS="--time-limit=3000 --concurrency=3 --config=celeryconfig"
# %N will be replaced with the first part of the nodename.
CELERYD_LOG_FILE="/var/log/celery/%N.log"
CELERYD_PID_FILE="/var/run/celery/%N.pid"
# Workers should run as an unprivileged user.
# You need to create this user manually (or you can choose
# a user/group combination that already exists, e.g. nobody).
CELERYD_USER="celery"
CELERYD_GROUP="celery"
# If enabled pid and log directories will be created if missing,
# and owned by the userid/group configured.
CELERY_CREATE_DIRS=1
After that i create celery user and group.
Here is my problem i am successfully run this project using celery -A test_celery.celery_app worker -l info --beatcommand but when i start my project using sudo service celeryd start OR sudo service celerybeat start
It gives me import error that no module name test_celery.celery_app.
Please provide me a hint what i am doing wrong.