Django Huey Crontab every day at 10am - django

I'm using Django 4.0.4 and huey 2.4.3. What I would like to atchieve it's to run a task everyday at 10am, using a periodic task.
My task folder path => project/apps/utils/tasks
+-- project/
| +-- apps/
| +-- utils/
| +-- tasks/
| +--__init__.py
| +--sms_task.py
| +--tasks.py
In the init.py file in tasks folder I've imported all tasks:
__all__ = ["tasks", "sms_task"]
Here my Huey Config in settings.py file:
HUEY = {
'huey_class': 'huey.RedisHuey', # Huey implementation to use.
'name': 'ASISPO', # Name of the Redis connection.
'immediate': False,
'connection': {
'url': env('REDIS_URL', default=None), # Allow Redis config via a DSN.
},
'consumer': {
'blocking': True,
'loglevel': True,
'workers': 8, # Number of consumer workers.
'scheduler_interval': 1, # Check schedule every second, -s.
'health_check_interval': 5, # Check worker health every second.
'simple_log': True,
},
}
What I've done so far in my tasks.py file:
#periodic_task(crontab(hour='10'))
def getjplus1():
calculation = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
filter_fields = {
'date_ope': calculation,
'escalade': 'False',
'rel_dou': 'False',
'rel_dou_ok':'False',
'rel_hemo':'False',
'rel_hemo_ok':'False'
}
for key, value in filter_fields.items():
suivis_patient = SuiviPatient.objects.filter(
response_suivi_patient__suivi_field_name__icontains=key ,
response_suivi_patient__response__icontains=value,
status='planifié',
archived=False
)
get_suivi_patient_j1(list(set(suivis_patient)))
But it's not running at 10am.
however it works when I run periodic_task every minute like so:
#periodic_task(crontab(minute="*/1"))
def getjplus1():
calculation = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
filter_fields = {
'date_ope': calculation,
'escalade': 'False',
'rel_dou': 'False',
'rel_dou_ok':'False',
'rel_hemo':'False',
'rel_hemo_ok':'False'
}
for key, value in filter_fields.items():
suivis_patient = SuiviPatient.objects.filter(
response_suivi_patient__suivi_field_name__icontains=key ,
response_suivi_patient__response__icontains=value,
status='planifié',
archived=False
)
get_suivi_patient_j1(list(set(suivis_patient))
)

Since you mentioned that you are using huey package as part of your code. You can utilize its periodic_task decorator and add crontab within it.
from huey import crontab
#huey.periodic_task(crontab(hour='10'))
def every_ten_in_the_morning():
print('This task runs every 10 in the morning.')
You can also check its documentation to know more about huey periodic task : https://huey.readthedocs.io/en/latest/guide.html#periodic-tasks

For this you can use via pip install django-celery and use celery.
from celery.schedules import crontab
from celery.task import periodic_task
#periodic_task(run_every=crontab(hour=10, minute=0))
def every_day():
print("This is run every Monday morning at 10:00")

Related

Executing tasks with celery at periodic schedule

I am trying to execute a task with celery in Django.I want to execute the task at 12:30 pm everyday for which I have written this in my tasks.py
#periodic_task(run_every=crontab(minute=30, hour=12), name="elast")
def elast():
do something
This is not working but if I want to schedule it at every 30 seconds I write this code
#periodic_task(run_every=(timedelta(seconds=30)), name="elast")
def elast():
do something
This works.I wanted to know that what is wrong with the first piece of code?Any help would be appreciated.
As per latest celery 4.3 version , to execute the task at 12:30 pm below code will be useful
celery.py
from celery.schedules import crontab
app.conf.beat_schedule = {
# Executes every day at 12:30 pm.
'run-every-afternoon': {
'task': 'tasks.elast',
'schedule': crontab(hour=12, minute=30),
'args': (),
},
}
tasks.py
import celery
#celery.task
def elast():
do something
to start celery beat scheduler
celery -A proj worker -B
for older version around celery 2.0
from celery.task.schedules import crontab
from celery.decorators import periodic_task
#periodic_task(run_every=crontab(hour=12, minute=30))
def elast():
print("code execution started.")
please check timezone setting.
New userguide
Old userguide
Check out the documentation, especially the parts specific for Django users. Also note that using #periodic_task decorator is deprecated and should be replaced with beat_schedule configuration (see the code).

Import error :Python Dataflow Job in cloud composer

I can run the single file as a dataflow job in cloud composer but when i run it as a package it fails .
pipeline_jobs/
-- __init__.py
-- run.py (main file)
-- setup.py
-- data_pipeline/
----- __init__.py
----- tasks.py
----- transform.py
----- util.py
I'm getting this error:
WARNING - File "/tmp/dataflowd232f-run.py", line 14, in <module
{gcp_dataflow_hook.py:120} WARNING - from data_pipeline.tasks import task
WARNING - ImportError: No module named data_pipeline.tasks.
This is the dag configuration:
from datetime import datetime, timedelta
from airflow import DAG
from airflow.contrib.operators.dataflow_operator import DataFlowPythonOperator
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': datetime.strptime("2017-11-01","%Y-%m-%d"),
'py_options': [],
'dataflow_default_options': {
'start-date': '20171101',
'end-date': '20171101',
'project': '<project-id>',
'region': '<location>',
'temp_location': 'gs://<bucket>/flow/tmp',
'staging_location': 'gs://<bucket>/flow/staging',
'setup_file': 'gs://<bucket>/dags/pipeline_jobs/setup.py',
'runner': 'DataFlowRunner',
'job_name': 'job_name_lookup',
'task-id': 'run_pipeline'
},
}
dag = DAG(
dag_id='pipeline_01',
default_args=default_args,
max_active_runs=1,
concurrency =1
)
task_1 = DataFlowPythonOperator(
py_file = 'gs://<bucket>/dags/pipeline_jobs/run.py',
gcp_conn_id='google_cloud_default',
task_id='run_job',
dag=dag)
I tried putting run.py into dags folder but still getting same error.
Any kind of suggestions would be really helpful.
Tried doing this as well:
from pipeline_jobs .data_pipeline.tasks import task
but still the same issue.
Try put the entire pipeline_jobs/ in dags folder following this instruction and refer the dataflow py file as: /home/airflow/gcs/dags/pipeline_jobs/run.py.

Celery starts the scheduler more often than specified in the settings

Tell me in what there can be a problem with Celery worker? When I run it, it starts executing the task more often than once a second, although it takes an interval of several minutes.
 
Running the bit: "celery market_capitalizations beat -l info --scheduler django_celery_beat.schedulers: DatabaseScheduler"
Launch of a vorker: "celery -A market_capitalizations worker -l info -S django"
 
Maybe I'm not starting the service correctly?
Settings:
INSTALLED_APPS = [
        'django.contrib.admin',
        'django.contrib.auth',
        'django.contrib.contenttypes',
        'django.contrib.sessions',
        'django.contrib.messages',
        'django.contrib.staticfiles',
        'exchange_rates',
        'django_celery_beat',
        'django_celery_results',
        ]
    TIME_ZONE = 'Europe / Saratov'
    USE_I18N = True
    USE_L10N = True
    USE_TZ = True
    CELERY_BROKER_URL = 'redis: // localhost: 6379'
    CELERY_RESULT_BACKEND = 'redis: // localhost: 6379'
    CELERY_ACCEPT_CONTENT = ['application / json']
    CELERY_TASK_SERIALIZER = 'json'
    CELERY_RESULT_SERIALIZER = 'json'
    CELERY_TIMEZONE = TIME_ZONE
    CELERY_ENABLE_UTC = False
    CELERYBEAT_SCHEDULER = 'django_celery_beat.schedulers: DatabaseScheduler'
running services
When the task is started, a request is not sent.
admin panel
Tell me, please, how to make a celery pick up task time from a web page and run the task with it?
I tried to run the task through the code, but it still runs more often than in a second.
 
    
from celery.schedules import crontab
    app.conf.beat_schedule = {
        'add-every-5-seconds': {
            'task': 'save_exchange_rates_task',
            'schedule': 600.0,
            # 'args': (16, 16)
        },
    }
 
I ran into the similiar issue when using django-celery-beat. But when I turn off USE_TZ(USE_TZ = False), the issue was gone.
But I need set USE_TZ to False to let my app TZ not aware the time zone.
If you have any solution, can you share it? Thansk.
My dev environment:
Python 3.7 + Django 2.0 + Celery 4.2 + Django-celery-beat 1.4
BTW,
Now I am using configuration schedule in settings and it is working well
I am still finding the solution to use django-celery-beat to use the db to manager the tasks.
CELERY_BEAT_SCHEDULE = {
'audit-db-every-10-minutes': {
'task': 'myapp.tasks.db_audit',
'schedule': 600.0, # 10 minutes
'args': ()
},
}

Django celery in docker looking for already deleted tasks

I have set up my celery app for django and have been using docker for running it.
I once run the code with following CELERYBEAT_SCHEDULE
# define scheduled tasks here
CELERYBEAT_SCHEDULE = {
'test-scheduler': {
'task': 'users.tasks.test_print',
'schedule': 10, # in seconds, or timedelta(seconds=10)
},
}
And, it works fine.
Later on, I changed the name of my tasks as follows:
# define scheduled tasks here
CELERYBEAT_SCHEDULE = {
'handle-email': {
'task': 'users.tasks.handle_email_task',
'schedule': 10, # in seconds, or timedelta(seconds=10)
},
}
But, When I run docker-compose up --build, following error is showing up.
worker_1 | 2017-06-16T15:17:22.844376379Z KeyError: 'users.tasks.test_print'
worker_1 | 2017-06-16T15:17:52.849843783Z [2017-06-16 15:17:52,848: ERROR/MainProcess] Received unregistered task of type 'users.tasks.test_print'.
Did I miss anything? Need help to solve it.

Python Django Celery is taking too much memory

I am running a celery server which have 5,6 task to run periodically. Celery is taking too much memory after 5,6 days of continuous execution.
Celery documentation is very confusing. I am using following settings.
# celeryconfig.py
import os
os.environ['DJANGO_SETTINGS_MODULE'] = 'xxx.settings'
# default RabbitMQ broker
BROKER_URL = "amqp://guest:guest#localhost:5672//"
from celery.schedules import crontab
# default RabbitMQ backend
CELERY_RESULT_BACKEND = None
#4 CONCURRENT proccesess are running.
CELERYD_CONCURRENCY = 4
# specify location of log files
CELERYD_LOG_FILE="/var/log/celery/celery.log"
CELERY_ALWAYS_EAGER = True
CELERY_IMPORTS = (
'xxx.celerydir.cron_tasks.deprov_cron_script',
)
CELERYBEAT_SCHEDULE = {
'deprov_cron_script': {
'task': 'xxx.celerydir.cron_tasks.deprov_cron_script.check_deprovision_vms',
'schedule': crontab(minute=0, hour=17),
'args': ''
}
}
I am running celery service using nohup command(this will run this in background).
nohup celery beat -A xxx.celerydir &
After going through documentation. I came to know that DEBUG was True in settings.
Just change value of DEBUG in settings.
REF:https://github.com/celery/celery/issues/2927