How to trigger DAG3 after successfully execution of DAG1 and DAG2 in airflow? - airflow-scheduler

I have created DAG1 and DAG2 which containing 2 task each inside it.
and in DAG3 by using triggerdagrunoperator triggering DAG1 and DAG2 and set parameter wait_for_completion and poke_interval of 15 sec.
But problem here is scheduler not working by this parameter...Scheduler got stop.
Here is my code for DAG3:
from airflow import DAG
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
from airflow.utils.dates import days_ago
from airflow.operators.python_operator import PythonOperator
dag = DAG(
dag_id="controller_dag",
default_args={"owner": "airflow"},
start_date=days_ago(2),
schedule_interval="#once",
tags=['example'],
)
def BYE():
return "GOOD BYE from Controller_Dag"
trigger1 = TriggerDagRunOperator(
task_id="trigger_dag_1",
trigger_dag_id="target_dag_1",
wait_for_completion = True,
poke_interval = 15,
dag=dag,
)
trigger2 = TriggerDagRunOperator(
task_id="trigger_dag_2",
trigger_dag_id="target_dag_2",
wait_for_completion =True,
poke_interval = 15,
dag=dag,
)
Bye = PythonOperator(
task_id = "BYE",
python_callable = BYE,
dag=dag)
[trigger1, trigger2]>>Bye
If any knows why its happening please help me.
Thanks in advance..!

Related

Schedule an Airflow DAG for running with parameters

I am trying to trigger an airflow DAG externally and passing some parameters to the DAG. The DAG is scheduled to run every 3 minutes. My problem is that the parameters are only being used by the first DAG run.
from pyexpat import model
from airflow import DAG
from datetime import datetime, timedelta
from airflow.operators.python import PythonOperator
import os
dag_id = "proj"
home_path = os.path.expanduser("~")
runpath = os.path.join(home_path, "airflow/data", dag_id)
def load_data(ti):
import os
train = os.path.join(runpath, "mnist")
test = os.path.join(runpath, "mnist.t")
model = os.path.join(runpath, "trained.mnist")
if not os.path.exists(train):
os.system(
"curl https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/mnist.bz2 --output mnist.bz2"
)
os.system("bzip2 -d mnist.bz2")
if not os.path.exists(test):
os.system(
"curl https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/mnist.t.bz2 --output mnist.t.bz2"
)
os.system("bzip2 -d mnist.t.bz2")
ti.xcom_push(key="train_path", value=train)
ti.xcom_push(key="test_path", value=test)
ti.xcom_push(key="model_path", value=model)
def train(
**context,
):
import os
ti = context["ti"]
train = ti.xcom_pull(task_ids="load_data", key="train_path")
model_path = ti.xcom_pull(task_ids="load_data", key="model_path")
lr = context["dag_run"].conf["lr"]
epochs = context["dag_run"].conf["epochs"]
name = context["dag_run"].conf["name"]
print(lr)
print(epochs)
ti.xcom_push(key="model_name", value=model_final_name)
def validate(**context):
ti = context["ti"]
test = ti.xcom_pull(task_ids="load_data", key="test_path")
model_path = ti.xcom_pull(task_ids="train", key="model_name")
print(test)
print(model_path)
with DAG(
dag_id="project",
default_args={"owner": "airflow"},
start_date=datetime(2022, 8, 8),
schedule_interval=timedelta(minutes=3),
tags=["mnist_4"],
catchup=False,
) as dag:
print(runpath)
os.makedirs(runpath, exist_ok=True)
os.chdir(runpath)
read_file = PythonOperator(
task_id="load_data",
python_callable=load_data,
provide_context=True,
)
process_train = PythonOperator(
task_id="train",
python_callable=train,
provide_context=True,
)
validate = PythonOperator(
task_id="validate", python_callable=validate, provide_context=True
)
read_file >> process_train >> validate
I trigger dag with the command
airflow dags trigger project --conf '{"epochs":1,"name":"trial_3","lr":0.001}'
Except one run, all the other runs have failed with the following error:
KeyError: 'lr'
When I look at the conf for the dag runs, only one run has the conf, rest are empty.
If I look at the field External Trigger, only one run is true which means while triggering the dag, only run is triggered, rest are scheduled.
I want to know how to pass config to the scheduled dags as well.
I hope it can help.
Indeed dag run conf works for manual triggered DAGs, in this case the conf can be passed.
For sheduled DAGs, you can set default params in your DAGs, this post shows an example :
Airflow how to set default values for dag_run.conf

Cloud Function Error runQuery() missing 1 required positional argument

i want to run a schedule query when HTTP cloud function is triggered
This written python 3.7
import time from google.protobuf.timestamp_pb2
import Timestamp from google.cloud
import bigquery_datatransfer_v1
def runQuery (parent, requested_run_time):
client = bigquery_datatransfer_v1.DataTransferServiceClient()
projectid = '917960740952' # Enter your projectID here
transferid = '630d5269-0000-2622-98d2-3c286d4314be'
parent = client.project_transfer_config_path(projectid, transferid)
start_time = bigquery_datatransfer_v1.types.Timestamp(seconds=int(time.time() + 10))
response = client.start_manual_transfer_runs(parent, requested_run_time=start_time)
print(response)
Any help Please
I changed the parent path to be and worked with me
parent = 'projects/917960740952/locations/europe/transferConfigs/630d5269-0000-2622-98d2-3c286d4314be'
from google.cloud import bigquery_datatransfer_v1
import time
from google.protobuf.timestamp_pb2 import Timestamp
def runQuery(request):
client = bigquery_datatransfer_v1.DataTransferServiceClient()
PROJECT_ID = 'YOUR_PROJECT_NAME'
LOCATION_NAME = 'YOUR_TRANSFER_REGION'
TRANSFER_CONFIG_ID = 'YOUR_TRANSFER_ID'
parent = "projects/{0}/locations/{1}/transferConfigs/{2}".format(PROJECT_ID, LOCATION_NAME, TRANSFER_CONFIG_ID)
start_time = bigquery_datatransfer_v1.types.Timestamp(seconds=int(time.time() + 10))
response = client.start_manual_transfer_runs(parent, requested_run_time=start_time)
print(response)

Django APscheduler prevent more workers running scheduled task

I use APScheduler in Django, on Windows IIS to run my background script. Problem is, taks gets run multiple times. If I run same program on my PC, it only runs once, but when I upload to windows server (which hosts my Django app) it runs more times. I guess it has some connection with the number of workers? Job is scheduled, but each time job task is done, it's like it runs random number of instances. First 1 time, then 2, then 10, then again 2. Even tho I have 'replace_existing=True, coalesce= True, misfire_grace_time = 1, max_instances = 1'
planer_zad.py
from apscheduler.schedulers.background import BackgroundScheduler
from blog.views import cron_mail_overdue
def start():
scheduler.add_job(cron_mail_overdue, "cron", hour=7, minute=14, day_of_week='mon-sun', id="task002", replace_existing=True, coalesce= True, misfire_grace_time = 10, max_instances = 1)
scheduler.start()
apps.py
from django.apps import AppConfig
class BlogConfig(AppConfig):
name = 'blog'
def ready(self):
#print('Starting Scheduler...')
from .planer import planer_zad
planer_zad.start()
For test I tried 'interval':
scheduler.add_job(cron_mail_overdue, "interval", minutes=1, id="task002", replace_existing=True, coalesce= True, misfire_grace_time = 10, max_instances = 1)
Tried:
scheduler = BackgroundScheduler({
'apscheduler.executors.default': {
'class': 'apscheduler.executors.pool:ThreadPoolExecutor',
'max_workers': '1'
},
'apscheduler.executors.processpool': {
'type': 'processpool',
'max_workers': '1'
},
'apscheduler.job_defaults.coalesce': 'True',
'apscheduler.job_defaults.max_instances': '1',
'apscheduler.timezone': 'UTC',
})
scheduler.add_job(cron_mail_overdue, "cron", hour=9, minute=3, second=00, day_of_week='mon-sun', id="task002", replace_existing=True, coalesce= True, misfire_grace_time = 10, max_instances = 1)
scheduler.start()
Does not work. Sometimes it runs only once, then 12 times.
Just test if the object already exists in ready() :
# django/myapp/apps.py
from django.apps import AppConfig
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
class BlogConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'blog'
def __init__(self, app_name, app_module):
super(BlogConfig, self).__init__(app_name, app_module)
self.planer_zad = None
def ready(self):
if os.environ.get('RUN_MAIN', None) != 'true':
return
if self.planer_zad is None:
background_scheduler = BackgroundScheduler()
background_scheduler.add_job(task1, CronTrigger.from_crontab('* * * * *')) # Every minutes (debug).
background_scheduler.start()
return background_scheduler
def task1(self):
print("cron task is working")
You can then call it later :
# api.py
from django.apps import apps
#router.get("/background-task")
def background_task(request):
"""
Run a background task.
"""
user = request.user
blog_config= apps.get_app_config('blog')
background_scheduler = blog_config.background_scheduler
return {"status": "Success", "True": str(background_scheduler)}

email is not sent using SendGrid/cloud composer

I'm trying to send an email using SenDGrid but the DAG is stuck on running.
I did the following:
set the environment variable SENDGRID_MAIL_FROM as my email
set the environment variable SENDGRID_API_KEY as the api I've generated from Sendgrid after confirming my personal email (same as sender email).
No spam im my email inbox.
Nothing found in the Activity section on SendGrid page and nothing is sent.
Can someone maybe point out what am I doing wrong?
My code:
from airflow.models import (DAG, Variable)
import os
from airflow.operators.email import EmailOperator
from datetime import datetime,timedelta
default_args = {
'start_date': datetime(2020, 1, 1),
'owner': 'Airflow',
"email_on_failure" : False,
"email_on_retry" : False,
"emails" : ['my#myemail.com']
}
PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "bigquery_default")
PROJECT_ID_GCP = os.environ.get("GCP_PROJECT_ID", "my_progect")
with DAG(
'retries_test',
schedule_interval=None,
catchup=False,
default_args=default_args
) as dag:
send_email_notification = EmailOperator(
task_id = "send_email_notification",
to = "test#sendgrid.com",
subject = "test",
html_content = "<h3>Hello</h3>"
)
send_email_notification

python - telegram bot sendMessage in specific date

I am terribly new in python and my progress is like a snail:(
I want to make a telegram bot that send a message at specific date and time. I used apscheduler and telepot libraries for that. and this is my code:
import telepot
import sys
import time
from time import sleep
from datetime import datetime
from apscheduler.scheduler import Scheduler
import logging
bot = telepot.Bot("***")
logging.basicConfig()
sched = Scheduler()
sched.start()
exec_date = datetime(2017, 9, 12 ,1,51,0)
def handle(msg):
content_type,chat_type,chat_id = telepot.glance(msg)
print(content_type,chat_type,chat_id)
if content_type == 'text' :
bot.sendMessage(chat_id,msg['text'])
def sendSimpleText():
# content_type,chat_type,chat_id = telepot.glance(msg)
# print(content_type,chat_type,chat_id)
#
# if content_type == 'text' :
chat_id = telepot.
bot.sendMessage(chat_id,'faez')
def main():
job = sched.add_date_job(sendSimpleText, exec_date)
while True:
sleep(1)
sys.stdout.write('.'); sys.stdout.flush()
# bot.message_loop(handle)
# # job = sched.add_date_job(sendSimpleText, '2017-09-11 21:35:00', ['testFuckBot'])
# while True:
# time.sleep(10)
if __name__ == '__main__':
main()
my question is what do I pass to sendSimpleText as argument in add_date_job? in this line:
job = sched.add_date_job(sendSimpleText, exec_date)
I know that msg is the message that user is typed so for add_date_job I have nothing?
You are used an old (2.1.2) version of APScheduler.
New version has a new syntax.
A function add_date_job no more available.
This is a worked solution for you:
import telepot
import sys
import time
from datetime import datetime
from apscheduler.schedulers.background import BackgroundScheduler
from telepot.loop import MessageLoop
import logging
bot = telepot.Bot("***YOUR_BOT_TOKEN***")
logging.basicConfig()
sched = BackgroundScheduler()
exec_date = datetime(2017, 9, 12 ,3,5,0)
def handle(msg):
content_type,chat_type,chat_id = telepot.glance(msg)
print(content_type,chat_type,chat_id)
if content_type == 'text' :
bot.sendMessage(chat_id,msg['text'])
def sendSimpleText(chat_id):
bot.sendMessage(chat_id,'faez')
def main():
MessageLoop(bot, handle).run_as_thread()
job = sched.add_job(sendSimpleText, run_date=exec_date, args=['**YOUR_TELEGRAM_ID**'])
while True:
time.sleep(1)
sys.stdout.write('.'); sys.stdout.flush()
if __name__ == '__main__':
sched.start()
main()