why apscheduler use get_jobs empty? - python-2.7

this is my test.py
from datetime import datetime, timedelta
import sys
import os
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
from apscheduler.jobstores.redis import RedisJobStore
jobstores = {
#'default': SQLAlchemyJobStore(url='sqlite:///jobs.sqlite')
'default': RedisJobStore(host='localhost', port=6379)
}
scheduler = BlockingScheduler(jobstores=jobstores)
def alarm(time):
print('Alarm! This alarm was scheduled at %s.' % time)
if __name__ == '__main__':
alarm_time = datetime.now() + timedelta(seconds=10)
scheduler.add_job(alarm, 'interval', seconds=10, args=[datetime.now()], name='alarm_test')
print('To clear the alarms, delete the example.sqlite file.')
print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
try:
scheduler.start()
except (KeyboardInterrupt, SystemExit):
pass
i do python test.py run job successfully
and then use another terminal by putty
python
>>> import redis
>>> from test import *
>>> r = redis.Redis()
>>> r.keys()
>>> r.zrange('apscheduler.run_times',0,1)
it will find the job id 57841c0ee05249efb466882265f2c495
>>> ret = scheduler.get_jobs(jobstore='default')
ret is empty
why???
thanks a lot

Have you started the scheduler before running get_jobs()? If not, it will only list tentatively scheduled jobs. That's why you're not seeing the job.
Try this instead:
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.jobstores.redis import RedisJobStore
scheduler = BackgroundScheduler()
scheduler.add_jobstore('redis', host='localhost', port=6379)
scheduler.start(paused=True)
scheduler.print_jobs()

Related

APScheduler duplicating jobs in Flask app hosted on Heroku

I have an app structured as below which allows a user to schedule tasks to query FB API each morning, afternoon etc to pull page/post data (3am in this case).
The problem I am experiencing is that the scheduler is executing each job twice which is obviously undesirable. Oddly, the issue doesn’t seem to occur locally, only when in production mode and I am hosting this in Heroku with 1 web dyno and 1 worker dyno, each with only 1 process each. I am therefore leaning towards Heroku being the issue.
I have a page to list the current scheduled jobs and when I inspect it, upon refreshing the page the same job instance will flick between two different values (screenshot below). It is as if there are two BackgroundScheduler's instances running.
I suspected this was to do with the BackgroundScheduler being initiated twice (once in flasky.py and again in tasks.py) so I created a temporary solution to stop the double initialising of the BackgroundScheduler and I still experience the same issue and am now stuck. Any help would be much appreciated.
-->app
-->__init__.py
-->decorators.py
-->models.py
-->tasks.py
-->auth
-->__init__.py
-->errors.py
-->forms.py
-->views.py
-->main
-->__init__.py
-->errors.py
-->forms.py
-->views.py
-->static
-->templates
-->migrations
config.py
flasky.py
Procfile
requirements.txt
app/flasky.py
from flask import Flask,render_template, session, redirect, url_for, flash
import os
from app import create_app,db
from app.models import User,Role
from datetime import datetime
from flask_migrate import Migrate,upgrade
app = create_app(os.getenv('FLASK_CONFIG') or 'default')
app/_ init _.py
from flask import Flask, render_template
from flask_bootstrap import Bootstrap
from flask_moment import Moment
from flask_migrate import Migrate
from flask_sqlalchemy import SQLAlchemy
from config import config
from flask_session import Session
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
import rq
from redis import Redis
from flask_login import LoginManager
from worker import conn
import os
bootstrap = Bootstrap()
moment = Moment()
db = SQLAlchemy()
scheduler = BackgroundScheduler()
migrate = Migrate()
session= Session()
login_manager = LoginManager()
login_manager.login_view = 'auth.login'
def create_app(config_name='default'):
app = Flask(__name__)
app.config.from_object(config[config_name])
db.init_app(app)
bootstrap.init_app(app)
moment.init_app(app)
migrate.init_app(app,db)
session.init_app(app)
login_manager.init_app(app)
if not scheduler.running:
scheduler.start()
jobstore_url = os.environ.get('DATABASE_URL')
scheduler.add_jobstore(SQLAlchemyJobStore(url=jobstore_url),'sqlalchemy')
from .main import main as main_blueprint
from .auth import auth as auth_blueprint
app.register_blueprint(main_blueprint)
app.register_blueprint(auth_blueprint,url_prefix='/auth')
app.task_queue = rq.Queue('flasky',connection=Redis.from_url(os.environ.get(REDIS_URL))
if app.config['SSL_REDIRECT']:
from flask_sslify import SSLify
sslify = SSLify(app)
return app
app/tasks.py
from . import create_app,db
from .models import User,Tokens,Files
from .decorators import token_getter
from flask_login import current_user
import requests
import datetime as dt
import urllib
import os
app = create_app(os.getenv('FLASK_CONFIG') or 'default')
<FUNCTIONS HERE>
app/auth/views.py
from flask import render_template,url_for,redirect,request,flash,session,current_app,Response
from .. import db,scheduler
from . import auth
from ..models import User,Role,Tokens,Files
from flask_login import login_user, logout_user, login_required
from ..decorators import admin_required, token_setter, token_getter,permission_required
import requests
import urllib
from .forms import LoginForm, SubmitConnection, ScheduleJobForm
from app.tasks import refreshed_google_client,test_context
app/main/views.py
from flask import render_template, session, redirect, url_for, flash,current_app,request
from datetime import datetime
from . import main
from .. import db,scheduler
from ..models import User,Tokens
from .forms import NameForm,AnalyticsForm
from flask_login import login_required,current_user
from ..decorators import admin_required,permission_required
import requests
import rq
from redis import Redis
from app.tasks import refreshed_google_client,load_analytics
#main.route('/ig_sync',methods=['GET','POST'])
#login_required
#permission_required(4)
def ig_sync():
form = IGAnalyticsForm()
if request.method=='POST':
from app.tasks import load_ig_sync
if form.validate_on_submit():
if form.submit_analytics_schedule.data:
#GET VARIABLES FROM FORM
scheduler.add_job(func=load_ig_sync,args=[#VARIABLES HERE],trigger='cron',hour=3,id=f'SYNC_IG_{page_name}',jobstore='sqlalchemy')
return(redirect(url_for('main.job_schedule')))
return render_template('ig_sync.html',form=form)
app/Procfile
web: gunicorn flasky:app
worker: rq worker -u $REDIS_URL flasky

How to get AWS workflow details

I'm wondering how can you get the details from aws workflow details page. For instance, I'm trying to get start time for a workflow I'm running but I can't find the method in aws's api to get this. Here is what I'm trying to do.
import sys
from awsglue.utils import getResolvedOptions
import logging
import pip
import os
import email.utils
import datetime
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import smtplib
import boto3
from configparser import ConfigParser
glue_client = boto3.client('glue')
args = getResolvedOptions(sys.argv,['WORKFLOW_NAME','WORKFLOW_RUN_ID'])
workflow_name = str(args['WORKFLOW_NAME'])
workflow_run_id = str(args['WORKFLOW_RUN_ID'])
workflow_start_time = str(args['WORKFLOW_START_TIME'])
print(workflow_start_time)
I think you're looking for this.
Example:
import boto3
client = boto3.client('glue')
response = client.get_workflow(
Name='workflow_name',
IncludeGraph=False
)

Django scheduled tasks

i want to display the current exchnage rate of USD/Bitcoin price-pair on my website.
Therefor i set celery and a small periodic_task.
Im currently not really able to understand how i call this periodic_task task or display the json data it returns.
this is how my celeter setup look like:
__init_.py
from __future__ import absolute_import, unicode_literals
from .celery import app as celery_app
__all__ = ('celery_app',)
celery.py
from __future__ import absolute_import, unicode_literals
from celery import Celery
import os
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'myproject.settings')
app = Celery('myproject')
app.config_from_object('django.conf:settings', namespace='CELERY')
app.autodiscover_tasks()
#app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request))
tasks.py
from celery import Celery
from celery.schedules import crontab
from celery.task import periodic_task
from celery.utils.log import get_task_logger
import requests
logger = get_task_logger(__name__)
app = Celery('tasks', broker='redis://127.0.0.1')
#app.task
def test():
return "Test Successful"
#periodic_task(run_every=(crontab(minute='*/15')), name="get_btc_exchange_rate", ignore_result=True)
def get_exchange_rate():
api_url = "https://api.coinmarketcap.com/v1/ticker/?limit=1"
try:
exchange_rate = requests.get(api_url).json()
logger.info("BTC Exchange rate updated.")
except Exception as e:
print(e)
exchange_rate = dict()
return exchange_rate
I'm currently stating celery with this script:
https://gist.github.com/psych0der/44a8994495abee1b4e832420c1c2974d
So my question is how can i trigger that periodic_task and display the return of the json data/field "price_usd"? in a template
Thanks in advance
You'll need to start a celerybeat instance. It will schedule and send off events that you can set on an interval.
http://docs.celeryproject.org/en/latest/userguide/periodic-tasks.html

list_jobs function min_creation_time error

Below is my code in Django frame (python 2.7) to list the jobs in Bigquery. I want to filter to just the ones in last two weeks but the min_creation_time in the list_jobs() function does not work and errors out for some reason. Please suggest
from __future__ import unicode_literals
from django.shortcuts import render
import thd_gbq_tools as bq
# Create your views here.
from django.http import HttpResponse
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from oauth2client.client import GoogleCredentials
from google.cloud import bigquery
import uuid
import os
import logging
import time
import json
from datetime import datetime,timedelta
from django.template import loader
from django.shortcuts import render
import pandas as pd
from collections import OrderedDict
from datetime import date
def home(request):
credentials = GoogleCredentials.get_application_default()
# Construct the service object for interacting with the BigQuery API.
bq_conn = build('bigquery', 'v2', credentials=credentials)
job_query_dict = []
import warnings
warnings.filterwarnings("ignore")
###Create the big query client
client =bigquery.Client(project='analytics-supplychain-thd')
###List the jobs in the client
jobs = client.list_jobs(all_users= True) # API request
for job in jobs:
job_create_timestamp = datetime.strptime((str(job.created).replace('+','.')).split('.')[0],'%Y-%m-%d %H:%M:%S')
job_ended_timestamp = datetime.strptime((str(job.ended).replace('+','.')).split('.')[0],'%Y-%m-%d %H:%M:%S')
job_query_dict.append([job.job_id, job.user_email , job_create_timestamp,job_ended_timestamp, job.state])
Table1 = sorted(job_query_dict,key=lambda x: (x[2]), reverse=True)
return render(request, 'j2_response.html', {'Table1':Table1})
This is the code I am using to assign the parameter that indicates the last 10 minutes for min_creation_time:
from datetime import datetime,timedelta
from datetime import date
ten_mins_ago = datetime.utcnow() - timedelta(minutes=10)
When indicating ten_mins_ago = datetime.utcnow() - timedelta(minutes=10) you are specifying that you want the BigQuery jobs that have been run for the last 10 minutes.
You can try this code snippet to list the BigQuery jobs made in the last 2 weeks:
from google.cloud import bigquery
from datetime import datetime, timedelta
from pytz import timezone
client = bigquery.Client(project = '[YOUR_PROJECT]')
local_timezone = timezone('US/Eastern')
two_weeks_ago = datetime.utcnow() - timedelta(days = 14)
local_two_weeks = local_timezone.localize(two_weeks_ago)
for job in client.list_jobs(all_users = True, max_results = 10, min_creation_time = local_two_weeks):
print(job.job_id, job.user_email)
If this snippet works for you, you can integrate it into your code. Should you get any errors, please state them so we can look further into the issue.

how to use db instance in flask-apscheduler's jobs function

When I used flask-apscheduler(not apscheduler), I have some problems in my flask web project. Especially when I used db(flask-sqlalchemy) objects. The problem may be:
JOBS = [
{
'id': 'job1',
'func': 'app.monitor.views:test',
'args': (),
'trigger': 'interval',
'seconds': 2
}
]
./app/init.py:
from flask import Flask
from flask.ext.bootstrap import Bootstrap
from flask.ext.mail import Mail
from flask.ext.moment import Moment
from flask.ext.sqlalchemy import SQLAlchemy
from flask.ext.login import LoginManager
from flask.ext.pagedown import PageDown
from flask_apscheduler import APScheduler
from celery import Celery
# from apscheduler.schedulers.blocking import BlockingScheduler
from config import config,Config
bootstrap = Bootstrap()
mail = Mail()
moment = Moment()
db = SQLAlchemy()
pagedown = PageDown()
celery = Celery(__name__, broker=Config.CELERY_BROKER_URL)
# https://pypi.python.org/pypi/Flask-APScheduler
scheduler = APScheduler()
login_manager = LoginManager()
login_manager.session_protection = 'strong'
login_manager.login_view = 'auth.login'
def create_app(config_name):
app = Flask(__name__)
app.config.from_object(config[config_name])
config[config_name].init_app(app)
bootstrap.init_app(app)
mail.init_app(app)
moment.init_app(app)
db.init_app(app)
login_manager.init_app(app)
pagedown.init_app(app)
scheduler.init_app(app)
celery.conf.update(app.config)
if not app.debug and not app.testing and not app.config['SSL_DISABLE']:
from flask.ext.sslify import SSLify
sslify = SSLify(app)
from .monitor import monitor as monitor_1_0_blueprint
from .laser import laser as laser_1_0_blueprint
app.register_blueprint(monitor_blueprint,url_prefix='/monitor/api')
app.register_blueprint(laser_1_0_blueprint,url_prefix='/laser/api/v1.0')
return app
Error 1:db is : Error 2:db is :No handlers
could be found for logger "apscheduler.executors.default" Error 3:db
is : raise RuntimeError('working outside of application context')
RuntimeError: working outside of application context
The key to the problem is to get the db and app objects in flask-apscheduler jobs function(views.py):
from app import scheduler
def test():
#to Solve the log error problem
import logging
log = logging.getLogger('apscheduler.executors.default')
log.setLevel(logging.INFO) # DEBUG
fmt = logging.Formatter('%(levelname)s:%(name)s:%(message)s')
h = logging.StreamHandler()
h.setFormatter(fmt)
log.addHandler(h)
#get the app object
app = scheduler.app
#get the db object and use it
with app.app_context():
print '........................',db
from app import scheduler#
def test():
#to Solve the log error problem
import logging
log = logging.getLogger('apscheduler.executors.default')
log.setLevel(logging.INFO) # DEBUG
fmt = logging.Formatter('%(levelname)s:%(name)s:%(message)s')
h = logging.StreamHandler()
h.setFormatter(fmt)
log.addHandler(h)
#get the app object
app = scheduler.app
#get the db object and use it
with app.app_context():
print '........................',db
def test():
#to Solve the log error problem
import logging
log = logging.getLogger('apscheduler.executors.default')
log.setLevel(logging.INFO) # DEBUG
fmt = logging.Formatter('%(levelname)s:%(name)s:%(message)s')
h = logging.StreamHandler()
h.setFormatter(fmt)
log.addHandler(h)
#get the app object
app = scheduler.app
#get the db object and use it
with app.app_context():
print '........................',db #the right db object