list_jobs function min_creation_time error

list_jobs function min_creation_time error - python-2.7

Below is my code in Django frame (python 2.7) to list the jobs in Bigquery. I want to filter to just the ones in last two weeks but the min_creation_time in the list_jobs() function does not work and errors out for some reason. Please suggest
from __future__ import unicode_literals
from django.shortcuts import render
import thd_gbq_tools as bq
# Create your views here.
from django.http import HttpResponse
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from oauth2client.client import GoogleCredentials
from google.cloud import bigquery
import uuid
import os
import logging
import time
import json
from datetime import datetime,timedelta
from django.template import loader
from django.shortcuts import render
import pandas as pd
from collections import OrderedDict
from datetime import date
def home(request):
credentials = GoogleCredentials.get_application_default()
# Construct the service object for interacting with the BigQuery API.
bq_conn = build('bigquery', 'v2', credentials=credentials)
job_query_dict = []
import warnings
warnings.filterwarnings("ignore")
###Create the big query client
client =bigquery.Client(project='analytics-supplychain-thd')
###List the jobs in the client
jobs = client.list_jobs(all_users= True) # API request
for job in jobs:
job_create_timestamp = datetime.strptime((str(job.created).replace('+','.')).split('.')[0],'%Y-%m-%d %H:%M:%S')
job_ended_timestamp = datetime.strptime((str(job.ended).replace('+','.')).split('.')[0],'%Y-%m-%d %H:%M:%S')
job_query_dict.append([job.job_id, job.user_email , job_create_timestamp,job_ended_timestamp, job.state])
Table1 = sorted(job_query_dict,key=lambda x: (x[2]), reverse=True)
return render(request, 'j2_response.html', {'Table1':Table1})
This is the code I am using to assign the parameter that indicates the last 10 minutes for min_creation_time:
from datetime import datetime,timedelta
from datetime import date
ten_mins_ago = datetime.utcnow() - timedelta(minutes=10)

When indicating ten_mins_ago = datetime.utcnow() - timedelta(minutes=10) you are specifying that you want the BigQuery jobs that have been run for the last 10 minutes.
You can try this code snippet to list the BigQuery jobs made in the last 2 weeks:
from google.cloud import bigquery
from datetime import datetime, timedelta
from pytz import timezone
client = bigquery.Client(project = '[YOUR_PROJECT]')
local_timezone = timezone('US/Eastern')
two_weeks_ago = datetime.utcnow() - timedelta(days = 14)
local_two_weeks = local_timezone.localize(two_weeks_ago)
for job in client.list_jobs(all_users = True, max_results = 10, min_creation_time = local_two_weeks):
print(job.job_id, job.user_email)
If this snippet works for you, you can integrate it into your code. Should you get any errors, please state them so we can look further into the issue.

Related

APScheduler duplicating jobs in Flask app hosted on Heroku

I have an app structured as below which allows a user to schedule tasks to query FB API each morning, afternoon etc to pull page/post data (3am in this case).
The problem I am experiencing is that the scheduler is executing each job twice which is obviously undesirable. Oddly, the issue doesn’t seem to occur locally, only when in production mode and I am hosting this in Heroku with 1 web dyno and 1 worker dyno, each with only 1 process each. I am therefore leaning towards Heroku being the issue.
I have a page to list the current scheduled jobs and when I inspect it, upon refreshing the page the same job instance will flick between two different values (screenshot below). It is as if there are two BackgroundScheduler's instances running.
I suspected this was to do with the BackgroundScheduler being initiated twice (once in flasky.py and again in tasks.py) so I created a temporary solution to stop the double initialising of the BackgroundScheduler and I still experience the same issue and am now stuck. Any help would be much appreciated.
-->app
-->__init__.py
-->decorators.py
-->models.py
-->tasks.py
-->auth
-->__init__.py
-->errors.py
-->forms.py
-->views.py
-->main
-->__init__.py
-->errors.py
-->forms.py
-->views.py
-->static
-->templates
-->migrations
config.py
flasky.py
Procfile
requirements.txt
app/flasky.py
from flask import Flask,render_template, session, redirect, url_for, flash
import os
from app import create_app,db
from app.models import User,Role
from datetime import datetime
from flask_migrate import Migrate,upgrade
app = create_app(os.getenv('FLASK_CONFIG') or 'default')
app/_ init _.py
from flask import Flask, render_template
from flask_bootstrap import Bootstrap
from flask_moment import Moment
from flask_migrate import Migrate
from flask_sqlalchemy import SQLAlchemy
from config import config
from flask_session import Session
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
import rq
from redis import Redis
from flask_login import LoginManager
from worker import conn
import os
bootstrap = Bootstrap()
moment = Moment()
db = SQLAlchemy()
scheduler = BackgroundScheduler()
migrate = Migrate()
session= Session()
login_manager = LoginManager()
login_manager.login_view = 'auth.login'
def create_app(config_name='default'):
app = Flask(__name__)
app.config.from_object(config[config_name])
db.init_app(app)
bootstrap.init_app(app)
moment.init_app(app)
migrate.init_app(app,db)
session.init_app(app)
login_manager.init_app(app)
if not scheduler.running:
scheduler.start()
jobstore_url = os.environ.get('DATABASE_URL')
scheduler.add_jobstore(SQLAlchemyJobStore(url=jobstore_url),'sqlalchemy')
from .main import main as main_blueprint
from .auth import auth as auth_blueprint
app.register_blueprint(main_blueprint)
app.register_blueprint(auth_blueprint,url_prefix='/auth')
app.task_queue = rq.Queue('flasky',connection=Redis.from_url(os.environ.get(REDIS_URL))
if app.config['SSL_REDIRECT']:
from flask_sslify import SSLify
sslify = SSLify(app)
return app
app/tasks.py
from . import create_app,db
from .models import User,Tokens,Files
from .decorators import token_getter
from flask_login import current_user
import requests
import datetime as dt
import urllib
import os
app = create_app(os.getenv('FLASK_CONFIG') or 'default')
<FUNCTIONS HERE>
app/auth/views.py
from flask import render_template,url_for,redirect,request,flash,session,current_app,Response
from .. import db,scheduler
from . import auth
from ..models import User,Role,Tokens,Files
from flask_login import login_user, logout_user, login_required
from ..decorators import admin_required, token_setter, token_getter,permission_required
import requests
import urllib
from .forms import LoginForm, SubmitConnection, ScheduleJobForm
from app.tasks import refreshed_google_client,test_context
app/main/views.py
from flask import render_template, session, redirect, url_for, flash,current_app,request
from datetime import datetime
from . import main
from .. import db,scheduler
from ..models import User,Tokens
from .forms import NameForm,AnalyticsForm
from flask_login import login_required,current_user
from ..decorators import admin_required,permission_required
import requests
import rq
from redis import Redis
from app.tasks import refreshed_google_client,load_analytics
#main.route('/ig_sync',methods=['GET','POST'])
#login_required
#permission_required(4)
def ig_sync():
form = IGAnalyticsForm()
if request.method=='POST':
from app.tasks import load_ig_sync
if form.validate_on_submit():
if form.submit_analytics_schedule.data:
#GET VARIABLES FROM FORM
scheduler.add_job(func=load_ig_sync,args=[#VARIABLES HERE],trigger='cron',hour=3,id=f'SYNC_IG_{page_name}',jobstore='sqlalchemy')
return(redirect(url_for('main.job_schedule')))
return render_template('ig_sync.html',form=form)
app/Procfile
web: gunicorn flasky:app
worker: rq worker -u $REDIS_URL flasky

How to get AWS workflow details

I'm wondering how can you get the details from aws workflow details page. For instance, I'm trying to get start time for a workflow I'm running but I can't find the method in aws's api to get this. Here is what I'm trying to do.
import sys
from awsglue.utils import getResolvedOptions
import logging
import pip
import os
import email.utils
import datetime
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import smtplib
import boto3
from configparser import ConfigParser
glue_client = boto3.client('glue')
args = getResolvedOptions(sys.argv,['WORKFLOW_NAME','WORKFLOW_RUN_ID'])
workflow_name = str(args['WORKFLOW_NAME'])
workflow_run_id = str(args['WORKFLOW_RUN_ID'])
workflow_start_time = str(args['WORKFLOW_START_TIME'])
print(workflow_start_time)

I think you're looking for this.
Example:
import boto3
client = boto3.client('glue')
response = client.get_workflow(
Name='workflow_name',
IncludeGraph=False
)

why apscheduler use get_jobs empty?

this is my test.py
from datetime import datetime, timedelta
import sys
import os
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
from apscheduler.jobstores.redis import RedisJobStore
jobstores = {
#'default': SQLAlchemyJobStore(url='sqlite:///jobs.sqlite')
'default': RedisJobStore(host='localhost', port=6379)
}
scheduler = BlockingScheduler(jobstores=jobstores)
def alarm(time):
print('Alarm! This alarm was scheduled at %s.' % time)
if __name__ == '__main__':
alarm_time = datetime.now() + timedelta(seconds=10)
scheduler.add_job(alarm, 'interval', seconds=10, args=[datetime.now()], name='alarm_test')
print('To clear the alarms, delete the example.sqlite file.')
print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
try:
scheduler.start()
except (KeyboardInterrupt, SystemExit):
pass
i do python test.py run job successfully
and then use another terminal by putty
python
>>> import redis
>>> from test import *
>>> r = redis.Redis()
>>> r.keys()
>>> r.zrange('apscheduler.run_times',0,1)
it will find the job id 57841c0ee05249efb466882265f2c495
>>> ret = scheduler.get_jobs(jobstore='default')
ret is empty
why???
thanks a lot

Have you started the scheduler before running get_jobs()? If not, it will only list tentatively scheduled jobs. That's why you're not seeing the job.
Try this instead:
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.jobstores.redis import RedisJobStore
scheduler = BackgroundScheduler()
scheduler.add_jobstore('redis', host='localhost', port=6379)
scheduler.start(paused=True)
scheduler.print_jobs()

How to solve KeyError?

I was trying to create some steps in the Abaqus by using the following python code. Unfortunately having this error. Anybody, please help me...
KeyError:model_name
Python Code:
from abaqus import *
from abaqusConstants import *
import __main__
import section
import regionToolset
import displayGroupMdbToolset as dgm
import part
import material
import assembly
import step
import interaction
import load
import mesh
import optimization
import job
import sketch
import visualization
import xyPlot
import displayGroupOdbToolset as dgo
import connectorBehavior
def create_step(model_name, new_step, previous_step):
mdb.models['model_name'].StaticStep(name='new_step', previous='previous_step', initialInc=0.025,
maxInc=0.025)
session.viewports['Viewport: 1'].assemblyDisplay.setValues(step='new_step')
model_name = 'Model-' + str(0)
new_step = 'C4'
previous_step = 'C3'
create_step(model_name, new_step, previous_step)

Replace mdb.models['model_name'].Stat... with mdb.models[model_name].Stat...
def create_step(model_name, new_step, previous_step):
mdb.models['model_name'].StaticStep(name='new_step', previous='previous_step', initialInc=0.025,
maxInc=0.025)
session.viewports['Viewport: 1'].assemblyDisplay.setValues(step='new_step')
2nd line should be,
mdb.models[model_name].StaticStep(name='new_step', previous='previous_step', initialInc=0.025,
maxInc=0.025)

How to separate flask routes to another modules

I have hundreds of routes in my flask main module,
I think it need to separate those hundred of routes from the main module.
How to do it ?
#!/usr/bin/env python3
# -*- coding: utf8 -*-
from flask import request, url_for
from flask import Flask, request, jsonify
from flask_request_params import bind_request_params
from flask import g
import datetime
import pandas as pd
import pymongo
from webargs import Arg
from webargs.flaskparser import use_args, use_kwargs
import yaml
import time, functools
from pdb import set_trace
from pandas_helper import PandasHelper
import errors
from app_helper import *
from release_schedule import ReleaseSchedule
from mongo import Mongo
#app.route('/next_release', methods=["GET"])
#return_json
def next_release():
schedules = ReleaseSchedule.next_release(DB)
return pd.DataFrame([sche for sche in schedules])
...
#app.route('/last_release', methods=["GET"])

This is what blueprints were made to do.
Another alternative is flask-classy (which is awesome). I'm going to talk about the blueprint approach since that's what I know better.
If I was in your position I would want to split my routes up based on common imports.
Without knowning your application I'm going to guess that a distribution like this
parse_user_data_views.py
from webargs import Arg
from webargs.flaskparser import use_args, use_kwargs
import yaml
push_to_db_views.py
from pandas_helper import PandasHelper
from mongo import Mongo
import pymongo
import pandas as pd
import datetime
release_views.py
from release_schedule import ReleaseSchedule
import pandas as pd
#app.route('/next_release', methods=["GET"])
#return_json
def next_release():
schedules = ReleaseSchedule.next_release(DB)
return pd.DataFrame([sche for sche in schedules])
is likely distribution. We can't answer this for you, only you can.
But this allows you to separate out your application in some pretty nice ways.
in __init__.py
from flask import Flask
from yourapplication.release_views import release_views
from yourapplication.push_to_db_views import push_to_db_views
from yourapplication.parse_user_data_views import parse_user_data_views
app = Flask(__name__)
app.register_blueprint(release_views)
app.register_blueprint(push_to_db_views)
app.register_blueprint(parse_user_data_views)

Create a new file called views.py and add all your routes there. Then import views.py in your __ init __.py .

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

list_jobs function min_creation_time error - python-2.7

Related

APScheduler duplicating jobs in Flask app hosted on Heroku

How to get AWS workflow details

why apscheduler use get_jobs empty?

How to solve KeyError?

How to separate flask routes to another modules

Categories

Resources