Batch Prediction Job non-blocking - google-cloud-platform

I am running a Vertex AI batch prediction using the python API.
The function I am using is from the google cloud docs:
def create_batch_prediction_job_dedicated_resources_sample(
key_path,
project: str,
location: str,
model_display_name: str,
job_display_name: str,
gcs_source: Union[str, Sequence[str]],
gcs_destination: str,
machine_type: str = "n1-standard-2",
sync: bool = True,
):
credentials = service_account.Credentials.from_service_account_file(
key_path)
# Initilaize an aiplatfrom object
aiplatform.init(project=project, location=location, credentials=credentials)
# Get a list of Models by Model name
models = aiplatform.Model.list(filter=f'display_name="{model_display_name}"')
model_resource_name = models[0].resource_name
# Get the model
my_model = aiplatform.Model(model_resource_name)
batch_prediction_job = my_model.batch_predict(
job_display_name=job_display_name,
gcs_source=gcs_source,
gcs_destination_prefix=gcs_destination,
machine_type=machine_type,
sync=sync,
)
#batch_prediction_job.wait_for_resource_creation()
batch_prediction_job.wait()
print(batch_prediction_job.display_name)
print(batch_prediction_job.resource_name)
print(batch_prediction_job.state)
return batch_prediction_job
datetime_today = datetime.datetime.now()
model_display_name = 'test_model'
key_path = 'vertex_key.json'
project = 'my_project'
location = 'asia-south1'
job_display_name = 'batch_prediction_' + str(datetime_today)
model_name = '1234'
gcs_source = 'gs://my_bucket/Cleaned_Data/user_item_pairs.jsonl'
gcs_destination = 'gs://my_bucket/prediction'
create_batch_prediction_job_dedicated_resources_sample(key_path,project,location,model_display_name,job_display_name,
gcs_source,gcs_destination)
OUTPUT:
92 current state:
JobState.JOB_STATE_RUNNING
INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/my_project/locations/asia-south1/batchPredictionJobs/37737350127597649
The above output is being printed on the terminal over and over after every few seconds.
The issue that I have is that the python program calling this function keeps on running until it is force stopped. I have tried both batch_prediction_job.wait() & batch_prediction_job.wait_for_resource_creation() with the same results.
How do I start a batch_prediction_job without waiting for it to complete and terminating the program just after the job has be created?

I gave you the wrong instruction on the comments, change the parameter sync=False and the function should return just after be executed.
Whether this function call should be synchronous (wait for pipeline run to finish before terminating) or asynchronous (return immediately)
sync=False
def create_batch_prediction_job_dedicated_resources_sample(
# ...
sync: bool = False,
):
UPDATE - Adding more details:
Check here my notebook code where I tested it and its working:
You have to change the sync=False AND remove/comment the following print lines:
#batch_prediction_job.wait()
#print(batch_prediction_job.display_name)
#print(batch_prediction_job.resource_name)
#print(batch_prediction_job.state)
Your code edited:
def create_batch_prediction_job_dedicated_resources_sample(
key_path,
project: str,
location: str,
model_display_name: str,
job_display_name: str,
gcs_source: Union[str, Sequence[str]],
gcs_destination: str,
machine_type: str = "n1-standard-2",
sync: bool = False,
):
credentials = service_account.Credentials.from_service_account_file(key_path)
# Initilaize an aiplatfrom object
aiplatform.init(project=project, location=location, credentials=credentials)
# Get a list of Models by Model name
models = aiplatform.Model.list(filter=f'display_name="{model_display_name}"')
model_resource_name = models[0].resource_name
# Get the model
my_model = aiplatform.Model(model_resource_name)
batch_prediction_job = my_model.batch_predict(
job_display_name=job_display_name,
gcs_source=gcs_source,
gcs_destination_prefix=gcs_destination,
machine_type=machine_type,
sync=sync,
)
return batch_prediction_job
datetime_today = datetime.datetime.now()
model_display_name = 'test_model'
key_path = 'vertex_key.json'
project = '<my_project_name>'
location = 'asia-south1'
job_display_name = 'batch_prediction_' + str(datetime_today)
model_name = '1234'
gcs_source = 'gs://<my_bucket_name>/Cleaned_Data/user_item_pairs.jsonl'
gcs_destination = 'gs://<my_bucket_name>/prediction'
create_batch_prediction_job_dedicated_resources_sample(key_path,
project,location,
model_display_name,
job_display_name,
gcs_source,
gcs_destination,
sync=False,)
Results sync=False:
Results sync=True:

Related

list indices must be integers or slices, not dict in django

I just want to iterate through the list of JSON data which I get in the payload but getting an error as list indices must be integers or slices, not dict
payload:
[{"AuditorId":10,"Agents":"sa","Supervisor":"sa","TicketId":"58742","QId":150,"Answer":"Yes","TypeSelected":"CMT Mails","Comments":"na","TicketType":"Regularticket","Action":"na","AuditSubFunction":"na","AuditRegion":"na"},{"AuditorId":10,"Agents":"sa","Supervisor":"sa","TicketId":"58742","QId":151,"Answer":"Yes","TypeSelected":"CMT Mails","Comments":"na","TicketType":"Regularticket","Action":"na","AuditSubFunction":"na","AuditRegion":"na"}]
views.py:
#api_view(['POST'])
def SaveUserResponse(request):
for ran in request.data:
auditorid = request.data[ran].get('AuditorId')
ticketid = request.data[ran].get('TicketId')
qid = request.data[ran].get('QId')
answer = request.data[ran].get('Answer')
sid = '0'
TicketType = request.data[ran].get('TicketType')
TypeSelected = request.data[ran].get('TypeSelected')
agents = request.data[ran].get('Agents')
supervisor = request.data[ran].get('Supervisor')
Comments = request.data[ran].get('Comments')
action = request.data[ran].get('Action')
subfunction = request.data[ran].get('AuditSubFunction')
region = request.data[ran].get('AuditRegion')
cursor = connection.cursor()
cursor.execute('EXEC [dbo].[sp_SaveAuditResponse] #auditorid=%s,#ticketid=%s,#qid=%s,#answer=%s,#sid=%s,#TicketType=%s,#TypeSelected=%s,#agents=%s, #supervisor =%s, #Comments=%s, #action=%s, #subfunction=%s, #region=%s',
(auditorid,ticketid,qid,answer, sid,TicketType, TypeSelected, agents, supervisor, Comments, action, subfunction,region))
return Response(True)
I ran this code on my machine and it works for the payload you provided.
#api_view(['POST'])
def SaveUserResponse(request):
for ran in request.data:
auditorid = ran.get('AuditorId')
ticketid = ran.get('TicketId')
qid = ran.get('QId')
answer = ran.get('Answer')
sid = '0'
TicketType = ran.get('TicketType')
TypeSelected = ran.get('TypeSelected')
agents = ran.get('Agents')
supervisor = ran.get('Supervisor')
Comments = ran.get('Comments')
action = ran.get('Action')
subfunction = ran.get('AuditSubFunction')
region = ran.get('AuditRegion')
If it doesn't then content of request.data must be different then payload you shared in original post

Flask app is keep on loading at the time of prediction(TensorRT)

This is in the continuation to the question
Facing issue while running Flask app with TensorRt model on jetson nano
Above is resolve but when I am running flask 'app' it keep loading and not showing video.
code:
def callback():
cuda.init()
device = cuda.Device(0)
ctx = device.make_context()
onnx_model_path = './some.onnx'
fp16_mode = False
int8_mode = False
trt_engine_path = './model_fp16_{}_int8_{}.trt'.format(fp16_mode, int8_mode)
max_batch_size = 1
engine = get_engine(max_batch_size, onnx_model_path, trt_engine_path, fp16_mode, int8_mode)
context = engine.create_execution_context()
inputs, outputs, bindings, stream = allocate_buffers(engine)
ctx.pop()
##callback function ends
worker_thread = threading.Thread(target=callback())
worker_thread.start()
trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
print("start in do_inferece")
# Transfer data from CPU to the GPU.
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# Run inference.
print("before run infernce in do_inferece")
context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
# Transfer predictions back from the GPU.
print("before output in do_inferece")
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
print("before stream synchronize in do_inferece")
# Synchronize the stream
stream.synchronize()
# Return only the host outputs.
print("before return in do_inferece")
return [out.host for out in outputs]
Your worker_thread creates the context required for do_inference. You should call the do_inference method inside the callback()
def callback():
cuda.init()
device = cuda.Device(0)
ctx = device.make_context()
onnx_model_path = './some.onnx'
fp16_mode = False
int8_mode = False
trt_engine_path = './model_fp16_{}_int8_{}.trt'.format(fp16_mode, int8_mode)
max_batch_size = 1
engine = get_engine(max_batch_size, onnx_model_path, trt_engine_path, fp16_mode, int8_mode)
context = engine.create_execution_context()
inputs, outputs, bindings, stream = allocate_buffers(engine)
trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
# post-process the trt_outputs
ctx.pop()

\u0000 cannot be converted to text in django/postgreSQl

i have a project with django .on the host when i want to upload an image sometime error occurred(problem with specific images)! the below show how i resize uploaded images:
def save_files_to_media(request, is_public=False, klass=None, conversation=None):
from apps.file.models import File
fs = FileSystemStorage()
file_items = {}
for data_item in request.data:
file_match = re.search('^fileToUpload\[(\d+)\]$', data_item)
if file_match and file_match.groups():
item_index = file_match.groups()[0]
if item_index not in file_items:
file_items[item_index] = {}
file_items[item_index]['file_to_upload'] = request.data[data_item]
else:
optimize_match = re.search('^optimizeType\[(\d+)\]$', data_item)
if optimize_match and optimize_match.groups():
item_index = optimize_match.groups()[0]
if item_index not in file_items:
file_items[item_index] = {}
file_items[item_index]['optimize_type'] = request.data[data_item]
files = []
for file_item_key in file_items:
input_file = file_items[file_item_key]['file_to_upload']
# TODO: checking validation. if input_file.name is not exist
optimize_type = file_items[file_item_key].get('optimize_type')
file_uuid = str(uuid4())
if is_public:
orig_filename, file_ext = splitext(basename(input_file.name))
directory_name = join(settings.MEDIA_ROOT, file_uuid)
filename = file_uuid + file_ext
else:
directory_name = join(settings.MEDIA_ROOT, file_uuid)
mkdir(directory_name)
filename = input_file.name
filepath = join(directory_name, filename)
fs.save(filepath, input_file)
is_optimized = False
if optimize_type == 'image':
is_success, filepath = image_optimizer(filepath)
filename = basename(filepath)
is_optimized = is_success
file_obj = File(
orig_name=filename,
uuid=file_uuid,
md5sum=get_md5sum(filepath),
filesize=get_filesize(filepath),
meta=get_meta_info(filepath),
is_optimized=is_optimized,
creator=request.user
)
if is_public:
file_obj.is_public = True
else:
file_obj.klass = klass
file_obj.conversation = conversation
file_obj.save()
files.append(file_obj)
return files
here is the error i got with some images:
unsupported Unicode escape sequence
LINE 1: ..., 'ada90ead20f7994837dced344266cc51', 145216, '', '{"FileTyp...
^
DETAIL: \u0000 cannot be converted to text.
CONTEXT: JSON data, line 1: ...ecTimeDigitized": 506779, "MakerNoteUnknownText":
its funny that in my local but not in host. for more information i must tell you guys my postgreSQL version is 11.3 and host postgreSQl is 9.5.17 . where you think is problem? as error it's seems for postgreSQL. thank you

Django single object update in for loop

Hi I'm wondering if it possible or how to update single model object name inside for loop by his id using object.filter(pk=id).update(name='name') function
I try to do this but its not working in for loop. It's working only outside loop
EDIT
my edit view with for loop:
first i prepare data based on my database.Data can be modified by other methods so i keep they in global list schedule_table.
When I'm saving project object i want to update other data representing by schedule_table, using for loop.
def ProjectEditView(request, pk):
project = get_object_or_404(Project, pk=17)
schedule_table_load_form_db(project)#preparing list with data
Task_Schedule_TableView(request)
project_form = ProjectForm(request.POST or None, instance=project)
project_form_valid = project_form.is_valid()
if project_form_valid:
with transaction.atomic():
# save form in DB
project = project_form.save(commit=False)
project.save()
# modify other object based on changed list schedule_table
if schedule_table.__len__() > 0:
#my for loop
for p in schedule_table:
team = Team.objects.get(name=p.team)
phase = Phase.objects.filter(pk=p.pk).update(name=p.name,
project=project,
team=team,
order=p.order,
duration=p.duration,
prev=p.prev,
start=p.start,
end=p.end)
# some other staff..... outside the loop
return redirect('Project:ProjectListView')
context = {'project_form': project_form}
return render(request, 'Project/test.html', context)
function to prepare list with data:
def schedule_table_load_form_db(project):
global schedule_table
schedule_table = []
phases = Phase.objects.filter(project=project)
for phase in phases:
tasks_list = Task.objects.filter(phase=phase)
tasks = []
for task in tasks_list:
task_dict = Task_dictionary.objects.get(pk=task.task_dictionary.pk)
tmp = ''
for e in task.employers.employer.all():
user = User.objects.get(pk=e.user_id)
tmp += user.first_name + ' ' + user.last_name + ','
tasks.append(TableTask(pk=task.pk,
order=task.order,
name=task.name,
duration=task.duration,
employer=tmp,
start=task.start,
end=task.end,
min_employers_nr=task_dict.min_employers_nr,
max_employers_nr=task_dict.max_employers_nr,
prev=task.prev_task,
))
schedule_table.append(TablePhase(pk=phase.pk,
name=phase.name,
duration=0.0,
start=phase.start,
end=phase.end,
team=phase.team.name,
task=tasks,
order=schedule_table.__len__(),
prev=phase.prev
))

Loading selected file from sftp to S3 using python

I have some file on sftp (named a.csv, b.csv,c.csv) and I want to load that file directly to S3 using python.
I got the below code from Stackoverflow, but the problem is that instead of ftp_priv_key_filename, i am connecting to the sftp using password.
Please suggest me the correct approach. Thanks
import math
import os
from boto.s3.connection import S3Connection
import io
import paramiko
import stat
import time
chunk_size = 12428800
ftp_priv_key_filename = '/path/to/private/key' # this script assume priv use auth
ftp_username = 'user'
ftp_host = '100.10.86.59'
ftp_port = 22
ftp_dir = '/import/TMP'
s3_id = 'abc'
s3_key = 'xyz'
bucket_name = 'efg/mno/pqr'
s3_conn = S3Connection(s3_id, s3_key)
bucket = s3_conn.get_bucket(bucket_name)
pkey = paramiko.RSAKey.from_private_key_file(ftp_priv_key_filename)
transport = paramiko.Transport((ftp_host, ftp_port))
transport.connect(username=ftp_username, pkey=pkey)
ftp_conn = paramiko.SFTPClient.from_transport(transport)
def move_file(filepath):
key_id = filepath.replace(ftp_dir, '').lstrip('/')
key = bucket.get_key(key_id)
ftp_fi = ftp_conn.file(filepath, 'r')
source_size = ftp_fi._get_size()
if key is not None:
# check if we need to replace, check sizes
if source_size == key.size:
print('%s already uploaded' % key_id)
ftp_fi.close()
return
chunk_count = int(math.ceil(source_size / float(chunk_size)))
mp = bucket.initiate_multipart_upload(key_id)
print('%s uploading size: %imb, %i chunks' % (
key_id, math.ceil(source_size/1024/1024), chunk_count))
for i in range(chunk_count):
start = time.time()
chunk = ftp_fi.read(chunk_size)
end = time.time()
seconds = end - start
print('%s read chunk from ftp (%i/%i) %ikbs' % (
key_id, i + 1, chunk_count,
math.ceil((chunk_size / 1024) / seconds)))
fp = io.BytesIO(chunk)
start = time.time()
mp.upload_part_from_file(fp, part_num=i + 1)
end = time.time()
seconds = end - start
print('%s upload chunk to s3 (%i/%i) %ikbs' % (
key_id, i + 1, chunk_count,
math.ceil((chunk_size / 1024) / seconds)))
mp.complete_upload()
ftp_fi.close()
def move_dir(directory):
ftp_conn.chdir(directory)
for filename in ftp_conn.listdir():
filepath = os.path.join(directory, filename)
if stat.S_ISDIR(ftp_conn.stat(filepath).st_mode):
move_dir(filepath)
else:
move_file(filepath)
move_dir(ftp_dir)
You just have to take out the following line :
pkey = paramiko.RSAKey.from_private_key_file(ftp_priv_key_filename)
and modify the ftp connect to the following (basically using the password instead of private key)
transport.connect(username = ftp_username, password = ftp_password)
You can load the selected file from sftp to S3 using python like below.
from ftplib import FTP_TLS
import s3fs
import logging
def lambda_handler(event, context):
s3 = s3fs.S3FileSystem(anon=False)
ftp_url = "100.10.86.59"
ftp_path = "/import/TMP/"
s3Bucket = "efg/mno/pqr"
file_name = "sample.txt"
ftps = FTP_TLS(ftp_url)
ftps.login('<user_name>','<pwd>')
ftps.prot_p()
ftps.cwd(ftp_path)
ftps.retrbinary('RETR ' + file_name, s3.open("{}/{}".format(s3Bucket, file_name), 'wb').write)
Reference - https://github.com/vhvinod/ftp-to-s3/blob/master/ftp-cred-to-s3.py