Timeout not working in ProcessPoolExecutor map function

Timeout not working in ProcessPoolExecutor map function - concurrency

I am trying to implement a timeout in my ProccessPoolExecutor but it seems that when I run the code, the timeout just doesn't work and the process continues as if the timeout didn't exist in the first place.
Here is the code:
from concurrent.futures import ProcessPoolExecutor
import angr
from angr.analyses import (
CFGFast,
Decompiler
)
from claripy import *
import os
#import ipdb
#ipdb.set_trace()
def decompile(filename):
p = angr.Project('/Path/' + filename, auto_load_libs=False, load_debug_info=True)
cfg = p.analyses[CFGFast].prep()(data_references=True, normalize=True)
for f in cfg.functions.values():
#print(f.name + " " + filename)
name = filename + " " + f.name
filepath = os.path.join('/path/'+ name)
try:
dec = p.analyses[Decompiler].prep()(f, cfg=cfg.model)
with open(filepath, "w") as f:
f.write(dec.codegen.text)
except AttributeError:
with open('/path', "a") as a:
a.write(name + "\n")
except ValueError:
with open('/path', "a") as a:
a.write(name + "\n")
except ClaripyOperationError:
with open('/path', "a") as a:
a.write(name + "\n")
except TypeError:
with open('/path', "a") as a:
a.write(name + "\n")
if __name__ == '__main__':
with ProcessPoolExecutor(4) as executor:
for result in executor.map(decompile, os.listdir('/path'), timeout=5):
print(result)
I think there must be some problem in the for loop as the result variable doesn't print. I would appreciate any help. Thanks!

Related

django+celery+rabbitmq encode error and sig-kill

I'm now doing a little project which uses celery to turn csv and xlsx files into postgresql table.
The code below works fine without celery(except large files),but after using celery it produce some errors and bugs.
I've looked for similar questions in StackOverFlow but don't have any idea how to do and why.
Hope you guys can help me with it,thanks.
First error is as follows:
csv-1
csv-2
I think it has something to do with my encoding part, but I tried to open it with utf-8-sig and big-5, not working.(It works fine without celery)
`
# -*- coding: utf-8 -*-
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.http import HttpResponseRedirect
from django.core.urlresolvers import reverse
from django.contrib import messages
from django.conf import settings
from django.db import connection
from django.views.decorators.csrf import csrf_exempt
from celery import Celery
from celery import task
import json
import csv
import sys
import random
import psycopg2
import xlrd
import openpyxl as pyxl
from .models import Document
from .forms import DocumentForm
app = Celery(
'tasks',
broker='amqp://guest:guest#localhost:5672//',
backend='rpc://'
)
CELERY_RESULT_BACKEND = 'rpc://'
CELERY_RESULT_PERSISTENT = False
#app.task()
def csvwritein(doc):# Transform csv to table
doc = doc
conn = psycopg2.connect("dbname='apidb' user='api' host='localhost'
password='eric40502' port='5432'")
readcur = conn.cursor()
readcur.execute("select exists(select * from
information_schema.tables where table_name='%s')" % doc.tablename) # check if
same file is already in database
check = readcur.fetchone()[0]
try:
fr = open(doc.path,encoding = 'utf-8-sig')
dr.delay(fr,doc,check)
fr.close()
except Exception as e:
fr = open(doc.path,encoding = 'big5')
dr.delay(fr,doc,check)
fr.close()
conn.commit()
readcur.close()
#app.task()
def dr(fr,doc,check): # make datareader as function to keep code 'dry'
csvt = 0 #count csv reader loop time
row_id = 1 # used for following id field
conn = psycopg2.connect("dbname='apidb' user='api' host='localhost'
password='eric40502' port='5432'")
maincur = conn.cursor()
writecur = conn.cursor()
datareader = csv.reader(fr, delimiter=',')
for row in datareader:
if csvt == 0: # first time in loop(create field) and check no
same file exists
if check == True:
app =
''.join([random.SystemRandom().choice('abcdefghijklmnopqrstuvwxyz0123456789')
for i in range(6)])
tname = '%s-%s' % (doc.tablename,app
tablename = '"%s-%s"' % (doc.tablename,app)
doc.tablename = tname
doc.save()
else:
tablename = '"%s"' % doc.tablename
maincur.execute("CREATE TABLE %s (id SERIAL PRIMARY
KEY);" % tablename)
row_count = sum(1 for line in datareader)
col_count = len(row)
frow = row
for i in range(0,col_count,1):
row[i] = '"%s"' % row[i] # change number to
string
maincur.execute("ALTER TABLE %s ADD %s
CITEXT;" % (tablename,row[i]))
csvt = csvt+1
fr.seek(0)
next(datareader)
elif csvt > 0: # not first time(insert data) and check no
same file exists
for j in range(0,col_count,1):
if j == 0:
writecur.execute("INSERT INTO %s (%s)
VALUES ('%s');" % (tablename,frow[j],row[j]))
else:
writecur.execute("UPDATE %s SET %s =
'%s' WHERE id = '%d';" %(tablename,frow[j],row[j],row_id))
csvt = csvt+1
row_id = row_id+1
else:
break
conn.commit()
maincur.close()
writecur.close()
conn.close()
csvt = 0
doc = Document.objects.all()
`
Second error is about turning a xlsx file(about 130,000 rows) into postgresql table, and the worker got sig-kill after 2-3 minutes.
Debug Message:
[2016-10-27 06:17:05,227: ERROR/MainProcess] Process 'Worker-1' pid:13829 exited with 'signal 9 (SIGKILL)' [2016-10-27 06:17:05,328:ERROR/MainProcess] Task data.tasks.xlsxwritein[5aec4679-c48b-4d07-a0a9-5e4e37fcd24b] raised unexpected: WorkerLostError('Worker exited prematurely: signal 9 (SIGKILL).',) Traceback (most recent call last): File "/usr/local/lib/python3.4/dist-packages/billiard/pool.py", line 1175, in mark_as_worker_lost human_status(exitcode)), billiard.exceptions.WorkerLostError: Worker exited prematurely: signal 9 (SIGKILL).
#The code continues from the above task.py file
#app.task()
def xlsxwritein(doc): # write into database for file type xlsx
xlsxt = 0
conn = psycopg2.connect("dbname='apidb' user='api' host='localhost'
password='eric40502' port='5432'")
maincur = conn.cursor()
readcur = conn.cursor()
writecur = conn.cursor()
readcur.execute("select exists(select * from
information_schema.tables where table_name='%s')" % doc.tablename) # check if
same file is already in database
check = readcur.fetchone()[0]
row_id = 1 # used for following id field
wb = pyxl.load_workbook(doc.path)
sheetnames = wb.get_sheet_names()
ws = wb.get_sheet_by_name(sheetnames[0])
for rown in range(ws.get_highest_row()):
if xlsxt == 0:
if check == True:
app =
''.join([random.SystemRandom().choice('abcdefghijklmnopqrstuvwxyz0123456789')
for i in range(6)])
tname = '%s-%s' % (doc.tablename,app)
tablename = '"%s-%s"' % (doc.tablename,app)
doc.tablename = tname
doc.save()
else:
tablename = '"%s"' % doc.tablename
field = [ws.cell(row=1,column=col_index).value for
col_index in range(1,ws.get_highest_column()+1)]
maincur.execute("CREATE TABLE %s (id SERIAL PRIMARY
KEY);" % tablename)
for coln in range(ws.get_highest_column()):
field[coln] = '"%s"' % field[coln] # change
number to string
if field[coln] == 'ID':
field[coln] = 'original_id'
maincur.execute("ALTER TABLE %s ADD %s
CITEXT;" % (tablename,field[coln]))
xlsxt = xlsxt+1
elif xlsxt > 0 and check == False: # not first time(insert
data) and check no same file exists
for coln in range(ws.get_highest_column()):
if coln == 0:
writecur.execute("INSERT INTO %s (%s)
VALUES ('%s');"
%(tablename,field[coln],str(ws.cell(row=rown,column=coln+1).value)))
else:
writecur.execute("UPDATE %s SET %s =
'%s' WHERE id = '%d';"
%(tablename,field[coln],str(ws.cell(row=rown+1,column=coln+1).value),row_id))
xlsxt = xlsxt+1
row_id = row_id+1
else:
break
conn.commit()
maincur.close()
readcur.close()
writecur.close()
conn.close()
xlsxt = 0

Probably something is going wrong during arguments deserialization. Instead of passing doc object try instead passing filename and then read file inside of task.

Python Value error

I'm getting an error when trying to split a piped output into python.
Error is need more than 3 values to unpack although I'm using 8 values
import subprocess, sys
from datetime import datetime
from time import sleep as sleep
multimon_ng = subprocess.Popen("multimon-ng -a FLEX -t wav flex.wav",
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True)
while True:
nextline = multimon_ng.stdout.readline()
flex, mdate, mtime, bitrate, other, capcode, o2, msg = nextline.split(" ", 7) # error here
if nextline is " ":
print "napping"
else:
print mdate + " " + mtime + " " + capcode + " " + msg
multimon_ng.poll()
sys.stdout.flush()
any help would be great

3 in error message indicated length of iterable in right hand side argument.
Minimal wrong examples:
a, = [] # ValueError: need more than 0 values to unpack
a, b = [1] # ValueError: need more than 1 value to unpack
a, b, c = [1, 2] # ValueError: need more than 2 values to unpack
# etc ...
Minimal correct examples:
a, = [1]
a, b = [1, 2]
a, b, c = [1, 2, 3]
Smallest change to fix and expose issue would be to wrap unpacking iterable in try-except block.
while True:
nextline = multimon_ng.stdout.readline()
if not nextline:
print "napping"
else:
try:
flex, mdate, mtime, bitrate, other, capcode, o2, msg = nextline.split(" ", 7)
except ValueError:
print "invalid line", nextline
else:
print mdate + " " + mtime + " " + capcode + " " + msg
multimon_ng.poll()
sys.stdout.flush()
As you can see, I also moved check for empty line before unpacking. If line is empty, unpacking would also fail.

XLRDError: No sheet named <'Sheet1'> in python

I am trying to convert the xls into csv file using pandas in python. But I am getting the following error like 'XLRDError: No sheet named <'Sheet1'>'. I have verified the sheet name and it is same as specified above, but I don't how to correct this error. Please find my code below.
CODE:
def xls_2_csv():
import pandas as pd
data_xls = pd.read_excel(r'c:\delivery\file1.xls','Sheet1', index_col=None)
data_xls.to_csv(r'C:\test\file1.csv', encoding='utf-8',index=None)
xls_2_csv()
Please help me in solving this error. Thanks in advance.

I found the same problem in python 3.6 and pandas version is 0.25.1.
The following should work:
import pandas as pd
file = 'your excel file path'
# the file is endswith '.xls' and there is multiple sheets
# error method
df_sheet1 = pd.read_excel(file, sheet_name='Sheet1')
df_sheet2 = pd.read_excel(file, sheet_name='Sheet2')
# when read Sheet1 had no error, but when read Sheet2, had an error:
# xlrd.biffh.XLRDError: No sheet named <'Sheet2'>
# right method
with pd.ExcelFile(file) as xls:
for sheet_name in xls.sheet_names:
df = pd.read_excel(xls, sheet_name=sheet_name)
print(df.head())

Hi I tried the following code it worked for me.
CODE:
import logging
import time
import traceback
import xlrd
import csv
import sys
import re
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
xls = input file path
target = output file path
logging.info("Start converting: From '" + xls + "' to '" + target + "'. ")
try:
start_time = time.time()
wb = xlrd.open_workbook(xls)
sh = wb.sheet_by_index(0)
csvFile = open(target, 'wb')
wr = csv.writer(csvFile, quoting=csv.QUOTE_ALL)
for row in xrange(sh.nrows):
rowValues = sh.row_values(row)
newValues = []
for s in rowValues:
if isinstance(s, unicode):
strValue = (str(s.encode("utf-8")))
else:
strValue = (str(s))
isInt = bool(re.match("^([0-9]+)\.0$", strValue))
if isInt:
strValue = int(float(strValue))
else:
isFloat = bool(re.match("^([0-9]+)\.([0-9]+)$", strValue))
isLong = bool(re.match("^([0-9]+)\.([0-9]+)e\+([0-9]+)$", strValue))
if isFloat:
strValue = float(strValue)
if isLong:
strValue = int(float(strValue))
newValues.append(strValue)
wr.writerow(newValues)
csvFile.close()
logging.info("Finished in %s seconds", time.time() - start_time)
except Exception as e:
print (str(e) + " " + traceback.format_exc())

print if list index out of range

hi all im trying to create a handle for "list index out of range" but seem not to be having any luck.
import json, urllib, re
from urllib import urlencode
import googlemaps
import tempfile
import win32api
import win32print
start = "Adelaide, South Australia"
finish = " ghkjffzh, south Australia "
url = 'http://maps.googleapis.com/maps/api/directions/json?%s' % urlencode((
('origin', start),
('destination', finish)
))
ur = urllib.urlopen(url)
result = json.load(ur)
filename = "output.txt"
with open(filename, 'w') as output:
for i in range(0, len(result['routes'][0]['legs'][0]['steps'])):
try:
s = (result['routes'][0]['legs'][0]['steps'][i]['html_instructions'])
d = (result['routes'][0]['legs'][0]['steps'][i]['distance']['text'])
l = (result['routes'][0]['legs'][0]['steps'][i]['duration']['text'])
s = re.sub('<[A-Za-z\/][^>]*>', '', s)
output.writelines(s + " " + d + " " + l + '\n')
except Exception:
print "Directions could not be printed"
output.write("Directions could not be given due to the format of page or the address type")
but nothing is written to .txt and still get error.
ive tried to replace Exception with IndexError and VauleError but no change

Solved used by exploring the returned json result and found a Status result so I passed that first.
with open(filename, 'w') as output:
if result ['status'] == "NOT_FOUND"
output.write( " no directions avalible")
else:
for i in range(0, len(result['routes'][0]['legs'][0]['steps'])):
s = (result['routes'][0]['legs'][0]['steps'][i]['html_instructions'])
d = (result['routes'][0]['legs'][0]['steps'][i]['distance']['text'])
l = (result['routes'][0]['legs'][0]['steps'][i]['duration']['text'])
s = re.sub('<[A-Za-z\/][^>]*>', '', s)
output.writelines(s + " " + d + " " + l + '\n')

Save results in the files with same name as input files

I am working on a python code which is as follows:
import os
count = 0
for doc in os.listdir('/home/krupa/Krupa/Mirellas_Image_Annotation_Data/Test/Html_Files/Texts'):
if doc.endswith(".txt"):
with open(doc, 'r') as f:
single_line = ''.join([line for line in f])
single_space = ' '.join(single_line.split())
with open(doc.format(count) , "w") as doc:
doc.write(single_space)
count += 1
else:
continue
Here I want to write the output in the same file name but with different extension (say .key). How do I do it? Please help. Thanks in advance

import os
count = 0
for doc in os.listdir('/home/krupa/Krupa/Mirellas_Image_Annotation_Data/Test/Html_Files/Texts'):
doc1 = "doc_path" + doc
doc2 = "new_path_where_new_file_with_new_ext_needs_to_be_saved" + doc1
if doc1.endswith(".txt"):
with open(doc, 'r') as f:
single_line = ''.join([line for line in f])
single_space = ' '.join(single_line.split())
with open(doc2.format(count) , "w") as doc2:
doc2.write(single_space)
count += 1
else:
continue

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Timeout not working in ProcessPoolExecutor map function - concurrency

Related

django+celery+rabbitmq encode error and sig-kill

Python Value error

XLRDError: No sheet named <'Sheet1'> in python

print if list index out of range

Save results in the files with same name as input files

Categories

Resources