I am unexpectedly getting IOError: bad message length error when trying to share pyodbc connection across multiple processes, especially when N is more than 4 (no. of cores). Sometimes I also get cPickle.UnpicklingError: invalid load key, '#'., pyodbc.ProgrammingError: ('24000', '[24000] [FreeTDS][SQL Server]Invalid cursor state (0) (SQLExecDirectW)') as errors.
# Import custom python packages
import multiprocessing
import multiprocessing.managers as mm
import pathos.multiprocessing as mp
import pyodbc, datetime, time
class MyConn(object):
def __init__(self):
self.conn = None
self.cursor = None
def connect_to_db(self):
self.conn = pyodbc.connect("DSN=cpmeast;UID=dntcore;PWD=dntcorevs2")
self.cursor = self.conn.cursor()
def run_qry(self, data):
print 'Running query', data
self.cursor.execute("WAITFOR DELAY '00:00:01';select GETDATE(), '"+str(data)+"';")
l = self.cursor.fetchall()
_l = []
for i in l:
_l.append(list(i))
print 'Result for query', data, _l
return _l
class MyManagerClass(object):
def __init__(self):
self.result = multiprocessing.Manager().list()
def read_data(self, *args):
conn = args[0][0]
data = args[0][1]
l = conn.run_qry(data)
self.result.append(l)
class MyManager(mm.BaseManager):
pass # Pass is really enough. Nothing needs to be done here.
def main():
time_start = time.time()
MyManager.register("MyConn", MyConn)
manager = MyManager()
manager.start()
a = manager.MyConn()
a.connect_to_db()
dbm = MyManagerClass()
pool = mp.ProcessingPool(4)
jobs = []
N = 5
for i in range(N):
jobs.append((a, str(i)))
for i in pool.imap(dbm.read_data, jobs):
print 'result'
pool.close()
pool.join()
print 'Result', dbm.result
print 'Closed'
time_stop = time.time()
msg = 'runtime: {0}'.format(str(datetime.timedelta
(seconds=time_stop-time_start)))
print msg
if __name__ == '__main__':
main()
Related
I have working code below.
I have a set of machines operated with Python. I have a gui in Tkinter but very often these machines are run headless with the python code auto-starting at boot.
I really like the design pattern of using root.after() to start multiple tasks and keep them going. My problem is that this comes from the Tkinter library and when running headless the line "root=Tk()" will throw an error.
I have two questions
Can I perform some trick to have the code ignore the fact there is no display?
OR
Is there a library that will match the design pattern of Tkinter "root.after(time_in_ms,function_to_call)".
I did try to poke around in the underlying code of Tkinter to see if there was simply another library wrapped by Tkinter but I don't have the skill to decode what is going on in that library.
This code works with a display connected: (it prints hello 11 times then ends)
from Tkinter import *
# def __init__(self, screenName=None, baseName=None, className='Tk', useTk=1, sync=0, use=None):
root = Tk() # error is thrown here if starting this command in headless hardware setup
h = None
count = 0
c = None
def stop_saying_hello():
global count
global h
global c
if count > 10:
root.after_cancel(h)
print "counting cancelled"
else:
c = root.after(200, stop_saying_hello)
def hello():
global h
global count
print "hello " + str(count)
count += 1
h = root.after(1000, hello)
h = root.after(1000, hello) # time in ms, function
c = root.after(200, stop_saying_hello)
root.mainloop()
If this is run headless - in an ssh session from a remote computer then this error message is returned
Traceback (most recent call last): File "tkinter_headless.py", line
5, in
root = Tk() File "/usr/lib/python2.7/lib-tk/Tkinter.py", line 1813, in init
self.tk = _tkinter.create(screenName, baseName, className, interactive, wantobjects, useTk, sync, use)
_tkinter.TclError: no display name and no $DISPLAY environment variable
You can use
threading and threating.timer()
shed
APSheduler
or create own taks manager with own after() and mainloop()
Simple example
import time
class TaskManager():
def __init__(self):
self.tasks = dict()
self.index = 0
self.running = True
def after(self, delay, callback):
# calcuate time using delay
current_time = time.time()*1000
run_time = current_time + delay
# add to tasks
self.index += 1
self.tasks[self.index] = (run_time, callback)
# return index
return self.index
def after_cancel(self, index):
if index in self.tasks:
del self.tasks[index]
def mainloop(self):
self.running = True
while self.running:
current_time = time.time()*1000
# check all tasks
# Python 3 needs `list(self.tasks.keys())`
# because `del` changes `self.tasks.keys()`
for key in self.tasks.keys():
if key in self.tasks:
run_time, callback = self.tasks[key]
if current_time >= run_time:
# execute task
callback()
# remove from list
del self.tasks[key]
# to not use all CPU
time.sleep(0.1)
def quit(self):
self.running = False
def destroy(self):
self.running = False
# --- function ---
def stop_saying_hello():
global count
global h
global c
if count > 10:
root.after_cancel(h)
print "counting cancelled"
else:
c = root.after(200, stop_saying_hello)
def hello():
global count
global h
print "hello", count
count += 1
h = root.after(1000, hello)
# --- main ---
count = 0
h = None
c = None
root = TaskManager()
h = root.after(1000, hello) # time in ms, function
c = root.after(200, stop_saying_hello)
d = root.after(12000, root.destroy)
root.mainloop()
#!/usr/bin/env python
from __future__ import print_function
import sys
import time
import getopt
import alsaaudio
import numpy
from time import sleep
class A_weight():
def __init__(self):
skaler = 2.361E-14
fix_cur = 0.20565360419770495
A = []
hPa = 4e-11
card = 'default'
array_float = numpy.dtype(float)
stream = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, card)
stream.setchannels(1)
stream.setrate(48000)
stream.setformat(alsaaudio.PCM_FORMAT_S16_LE)
stream.setperiodsize(128)
def A(f):
return (12200**2*f**4/((f**2+20.6**2)*(f**2+12200**2)*numpy.sqrt(f**2+107.7**2)*numpy.sqrt(f**2+737.9**2)))+fix_cur
def listen(self):
glob_leq = 0
liczba_ramek = 0
index_ramek = 0
while True:
try:
l, data = stream.read()
except IOError, e:
error_count += 1
print(" (%d) Error recording: %s" % (error_count, e))
else:
if l==128:
decoded_block = numpy.frombuffer(data, dtype='int16' )
else:
continue
Y = numpy.fft.fft(decoded_block) # fft computing and normalization
Aw = A(numpy.arange(20.,20000,(19980./len(Y))))
Na = Aw*Y
inverse = numpy.fft.ifft(Y)
maks = 32768
array_float = numpy.divide(inverse.real ,float( maks))
array_float = array_float**2
sum_array = numpy.sum(array_float, dtype=float)
glob_leq = glob_leq + sum_array
liczba_ramek += 1
index_ramek += 1
if index_ramek == 375:
index_ramek=0
cis_chwil = numpy.divide(glob_leq, liczba_ramek * 128)
leq =10*numpy.log10(numpy.divide(cis_chwil, hPa))
print (leq)
#A.append(leq)
#print(max(A))
A_weight().listen()
So i trying writing program compute sound pressure level with weighting A.
All work correct but when i want close may code in class I have problem. Because something wrong with invoke to importing class in this case is it alsaaudio.
I get this feedback:
Traceback (most recent call last):
File "rec_A.py", line 64, in <module>
A_weight().listen()
File "rec_A.py", line 37, in listen
l, data = stream.read()
NameError: global name 'stream' is not defined
Do you have any idea
Change each occurrence of stream to self.stream:
class A_weight():
def __init__(self):
skaler = 2.361E-14
fix_cur = 0.20565360419770495
A = []
hPa = 4e-11
card = 'default'
array_float = numpy.dtype(float)
self.stream = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, card)
self.stream.setchannels(1)
self.stream.setrate(48000)
self.stream.setformat(alsaaudio.PCM_FORMAT_S16_LE)
self.stream.setperiodsize(128)
...
def listen(self):
glob_leq = 0
liczba_ramek = 0
index_ramek = 0
while True:
try:
l, data = self.stream.read()
...
This will make it an instance variable, and all other methods of that class (as long as they are passed the self argument) will have access to it through self.stream. See this bit of documentation for more details on instance variables.
Also, this is merely an aesthetic point, but the convention in Python is to use upper camel case for class names, i.e., AWeight instead of A_weight - but this will not affect how your code runs.
I'm now doing a little project which uses celery to turn csv and xlsx files into postgresql table.
The code below works fine without celery(except large files),but after using celery it produce some errors and bugs.
I've looked for similar questions in StackOverFlow but don't have any idea how to do and why.
Hope you guys can help me with it,thanks.
First error is as follows:
csv-1
csv-2
I think it has something to do with my encoding part, but I tried to open it with utf-8-sig and big-5, not working.(It works fine without celery)
`
# -*- coding: utf-8 -*-
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.http import HttpResponseRedirect
from django.core.urlresolvers import reverse
from django.contrib import messages
from django.conf import settings
from django.db import connection
from django.views.decorators.csrf import csrf_exempt
from celery import Celery
from celery import task
import json
import csv
import sys
import random
import psycopg2
import xlrd
import openpyxl as pyxl
from .models import Document
from .forms import DocumentForm
app = Celery(
'tasks',
broker='amqp://guest:guest#localhost:5672//',
backend='rpc://'
)
CELERY_RESULT_BACKEND = 'rpc://'
CELERY_RESULT_PERSISTENT = False
#app.task()
def csvwritein(doc):# Transform csv to table
doc = doc
conn = psycopg2.connect("dbname='apidb' user='api' host='localhost'
password='eric40502' port='5432'")
readcur = conn.cursor()
readcur.execute("select exists(select * from
information_schema.tables where table_name='%s')" % doc.tablename) # check if
same file is already in database
check = readcur.fetchone()[0]
try:
fr = open(doc.path,encoding = 'utf-8-sig')
dr.delay(fr,doc,check)
fr.close()
except Exception as e:
fr = open(doc.path,encoding = 'big5')
dr.delay(fr,doc,check)
fr.close()
conn.commit()
readcur.close()
#app.task()
def dr(fr,doc,check): # make datareader as function to keep code 'dry'
csvt = 0 #count csv reader loop time
row_id = 1 # used for following id field
conn = psycopg2.connect("dbname='apidb' user='api' host='localhost'
password='eric40502' port='5432'")
maincur = conn.cursor()
writecur = conn.cursor()
datareader = csv.reader(fr, delimiter=',')
for row in datareader:
if csvt == 0: # first time in loop(create field) and check no
same file exists
if check == True:
app =
''.join([random.SystemRandom().choice('abcdefghijklmnopqrstuvwxyz0123456789')
for i in range(6)])
tname = '%s-%s' % (doc.tablename,app
tablename = '"%s-%s"' % (doc.tablename,app)
doc.tablename = tname
doc.save()
else:
tablename = '"%s"' % doc.tablename
maincur.execute("CREATE TABLE %s (id SERIAL PRIMARY
KEY);" % tablename)
row_count = sum(1 for line in datareader)
col_count = len(row)
frow = row
for i in range(0,col_count,1):
row[i] = '"%s"' % row[i] # change number to
string
maincur.execute("ALTER TABLE %s ADD %s
CITEXT;" % (tablename,row[i]))
csvt = csvt+1
fr.seek(0)
next(datareader)
elif csvt > 0: # not first time(insert data) and check no
same file exists
for j in range(0,col_count,1):
if j == 0:
writecur.execute("INSERT INTO %s (%s)
VALUES ('%s');" % (tablename,frow[j],row[j]))
else:
writecur.execute("UPDATE %s SET %s =
'%s' WHERE id = '%d';" %(tablename,frow[j],row[j],row_id))
csvt = csvt+1
row_id = row_id+1
else:
break
conn.commit()
maincur.close()
writecur.close()
conn.close()
csvt = 0
doc = Document.objects.all()
`
Second error is about turning a xlsx file(about 130,000 rows) into postgresql table, and the worker got sig-kill after 2-3 minutes.
Debug Message:
[2016-10-27 06:17:05,227: ERROR/MainProcess] Process 'Worker-1' pid:13829 exited with 'signal 9 (SIGKILL)' [2016-10-27 06:17:05,328:ERROR/MainProcess] Task data.tasks.xlsxwritein[5aec4679-c48b-4d07-a0a9-5e4e37fcd24b] raised unexpected: WorkerLostError('Worker exited prematurely: signal 9 (SIGKILL).',) Traceback (most recent call last): File "/usr/local/lib/python3.4/dist-packages/billiard/pool.py", line 1175, in mark_as_worker_lost human_status(exitcode)), billiard.exceptions.WorkerLostError: Worker exited prematurely: signal 9 (SIGKILL).
#The code continues from the above task.py file
#app.task()
def xlsxwritein(doc): # write into database for file type xlsx
xlsxt = 0
conn = psycopg2.connect("dbname='apidb' user='api' host='localhost'
password='eric40502' port='5432'")
maincur = conn.cursor()
readcur = conn.cursor()
writecur = conn.cursor()
readcur.execute("select exists(select * from
information_schema.tables where table_name='%s')" % doc.tablename) # check if
same file is already in database
check = readcur.fetchone()[0]
row_id = 1 # used for following id field
wb = pyxl.load_workbook(doc.path)
sheetnames = wb.get_sheet_names()
ws = wb.get_sheet_by_name(sheetnames[0])
for rown in range(ws.get_highest_row()):
if xlsxt == 0:
if check == True:
app =
''.join([random.SystemRandom().choice('abcdefghijklmnopqrstuvwxyz0123456789')
for i in range(6)])
tname = '%s-%s' % (doc.tablename,app)
tablename = '"%s-%s"' % (doc.tablename,app)
doc.tablename = tname
doc.save()
else:
tablename = '"%s"' % doc.tablename
field = [ws.cell(row=1,column=col_index).value for
col_index in range(1,ws.get_highest_column()+1)]
maincur.execute("CREATE TABLE %s (id SERIAL PRIMARY
KEY);" % tablename)
for coln in range(ws.get_highest_column()):
field[coln] = '"%s"' % field[coln] # change
number to string
if field[coln] == 'ID':
field[coln] = 'original_id'
maincur.execute("ALTER TABLE %s ADD %s
CITEXT;" % (tablename,field[coln]))
xlsxt = xlsxt+1
elif xlsxt > 0 and check == False: # not first time(insert
data) and check no same file exists
for coln in range(ws.get_highest_column()):
if coln == 0:
writecur.execute("INSERT INTO %s (%s)
VALUES ('%s');"
%(tablename,field[coln],str(ws.cell(row=rown,column=coln+1).value)))
else:
writecur.execute("UPDATE %s SET %s =
'%s' WHERE id = '%d';"
%(tablename,field[coln],str(ws.cell(row=rown+1,column=coln+1).value),row_id))
xlsxt = xlsxt+1
row_id = row_id+1
else:
break
conn.commit()
maincur.close()
readcur.close()
writecur.close()
conn.close()
xlsxt = 0
Probably something is going wrong during arguments deserialization. Instead of passing doc object try instead passing filename and then read file inside of task.
So I am trying to multi process function F. Which is accessed by a button press with tkinter.
def f(x):
global doom,results,info
doom = doom + 1
if check(x) == True:
results.add(x)
info.append(get_column_number(x))
j.step(1)
texx = "1/"+doom
s.configure(text=texx)
root.update()
The function is called within a function like so:
def dojob():
index = ['URLS'...]
pool = Pool(processes=4)
s.configure(text="Shifting Workload to cores..")
root.update()
pool.map(f, index)
The button is inside root window.
I get the following error:
Exception in thread Thread-2:
Traceback (most recent call last):
File "C:\Python27\lib\threading.py", line 808, in __bootstrap_inner
self.run()
File "C:\Python27\lib\threading.py", line 761, in run
self.__target(*self.__args, **self.__kwargs)
File "C:\Python27\lib\multiprocessing\pool.py", line 342, in _handle_tasks
put(task)
UnpickleableError: Cannot pickle <type 'tkapp'> objects
I do not even know what a pickle does? Help?
Here is the complete code:
from Tkinter import *
from ttk import *
from tkMessageBox import showinfo
from multiprocessing import Pool
import random
emails = set()
import urllib2
import urllib2 as urllib
########
CONSTANT_PAGECOUNT = 20
######
def f(x):
global doom,emails,info
doom = doom + 1
if check(x) == True:
print "",
emails.add(x)
info.append(get_column_number(x))
j.step(1)
texx = "Sk1nn1n "+str(doom)+'/'+str(CONSTANT_PAGECOUNT)+""
s.configure(text=texx)
root.update()
return 0
def f(x):
print ""
def showFile(site,info):
top = Toplevel()
top.title('Sites')
x = Text(top)
x.pack()
i=0
for site_url in site:
x.insert(END,site_url)
i=i+1
def get_column_number(url):
return True
def check(url):
return True
def getgoogleurl(search,siteurl=False,startr=0):
if siteurl==False:
return 'http://www.google.com/search?q='+urllib2.quote(search)+'&start='+str(startr)+'&oq='+urllib2.quote(search)
else:
return 'http://www.google.com/search?q=site:'+urllib2.quote(siteurl)+'%20'+urllib2.quote(search)+'&oq=site:'+urllib2.quote(siteurl)+'%20'+urllib2.quote(search)
def getgooglelinks(search,siteurl=False,startr=0):
#google returns 403 without user agent
headers = {'User-agent':'Mozilla/11.0'}
req = urllib2.Request(getgoogleurl(search,siteurl,startr),None,headers)
site = urllib2.urlopen(req)
data = site.read()
site.close()
#no beatifulsoup because google html is generated with javascript
start = data.find('<div id="res">')
end = data.find('<div id="foot">')
if data[start:end]=='':
#error, no links to find
return False
else:
links =[]
data = data[start:end]
start = 0
end = 0
while start>-1 and end>-1:
#get only results of the provided site
if siteurl==False:
start = data.find('<a href="/url?q=')
else:
start = data.find('<a href="/url?q='+str(siteurl))
data = data[start+len('<a href="/url?q='):]
end = data.find('&sa=U&ei=')
if start>-1 and end>-1:
link = urllib2.unquote(data[0:end])
data = data[end:len(data)]
if link.find('http')==0:
links.append(link)
return links
def rip(results=15,accuracy=16):
global e
keyword = ''+str(e.get())
if keyword.strip()=="":
s.configure(text="Please enter a keyword")
root.update()
return 0
linklist = []
counter = 0
doom = 0
while counter < results:
links = getgooglelinks(keyword,startr=counter)
for link in links:
if len(linklist) > CONSTANT_PAGECOUNT:
s.configure(text="Proccessing..")
root.update()
return linklist
else:
doom = doom + 1
linklist.append(link)
texx = str(doom)+"/"+str(CONSTANT_PAGECOUNT)
s.configure(text=texx)
root.update()
root.update()
counter = counter+accuracy
return linklist
def flip():
global e
emails = set()
info = []
keyword = ''+str(e.get())
if keyword.strip()=="":
s.configure(text="Please enter a keyword")
root.update()
return 0
s.configure(text="Generating index..")
root.update()
doom = -1
index = rip(CONSTANT_PAGECOUNT,10)
if 1:
try:
pool = Pool(processes=4)
#s.configure(text="Shifting Workload to cores..")
#root.update()
pool.map(f, index)
pool.close()
except:
print "The errors there.."
j.config(value=CONSTANT_PAGECOUNT)
if len(emails) > 0:
filepath='relavant_list_'+str(random.randint(1,9999))+'.emList.txt'
#print len(emails),
#print "emails found."
ggg = open(filepath,'a+')
for x in emails:
ggg.write(x+"\n")
showinfo(
str(len(emails))+" key word related sites found!",
" sites are saved in "+str(filepath)
)
showFile(emails,info)
s.configure(text=filepath)
else:
s.configure(text='No related sites found : (')
if __name__ == '__main__':
### CONSTANTS
version = '1.0'
### END CONSTANTS
root = Tk()
root.title('Program v'+version)
s = Style()
s.theme_use('default')
#print s.theme_names()
s.configure("black.Horizontal.TProgressbar", foreground='blue', background='blue')
j = Progressbar(root, style="black.Horizontal.TProgressbar", orient="vertical", length=200, mode="determinate", maximum=CONSTANT_PAGECOUNT, value=0)
j.pack(side='right',fill='y')
f = Frame(root)
x = Frame(f)
e = Entry(x,width=51)
s = Label(x,width=50,anchor='center',text='Waiting for task..')
Button(f,text='Generate List!',width=50,command=flip).pack(fill='both',expand=True)
s.pack(side='bottom',fill='y',expand=True)
e.pack(side='top',fill='both',expand=True)
x.pack(side='top',fill='y',expand=True)
f.pack(side='left',expand=True,fill="both")
root.mainloop()
You are leaking a tkinter object. Most likely because you are trying to update the interface from another process with the last line of f()
Update based on code
You have a name collision between your function f() and a variable f in your __main__ which gets assigned to your main window and causes the tkapp pickle error. Rename the function to def myfunc() or something. Also need to call pool.join() after pool.close()
I am Trying to call a another method within my class, for some reason I am getting the AttributeError: portfinder instance has no attribute 'generatePortNumber' See my code below:
when I tried to call the generatePortNumber I'm getting the same error. I have never come across this issue.
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sqlite3 as lite
import sys
import random
class portfinder:
"""docstring for ClassName"""
def __init__(self):
self.portsToCheck = ['agentport','BatchProcessingAgentPort','databaseport','indexserviceport','monitorport','servicefacadeport','webdriverport']
self.dataBasePort = (u'60025',)
self.portInUse = False
self.x = 0
def generatePortNumber(self):
self.newPortNumber = random.randrange(8000, 9000)
print self.newPortNumber
return self.newPortNumber
def findUsedPortsinDB(self):
con = lite.connect('D:\play\Opes\db.sqlite3')
with con:
cur = con.cursor()
sqlStatement = "Select " + self.portsToCheck[2] +' From Jobs_jobs'
print sqlStatement
cur.execute(sqlStatement)
rows = cur.fetchall()
for row in rows:
print row
if row == self.dataBasePort:
self.portInUse = "true"
self.generatePortNumber()
if __name__ == "__main__":
m = portfinder()
m.findUsedPortsinDB()
Found what was wrong I had a extra indentation in my method