Cannot Pool.map() function because of UnpickleableError? - python-2.7

So I am trying to multi process function F. Which is accessed by a button press with tkinter.
def f(x):
global doom,results,info
doom = doom + 1
if check(x) == True:
results.add(x)
info.append(get_column_number(x))
j.step(1)
texx = "1/"+doom
s.configure(text=texx)
root.update()
The function is called within a function like so:
def dojob():
index = ['URLS'...]
pool = Pool(processes=4)
s.configure(text="Shifting Workload to cores..")
root.update()
pool.map(f, index)
The button is inside root window.
I get the following error:
Exception in thread Thread-2:
Traceback (most recent call last):
File "C:\Python27\lib\threading.py", line 808, in __bootstrap_inner
self.run()
File "C:\Python27\lib\threading.py", line 761, in run
self.__target(*self.__args, **self.__kwargs)
File "C:\Python27\lib\multiprocessing\pool.py", line 342, in _handle_tasks
put(task)
UnpickleableError: Cannot pickle <type 'tkapp'> objects
I do not even know what a pickle does? Help?
Here is the complete code:
from Tkinter import *
from ttk import *
from tkMessageBox import showinfo
from multiprocessing import Pool
import random
emails = set()
import urllib2
import urllib2 as urllib
########
CONSTANT_PAGECOUNT = 20
######
def f(x):
global doom,emails,info
doom = doom + 1
if check(x) == True:
print "",
emails.add(x)
info.append(get_column_number(x))
j.step(1)
texx = "Sk1nn1n "+str(doom)+'/'+str(CONSTANT_PAGECOUNT)+""
s.configure(text=texx)
root.update()
return 0
def f(x):
print ""
def showFile(site,info):
top = Toplevel()
top.title('Sites')
x = Text(top)
x.pack()
i=0
for site_url in site:
x.insert(END,site_url)
i=i+1
def get_column_number(url):
return True
def check(url):
return True
def getgoogleurl(search,siteurl=False,startr=0):
if siteurl==False:
return 'http://www.google.com/search?q='+urllib2.quote(search)+'&start='+str(startr)+'&oq='+urllib2.quote(search)
else:
return 'http://www.google.com/search?q=site:'+urllib2.quote(siteurl)+'%20'+urllib2.quote(search)+'&oq=site:'+urllib2.quote(siteurl)+'%20'+urllib2.quote(search)
def getgooglelinks(search,siteurl=False,startr=0):
#google returns 403 without user agent
headers = {'User-agent':'Mozilla/11.0'}
req = urllib2.Request(getgoogleurl(search,siteurl,startr),None,headers)
site = urllib2.urlopen(req)
data = site.read()
site.close()
#no beatifulsoup because google html is generated with javascript
start = data.find('<div id="res">')
end = data.find('<div id="foot">')
if data[start:end]=='':
#error, no links to find
return False
else:
links =[]
data = data[start:end]
start = 0
end = 0
while start>-1 and end>-1:
#get only results of the provided site
if siteurl==False:
start = data.find('<a href="/url?q=')
else:
start = data.find('<a href="/url?q='+str(siteurl))
data = data[start+len('<a href="/url?q='):]
end = data.find('&sa=U&ei=')
if start>-1 and end>-1:
link = urllib2.unquote(data[0:end])
data = data[end:len(data)]
if link.find('http')==0:
links.append(link)
return links
def rip(results=15,accuracy=16):
global e
keyword = ''+str(e.get())
if keyword.strip()=="":
s.configure(text="Please enter a keyword")
root.update()
return 0
linklist = []
counter = 0
doom = 0
while counter < results:
links = getgooglelinks(keyword,startr=counter)
for link in links:
if len(linklist) > CONSTANT_PAGECOUNT:
s.configure(text="Proccessing..")
root.update()
return linklist
else:
doom = doom + 1
linklist.append(link)
texx = str(doom)+"/"+str(CONSTANT_PAGECOUNT)
s.configure(text=texx)
root.update()
root.update()
counter = counter+accuracy
return linklist
def flip():
global e
emails = set()
info = []
keyword = ''+str(e.get())
if keyword.strip()=="":
s.configure(text="Please enter a keyword")
root.update()
return 0
s.configure(text="Generating index..")
root.update()
doom = -1
index = rip(CONSTANT_PAGECOUNT,10)
if 1:
try:
pool = Pool(processes=4)
#s.configure(text="Shifting Workload to cores..")
#root.update()
pool.map(f, index)
pool.close()
except:
print "The errors there.."
j.config(value=CONSTANT_PAGECOUNT)
if len(emails) > 0:
filepath='relavant_list_'+str(random.randint(1,9999))+'.emList.txt'
#print len(emails),
#print "emails found."
ggg = open(filepath,'a+')
for x in emails:
ggg.write(x+"\n")
showinfo(
str(len(emails))+" key word related sites found!",
" sites are saved in "+str(filepath)
)
showFile(emails,info)
s.configure(text=filepath)
else:
s.configure(text='No related sites found : (')
if __name__ == '__main__':
### CONSTANTS
version = '1.0'
### END CONSTANTS
root = Tk()
root.title('Program v'+version)
s = Style()
s.theme_use('default')
#print s.theme_names()
s.configure("black.Horizontal.TProgressbar", foreground='blue', background='blue')
j = Progressbar(root, style="black.Horizontal.TProgressbar", orient="vertical", length=200, mode="determinate", maximum=CONSTANT_PAGECOUNT, value=0)
j.pack(side='right',fill='y')
f = Frame(root)
x = Frame(f)
e = Entry(x,width=51)
s = Label(x,width=50,anchor='center',text='Waiting for task..')
Button(f,text='Generate List!',width=50,command=flip).pack(fill='both',expand=True)
s.pack(side='bottom',fill='y',expand=True)
e.pack(side='top',fill='both',expand=True)
x.pack(side='top',fill='y',expand=True)
f.pack(side='left',expand=True,fill="both")
root.mainloop()

You are leaking a tkinter object. Most likely because you are trying to update the interface from another process with the last line of f()
Update based on code
You have a name collision between your function f() and a variable f in your __main__ which gets assigned to your main window and causes the tkapp pickle error. Rename the function to def myfunc() or something. Also need to call pool.join() after pool.close()

Related

Run Python with Tkinter (sometimes) headless OR replacement for root.after()

I have working code below.
I have a set of machines operated with Python. I have a gui in Tkinter but very often these machines are run headless with the python code auto-starting at boot.
I really like the design pattern of using root.after() to start multiple tasks and keep them going. My problem is that this comes from the Tkinter library and when running headless the line "root=Tk()" will throw an error.
I have two questions
Can I perform some trick to have the code ignore the fact there is no display?
OR
Is there a library that will match the design pattern of Tkinter "root.after(time_in_ms,function_to_call)".
I did try to poke around in the underlying code of Tkinter to see if there was simply another library wrapped by Tkinter but I don't have the skill to decode what is going on in that library.
This code works with a display connected: (it prints hello 11 times then ends)
from Tkinter import *
# def __init__(self, screenName=None, baseName=None, className='Tk', useTk=1, sync=0, use=None):
root = Tk() # error is thrown here if starting this command in headless hardware setup
h = None
count = 0
c = None
def stop_saying_hello():
global count
global h
global c
if count > 10:
root.after_cancel(h)
print "counting cancelled"
else:
c = root.after(200, stop_saying_hello)
def hello():
global h
global count
print "hello " + str(count)
count += 1
h = root.after(1000, hello)
h = root.after(1000, hello) # time in ms, function
c = root.after(200, stop_saying_hello)
root.mainloop()
If this is run headless - in an ssh session from a remote computer then this error message is returned
Traceback (most recent call last): File "tkinter_headless.py", line
5, in
root = Tk() File "/usr/lib/python2.7/lib-tk/Tkinter.py", line 1813, in init
self.tk = _tkinter.create(screenName, baseName, className, interactive, wantobjects, useTk, sync, use)
_tkinter.TclError: no display name and no $DISPLAY environment variable
You can use
threading and threating.timer()
shed
APSheduler
or create own taks manager with own after() and mainloop()
Simple example
import time
class TaskManager():
def __init__(self):
self.tasks = dict()
self.index = 0
self.running = True
def after(self, delay, callback):
# calcuate time using delay
current_time = time.time()*1000
run_time = current_time + delay
# add to tasks
self.index += 1
self.tasks[self.index] = (run_time, callback)
# return index
return self.index
def after_cancel(self, index):
if index in self.tasks:
del self.tasks[index]
def mainloop(self):
self.running = True
while self.running:
current_time = time.time()*1000
# check all tasks
# Python 3 needs `list(self.tasks.keys())`
# because `del` changes `self.tasks.keys()`
for key in self.tasks.keys():
if key in self.tasks:
run_time, callback = self.tasks[key]
if current_time >= run_time:
# execute task
callback()
# remove from list
del self.tasks[key]
# to not use all CPU
time.sleep(0.1)
def quit(self):
self.running = False
def destroy(self):
self.running = False
# --- function ---
def stop_saying_hello():
global count
global h
global c
if count > 10:
root.after_cancel(h)
print "counting cancelled"
else:
c = root.after(200, stop_saying_hello)
def hello():
global count
global h
print "hello", count
count += 1
h = root.after(1000, hello)
# --- main ---
count = 0
h = None
c = None
root = TaskManager()
h = root.after(1000, hello) # time in ms, function
c = root.after(200, stop_saying_hello)
d = root.after(12000, root.destroy)
root.mainloop()

How I can invoke importing class in other class Python

#!/usr/bin/env python
from __future__ import print_function
import sys
import time
import getopt
import alsaaudio
import numpy
from time import sleep
class A_weight():
def __init__(self):
skaler = 2.361E-14
fix_cur = 0.20565360419770495
A = []
hPa = 4e-11
card = 'default'
array_float = numpy.dtype(float)
stream = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, card)
stream.setchannels(1)
stream.setrate(48000)
stream.setformat(alsaaudio.PCM_FORMAT_S16_LE)
stream.setperiodsize(128)
def A(f):
return (12200**2*f**4/((f**2+20.6**2)*(f**2+12200**2)*numpy.sqrt(f**2+107.7**2)*numpy.sqrt(f**2+737.9**2)))+fix_cur
def listen(self):
glob_leq = 0
liczba_ramek = 0
index_ramek = 0
while True:
try:
l, data = stream.read()
except IOError, e:
error_count += 1
print(" (%d) Error recording: %s" % (error_count, e))
else:
if l==128:
decoded_block = numpy.frombuffer(data, dtype='int16' )
else:
continue
Y = numpy.fft.fft(decoded_block) # fft computing and normalization
Aw = A(numpy.arange(20.,20000,(19980./len(Y))))
Na = Aw*Y
inverse = numpy.fft.ifft(Y)
maks = 32768
array_float = numpy.divide(inverse.real ,float( maks))
array_float = array_float**2
sum_array = numpy.sum(array_float, dtype=float)
glob_leq = glob_leq + sum_array
liczba_ramek += 1
index_ramek += 1
if index_ramek == 375:
index_ramek=0
cis_chwil = numpy.divide(glob_leq, liczba_ramek * 128)
leq =10*numpy.log10(numpy.divide(cis_chwil, hPa))
print (leq)
#A.append(leq)
#print(max(A))
A_weight().listen()
So i trying writing program compute sound pressure level with weighting A.
All work correct but when i want close may code in class I have problem. Because something wrong with invoke to importing class in this case is it alsaaudio.
I get this feedback:
Traceback (most recent call last):
File "rec_A.py", line 64, in <module>
A_weight().listen()
File "rec_A.py", line 37, in listen
l, data = stream.read()
NameError: global name 'stream' is not defined
Do you have any idea
Change each occurrence of stream to self.stream:
class A_weight():
def __init__(self):
skaler = 2.361E-14
fix_cur = 0.20565360419770495
A = []
hPa = 4e-11
card = 'default'
array_float = numpy.dtype(float)
self.stream = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, card)
self.stream.setchannels(1)
self.stream.setrate(48000)
self.stream.setformat(alsaaudio.PCM_FORMAT_S16_LE)
self.stream.setperiodsize(128)
...
def listen(self):
glob_leq = 0
liczba_ramek = 0
index_ramek = 0
while True:
try:
l, data = self.stream.read()
...
This will make it an instance variable, and all other methods of that class (as long as they are passed the self argument) will have access to it through self.stream. See this bit of documentation for more details on instance variables.
Also, this is merely an aesthetic point, but the convention in Python is to use upper camel case for class names, i.e., AWeight instead of A_weight - but this will not affect how your code runs.

multiprocessing - pyodbc IOError: bad message length

I am unexpectedly getting IOError: bad message length error when trying to share pyodbc connection across multiple processes, especially when N is more than 4 (no. of cores). Sometimes I also get cPickle.UnpicklingError: invalid load key, '#'., pyodbc.ProgrammingError: ('24000', '[24000] [FreeTDS][SQL Server]Invalid cursor state (0) (SQLExecDirectW)') as errors.
# Import custom python packages
import multiprocessing
import multiprocessing.managers as mm
import pathos.multiprocessing as mp
import pyodbc, datetime, time
class MyConn(object):
def __init__(self):
self.conn = None
self.cursor = None
def connect_to_db(self):
self.conn = pyodbc.connect("DSN=cpmeast;UID=dntcore;PWD=dntcorevs2")
self.cursor = self.conn.cursor()
def run_qry(self, data):
print 'Running query', data
self.cursor.execute("WAITFOR DELAY '00:00:01';select GETDATE(), '"+str(data)+"';")
l = self.cursor.fetchall()
_l = []
for i in l:
_l.append(list(i))
print 'Result for query', data, _l
return _l
class MyManagerClass(object):
def __init__(self):
self.result = multiprocessing.Manager().list()
def read_data(self, *args):
conn = args[0][0]
data = args[0][1]
l = conn.run_qry(data)
self.result.append(l)
class MyManager(mm.BaseManager):
pass # Pass is really enough. Nothing needs to be done here.
def main():
time_start = time.time()
MyManager.register("MyConn", MyConn)
manager = MyManager()
manager.start()
a = manager.MyConn()
a.connect_to_db()
dbm = MyManagerClass()
pool = mp.ProcessingPool(4)
jobs = []
N = 5
for i in range(N):
jobs.append((a, str(i)))
for i in pool.imap(dbm.read_data, jobs):
print 'result'
pool.close()
pool.join()
print 'Result', dbm.result
print 'Closed'
time_stop = time.time()
msg = 'runtime: {0}'.format(str(datetime.timedelta
(seconds=time_stop-time_start)))
print msg
if __name__ == '__main__':
main()

Why is my Python code returning an error when I try to fetch YouTube videos for a given keyword?

Whenever I try to run my code, I receive the following error: "comment_content error! 'nonetype' object has no attribute 'href'" I am new to Python, and did not write this code myself; it was given to me to use. My understanding is that it was functioning properly before? Could this have to do with changes in the YouTube Data API since it was written?
import pdb
import gdata.youtube
import gdata.youtube.service
import codecs
import time
client = gdata.youtube.service.YouTubeService()
query = gdata.youtube.service.YouTubeVideoQuery()
### the input words are here
query.vq = "4b hair"
#######
# the out put file are here
viewFile = codecs.open('views4b_hair.csv', 'w')
commentFile=codecs.open('comments4b_hair.csv', 'w')
##########
query.max_results = 50
query.start_index = 0
query.safesearch = "moderate"
#query.format = 5
query.orderby = "relevance"
#query.author = "hawaiinani"
#pdb.set_trace()
for i in range(19):
#pdb.set_trace()
query.start_index=str(int(query.start_index)+50)
feed = client.YouTubeQuery(query)
print len(feed.entry)
youtubeid=[]
youtubetitle=[]
for entry in feed.entry:
#youtubetitle.append(entry.title.text)
youtubeid.append(entry.id.text[38:])
print entry.id.text[38:],i
try:
entry_comment = client.GetYouTubeVideoEntry(video_id=entry.id.text[38:])
comment_feed = client.GetYouTubeVideoCommentFeed(video_id=entry.id.text[38:])
viewFile.write(','.join([entry.id.text[38:],entry_comment.published.text,
str(entry_comment.media.duration.seconds), str(entry_comment.statistics.view_count),comment_feed.total_results.text,entry_comment.media.title.text.decode('ascii', errors='ignore').encode('ascii', 'ignore')]) + '\n')
#videop.append("%s, %s,%s, %s, %s, %s" % (search_result["id"]["videoId"],entry.published.text,
# entry.media.duration.seconds, entry.statistics.view_count,comment_feed.total_results.text,entry.media.title.text))
#
#time.sleep(3)
except Exception, ex:
print 'View_content Error', ex
time.sleep(10)
try:
comment_content = client.GetYouTubeVideoCommentFeed(video_id=entry.id.text[38:])
indexh=0
#while comment_content:
while indexh<10:
indexh=indexh+1
for comment_entry in comment_content.entry:
pubText = comment_entry.published.text
#print pubText
titleText = comment_entry.content.text.decode('ascii', errors='ignore').encode('ascii', 'ignore')
#print titleText
#print 'Got title'
#pubText, titleText = comment_entry.published.text, comment_entry.title.text
commentFile.write(','.join([entry.id.text[38:],pubText,titleText]) + '\n'+'\n')
#commentFile.write(u',')
#commentFile.write(pubText + u',')
#print 'About to write title'
#print titleText
#print 'Wrote title'
#commentlist.append("%s, %s,%s" % (search_result["id"]["videoId"],pubText, titleText))
comment_content=client.Query(comment_content.GetNextLink().href)
#time.sleep(3)
#time.sleep(3)
except Exception, ex:
print 'Comment_content Error!', ex
time.sleep(5)
#pdb.set_trace()
viewFile.close()
commentFile.close()
The error occurs when comment_content.GetNextLink() becomes None. In order to fix it, replace:
while indexh < 10:
with:
while indexh < 10 and comment_content:
also replace:
comment_content=client.Query(comment_content.GetNextLink().href)
with:
next_link = comment_content.GetNextLink()
if next_link:
comment_content = client.Query(next_link.href)
else:
comment_content = None
Hope that helps.

Access std::complex<> from gdb python macro

I'm trying to write a python script that evaluates a vector Complex numbers so I can do things like plot it via matplotlib. Python code below with comment where code breaks:
import sys
import gdb
import matplotlib.pyplot as plt
class Plotter (gdb.Command):
""" Plots vector"""
# _iterator pulled directly from
# /usr/share/gdb/python/libstdcxx/v6/printers.py
class _iterator:
def __init__ (self, start, finish):
self.item = start
self.finish = finish
self.count = 0
def __iter__(self):
return self
def next(self):
if self.item == self.finish:
raise StopIteration
count = self.count
self.count = self.count + 1
elt = self.item.dereference()
self.item = self.item + 1
return ('[%d]' % count, elt)
def __init__(self):
super(Plotter, self).__init__("plot_test", gdb.COMMAND_OBSCURE)
def invoke(self, arg, from_tty):
frame = gdb.selected_frame()
try:
val = gdb.Frame.read_var(frame, arg)
if str(val.type).find("vector") != -1:
print "Plot vector:", str(val.type)
if (str(val.type).find("complex") != -1):
self.plot_complex_vector(val)
else:
self.plot_vector(val)
else:
print "Not a vector..."
except:
print "No such variable:", arg
return
def plot_complex_vector(self, val):
try:
it = self._iterator(val['_M_impl']['_M_start'],
val['_M_impl']['_M_finish'])
vector = []
while(True):
x = it.next()
vector.append(complex(x[1])) # doesn't work...
return
except StopIteration:
pass
except:
print sys.exc_info()[0]
print vector
plt.plot(vector)
plt.show()
# works...
def plot_vector(self, val):
try:
it = self._iterator(val['_M_impl']['_M_start'],
val['_M_impl']['_M_finish'])
vector = []
while(True):
x = it.next()
vector.append(float(x[1]))
except StopIteration:
pass
except:
print sys.exc_info()[0]
print vector
plt.plot(vector)
plt.show()
Plotter()
So the question is, how do I access the real/imaginary parts of a a std::complex value?
It looks like doing a
print x[1]
Will print values like : {_M_value = 0 + 1 * I}
Update: It looks like I can do a little string editing before doing a typecast:
while(True):
x = it.next()
s = str(x[1]['_M_value'])
# convert string to complex format that python understands.
c = complex(s.replace('I','j').replace(' ','').replace('*',''))
vector.append(c) # Works now...
But... is there a better way to do this?
try:
it = self._iterator(val['_M_impl']['_M_start'],
val['_M_impl']['_M_finish'])
vector = []
while(True):
x = it.next()
vector.append(complex(x[1])) # doesn't work...
return
except StopIteration:
pass
except:
print sys.exc_info()[0]
Is not how iterators are meant to be used in python. Use
try:
it = self._iterator(val['_M_impl']['_M_start'],
val['_M_impl']['_M_finish'])
vector = [complex(x[1]) for x in it]
except Exception as e:
print e, sys.exc_info()[0]
If you really still want to wrap it in a try...except block.
Edit: Try complex(x.real() + x.imag()). What does print x.type.fields() show?