multiprocessing pool hangs in jupyter notebook - python-2.7

I have a very simple script which is the following:
import multiprocessing as multi
def call_other_thing_with_multi():
P = multi.Pool(3)
P.map(other_thing, range(0,5))
P.join()
def other_thing(arg):
print(arg)
return arg**2.
call_other_thing_with_multi()
When I call this, my code hangs at perpetuity. This is on windows with python 2.7.
Thanks for any guidance!

As per documentation, you need to call close() before join():
import multiprocessing as multi
def call_other_thing_with_multi():
P = multi.Pool(3)
P.map(other_thing, range(0,5))
P.close() # <-- calling close before P.join()
P.join()
print('END')
def other_thing(arg):
print(arg)
return arg**2.
call_other_thing_with_multi()
Prints:
0
1
2
3
4
END
EDIT: Better is use context manager, to not forget to call close():
def call_other_thing_with_multi():
with multi.Pool(3) as P:
P.map(other_thing, range(0,5))
print('END')

Related

PyQt4/QProcess issues with Nuke v9

PyQt4/QProcess issues with Nuke v9...
I am trying to utilize a QProcess to run renders in Nuke at my workplace. The reason why I want to use a QProcess is because I've setup this Task Manager with the help of the community at stackoverflow, which takes a list of commands and sequentially runs it one by one, and also allows me to display an output. You can view the question I posted here:
How to update UI with output from QProcess loop without the UI freezing?
Now I am trying to basically run Nuke renders through this "Task Manager". But every time I do it just gives me an error that the QProcess is destroyed while still running. I mean I tested this with subprocess and that worked totally fine. So i am not sure why the renders are not working through QProcess.
So to do more testing I just wrote a simplified version at home. The first issue I ran into though is that apparently PyQt4 couldn't be found from Nuke's python.exe. Even though I have PyQt4 for my main Python version. However apparently there is a compatibility issue with my installed PyQt4 since my main Python version is 2.7.12, while my Nuke's python version is 2.7.3. So i thought "fine then i'll just directly install PyQt4 inside my Nuke directory". So i grabbed this link and installed this PyQt version into my Nuke directory:
http://sourceforge.net/projects/pyqt/files/PyQt4/PyQt-4.10.3/PyQt4-4.10.3-gpl-Py2.7-Qt4.8.5-x64.exe
So i run my little test and seems to be doing the same thing as it does in my workplace, where the QProcess just gets destoryed. So i thought maybe adding "waitForFinished()" would maybe do something different, but then it gives me this error that reads:
The procedure entry point ??4QString##QEAAAEAV0#$$QEAV0##Z could not be located in the dynamic link library QtCore4.dll
And gives me this error as well:
ImportError: Failed to load C:\Program Files\Nuke9.0v8\nuke-9.0.8.dll
Now at this point I can't really do any more testing at home, and my studio is closed for the holidays. So i have two questions i'd like to ask:
1) What is this error I am seeing about "procedure entry point"? It only happens when i try to call something in a QProcess instance.
2) Why is my QProcess being destroyed before the render is finished?? How come this doesn't happen with subprocess? How can I submit a Nuke render job while acheiving the same results as subprocess?
Here is my test code:
import os
import sys
import subprocess
import PyQt4
from PyQt4 import QtCore
class Task:
def __init__(self, program, args=None):
self._program = program
self._args = args or []
#property
def program(self):
return self._program
#property
def args(self):
return self._args
class SequentialManager(QtCore.QObject):
started = QtCore.pyqtSignal()
finished = QtCore.pyqtSignal()
progressChanged = QtCore.pyqtSignal(int)
dataChanged = QtCore.pyqtSignal(str)
#^ this is how we can send a signal and can declare what type
# of information we want to pass with this signal
def __init__(self, parent=None):
# super(SequentialManager, self).__init__(parent)
# QtCore.QObject.__init__(self,parent)
QtCore.QObject.__init__(self)
self._progress = 0
self._tasks = []
self._process = QtCore.QProcess(self)
self._process.setProcessChannelMode(QtCore.QProcess.MergedChannels)
self._process.finished.connect(self._on_finished)
self._process.readyReadStandardOutput.connect(self._on_readyReadStandardOutput)
def execute(self, tasks):
self._tasks = iter(tasks)
#this 'iter()' method creates an iterator object
self.started.emit()
self._progress = 0
self.progressChanged.emit(self._progress)
self._execute_next()
def _execute_next(self):
try:
task = next(self._tasks)
except StopIteration:
return False
else:
print 'starting %s' % task.args
self._process.start(task.program, task.args)
return True
def _on_finished(self):
self._process_task()
if not self._execute_next():
self.finished.emit()
def _on_readyReadStandardOutput(self):
output = self._process.readAllStandardOutput()
result = output.data().decode()
self.dataChanged.emit(result)
def _process_task(self):
self._progress += 1
self.progressChanged.emit(self._progress)
class outputLog(QtCore.QObject):
def __init__(self, parent=None, parentWindow=None):
QtCore.QObject.__init__(self)
self._manager = SequentialManager(self)
def startProcess(self, tasks):
# self._manager.progressChanged.connect(self._progressbar.setValue)
self._manager.dataChanged.connect(self.on_dataChanged)
self._manager.started.connect(self.on_started)
self._manager.finished.connect(self.on_finished)
self._manager.execute(tasks)
#QtCore.pyqtSlot()
def on_started(self):
print 'process started'
#QtCore.pyqtSlot()
def on_finished(self):
print 'finished'
#QtCore.pyqtSlot(str)
def on_dataChanged(self, message):
if message:
print message
def nukeTestRender():
import nuke
nuke.scriptOpen('D:/PC6/Documents/nukeTestRender/nukeTestRender.nk')
writeNode = None
for node in nuke.allNodes():
if node.Class() == 'Write':
writeNode = node
framesList = [1, 20, 30, 40]
fr = nuke.FrameRanges(framesList)
# nuke.execute(writeNode, fr)
for x in range(20):
print 'random'
def run():
nukePythonEXE = 'C:/Program Files/Nuke9.0v8/python.exe'
thisFile = os.path.dirname(os.path.abspath("__file__"))
print thisFile
cmd = '"%s" %s renderCheck' %(nukePythonEXE, __file__)
cmd2 = [__file__, 'renderCheck']
cmdList = [Task(nukePythonEXE, cmd2)]
# subprocess.call(cmd, stdin=None, stdout=None, stderr=None, shell=False)
taskManager = outputLog()
taskManager.startProcess(cmdList)
taskManager._manager._process.waitForFinished()
if __name__ == "__main__":
print sys.argv
if len(sys.argv) == 1:
run()
elif len(sys.argv) == 2:
nukeTestRender()
I have managed to come up with an answer, so I will write in the details below:
Basically, I was getting the error with the installed PyQt4 because it was not compatible with my version of Nuke, so it is apparently more recommended to use PySide included in Nuke. However Nuke's Python executable cannot natively find PySide, a few paths needed to be added to the sys.path:
paths = ['C:\\Program Files\\Nuke9.0v8\\lib\\site-packages,
C:\\Users\\Desktop02\\.nuke',
'C:\\Program Files\\Nuke9.0v8\\plugins',
'C:\\Program Files\\Nuke9.0v8\\pythonextensions\\site-packages\\setuptools-0.6c11-py2.6.egg',
'C:\\Program Files\\Nuke9.0v8\\pythonextensions\\site-packages\\protobuf-2.5.0-py2.6.egg',
'C:\\Program Files\\Nuke9.0v8\\pythonextensions\\site-packages',
'C:\\Program Files\\Nuke9.0v8\\plugins\\modules',
'C:\\Program Files\\Nuke9.0v8\\configs\\Python\\site-packages',
'C:\\Users\\Desktop02\\.nuke\\Python\\site-packages']
for path in paths:
sys.path.append(path)
I found the missing paths by opening up both Nuke in GUI mode and the Python executable, and comparing both sys.path to see what the Python executable was lacking.
And to answer my own main question: if I call waitForFinished(-1) on the QProcess instance, this ignores the default 30sec limit of this function... Answer came from this thread:
QProcess and shell : Destroyed while process is still running
So here is my resulting working code:
import os
import sys
import subprocess
sysArgs = sys.argv
try:
import nuke
from PySide import QtCore
except ImportError:
raise ImportError('nuke not currently importable')
class Task:
def __init__(self, program, args=None):
self._program = program
self._args = args or []
#property
def program(self):
return self._program
#property
def args(self):
return self._args
class SequentialManager(QtCore.QObject):
started = QtCore.Signal()
finished = QtCore.Signal()
progressChanged = QtCore.Signal(int)
dataChanged = QtCore.Signal(str)
#^ this is how we can send a signal and can declare what type
# of information we want to pass with this signal
def __init__(self, parent=None):
# super(SequentialManager, self).__init__(parent)
# QtCore.QObject.__init__(self,parent)
QtCore.QObject.__init__(self)
self._progress = 0
self._tasks = []
self._process = QtCore.QProcess(self)
self._process.setProcessChannelMode(QtCore.QProcess.MergedChannels)
self._process.finished.connect(self._on_finished)
self._process.readyReadStandardOutput.connect(self._on_readyReadStandardOutput)
def execute(self, tasks):
self._tasks = iter(tasks)
#this 'iter()' method creates an iterator object
self.started.emit()
self._progress = 0
self.progressChanged.emit(self._progress)
self._execute_next()
def _execute_next(self):
try:
task = next(self._tasks)
except StopIteration:
return False
else:
print 'starting %s' % task.args
self._process.start(task.program, task.args)
return True
def _on_finished(self):
self._process_task()
if not self._execute_next():
self.finished.emit()
def _on_readyReadStandardOutput(self):
output = self._process.readAllStandardOutput()
result = output.data().decode()
self.dataChanged.emit(result)
def _process_task(self):
self._progress += 1
self.progressChanged.emit(self._progress)
class outputLog(QtCore.QObject):
def __init__(self, parent=None, parentWindow=None):
QtCore.QObject.__init__(self)
self._manager = SequentialManager(self)
def startProcess(self, tasks):
# self._manager.progressChanged.connect(self._progressbar.setValue)
self._manager.dataChanged.connect(self.on_dataChanged)
self._manager.started.connect(self.on_started)
self._manager.finished.connect(self.on_finished)
self._manager.execute(tasks)
#QtCore.Slot()
def on_started(self):
print 'process started'
#QtCore.Slot()
def on_finished(self):
print 'finished'
#QtCore.Slot(str)
def on_dataChanged(self, message):
if message:
print message
def nukeTestRender():
import nuke
nuke.scriptOpen('D:/PC6/Documents/nukeTestRender/nukeTestRender.nk')
writeNode = None
for node in nuke.allNodes():
if node.Class() == 'Write':
writeNode = node
framesList = [1, 20, 30, 40]
fr = nuke.FrameRanges(framesList)
nuke.execute(writeNode, fr)
# nuke.execute(writeNode, start=1, end=285)
for x in range(20):
print 'random'
def run():
nukePythonEXE = 'C:/Program Files/Nuke9.0v8/python.exe'
thisFile = os.path.dirname(os.path.abspath("__file__"))
print thisFile
cmd = '"%s" %s renderCheck' %(nukePythonEXE, sysArgs[0])
cmd2 = [sysArgs[0], 'renderCheck']
cmdList = [Task(nukePythonEXE, cmd2)]
# subprocess.call(cmd, stdin=None, stdout=None, stderr=None, shell=False)
taskManager = outputLog()
taskManager.startProcess(cmdList)
taskManager._manager._process.waitForFinished(-1)
if __name__ == "__main__":
print sys.argv
if len(sysArgs) == 1:
run()
elif len(sysArgs) == 2:
nukeTestRender()
For whatever reason, PySide refuses to load for me without the nuke module imported first. and also theres a known error when importing nuke it deletes all sys.argv arguments so thats gotta be stored somewhere first before the nuke import...

Redirecting stderr when using multiprocessing.Pool

I want to spawn multiple processes using multiprocessing.Pool (python 2.7.13), and redirect stdout / stderr of each process to a file. The problem is it works for stdout, but not for stderr. Here's an example with a single process.
import sys
import multiprocessing as mp
def foo():
sys.stdout = open('a.out','w')
sys.stderr = open('a.err', 'w')
print("this must go to a.out.")
raise Exception('this must go to a.err.')
return True
def run():
pool = mp.Pool(4)
_retvals = []
_retvals.append( pool.apply_async(foo) )
retvals = [r.get(timeout=10.) for r in _retvals]
if __name__ == '__main__':
run()
Running python stderr.py in terminal (of macbook) produces a.out with correct message ("this must go to a.out"). But it produces empty a.err, and the error message appears in terminal window instead.
If I don't use multiprocessing.Pool and directly run it in the main thread, it produces correct messages on both files. This means replacing run() with the following snippet:
def run():
foo()
When using Pools, unhandled exceptions are handled by the the main process. You should either redirect stderr in main(), or wrap your functions like this:
def foo():
sys.stdout = open('x.out', 'a')
sys.stderr = open('x.err', 'a')
try:
print("this goes to x.out.")
print("this goes to x.err.", file=sys.stderr)
raise ValueError('this must go to a.err.')
except:
traceback.print_exc()
raise # optional

How to synchronize wxpython Gauge widget with threads in python?

My python project has multiple threads running. I want to add a gauge widget from the wxpython library to show the progress. I want the gauge to fill until my first thread completes. How do I achieve this? I am using Python 2.7 on Windows.
Use wx.CallAfter
def add_to_the_gauge(value):
your_gauge.Value += value
...
#in some thread
def some_thread_running():
wx.CallAfter(add_to_the_gauge, 2)
You need to post events from your worker thread to the main thread asking it to update the gauge, see this overview.
You can use some simple things
remember to import the module:
import os
and then put this on the frame
def __init__(self, *a, **k):
filename = 'game.exe'
self.timer = wx.Timer()
self.Bind(wx.EVT_TIMER, self.OnUpdateGauge, self.timer)
self.timer.Start()
self.proc = os.popen(filename) # start the process
def OnUpdateGauge(self, event):
if self.proc.poll() == None: # return None if still running and 0 if stopped
self.timer.Stop()
return
else:
'''do something with the gauge'''
hope those help

Pooling a member function in python 2.7

I am getting a weird error with this code. I am trying to pool instances of a worker function which is a member the class invoking the pool. While I had my doubts if this will work or not, I am not sure of the exact reason as to why? The error thrown when I run this is a "PicklingError". Can someone explain why?
import multiprocessing
import time
class Pooler(multiprocessing.Process):
def __init__(self):
multiprocessing.Process.__init__(self)
def run(self):
pool = multiprocessing.Pool(10)
print "starting pool"
pool.map(self.worker, xrange(10), chunksize=10)
def worker(self, arg):
print "worker - arg - {}".format(arg)
if __name__ == '__main__':
jobs = []
for i in range(5):
proc = Pooler()
jobs.append(proc)
proc.start()
for j in jobs:
j.join()
print "...ending"
UPDATE
I changed the code to look as follows:
import multiprocessing
import time
class Pooler(multiprocessing.Process):
def __init__(self):
multiprocessing.Process.__init__(self)
def run(self):
pool = multiprocessing.Pool(1)
print "starting pool"
obj = Worker()
pool.map(obj.run, range(10), chunksize=1)
class Worker(object):
def __init__(self):
pass
def run(self, nums):
print "worker - arg - {}".format(nums)
if __name__ == '__main__':
jobs = []
for i in range(1):
proc = Pooler()
jobs.append(proc)
proc.start()
for j in jobs:
j.join()
print "...ending"
but I am still getting the following error:
starting pool
Process Pooler-1:
Traceback (most recent call last):
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "pool_test.py", line 13, in run
pool.map(obj.run, range(10), chunksize=1)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/pool.py", line 251, in map
return self.map_async(func, iterable, chunksize).get()
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/pool.py", line 567, in get
raise self._value
PicklingError: Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed
...ending
The answer is simple. multiprocessing uses pickle to serialize objects and pass those objects between the different processes -- and as the error states, pickle can't serialize an instancemethod. You need to use a better serializer, like dill if you want to serialize an instancemethod (see https://stackoverflow.com/a/21345273/2379433).
So what can you do about multiprocessing? Fortunately, there is a fork of multiprocessing called multiprocess that uses dill, and if you use it, your object will serialize and your code will work. It's a one-line change that comes with the bonus of being able to run from the interpreter as well as serialize almost all the objects in python. (The link I posted above is for pathos and dill, but pathos is built on top of multiprocess, so it's still very relevant.)
>>> import multiprocess as multiprocessing
>>> import time
>>> class Pooler(multiprocessing.Process):
... def __init__(self):
... multiprocessing.Process.__init__(self)
... def run(self):
... pool = multiprocessing.Pool(1)
... print "starting pool"
... obj = Worker()
... pool.map(obj.run, range(10), chunksize=1)
...
>>> class Worker(object):
... def __init__(self):
... pass
... def run(self, nums):
... print "worker - arg - {}".format(nums)
...
>>> if __name__ == '__main__':
... jobs = []
... for i in range(1):
... proc = Pooler()
... jobs.append(proc)
... proc.start()
... for j in jobs:
... j.join()
... print "...ending"
...
starting pool
worker - arg - 0
worker - arg - 1
worker - arg - 2
worker - arg - 3
worker - arg - 4
worker - arg - 5
worker - arg - 6
worker - arg - 7
worker - arg - 8
worker - arg - 9
...ending
>>>

Python user input in child process

I'm trying to create a child process that can take input through raw_input() or input(), but I'm getting an end of liner error EOFError: EOF when asking for input.
I'm doing this to experiment with multiprocessing in python, and I remember this easily working in C. Is there a workaround without using pipes or queues from the main process to it's child ? I'd really like the child to deal with user input.
def child():
print 'test'
message = raw_input() #this is where this process fails
print message
def main():
p = Process(target = child)
p.start()
p.join()
if __name__ == '__main__':
main()
I wrote some test code that hopefully shows what I'm trying to achieve.
My answer is taken from here: Is there any way to pass 'stdin' as an argument to another process in python?
I have modified your example and it seems to work:
from multiprocessing.process import Process
import sys
import os
def child(newstdin):
sys.stdin = newstdin
print 'test'
message = raw_input() #this is where this process doesn't fail anymore
print message
def main():
newstdin = os.fdopen(os.dup(sys.stdin.fileno()))
p = Process(target = child, args=(newstdin,))
p.start()
p.join()
if __name__ == '__main__':
main()