Multiprocessing with Python 2.7 throwing attribute error

Multiprocessing with Python 2.7 throwing attribute error - python-2.7

from itertools import product
from multiprocessing import Pool
with Pool(4) as pool:
pool.map(lambda x: run_test_function(x, arg2, arg3, arg4), arg1)
I am getting below error after executing above code. There some other code as well which I can't write here. But actual problem is coming from this piece of code only.
Traceback (most recent call last):
File "modProfileChange_test.py", line 347, in <module>
main(sys.argv[1:])
File "modProfileChange_test.py", line 336, in main
test_run_code(arg1, arg2, arg3, arg4, arg5, arg6)
File "modProfileChange_test.py", line 23, in test_run_code
with Pool(4) as pool:
AttributeError: __exit__

In Python 2.7, multiprocessing.Pool is not a context manager and thus it can't be used in a with statement
Solution - create a pool using regular assignment to a variable:
my_pool = Pool(4)
my_pool.map(...)
lambda functions don't work with multiprocessing.Pool, even in Python 3.
Solution - emulate a closure using a solution in the link above:
from functors import partial
def run_test_function(x, fun_arg2, fun_arg3, fun_arg4):
# your code here
...
process_func = partial(run_test_function, fun_arg2=arg2, fun_arg3=arg3, fun_arg4=arg4)
Putting this together:
from multiprocessing import Pool
from functools import partial
def run_test_function(x, fun_arg2, fun_arg3, fun_arg4):
# this is an example
print x, fun_arg2, fun_arg3, fun_arg4
if __name__ == "__main__":
arg1 = 1,2,3,4
arg2 = "hello"
arg3 = "world"
arg4 = "!"
process_func = partial(run_test_function, fun_arg2=arg2, fun_arg3=arg3, fun_arg4=arg4)
my_pool = Pool(4)
my_pool.map(process_func, arg1)
Output:
~/test $ python2.7 so10.py
1 hello world !
2 hello world !
3 hello world !
4 hello world !

Related

Is it possible to do parallel writes on one h5py file using multiprocessing?

I wrote a simple program that would see if I could naively fill an hdf5 file with dummy data.
import sys
import time
import multiprocessing as mp
import numpy as np
import h5py
def parallel_h5write(hdf, key, data):
dset = hdf.create_dataset(key, shape=data.shape, dtype=data.dtype)
dset[:] = data
hdf.flush()
return None
large_data = np.array([
float(j)*np.ones(20000) for j in xrange(1, 20001)
])
start = time.time()
h5file = h5py.File('serial.h5', 'w')
for j, data in enumerate(large_data):
nm = "name{0}".format(j)
dset = h5file.create_dataset(nm, shape=data.shape, dtype=data.dtype)
dset[:] = data
h5file.flush()
h5file.close()
finish = time.time()
print "serial write took {0} seconds".format(finish - start)
start = time.time()
h5file = h5py.File('parallel.h5', 'w')
arglist = [(h5file, 'name{0}'.format(j), data) for j, data in enumerate(large_data)]
p = mp.Pool(4)
try:
p.map(parallel_h5write, arglist)
except Exception, err:
p.close()
p.join()
print err
sys.exit()
p.close()
p.join()
finish = time.time()
print "parallel write took {0} seconds".format(finish - start)
I got the following error message:
Exception KeyError: KeyError(0,) in 'h5py._objects.ObjectID.__dealloc__' ignored
Process PoolWorker-1:
Traceback (most recent call last):
File "/home/user/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
self.run()
File "/home/user/anaconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/home/user/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
task = get()
File "/home/user/anaconda2/lib/python2.7/multiprocessing/queues.py", line 376, in get
return recv()
File "stringsource", line 5, in h5py.h5f.__pyx_unpickle_FileID
File "h5py/_objects.pyx", line 178, in h5py._objects.ObjectID.__cinit__
TypeError: __cinit__() takes exactly 1 positional argument (0 given)
Traceback (most recent call last):
File "h5py/_objects.pyx", line 200, in h5py._objects.ObjectID.__dealloc__
KeyError: 0
Needless to say I am not sure what to do with this error message. What am I doing wrong? What could I do to fix this? Can I actually do this?

Multiprocessing with joblib.Parallel - error when parallizing a self written algorithm

I have a class called ftrl_proximal() which fits a model on data. It is a self written classifier (not sklearn's).
The algorithm works perfect when I run using only one CPU, but once I'm trying to perform it in multiprocessing (sort of cross validation) I get an error as described below.
The code is:
from FTRL import ftrl_proximal
from sklearn.externals import joblib
from sklearn.base import clone
import multiprocessing
from sklearn.cross_validation import StratifiedKFold
def ftrl_train(estimator, X, y, train_index, test_index):
y_ftrl_pred_test = []
y_ftrl_true = []
# Split the data to train and test
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
# Fit a model on sample of the data
for idx, x in enumerate(X_train):
# predict
_p = estimator.predict(x.indices)
# update
estimator.update(x.indices, _p, y_train[idx])
for idx, x in enumerate(X_test):
_v = estimator.predict(x.indices)
y_ftrl_pred_test.append(_v) # Predicted
y_ftrl_true.append(y_test[idx]) # True
return y_ftrl_pred_test, y_ftrl_true
cv_fold = 3 # determines the number of folds.
skf = StratifiedKFold(y, n_folds=cv_fold, random_state=0)
ftrl = ftrl_proximal(alpha, beta, L1, L2, D, interaction) # initialize a learner
parallel = joblib.Parallel(n_jobs=num_cores, verbose=0, pre_dispatch='2*n_jobs')
preds_blocks = parallel(joblib.delayed(ftrl_train)(clone(ftrl), X, y,
train_index, test_index, verbose=0, fit_params=None)
for train_index, test_index in skf)
The error:
Traceback (most recent call last):
File "/home/workspace/Predictor/modelSelection.py", line 61, in <module>
class Main():
File "/home/workspace/Predictor/modelSelection.py", line 199, in Main
for train_index, test_index in skf)
File "/home/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 658, in __call__
for function, args, kwargs in iterable:
File "/home/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 184, in next
return next(self._it)
File "/home/workspace/Predictor/modelSelection.py", line 199, in <genexpr>
for train_index, test_index in skf)
NameError: global name '_ftrl' is not defined

python multiprocessing PicklingError

I'm trying to do a multi-purpose paraloop class to be able to run multiprocessor jobs easily. Basically a user must define every iteration step as def iteration(index) within a with statement as in the example herein
here's my implementation
import multiprocessing as mp
import types
class paraloop(object):
def __init__(self, ncores, niterations):
self.niterations = niterations
self.ncores = min(ncores, self.niterations)
def __enter__(self, *args, **kwargs):
self.pool = mp.Pool(processes = self.ncores)
self.iterated = 0
# create results dict
self.result = {}
return self
def __exit__(self, *args, **kwargs):
print isinstance (iteration, types.MethodType)
def ITER():
self.iterated += 1
self.result[self.iterated] = iteration(self.iterated)
if self.iterated < self.niterations:
self.pool.apply_async( ITER ).get()
print isinstance (ITER, types.MethodType)
# run iterations in parallel
[self.pool.apply_async( ITER ).get() for idx in xrange(self.ncores)]
# usage example
import numpy as np
ITERATIONS = 10
ARRAY = np.ones(1000000)
with paraloop(ncores=4, niterations=ITERATIONS) as p:
def iteration(index):
print 'this is an iteration %i'%index
s = 0
for n in ARRAY:
s += n
return s
the print statement are to make sure that I have picklable function and not methods.
>> False
>> False
>> Traceback (most recent call last):
>> File "paraloop.py", line 48, in <module>
>> def iteration(index):
>> File "paraloop.py", line 29, in __exit__
>> [self.pool.apply_async( ITER ).get() for idx in xrange(self.ncores)]
>> File "c:\Python27\lib\multiprocessing\pool.py", line 558, in get
>> raise self._value
>> cPickle.PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed
Any idea why I am having this error message ?

As an arbitrary rule take into account that, if you can't import it then you can't pickle it, since pickle will try to do that before serializing. Just don't define the target function inside another function!

Why isn't the constructor called the second time?

Why isn't the constructor called the second time?
from datetime import datetime
class Time(datetime):
def __init__(self, *args):
print 5, args
try:
d = args[0]
print 8, d
datetime.__init__(self,
d.year, d.month, t.day, t.hour, t.minute, t.second)
except Exception:
print 12, args
datetime.__init__(self, args)
if __name__ == '__main__':
t = Time(1965, 1, 10)
print 17, t
u = Time(t)
print 19, u
Using python 2.7.2, here's the output:
bash-4.1$ python tmp.py
5 (1965, 1, 10)
8 1965
12 (1965, 1, 10)
17 1965-01-10 00:00:00
Traceback (most recent call last):
File "tmp.py", line 18, in <module>
u = Time(t)
TypeError: an integer is required
I expected to see:
5 Time(1965, 1, 10)
What function is being called if not the constructor?

It's the type's __new__ method.
__init__ isn't the first thing that happens when you create an object. First, the type's __new__ method is called to actually produce the object, and then __init__ is called to initialize it. For mutable types, __new__ doesn't usually do much, but for immutable types like datetime, __new__ generally creates the object pre-initialized, since otherwise __init__ would have to mutate the object to initialize it.
If you want to inherit from datetime, you have to implement __new__ as well as __init__:
def __new__(cls, *args):
print 5, args
try:
d = args[0]
print 8, d
return datetime.__new__(cls,
d.year, d.month, t.day, t.hour, t.minute, t.second)
except Exception:
print 12, args
return datetime.__new__(cls, args)
If you want to see what datetime's __new__ does, it's visible in Modules/datetimemodule.c. You'll have to know C and know or look up a bunch of Python C-api stuff to understand it, though.

Python: TypeError: 'NoneType' object is not iterable Confusion

I'm new to python and am trying to write a code that would deal poker hands and check for pat flushes. Below is my code and the shell when I try to run it. According to my professor this should return True if there is only one suit in the hand, i.e., only one entry in the set "suits" and False otherwise, but I keep getting this error message. Can someone help explain this to me?
from random import *
suits = {'H','C','D','S'} #hearts, clubs, diamonds, spades
ranks = {'a','2','3','4','5','6','7','8','9','10','j','q','k'} #card values
deck = [r +s for r in ranks for s in suits]
hand = []
def deal (n):
'''deals n hands'''
for n in range (0,n):
hand = sample (deck,5)
for x in hand:
deck.remove (x)
print (hand)
def is_flush (hand):
'''checks for pat flush hands'''
suits = {c[-1] for c in hand}
return len(suits) == 1
RUN
>>> is_flush (5)
['10S', 'qD', '8H', '8D', '3S']
['5C', 'jC', 'kS', '4C', '2H']
['2S', '7C', '7H', '7S', '9S']
['8C', '8S', 'aH', '5S', '2D']
['9D', '6S', '4D', 'qS', '9H']
Traceback (most recent call last):
File "<pyshell#17>", line 1, in <module>
is_flush (5)
File "K:/stalter_3.py", line 19, in is_flush
suits = {c[-1] for c in hand}
TypeError: 'NoneType' object is not iterable
>>>

You're calling is_flush(5). If I understand you correctly, then that value 5 is the variable hand which you are trying to iterate through (as if it were a hand) in c[-1] for c in hand, but you can't iterate an integer. The reason I'm confused is I would expect it to say that it's an IntType, not a NoneType.

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Multiprocessing with Python 2.7 throwing attribute error - python-2.7

Related

Is it possible to do parallel writes on one h5py file using multiprocessing?

Multiprocessing with joblib.Parallel - error when parallizing a self written algorithm

python multiprocessing PicklingError

Why isn't the constructor called the second time?

Python: TypeError: 'NoneType' object is not iterable Confusion

Categories

Resources