python multiprocessing PicklingError - python-2.7

I'm trying to do a multi-purpose paraloop class to be able to run multiprocessor jobs easily. Basically a user must define every iteration step as def iteration(index) within a with statement as in the example herein
here's my implementation
import multiprocessing as mp
import types
class paraloop(object):
def __init__(self, ncores, niterations):
self.niterations = niterations
self.ncores = min(ncores, self.niterations)
def __enter__(self, *args, **kwargs):
self.pool = mp.Pool(processes = self.ncores)
self.iterated = 0
# create results dict
self.result = {}
return self
def __exit__(self, *args, **kwargs):
print isinstance (iteration, types.MethodType)
def ITER():
self.iterated += 1
self.result[self.iterated] = iteration(self.iterated)
if self.iterated < self.niterations:
self.pool.apply_async( ITER ).get()
print isinstance (ITER, types.MethodType)
# run iterations in parallel
[self.pool.apply_async( ITER ).get() for idx in xrange(self.ncores)]
# usage example
import numpy as np
ITERATIONS = 10
ARRAY = np.ones(1000000)
with paraloop(ncores=4, niterations=ITERATIONS) as p:
def iteration(index):
print 'this is an iteration %i'%index
s = 0
for n in ARRAY:
s += n
return s
the print statement are to make sure that I have picklable function and not methods.
>> False
>> False
>> Traceback (most recent call last):
>> File "paraloop.py", line 48, in <module>
>> def iteration(index):
>> File "paraloop.py", line 29, in __exit__
>> [self.pool.apply_async( ITER ).get() for idx in xrange(self.ncores)]
>> File "c:\Python27\lib\multiprocessing\pool.py", line 558, in get
>> raise self._value
>> cPickle.PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed
Any idea why I am having this error message ?

As an arbitrary rule take into account that, if you can't import it then you can't pickle it, since pickle will try to do that before serializing. Just don't define the target function inside another function!

Related

Multiple time "Init failure" error witth attribute error "__dict__"

I have a bunch of code, Program is written in python2 and used old version of pymc. probably version2.x .
When i run
python run.py
the error i am facing
Init failure
Init failure
Init failure
Init failure
Init failure
Init failure
Init failure
Init failure
No previous MCMC data found.
Traceback (most recent call last):
File "run.py", line 106, in <module>
M=run_MCMC(ms)
File "run.py", line 94, in run_MCMC
mcmc = pm.MCMC(model, db=db, name=name)
File "/home/divyadeep/miniconda3/envs/detrital/lib/python2.7/site-packages/pymc/MCMC.py", line 90, in init
**kwds)
File "/home/divyadeep/miniconda3/envs/detrital/lib/python2.7/site-packages/pymc/Model.py", line 191, in init
Model.init(self, input, name, verbose)
File "/home/divyadeep/miniconda3/envs/detrital/lib/python2.7/site-packages/pymc/Model.py", line 92, in init
ObjectContainer.init(self, input)
File "/home/divyadeep/miniconda3/envs/detrital/lib/python2.7/site-packages/pymc/Container.py", line 605, in init
input_to_file = input.dict
AttributeError: 'NoneType' object has no attribute 'dict'`
I have tried to comment out some of 'init' in the program. but still not able to run.
the run.py is as
def InitExhumation(settings):
"""Initialize piece-wise linear exhumation model"""
#Check that erosion and age break priors are meaningful
if (settings.erate_prior[0] >= settings.erate_prior[1]):
print "\nInvalid range for erate_prior."
sys.exit()
if (settings.abr_prior[0] >= settings.abr_prior[1]):
print "\nInvalid range for abr_prior."
sys.exit()
#Create erosion rate parameters (e1, e2, ...)
e = []
for i in range(1,settings.breaks+2):
e.append(pm.Uniform("e%i" % i, settings.erate_prior[0], settings.erate_prior[1]))
#Create age break parameters (abr1, ...)
abr_i = settings.abr_prior[0]
abr = []
for i in range(1,settings.breaks+1):
abr_i = pm.Uniform("abr%i" % i, abr_i, settings.abr_prior[1])
abr.append(abr_i)
return e, abr
def ExhumationModel(settings):
"""Set up the exhumation model"""
#Check that error rate priors are meaningful
if (settings.error_prior[0] >= settings.error_prior[1]):
print "\nInvalid range for error_prior."
sys.exit()
err = pm.Uniform('RelErr',settings.error_prior[0],settings.error_prior[1])
#Closure elevation priors
hc_parms={'AFT':[3.7, 0.8, 6.0, 2.9], 'AHe':[2.2, 0.5, 3.7, 1.6]}
e, abr = InitExhumation(settings)
nodes = [err, e, abr]
hc = {}
for sample in settings.samples:
parms = e[:]
h_mu = np.mean(sample.catchment.z)
if sample.tc_type not in hc.keys():
hc[sample.tc_type] = pm.TruncatedNormal("hc_%s"%sample.tc_type, h_mu-hc_parms[sample.tc_type][0],
1/hc_parms[sample.tc_type][1]**2,
h_mu-hc_parms[sample.tc_type][2],
h_mu-hc_parms[sample.tc_type][3])
nodes.append(hc[sample.tc_type])
parms.append(hc[sample.tc_type])
parms.extend(abr)
if isinstance(sample, DetritalSample):
idx_i = pm.Categorical("Index_" + sample.sample_name, p = sample.catchment.bins['w'], size=len(sample.dt_ages))
nodes.extend([idx_i])
exp_i = pm.Lambda("ExpAge_" + sample.sample_name, lambda parm=parms, idx=idx_i: ba.h2a(sample.catchment.bins['h'][idx],parm))
value = sample.dt_ages
else:
idx_i = None
exp_i = pm.Lambda("ExpAge_" + sample.sample_name, lambda parm=parms: ba.h2a(sample.br_elevation,parm), plot=False)
value = sample.br_ages
obs_i = pm.Normal("ObsAge_" + sample.sample_name, mu = exp_i, tau = 1./(err*exp_i)**2, value = value, observed=True)
sim_i = pm.Lambda("SimAge_" + sample.sample_name, lambda ta=exp_i, err=err: pm.rnormal(mu = ta, tau = 1./(err*ta)**2))
nodes.extend([exp_i, obs_i, sim_i])
return nodes
def run_MCMC(settings):
"""Run MCMC algorithm"""
burn = settings.iterations/2
thin = (settings.iterations-burn) / settings.finalChainSize
name = "%s" % settings.model_name + "_%ibrk" % settings.breaks
attempt = 0
model=None
while attempt<5000:
try:
model = ExhumationModel(settings)
break
except pm.ZeroProbability, ValueError:
attempt+=1
#print "Init failure %i" % attemp
print "Init failure "
try:
#The following creates text files for the chains rather than hdf5
db = pm.database.txt.load(name + '.txt')
#db = pm.database.hdf5.load(name + '.hdf5')
print "\nExisting MCMC data loaded.\n"
except AttributeError:
print "\nNo previous MCMC data found.\n"
db='txt'
mcmc = pm.MCMC(model, db=db, name=name)
#mcmc.use_step_method(pm.AdaptiveMetropolis, M.parm)
if settings.iterations > 1:
mcmc.sample(settings.iterations,burn=burn,thin=thin)
return mcmc
if __name__ == '__main__':
sys.path[0:0] = './' # Puts current directory at the start of path
import model_setup as ms
if len(sys.argv)>1: ms.iterations = int(sys.argv[1])
M=run_MCMC(ms)
#import pdb; pdb.set_trace()
#Output and diagnostics
try:
ba.statistics(M, ms.samples)
except TypeError:
print "\nCannot compute stats without resampling (PyMC bug?).\n"
ps.chains(M, ms.finalChainSize, ms.iterations, ms.samples, ms.output_format)
ps.summary(M, ms.samples, ms.output_format)
ps.ks_gof(M, ms.samples, ms.output_format)
ps.histograms(ms.samples, ms.show_histogram, ms.output_format)
ps.discrepancy(M, ms.samples, ms.output_format)
## ps.unorthodox_ks(M, ms.output_format)
## try:
## ps.catchment(M.catchment_dem, format=ms.output_format)
## except KeyError:
## print "\nUnable to generate catchment plot."
M.db.close()
`

understanding init and method calling in python

class Vehicle:
def __init__(self, number_of_wheels, type_of_tank, seating_capacity, maximum_velocity):
self.number_of_wheels = number_of_wheels
self.type_of_tank = type_of_tank
self.seating_capacity = seating_capacity
self.maximum_velocity = maximum_velocity
#property
def number_of_wheels(self):
return self.number_of_wheels
#number_of_wheels.setter
def number_of_wheels(number):
self.number_of_wheels = number
def make_noise(self):
print('VRUUUUUUUM')
tesla_model_s = Vehicle(4, 'electric', 5, 250)
print('number of wheels',tesla_model_s.number_of_wheels) # 4
tesla_model_s.number_of_wheels = 2 # setting number of wheels to 2
print('updated number of wheels',tesla_model_s.number_of_wheels) # 2
tesla_model_s.make_noise() # VRUUUUUUUM
when I run this code I got the below error.I think it might be because of getter and setter . Can anyone please help me sort this .
Traceback (most recent call last):
File "classes.py", line 23, in <module>
tesla_model_s.make_noise() # VRUUUUUUUM
AttributeError: Vehicle instance has no attribute 'make_noise'

Invalid literal for float in k nearest neighbor

I am having the hardest time figuring out why i am getting this error. I have searched a lot but unable to fine any solution
import numpy as np
import warnings
from collections import Counter
import pandas as pd
def k_nearest_neighbors(data, predict, k=3):
if len(data) >= k:
warnings.warn('K is set to a value less than total voting groups!')
distances = []
for group in data:
for features in data[group]:
euclidean_distance = np.linalg.norm(np.array(features)-
np.array(predict))
distances.append([euclidean_distance,group])
votes = [i[1] for i in sorted(distances)[:k]]
vote_result = Counter(votes).most_common(1)[0][0]
return vote_result
df = pd.read_csv("data.txt")
df.replace('?',-99999, inplace=True)
df.drop(['id'], 1, inplace=True)
full_data = df.astype(float).values.tolist()
print(full_data)
After running. it gives error
Traceback (most recent call last):
File "E:\Jazab\Machine Learning\Lec18(Testing K Neatest Nerighbors
Classifier)\Lec18(Testing K Neatest Nerighbors
Classifier)\Lec18_Testing_K_Neatest_Nerighbors_Classifier_.py", line 25, in
<module>
full_data = df.astype(float).values.tolist()
File "C:\Python27\lib\site-packages\pandas\util\_decorators.py", line 91, in
wrapper
return func(*args, **kwargs)
File "C:\Python27\lib\site-packages\pandas\core\generic.py", line 3299, in
astype
**kwargs)
File "C:\Python27\lib\site-packages\pandas\core\internals.py", line 3224, in
astype
return self.apply('astype', dtype=dtype, **kwargs)
File "C:\Python27\lib\site-packages\pandas\core\internals.py", line 3091, in
apply
applied = getattr(b, f)(**kwargs)
File "C:\Python27\lib\site-packages\pandas\core\internals.py", line 471, in
astype
**kwargs)
File "C:\Python27\lib\site-packages\pandas\core\internals.py", line 521, in
_astype
values = astype_nansafe(values.ravel(), dtype, copy=True)
File "C:\Python27\lib\site-packages\pandas\core\dtypes\cast.py", line 636,
in astype_nansafe
return arr.astype(dtype)
ValueError: invalid literal for float(): 3) <-----Reappears in Group 8 as:
Press any key to continue . . .
if i remove astype(float) program run fine
What should i need to do ?
There are bad data (3)), so need to_numeric with apply because need processes all columns.
Non numeric are converted to NaNs, which are replaced by fillna to some scalar, e.g. 0:
full_data = df.apply(pd.to_numeric, errors='coerce').fillna(0).values.tolist()
Sample:
df = pd.DataFrame({'A':[1,2,7], 'B':['3)',4,5]})
print (df)
A B
0 1 3)
1 2 4
2 7 5
full_data = df.apply(pd.to_numeric, errors='coerce').fillna(0).values.tolist()
print (full_data)
[[1.0, 0.0], [2.0, 4.0], [7.0, 5.0]]
It looks like you have 3) as an entry in your CSV file, and Pandas is complaining because it can't cast it to a float because of the ).

How I can invoke importing class in other class Python

#!/usr/bin/env python
from __future__ import print_function
import sys
import time
import getopt
import alsaaudio
import numpy
from time import sleep
class A_weight():
def __init__(self):
skaler = 2.361E-14
fix_cur = 0.20565360419770495
A = []
hPa = 4e-11
card = 'default'
array_float = numpy.dtype(float)
stream = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, card)
stream.setchannels(1)
stream.setrate(48000)
stream.setformat(alsaaudio.PCM_FORMAT_S16_LE)
stream.setperiodsize(128)
def A(f):
return (12200**2*f**4/((f**2+20.6**2)*(f**2+12200**2)*numpy.sqrt(f**2+107.7**2)*numpy.sqrt(f**2+737.9**2)))+fix_cur
def listen(self):
glob_leq = 0
liczba_ramek = 0
index_ramek = 0
while True:
try:
l, data = stream.read()
except IOError, e:
error_count += 1
print(" (%d) Error recording: %s" % (error_count, e))
else:
if l==128:
decoded_block = numpy.frombuffer(data, dtype='int16' )
else:
continue
Y = numpy.fft.fft(decoded_block) # fft computing and normalization
Aw = A(numpy.arange(20.,20000,(19980./len(Y))))
Na = Aw*Y
inverse = numpy.fft.ifft(Y)
maks = 32768
array_float = numpy.divide(inverse.real ,float( maks))
array_float = array_float**2
sum_array = numpy.sum(array_float, dtype=float)
glob_leq = glob_leq + sum_array
liczba_ramek += 1
index_ramek += 1
if index_ramek == 375:
index_ramek=0
cis_chwil = numpy.divide(glob_leq, liczba_ramek * 128)
leq =10*numpy.log10(numpy.divide(cis_chwil, hPa))
print (leq)
#A.append(leq)
#print(max(A))
A_weight().listen()
So i trying writing program compute sound pressure level with weighting A.
All work correct but when i want close may code in class I have problem. Because something wrong with invoke to importing class in this case is it alsaaudio.
I get this feedback:
Traceback (most recent call last):
File "rec_A.py", line 64, in <module>
A_weight().listen()
File "rec_A.py", line 37, in listen
l, data = stream.read()
NameError: global name 'stream' is not defined
Do you have any idea
Change each occurrence of stream to self.stream:
class A_weight():
def __init__(self):
skaler = 2.361E-14
fix_cur = 0.20565360419770495
A = []
hPa = 4e-11
card = 'default'
array_float = numpy.dtype(float)
self.stream = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, card)
self.stream.setchannels(1)
self.stream.setrate(48000)
self.stream.setformat(alsaaudio.PCM_FORMAT_S16_LE)
self.stream.setperiodsize(128)
...
def listen(self):
glob_leq = 0
liczba_ramek = 0
index_ramek = 0
while True:
try:
l, data = self.stream.read()
...
This will make it an instance variable, and all other methods of that class (as long as they are passed the self argument) will have access to it through self.stream. See this bit of documentation for more details on instance variables.
Also, this is merely an aesthetic point, but the convention in Python is to use upper camel case for class names, i.e., AWeight instead of A_weight - but this will not affect how your code runs.

ValueError: This solver needs samples of at least 2 classes in the data, but the data contains only one class: 0.0

I have applied Logistic Regression on train set after splitting the data set into test and train sets, but I got the above error. I tried to work it out, and when i tried to print my response vector y_train in the console it prints integer values like 0 or 1. But when i wrote it into a file I found the values were float numbers like 0.0 and 1.0. If thats the problem, how can I over come it.
lenreg = LogisticRegression()
print y_train[0:10]
y_train.to_csv(path='ytard.csv')
lenreg.fit(X_train, y_train)
y_pred = lenreg.predict(X_test)
print metics.accuracy_score(y_test, y_pred)
StrackTrace is as follows,
Traceback (most recent call last):
File "/home/amey/prog/pd.py", line 82, in <module>
lenreg.fit(X_train, y_train)
File "/usr/lib/python2.7/dist-packages/sklearn/linear_model/logistic.py", line 1154, in fit
self.max_iter, self.tol, self.random_state)
File "/usr/lib/python2.7/dist-packages/sklearn/svm/base.py", line 885, in _fit_liblinear
" class: %r" % classes_[0])
ValueError: This solver needs samples of at least 2 classes in the data, but the data contains only one class: 0.0
Meanwhile I've gone across the link which was unanswered. Is there a solution.
The problem here is that your y_train vector, for whatever reason, only has zeros. It is actually not your fault, and its kind of a bug ( I think ). The classifier needs 2 classes or else it throws this error.
It makes sense. If your y_train vector only has zeros, ( ie only 1 class ), then the classifier doesn't really need to do any work, since all predictions should just be the one class.
In my opinion the classifier should still complete and just predict the one class ( all zeros in this case ) and then throw a warning, but it doesn't. It throws the error in stead.
A way to check for this condition is like this:
lenreg = LogisticRegression()
print y_train[0:10]
y_train.to_csv(path='ytard.csv')
if len(np.sum(y_train)) in [len(y_train),0]:
print "all one class"
#do something else
else:
#OK to proceed
lenreg.fit(X_train, y_train)
y_pred = lenreg.predict(X_test)
print metics.accuracy_score(y_test, y_pred)
TO overcome the problem more easily i would recommend just including more samples in you test set, like 100 or 1000 instead of 10.
I had the same problem using learning_curve:
train_sizes, train_scores, test_scores = learning_curve(estimator,
X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes,
scoring="f1", random_state=RANDOM_SEED, shuffle=True)
add the suffle parameter that will randomize the sets.
This doesn't prevent error from happening but it's a way to increase the chances to have both classes in subsets used by the function.
I found it to be because of only 1's or 0's wound up in my y_test since my sample size was really small. Try chaning your test_size value.
# python3
import numpy as np
from sklearn.svm import LinearSVC
def upgrade_to_work_with_single_class(SklearnPredictor):
class UpgradedPredictor(SklearnPredictor):
def __init__(self, *args, **kwargs):
self._single_class_label = None
super().__init__(*args, **kwargs)
#staticmethod
def _has_only_one_class(y):
return len(np.unique(y)) == 1
def _fitted_on_single_class(self):
return self._single_class_label is not None
def fit(self, X, y=None):
if self._has_only_one_class(y):
self._single_class_label = y[0]
else:
super().fit(X, y)
return self
def predict(self, X):
if self._fitted_on_single_class():
return np.full(X.shape[0], self._single_class_label)
else:
return super().predict(X)
return UpgradedPredictor
LinearSVC = upgrade_to_work_with_single_class(LinearSVC)
or hard-way (more right):
import numpy as np
from sklearn.svm import LinearSVC
from copy import deepcopy, copy
from functools import wraps
def copy_class(cls):
copy_cls = type(f'{cls.__name__}', cls.__bases__, dict(cls.__dict__))
for name, attr in cls.__dict__.items():
try:
hash(attr)
except TypeError:
# Assume lack of __hash__ implies mutability. This is NOT
# a bullet proof assumption but good in many cases.
setattr(copy_cls, name, deepcopy(attr))
return copy_cls
def upgrade_to_work_with_single_class(SklearnPredictor):
SklearnPredictor = copy_class(SklearnPredictor)
original_init = deepcopy(SklearnPredictor.__init__)
original_fit = deepcopy(SklearnPredictor.fit)
original_predict = deepcopy(SklearnPredictor.predict)
#staticmethod
def _has_only_one_class(y):
return len(np.unique(y)) == 1
def _fitted_on_single_class(self):
return self._single_class_label is not None
#wraps(SklearnPredictor.__init__)
def new_init(self, *args, **kwargs):
self._single_class_label = None
original_init(self, *args, **kwargs)
#wraps(SklearnPredictor.fit)
def new_fit(self, X, y=None):
if self._has_only_one_class(y):
self._single_class_label = y[0]
else:
original_fit(self, X, y)
return self
#wraps(SklearnPredictor.predict)
def new_predict(self, X):
if self._fitted_on_single_class():
return np.full(X.shape[0], self._single_class_label)
else:
return original_predict(self, X)
setattr(SklearnPredictor, '_has_only_one_class', _has_only_one_class)
setattr(SklearnPredictor, '_fitted_on_single_class', _fitted_on_single_class)
SklearnPredictor.__init__ = new_init
SklearnPredictor.fit = new_fit
SklearnPredictor.predict = new_predict
return SklearnPredictor
LinearSVC = upgrade_to_work_with_single_class(LinearSVC)
You can find the indexes of the first (or any) occurrence of each of the classes and concatenate them on top of the arrays and delete them from their original positions, that way there will be at least one instance of each class in the training set.
This error related to the dataset you are using, the dataset contains a class for example 1/benign, whereas it must contain two classes 1 and 0 or Benign and Attack.