I am running on Windows 7, Python 2.7 via Anaconda 4.3.17, Luigi 2.4.0, Pandas 0.18, sklearn version 0.18. Per below, I am trying to have a luigi.LocalTarget output be a pickle to store a few different objects (using firstJob) and then read from that pickle in a dependent job (secondJob). firstJob completes successfully if I run the following from the command line:
"python -m luigi --module luigiPickle firstJob --date 2017-06-07 --local-scheduler"
However, if I try running secondJob i.e.,
"python -m luigi --module luigiPickle secondJob --date 2017-06-07 --local-scheduler"
I get
Traceback (most recent call last):
File "C:\Anaconda2\lib\site-packages\luigi-2.4.0-py2.7.egg\luigi\worker.py", l
ine 191, in run
new_deps = self._run_get_new_deps()
File "C:\Anaconda2\lib\site-packages\luigi-2.4.0-py2.7.egg\luigi\worker.py", l
ine 129, in _run_get_new_deps
task_gen = self.task.run()
File "luigiPickle.py", line 41, in run
ret2 = pickle.load(inFile)
File "C:\Anaconda2\lib\pickle.py", line 1384, in load
return Unpickler(file).load()
File "C:\Anaconda2\lib\pickle.py", line 864, in load
dispatch[key](self)
File "C:\Anaconda2\lib\pickle.py", line 1096, in load_global
klass = self.find_class(module, name)
File "C:\Anaconda2\lib\pickle.py", line 1130, in find_class
__import__(module)
ImportError: No module named frame
It appears that luigi is having trouble reading the pickle due to not recognizing the pandas.DataFrame() object (perhaps a scope issue?).
import luigi
import pandas as pd
import pickle
from sklearn.linear_model import LinearRegression
class firstJob(luigi.Task):
date = luigi.DateParameter()
def requires(self):
return None
def output(self):
return luigi.LocalTarget('%s_first.pickle' % self.date)
def run(self):
ret = {}
ret['a'] = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
ret['b'] = pd.DataFrame({'a': [3, 4], 'd': [0, 0]})
ret['c'] = LinearRegression()
outFile = self.output().open('wb')
pickle.dump(ret, outFile, protocol=pickle.HIGHEST_PROTOCOL)
outFile.close()
class secondJob(luigi.Task):
date = luigi.DateParameter()
def requires(self):
return firstJob(self.date)
def output(self):
return luigi.LocalTarget('%s_second.pickle' % self.date)
def run(self):
inFile = self.input().open('rb')
ret2 = pickle.load(inFile)
inFile.close()
if __name__ == '__main__':
luigi.run()
The luigi open command doesn't work with the b flag for binary- it strips it out of the options string. (not sure why). Better to just use standard open with the path attribute:
open(self.input().path, 'rb') and open(self.output().path, 'wb').
d6tflow solves this, see example for sklearn model pickle which answers this question. Plus you don't need to write all that boilerplate code.
import d6tflow
class firstJob(d6tflow.tasks.TaskPickle):
def run(self):
# your code
self.save(ret)
class secondJob(TaskClass):
date = luigi.DateParameter()
def requires(self):
return firstJob(self.date)
def run(self):
inFile = self.input().load()
# use inFile
d6tflow.run([secondJob])
Related
I have built a horse human detector using keras CNN on Google colab the model worked and loaded perfectly on colab. Now I am building a flask application while loading he .h5 model file I was getting error
TypeError: __init__() got an unexpected keyword argument 'ragged'
I reinstall keras 2.3.1 using pip and now I am getting a library error
NameError: name 'six' is not defined
my App.py
#Import necessary libraries
from flask import Flask, render_template, request
import numpy as np
import os
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.models import load_model
#load model
model = load_model("predictor.h5" )
print("## model loaded")
def pred_human_horse(model , horse_or_human):
test_image = load_img(horse_or_human , target_size=(150,150)) #resize
print("## Got Image for predicton")
test_image = img_to_array(test_image)/255 #numpy array between 0-1
test_image = np.expand_dims(test_image,axis=0) #4 dimension
result= model.predict(test_image).round(3) #rounding off
pred =np.argmax(result)
print("## Raw results = ",result)
print("## class = ", pred)
if pred==0:
return "Horse"
else:
return "Human"
# Crate flask app
app = Flask(__name__)
#app.route("/",methods=["GET","POST"])
def home():
return render_template("index.html")
#app.route("/predict",methods=["GET","POST"])
def predict():
if request.method=="POST":
#get input image file
file = request.files["image"]
filename= file.filename
print("## File recieved",filename)
#save the file
file_path= os.path.join("static/user_uploaded",filename)
file.save(file_path)
print("## Prediction...")
pred=pred_human_horse(horse_or_human=file_path )
return render_template("predict.html" ,pred_output= pred , user_image=file_path )
if __name__=="__main__":
app.run(threaded=False)
Error I am getting
runfile('F:/INTERNSHIP/Crisp-Metric-MAY21/Human-horse-prediction/app.py', wdir='F:/INTERNSHIP/Crisp-Metric-MAY21/Human-horse-prediction')
Traceback (most recent call last):
File "<ipython-input-26-df590f092cb6>", line 1, in <module>
runfile('F:/INTERNSHIP/Crisp-Metric-MAY21/Human-horse-prediction/app.py', wdir='F:/INTERNSHIP/Crisp-Metric-MAY21/Human-horse-prediction')
File "C:\Users\DANIA NIAZI\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\DANIA NIAZI\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "F:/INTERNSHIP/Crisp-Metric-MAY21/Human-horse-prediction/app.py", line 13, in <module>
model = load_model("predictor.h5" )
File "C:\Users\DANIA NIAZI\Anaconda3\lib\site-packages\keras\engine\saving.py", line 492, in load_wrapper
File "C:\Users\DANIA NIAZI\Anaconda3\lib\site-packages\keras\engine\saving.py", line 582, in load_model
File "C:\Users\DANIA NIAZI\Anaconda3\lib\site-packages\keras\utils\io_utils.py", line 211, in is_supported_type
NameError: name 'six' is not defined
Maybe you should try installing the six package which will be installed when installing Django. Anyway you can install it using:
pip install six
When I ran the below code,
import numpy as np
import tensorflow as tf
class Config:
activation = tf.nn.tanh
class Sample:
def function(self, x):
return self.config.activation(x)
def __init__(self, config):
self.config = config
if __name__ == "__main__":
with tf.Graph().as_default():
config = Config()
sample = Sample(config)
with tf.Session() as sess:
a = tf.constant(2.0)
print sess.run(sample.function(a))
I get this error message:
Traceback (most recent call last):
File "test.py", line 27, in <module>
print sess.run(sample.function(a))
File "test.py", line 11, in function
return self.config.activation(x)
File "/Users/byungwookang/anaconda/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 2019, in tanh
with ops.name_scope(name, "Tanh", [x]) as name:
File "/Users/byungwookang/anaconda/lib/python2.7/contextlib.py", line 17, in __enter__
return self.gen.next()
File "/Users/byungwookang/anaconda/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 4185, in name_scope
with g.as_default(), g.name_scope(n) as scope:
File "/Users/byungwookang/anaconda/lib/python2.7/contextlib.py", line 17, in __enter__
return self.gen.next()
File "/Users/byungwookang/anaconda/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2839, in name_scope
if name:
File "/Users/byungwookang/anaconda/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 541, in __nonzero__
raise TypeError("Using a `tf.Tensor` as a Python `bool` is not allowed. "
TypeError: Using a `tf.Tensor` as a Python `bool` is not allowed. Use `if t is not None:` instead of `if t:` to test if a tensor is defined, and use TensorFlow ops such as tf.cond to execute subgraphs conditioned on the value of a tensor.
In contrast, this code works as expected.
import numpy as np
import tensorflow as tf
class Config:
activation = np.tanh
class Sample:
def function(self, x):
return self.config.activation(x)
def __init__(self, config):
self.config = config
if __name__ == "__main__":
config = Config()
sample = Sample(config)
print sample.function(2.0)
print np.tanh(2.0)
It gives
0.964027580076
0.964027580076
I am curious why one can't pass a tensorflow built-in function as a variable (as done in the first code above), and whether there is a way to avoid the above error. Especially given the second code where a numpy function is passed nicely as a variable, it seems very strange to me that tensorflow doesn't allow this.
The reason your stuff does not work is because in your case
print sample.function # <bound method Sample.function of <__main__.Sample instance at 0xXXX>>
print tf.nn.tanh # <function tanh at 0xXXX>
are not the same, whereas in your second case they match. So when you run sample.function(a), not tanh but something else is executed.
It is hard for me to understand the purpose of all these classes and functions to do a simple job, so I just found the easiest way to modify whatever was written for it to work:
import numpy as np
import tensorflow as tf
def config():
return {'activation': tf.nn.tanh}
class Sample:
def function(self, x):
return self.config['activation'](x)
def __init__(self, config):
self.config = config
if __name__ == "__main__":
with tf.Graph().as_default(): # this is also not needed
sample = Sample(config())
with tf.Session() as sess:
a = tf.constant(2.0)
print sess.run(sample.function(a))
I'm running my app locally and I'm currently having an IOError during my file creation from the database. I am using Django 1.10, MongoDB as my database, and Celery 4.0.2 for my background tasks. The problem occurs in the tasks.py since that is where I access the db then store it in my django subfolder 'analysis_samples'.
Here is the traceback:
[2017-04-15 15:31:08,798: ERROR/PoolWorker-2] Task tasks.process_sample_input[0619194e-4300-4a1d-91b0-20766e048c4a] raised unexpected: IOError(2, 'No such file or directory')
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/celery/app/trace.py", line 367, in trace_task
R = retval = fun(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/celery/app/trace.py", line 622, in __protected_call__
return self.run(*args, **kwargs)
File "/home/user/django_apps/myapp/analysis/tasks.py", line 218, in process_sample_input
with open(sample_file_path, "w+") as f:
IOError: [Errno 2] No such file or directory: u'/home/user/django_apps/myapp/myapp/analysis_samples/58f1cc3c45015d127c3d68c1'
And here is the snippet of tasks.py:
from django.core.files import File
sys.path.append(settings.ANALYSIS_SAMPLES)
import base64
import os, sys
#shared_task(name='tasks.process_sample_input')
def process_sample_input(instance_id):
instance = Sample.objects.get(pk=instance_id)
#many code here..
try:
conn=pymongo.MongoClient(settings.MONGO_HOST, settings.MONGO_PORT)
db = conn.thugfs #connect to GridFS db of thug
thugfs_db = GridFS(db)
except pymongo.errors.ConnectionFailure, e:
logger.error("Could not connect to ThugFS MongoDB: %s" % e)
sample_file_folder = settings.ANALYSIS_SAMPLES
for sample_fs_id in sample_fs_ids:
sample_file = thugfs_db.get(ObjectId(sample_fs_id)).read()
sample_file = base64.b64decode(sample_file) #decode file from database
sample_file_path = os.path.join(sample_file_folder, sample_fs_id)
with open(sample_file_path, "w+") as f:
fileOut = File(f)
fileOut.write(sample_file)
settings.py:
ANALYSIS_SAMPLES = os.path.join(BASE_DIR, 'myapp/analysis_samples')
Can anyone see the point that caused the error? Any help will be appreciated.
I have submitted a training job to cloud ml. But, it can't find the csv file. it is there in the bucket. this is the code.
# Use scikit-learn to grid search the batch size and epochs
import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
def create_model():
model = Sequential()
model.add(Dense(12, input_dim=11, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['accuracy'])
return model
seed = 7
numpy.random.seed(seed)
FIL = "gs://bubbly-hexagon-112008-ml/dataset/mixed.csv"
dataset = numpy.loadtxt(FIL, delimiter=",")
X = dataset[:,0:11]
Y = dataset[:,11]
model = KerasClassifier(build_fn=create_model, verbose=1)
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100, 500, 1000]
param_grid = dict(batch_size=batch_size, nb_epoch=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, Y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
after submitting the job i get this error.
Traceback (most recent call last): File "/usr/lib/python2.7/runpy.py", line 162, in _run_module_as_main "__main__", fname, loader, pkg_name) File "/usr/lib/python2.7/runpy.py", line 72, in _run_code exec code in
run_globals File "/root/.local/lib/python2.7/
site-packages/trainer/task.py", line 18, in <module> dataset = numpy.loadtxt(FIL, delimiter=",") File "/root/.local/lib/python2.7/
site-packages/numpy/lib/npyio.py", line 803, in loadtxt fh = iter(open(fname, 'U')) IOError: [Errno 2] No such file or directory:
'gs://bubbly-hexagon-112008-ml/dataset/mixed.csv'
-The file is in the specified bucket and its permission includes cloud ml as reader.
-I also used gcloud beta ml init-project to initialize the project.
-And i created a new bucket and put the file in there, but got the same error.
-My bucket is in the same region as my submitted job.
Thanks
file_io from tensorflow works great:
from tensorflow.python.lib.io import file_io
import numpy as np
import json
To read a numpy array:
with file_io.FileIO(path_npx, 'rb') as f:
np_arr = np.load( BytesIO(f.read()) )
print(np_arr)
To read a json file:
with file_io.FileIO(path_json, 'r') as f:
print(json.loads(f.read()))
You can't read directly from gfs like that you need to use some sort of io library.
from io import BytesIO
import tensorflow as tf
import numpy as np
from tensorflow.python.lib.io import file_io
FIL = "gs://bubbly-hexagon-112008-ml/dataset/mixed.csv"
f = BytesIO(file_io.read_file_to_string(FIL, binary_mode=True))
data = np.load(f)
I don't think you can read gcs files directly with numpy.
I am new to Python and as my first project I am attempting to convert a Python2 script to Python3.
The script is failing when it attempts to serialize a class using pickle.
It seems as though it is failing as I am trying to save a class which uses the Cmd CLI.
This code works using Python2.
Can anyone tell me what is wrong with the script and how I fix it?
import sys
import cmd
try:
import pickle as pickle
except:
import pickle
import os.path
def main():
app = Labyrinth()
turnfile = "turn0.lwot"
app.Save(turnfile)
class CLI(cmd.Cmd):
def __init__(self):
cmd.Cmd.__init__(self)
class Labyrinth(cmd.Cmd):
def __init__(self):
cmd.Cmd.__init__(self)
def Save(self, fname):
with open(fname, 'wb') as f:
pickle.dump(self,f, 2)
f.close()
print ("Save Successful!")
sys.exit()
if __name__ == '__main__':
main()
Not all objects are picklable. In particular, file objects are problematic because you can't generally restore their state later. cmd.Cmd holds stdin and stdout file objects and that should make them unpicklable. I was quite surprised that it worked in python 2, but it didn't really... Even though the stdin and stdout pickled, the unpickled object you get back later doesn't work, as in this example:
>>> import sys
>>> import pickle
>>> sys.stdout.write('foo\n')
foo
>>> serialized = pickle.dumps(sys.stdout, 2)
>>> stdout = pickle.loads(serialized)
>>> stdout.write('bar\n')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: I/O operation on closed file
>>>
So, even though this bit of code didn't fail, the object shouldn't be usable later. You can add a few special methods to an object that let you fix objects so they can be serialized. Here, I've stripped the bad attributes on save and added them back on restore. Now you can pickle, unpickle and it actually works when you are done.
import sys
import cmd
try:
import cPickle as pickle
except:
import pickle
import os.path
def main():
app = Labyrinth()
turnfile = "turn0.lwot"
app.Save(turnfile)
class CLI(cmd.Cmd):
def __init__(self):
cmd.Cmd.__init__(self)
class Labyrinth(cmd.Cmd):
def __init__(self):
cmd.Cmd.__init__(self)
def Save(self, fname):
with open(fname, 'wb') as f:
pickle.dump(self,f, pickle.HIGHEST_PROTOCOL)
f.close()
print ("Save Successful!")
sys.exit()
def __getstate__(self):
# stdin/out are unpicklable. We'll get new ones on load
return tuple(((k,v) for k,v in self.__dict__.items()
if k not in ('stdin', 'stdout')))
def __setstate__(self, state):
self.__dict__.update(state)
self.stdin = sys.stdin
self.stdout = sys.stdout
if __name__ == '__main__':
main()
Playing with the protocol doesn't help. The full error message (which you should have included) is:
1027:~/mypy$ python3 stack41334887.py
Traceback (most recent call last):
File "stack41334887.py", line 33, in <module>
main()
File "stack41334887.py", line 14, in main
app.Save(turnfile)
File "stack41334887.py", line 27, in Save
pickle.dump(self,f, 3, fix_imports=True)
TypeError: cannot serialize '_io.TextIOWrapper' object
Python3 made some major changes in the io system. This TextIOWrapper is, I think new to Py3.
https://docs.python.org/3.1/library/io.html#io.TextIOWrapper
Can I use multiprocessing.Pool in a method of a class? also had problems serializing a TextIOWrapper.
=========
So inspireed by #tdelaney, I checked the stdin for my PY3 session:
In [1212]: sys.stdin
Out[1212]: <_io.TextIOWrapper name='<stdin>' mode='r' encoding='UTF-8'>
So that's the thing that can't be serialized.