Slow inference times for Tensorflow Object Detection API

Slow inference times for Tensorflow Object Detection API - amazon-web-services

I've been working with the Tensorflow Object Detection API - In my case, I'm attempting to detect vehicles in still images using the kitti-trained model (faster_rcnn_resnet101_kitti_2018_01_28) from the model zoo and I am using code modified from the object_detection_tutorial jupyter notebook included in the github repository .
I have included my modified code below but am finding the same results with the original notebook from github.
When running on a jupyter notebook server on an Amazon AWS g3x4large (GPU) instance with the deep learning AMI, it takes just shy of 4 seconds to process a single image. The time for the inference function is 1.3-1.5 seconds (see code below) - which seems ABNORMALLY high for the reported inference times for the model (20ms). While I don't expect to hit the reported mark, my times seem out of line and are impractical for my needs. I'm looking at processing 1-million+ images at a time and can't afford 46 days of processing time. Given that the model is used on video frame captures....I would think it should be possible to cut time per image to under 1 second, at least.
My questions are:
1) What explanations/solutions exist to reduce inference time?
2) Is 1.5 seconds to convert an image to a numpy (prior to processing) out-of-line?
3) If this is the best performance I can expect, how much increase in time could I hope to gain from reworking the model to batch process images?
Thanks for any help!
Code from python notebook:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import json
import collections
import os.path
import datetime
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
# This is needed to display the images.
get_ipython().magic('matplotlib inline')
#Setup variables
PATH_TO_TEST_IMAGES_DIR = 'test_images'
MODEL_NAME = 'faster_rcnn_resnet101_kitti_2018_01_28'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'kitti_label_map.pbtxt')
NUM_CLASSES = 2
from utils import label_map_util
from utils import visualization_utils as vis_util
def get_scores(
boxes,
classes,
scores,
category_index,
min_score_thresh=.5
):
import collections
# Create a display string (and color) for every box location, group any boxes
# that correspond to the same location.
box_to_display_str_map = collections.defaultdict(list)
for i in range(boxes.shape[0]):
if scores is None or scores[i] > min_score_thresh:
box = tuple(boxes[i].tolist())
if scores is None:
box_to_color_map[box] = groundtruth_box_visualization_color
else:
display_str = ''
if classes[i] in category_index.keys():
class_name = category_index[classes[i]]['name']
else:
class_name = 'N/A'
display_str = str(class_name)
if not display_str:
display_str = '{}%'.format(int(100*scores[i]))
else:
display_str = '{}: {}%'.format(display_str, int(100*scores[i]))
box_to_display_str_map[i].append(display_str)
return box_to_display_str_map
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
def run_inference_for_single_image(image, graph):
with graph.as_default():
with tf.Session() as sess:
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
# Run inference
output_dict = sess.run(tensor_dict,
feed_dict={image_tensor: np.expand_dims(image, 0)})
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.uint8)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
#get list of paths
exten='.jpg'
TEST_IMAGE_PATHS=[]
for dirpath, dirnames, files in os.walk(PATH_TO_TEST_IMAGES_DIR):
for name in files:
if name.lower().endswith(exten):
#print(os.path.join(dirpath,name))
TEST_IMAGE_PATHS.append(os.path.join(dirpath,name))
print((len(TEST_IMAGE_PATHS), 'Images To Process'))
#load model graph for inference
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
#setup class labeling parameters
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
#placeholder for timings
myTimings=[]
myX = 1
myResults = collections.defaultdict(list)
for image_path in TEST_IMAGE_PATHS:
if os.path.exists(image_path):
print(myX,"--------------------------------------",datetime.datetime.time(datetime.datetime.now()))
print(myX,"Image:", image_path)
myTimings.append((myX,"Image", image_path))
print(myX,"Open:",datetime.datetime.time(datetime.datetime.now()))
myTimings.append((myX,"Open",datetime.datetime.time(datetime.datetime.now()).__str__()))
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
print(myX,"Numpy:",datetime.datetime.time(datetime.datetime.now()))
myTimings.append((myX,"Numpy",datetime.datetime.time(datetime.datetime.now()).__str__()))
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
print(myX,"Expand:",datetime.datetime.time(datetime.datetime.now()))
myTimings.append((myX,"Expand",datetime.datetime.time(datetime.datetime.now()).__str__()))
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
print(myX,"Detect:",datetime.datetime.time(datetime.datetime.now()))
myTimings.append((myX,"Detect",datetime.datetime.time(datetime.datetime.now()).__str__()))
output_dict = run_inference_for_single_image(image_np, detection_graph)
# Visualization of the results of a detection.
print(myX,"Export:",datetime.datetime.time(datetime.datetime.now()))
myTimings.append((myX,"Export",datetime.datetime.time(datetime.datetime.now()).__str__()))
op=get_scores(
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
min_score_thresh=.2)
myResults[image_path].append(op)
print(myX,"Done:", datetime.datetime.time(datetime.datetime.now()))
myTimings.append((myX,"Done", datetime.datetime.time(datetime.datetime.now()).__str__()))
myX= myX + 1
#save results
with open((OUTPUTS_BASENAME+'_Results.json'), 'w') as fout:
json.dump(myResults, fout)
with open((OUTPUTS_BASENAME+'_Timings.json'), 'w') as fout:
json.dump(myTimings, fout)
Example Of Timings:
[1, "Image", "test_images/DE4T_11Jan2018/MFDC4612.JPG"]
[1, "Open", "19:20:08.029423"]
[1, "Numpy", "19:20:08.052679"]
[1, "Expand", "19:20:09.977166"]
[1, "Detect", "19:20:09.977250"]
[1, "Export", "19:23:13.902443"]
[1, "Done", "19:23:13.903012"]
[2, "Image", "test_images/DE4T_11Jan2018/MFDC4616.JPG"]
[2, "Open", "19:23:13.903885"]
[2, "Numpy", "19:23:13.906320"]
[2, "Expand", "19:23:15.756308"]
[2, "Detect", "19:23:15.756597"]
[2, "Export", "19:23:17.153233"]
[2, "Done", "19:23:17.153699"]
[3, "Image", "test_images/DE4T_11Jan2018/MFDC4681.JPG"]
[3, "Open", "19:23:17.154510"]
[3, "Numpy", "19:23:17.156576"]
[3, "Expand", "19:23:19.012935"]
[3, "Detect", "19:23:19.013013"]
[3, "Export", "19:23:20.323839"]
[3, "Done", "19:23:20.324307"]
[4, "Image", "test_images/DE4T_11Jan2018/MFDC4697.JPG"]
[4, "Open", "19:23:20.324791"]
[4, "Numpy", "19:23:20.327136"]
[4, "Expand", "19:23:22.175578"]
[4, "Detect", "19:23:22.175658"]
[4, "Export", "19:23:23.472040"]
[4, "Done", "19:23:23.472297"]

1) What you can do is load the video directly instead of images, then change "run_inference_for_single_image()" to create the session once and load the images/video in it (re-creating the graph is very slow). Furthermore, you can edit the pipeline config file to reduce the number of proposals, which will directly speedup inference. Note you have to re-export the graph afterwards (https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/exporting_models.md). Batch also helps (though I am sorry, I forgot by how much) and finally, you can employ multiprocessing to offload CPU specific operations (drawing bounding boxes, loading data) to utilize the GPU better.
2) Is 1.5 seconds to convert an image to a numpy (prior to processing) out-of-line <- yes, that is insanely slow and there is plenty of room for improvement.
3) While I don't know the exact gpu at AWS (k80?), you should be able to get over 10fps on a geforce 1080TI with all fixes, which is in line with the 79ms time they reported (where did you get 20ms for faster-rcnn_resnet_101?? )

You could also try OpenVINO for better performance of the inference. It optimizes the inference time by e.g. graph pruning and fusing some operations. OpenVINO is optimized for Intel hardware but it should work with any CPU (even with Cloud).
Here are some performance benchmarks for the Faster RCNN Resnet model and various CPUs.
It's rather straightforward to convert the Tensorflow model to OpenVINO unless you have fancy custom layers. The full tutorial on how to do it can be found here. Some snippets below.
Install OpenVINO
The easiest way to do it is using PIP. Alternatively, you can use this tool to find the best way in your case.
pip install openvino-dev[tensorflow2]
Use Model Optimizer to convert SavedModel model
The Model Optimizer is a command-line tool that comes from OpenVINO Development Package. It converts the Tensorflow model to IR, which is a default format for OpenVINO. You can also try the precision of FP16, which should give you better performance without a significant accuracy drop (just change data_type). Run in the command line:
mo --saved_model_dir "model" --input_shape "[1, 3, 224, 224]" --data_type FP32 --output_dir "model_ir"
Run the inference
The converted model can be loaded by the runtime and compiled for a specific device e.g. CPU or GPU (integrated into your CPU like Intel HD Graphics). If you don't know what is the best choice for you, just use AUTO.
# Load the network
ie = Core()
model_ir = ie.read_model(model="model_ir/model.xml")
compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU")
# Get output layer
output_layer_ir = compiled_model_ir.output(0)
# Run inference on the input image
result = compiled_model_ir([input_image])[output_layer_ir]
There is even [OpenVINO Model Server][5] which is very similar to Tensorflow Serving.
Disclaimer: I work on OpenVINO.

Related

Strange behavior of Inception_v3

I am trying to create a generative network based on the pre-trained Inception_v3.
1) I fix all the weights in the model
2) create a Variable whose size is (2, 3, 299, 299)
3) create targets of size (2, 1000) that I want my final layer activations to become as close as possible to by optimizing the Variable.
(I do not set the batchsize of 1, because unlike VGG16, Inception_v3 doesn't take batchsize=1, but that's not the point).
The following code should work, but gives me the error: «RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation».
# minimalist code with Inception_v3 that throws the error:
import torch
from torch.autograd import Variable
import torch.optim as optim
import torch.nn as nn
import torchvision
torch.set_default_tensor_type('torch.FloatTensor')
Iv3 = torchvision.models.inception_v3(pretrained=True)
for i in Iv3.parameters():
i.requires_grad = False
criterion = nn.CrossEntropyLoss()
x = Variable(torch.randn(2, 3, 299, 299), requires_grad=True)
target = torch.empty(2, dtype=torch.long).random_(1000)
output = Iv3(x)
loss = criterion(output[0], target)
loss.backward()
print(x.grad)
This is very strange, because if I do the same thing with VGG16, everything works fine:
# minimalist working code with VGG16:
import torch
from torch.autograd import Variable
import torch.optim as optim
import torch.nn as nn
import torchvision
# torch.cuda.empty_cache()
# vgg16 = torchvision.models.vgg16(pretrained=True).cuda()
# torch.set_default_tensor_type('torch.cuda.FloatTensor')
torch.set_default_tensor_type('torch.FloatTensor')
vgg16 = torchvision.models.vgg16(pretrained=True)
for i in vgg16.parameters():
i.requires_grad = False
criterion = nn.CrossEntropyLoss()
x = Variable(torch.randn(2, 3, 229, 229), requires_grad=True)
target = torch.empty(2, dtype=torch.long).random_(1000)
output = vgg16(x)
loss = criterion(output, target)
loss.backward()
print(x.grad)
Please help.

Thanks to #iacolippo the issue is solved. Turns out the problem was due to Pytorch 1.0.0. No problem with Pytorch 0.4.1. though.

Saving data from traceplot in PyMC3

Below is the code for a simple Bayesian Linear regression. After I obtain the trace and the plots for the parameters, is there any way in which I can save the data that created the plots in a file so that if I need to plot it again I can simply plot it from the data in the file rather than running the whole simulation again?
import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(0,9,5)
y = 2*x + 5
yerr=np.random.rand(len(x))
def soln(x, p1, p2):
return p1+p2*x
with pm.Model() as model:
# Define priors
intercept = pm.Normal('Intercept', 15, sd=5)
slope = pm.Normal('Slope', 20, sd=5)
# Model solution
sol = soln(x, intercept, slope)
# Define likelihood
likelihood = pm.Normal('Y', mu=sol,
sd=yerr, observed=y)
# Sampling
trace = pm.sample(1000, nchains = 1)
pm.traceplot(trace)
print pm.summary(trace, ['Slope'])
print pm.summary(trace, ['Intercept'])
plt.show()

There are two easy ways of doing this:
Use a version after 3.4.1 (currently this means installing from master, with pip install git+https://github.com/pymc-devs/pymc3). There is a new feature that allows saving and loading traces efficiently. Note that you need access to the model that created the trace:
...
pm.save_trace(trace, 'linreg.trace')
# later
with model:
trace = pm.load_trace('linreg.trace')
Use cPickle (or pickle in python 3). Note that pickle is at least a little insecure, don't unpickle data from untrusted sources:
import cPickle as pickle # just `import pickle` on python 3
...
with open('trace.pkl', 'wb') as buff:
pickle.dump(trace, buff)
#later
with open('trace.pkl', 'rb') as buff:
trace = pickle.load(buff)

Update for someone like me who is still coming over to this question:
load_trace and save_trace functions were removed. Since version 4.0 even the deprecation waring for these functions were removed.
The way to do it is now to use arviz:
with model:
trace = pymc.sample(return_inferencedata=True)
trace.to_netcdf("filename.nc")
And it can be loaded with:
trace = arviz.from_netcdf("filename.nc")

This way works for me :
# saving trace
pm.save_trace(trace=trace_nb, directory=r"c:\Users\xxx\Documents\xxx\traces\trace_nb")
# loading saved traces
with model_nb:
t_nb = pm.load_trace(directory=r"c:\Users\xxx\Documents\xxx\traces\trace_nb")

Both eager and graph execution in tensorflow tests

I have some tests that work with graph and sessions. I also want to write some small tests with eager mode to test easily some functionalities. For example:
def test_normal_execution():
matrix_2x4 = np.array([[1, 2, 3, 4], [6, 7, 8, 9]])
dataset = tf.data.Dataset.from_tensor_slices(matrix_2x4)
iterator = dataset.make_one_shot_iterator()
first_elem = iterator.get_next()
with tf.Session() as sess:
result = sess.run(first_elem)
assert (result == [1, 2, 3, 4]).all()
sess.close()
In another file:
def test_eager_execution():
matrix_2x4 = np.array([[1, 2, 3, 4], [6, 7, 8, 9]])
tf.enable_eager_execution()
dataset = tf.data.Dataset.from_tensor_slices(matrix_2x4)
iterator = dataset.__iter__()
first_elem = iterator.next()
assert (first_elem.numpy() == [1, 2, 3, 4]).all()
Is there a way to to this? I get ValueError: tf.enable_eager_execution must be called at program startup. when I try to run the test executed eagerly. I am using pytest to run my tests.
edit:
With little assistance from the accepted response I created a decorator, that works nicely with eager mode and pytest's fixtures:
def run_eagerly(func):
#functools.wraps(func)
def eager_fun(*args, **kwargs):
with tf.Session() as sess:
sess.run(tfe.py_func(func, inp=list(kwargs.values()), Tout=[]))
return eager_fun

With the caveat that anything in the tf.contrib namespace is subject to change between releases, you can decorate your test with #tf.contrib.eager.run_test_in_graph_and_eager_modes. Some other projects, like TensorFlow Probability seem to use this.
For non-tests, some things to look into are:
tf.contrib.eager.defun: Is useful when you have eager execution enabled but want to "compile" some computation into a graph to benefit from memory and/or performance optimizations.
tf.contrib.eager.py_func: Is useful when do not have eager execution enabled but want to execute some computation in the graph as a Python function.
One may question the reasoning behind not allowing a call to tf.enable_eager_execution() to be undone. The idea is that library authors should not invoke it, only the end-user should invoke it in main(). The reduces the chances that libraries are written incompatible ways (where say functions in one library disable eager execution and return symbolic tensors while functions in another library enable eager execution and expects concrete valued tensors. This would make mixing the libraries problematic).
Hope that helps

There is an official way to use eager execution in a graph environment. But I'm not sure if this is good and convenient enough for you because you need to write quite some code to wrap and run your test function. Anyway, here is your example which should at least work:
import numpy as np
import tensorflow as tf
def test_normal_execution():
matrix_2x4 = np.array([[1, 2, 3, 4], [6, 7, 8, 9]])
dataset = tf.data.Dataset.from_tensor_slices(matrix_2x4)
iterator = dataset.make_one_shot_iterator()
first_elem = iterator.get_next()
with tf.Session() as sess:
result = sess.run(first_elem)
assert (result == [1, 2, 3, 4]).all()
sess.close()
def test_eager_execution():
matrix_2x4 = np.array([[1, 2, 3, 4], [6, 7, 8, 9]])
dataset = tf.data.Dataset.from_tensor_slices(matrix_2x4)
iterator = dataset.__iter__()
first_elem = iterator.next()
assert (first_elem.numpy() == [1, 2, 3, 4]).all()
test_normal_execution()
# test_eager_execution() # Instead, you have to use the following three lines.
with tf.Session() as sess:
tfe = tf.contrib.eager
sess.run(tfe.py_func(test_eager_execution, [], []))

Somewhat undocumented but tensorflow 2 has a function run_all_in_graph_and_eager_modes used for decorating test classes and
run_in_graph_and_eager_modes used for decorating test methods:
import tensorflow as tf
from tensorflow.python.framework import test_util
#test_util.run_all_in_graph_and_eager_modes
class MyTestCase(tf.test.TestCase):
#...
import tensorflow as tf
from tensorflow.python.framework import test_util
class MyTestCase(tf.test.TestCase):
#test_util.run_in_graph_and_eager_modes
def test_something():
#...

Python Keras - How to generate right image dimension

I'm tying to use Keras for image recognition, but kept getting errors like:
ValueError: Error when checking input: expected input_9 to have 4 dimensions, but got array with shape (100, 300, 300)
I tried to change values for params that relate to dimensions, also tried to reshape images, but still got errors.
In fact, I don't understand why did I get this error. Why it expects 4 dimensions?
Here's my code:
import os
import numpy as np
import pandas as pd
import scipy
import sklearn
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Convolution2D, Flatten, MaxPooling2D, Reshape, InputLayer
import cv2
from skimage import io
import urllib2
from PIL import Image
import numpy as np
%matplotlib inline
I chose 50 rose images and 50 sunflower images from imagenet:
rose_file = "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n04971313"
sunflower_file = "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n11978713"
images = []
image_num = 50
rose_urls = urllib2.urlopen(rose_file)
rose_ct = 0
for rose_url in rose_urls:
try:
resp = urllib2.urlopen(rose_url)
rose_image = np.asarray(bytearray(resp.read()), dtype="uint8")
images.append(rose_image)
rose_ct += 1
if rose_ct == image_num: # only use 50 images here, otherwise, loading time is too long
break
except: # some images are no longer available
pass
sunflower_urls = urllib2.urlopen(sunflower_file)
sunflower_ct = 0
for sunflower_url in sunflower_urls:
try:
resp = urllib2.urlopen(sunflower_url)
sunflower_image = np.asarray(bytearray(resp.read()), dtype="uint8")
images.append(sunflower_image)
sunflower_ct += 1
if sunflower_ct == image_num: # only use 50 images here, otherwise, loading time is too long
break
except: # some images are no longer available
pass
Resize training images to 300*300:
from keras.utils.np_utils import to_categorical
for i in range(len(images)):
images[i]=cv2.resize(np.array(images[i]),(300,300))
images = np.array(images)
labels = [0 for i in range(image_num)]
labels.extend([1 for j in range(image_num)])
labels = np.array(labels)
labels = to_categorical(labels)
Build the model:
filters=10
filtersize=(5,5)
epochs=7
batchsize=128
input_shape=(300,300, 3)
model = Sequential()
model.add(keras.layers.InputLayer(input_shape=input_shape))
model.add(keras.layers.convolutional.Conv2D(filters, filtersize, strides=(1, 1),
padding='valid', data_format="channels_last", activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(units=2, input_dim=10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(images, labels, epochs=epochs, batch_size=batchsize, validation_split=0.3)
model.summary()
Here, I tried to change input_shape=(300,300, 3) into input_shape=(300,300, 3, 0), hoping this means 4 dimensions, but got errors saying:
Input 0 is incompatible with layer conv2d_13: expected ndim=4, found ndim=5
Do you know why did I get these errors? And how to deal with this problem?

Tensorflow high-level Estimator with input_fn from external file reader

[short summary: how to use TF high-level Estimator on Python with an external file reader? or with feed_dict?]
Been struggling with this for few days, couldn't find any solution on-line...
I'm using TF high-level modules (tf.contrib.learn.Estimator on tf1.0, or tf.estimator.Estimator on tf1.1),
features and targets (x/y) inputted through an input_fn, and the graph built on the model_fn.
Already trained a nn on 'small' data sets, in which the whole input is the part of the graph, using slice_input_producer etc. (I can push an example to github if it serves ppl here).
I try to train a larger nn on 'heavier' data-sets (10s-100s GB).
I have an external Python reader that does some nasty binary file reading, which I really don't want to get into.
This reader has its own queue.Queue with m1 samples. When I use it to extract the m1 {features} & {targets}, the net simply saves all these samples as const. in the first layer of the graph... completely undesired.
I try to either -
feed the output of the external file reader as input to my graph.
define a proper tf queue object that will keep updating the queue (each time a sample is dequeued, i want a completely other sample to be enqueued).
Reminding that I use the "high level", e.g.
self.Estimator = tf.contrib.learn.Estimator(
model_fn=self.model_fn,
model_dir=self.config['model_dir'],
config=tf.contrib.learn.RunConfig( ... ) )
def input_fn(self, mode):
batch_data = self.data[mode].next() # pops out a batch of samples, as numpy 4D matrices
... # some processing of batch data
features_dict = dict(data=batch_data.pop('data'))
targets_dict = batch_data
return features_dict, targets_dict
self.Estimator.fit(input_fn=lambda: self.input_fn(modekeys.TRAIN))

Attached is a final solution for integrating an external reader into the high-level TF api (tf.contrib.learn.Estimator / tf.estimator.Estimator).
Please note:
the architecture and "logic" is not important. it's a stupid simple net.
the external reader outputs a dictionary of numpy matrices.
the input_fn is using this reader.
In order to verify that the reader "pulls new values", I both
save the recent value to self.status (should be > 1.0)
save a summary, to be viewed in tensorboard.
Code example is in gist, and below.
import tensorflow as tf
import numpy as np
modekeys = tf.contrib.learn.ModeKeys
tf.logging.set_verbosity(tf.logging.DEBUG)
# Tested on python 2.7.9, tf 1.1.0
class inputExample:
def __init__(self):
self.status = 0.0 # tracing which value was recently 'pushed' to the net
self.model_dir = 'temp_dir'
self.get_estimator()
def input_fn(self):
# returns features and labels dictionaries as expected by tf Estimator's model_fn
data, labels = tf.py_func(func=self.input_fn_np, inp=[], Tout=[tf.float32, tf.float32], stateful=True)
data.set_shape([1,3,3,1]) # shapes are unknown and need to be set for integrating into the network
labels.set_shape([1,1,1,1])
return dict(data=data), dict(labels=labels)
def input_fn_np(self):
# returns a dictionary of numpy matrices
batch_data = self.reader()
return batch_data['data'], batch_data['labels']
def model_fn(self, features, labels, mode):
# using tf 2017 convention of dictionaries of features/labels as inputs
features_in = features['data']
labels_in = labels['labels']
pred_layer = tf.layers.conv2d(name='pred', inputs=features_in, filters=1, kernel_size=3)
tf.summary.scalar(name='label', tensor=tf.squeeze(labels_in))
tf.summary.scalar(name='pred', tensor=tf.squeeze(pred_layer))
loss = None
if mode != modekeys.INFER:
loss = tf.losses.mean_squared_error(labels=labels_in, predictions=pred_layer)
train_op = None
if mode == modekeys.TRAIN:
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
learning_rate = 0.01,
optimizer = 'SGD',
global_step = tf.contrib.framework.get_global_step()
)
predictions = {'estim_exp': pred_layer}
return tf.contrib.learn.ModelFnOps(mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def reader(self):
self.status += 1
if self.status > 1000.0:
self.status = 1.0
return dict(
data = np.random.randn(1,3,3,1).astype(dtype=np.float32),
labels = np.sin(np.ones([1,1,1,1], dtype=np.float32)*self.status)
)
def get_estimator(self):
self.Estimator = tf.contrib.learn.Estimator(
model_fn = self.model_fn,
model_dir = self.model_dir,
config = tf.contrib.learn.RunConfig(
save_checkpoints_steps = 10,
save_summary_steps = 10,
save_checkpoints_secs = None
)
)
if __name__ == '__main__':
ex = inputExample()
ex.Estimator.fit(input_fn=ex.input_fn)

You can use tf.constant if you have the training data already in python memory as shown in the abalone TF example: https://github.com/tensorflow/tensorflow/blob/r1.1/tensorflow/examples/tutorials/estimators/abalone.py#L138-L141
Note: copying the data from disk to Python to TensorFlow is often less efficient than constructing an input pipeline in TensorFlow (i.e. loading data from disk directly into TensorFlow Tensors), such as using tf.contrib.learn.datasets.base.load_csv_without_header.

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Slow inference times for Tensorflow Object Detection API - amazon-web-services

Related

Strange behavior of Inception_v3

Saving data from traceplot in PyMC3

Both eager and graph execution in tensorflow tests

Python Keras - How to generate right image dimension

Tensorflow high-level Estimator with input_fn from external file reader

Categories

Resources