RNN regression using Tensorflow? - python-2.7

I am currently trying to implement a RNN for regression.
I need to create a neural network capable of converting audio samples into vector of mfcc feature. I've already know what the feature for each audio samples is, so the task it self is to create a neural network that is capable of converting a list of audio samples in to the desired MFCC feature.
The second problem I am facing is that since the audio files I am sampling has different length, will the list with the audio sample also have different length, which would cause problem with the number of input I need to feed into to the neural network. I found this post on how to handle variable sequence length, and tried to incorporate into my implementation of a RNN, but seem to not be able to get a lot of errors for unexplainable reasons..
Could anyone see what is going wrong with my implementation?
Here is the code:
def length(sequence): ##Zero padding to fit the max lenght... Question whether that is a good idea.
used = tf.sign(tf.reduce_max(tf.abs(sequence), reduction_indices=2))
length = tf.reduce_sum(used, reduction_indices=1)
length = tf.cast(length, tf.int32)
return length
def cost(output, target):
# Compute cross entropy for each frame.
cross_entropy = target * tf.log(output)
cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2)
mask = tf.sign(tf.reduce_max(tf.abs(target), reduction_indices=2))
cross_entropy *= mask
# Average over actual sequence lengths.
cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1)
cross_entropy /= tf.reduce_sum(mask, reduction_indices=1)
return tf.reduce_mean(cross_entropy)
def last_relevant(output):
max_length = int(output.get_shape()[1])
relevant = tf.reduce_sum(tf.mul(output, tf.expand_dims(tf.one_hot(length, max_length), -1)), 1)
return relevant
files_train_path = [dnn_train+f for f in listdir(dnn_train) if isfile(join(dnn_train, f))]
files_test_path = [dnn_test+f for f in listdir(dnn_test) if isfile(join(dnn_test, f))]
files_train_name = [f for f in listdir(dnn_train) if isfile(join(dnn_train, f))]
files_test_name = [f for f in listdir(dnn_test) if isfile(join(dnn_test, f))]
os.chdir(dnn_train)
train_name,train_data = generate_list_of_names_data(files_train_path)
train_data, train_names, train_output_data, train_class_output = load_sound_files(files_train_path,train_name,train_data)
max_length = 0 ## Used for variable sequence input
for element in train_data:
if element.size > max_length:
max_length = element.size
NUM_EXAMPLES = len(train_data)/2
test_data = train_data[NUM_EXAMPLES:]
test_output = train_output_data[NUM_EXAMPLES:]
train_data = train_data[:NUM_EXAMPLES]
train_output = train_output_data[:NUM_EXAMPLES]
print("--- %s seconds ---" % (time.time() - start_time))
#----------------------------------------------------------------------#
#----------------------------Main--------------------------------------#
### Tensorflow neural network setup
batch_size = None
sequence_length_max = max_length
input_dimension=1
data = tf.placeholder(tf.float32,[batch_size,sequence_length_max,input_dimension])
target = tf.placeholder(tf.float32,[None,14])
num_hidden = 24 ## Hidden layer
cell = tf.nn.rnn_cell.LSTMCell(num_hidden,state_is_tuple=True) ## Long short term memory
output, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32,sequence_length = length(data)) ## Creates the Rnn skeleton
last = last_relevant(output)#tf.gather(val, int(val.get_shape()[0]) - 1) ## Appedning as last
weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
cross_entropy = cost(output,target)# How far am I from correct value?
optimizer = tf.train.AdamOptimizer() ## TensorflowOptimizer
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
## Training ##
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
batch_size = 1000
no_of_batches = int(len(train_data)/batch_size)
epoch = 5000
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_data[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
ptr+=batch_size
sess.run(minimize,{data: inp, target: out})
print "Epoch - ",str(i)
incorrect = sess.run(error,{data: test_data, target: test_output})
print('Epoch {:2d} error {:3.1f}%'.format(i + 1, 100 * incorrect))
sess.close()
Error message:
Traceback (most recent call last):
File "tensorflow_test.py", line 177, in <module>
last = last_relevant(output)#tf.gather(val, int(val.get_shape()[0]) - 1) ## Appedning as last
File "tensorflow_test.py", line 132, in last_relevant
relevant = tf.reduce_sum(tf.mul(output, tf.expand_dims(tf.one_hot(length, max_length), -1)), 1)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.py", line 2778, in one_hot
name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1413, in _one_hot
axis=axis, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 454, in apply_op
as_ref=input_arg.is_ref)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 621, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/constant_op.py", line 180, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/constant_op.py", line 163, in constant
tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/tensor_util.py", line 421, in make_tensor_proto
tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/util/compat.py", line 45, in as_bytes
(bytes_or_text,))
TypeError: Expected binary or unicode string, got <function length at 0x7f51a7a3ede8>
Edit:
Changing the tf.one_hot(lenght(output),max_length) gives me this error message:
Traceback (most recent call last):
File "tensorflow_test.py", line 184, in <module>
cross_entropy = cost(output,target)# How far am I from correct value?
File "tensorflow_test.py", line 121, in cost
cross_entropy = target * tf.log(output)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 754, in binary_op_wrapper
return func(x, y, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 903, in _mul_dispatch
return gen_math_ops.mul(x, y, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 1427, in mul
result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2312, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1704, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 1801, in _BroadcastShape
% (shape_x, shape_y))
ValueError: Incompatible shapes for broadcasting: (?, 14) and (?, 138915, 24)

tf.one_hot(length, ...)
here length is a function, not a tensor. Try length(something) instead.

Related

Buffer underrun and ResourceExhausted errors with tensorflow

I'm in high school and I'm trying to do a project involving neural networks. I am using Ubuntu and trying to do reinforcement learning with tensorflow, but I consistently get lots of underrun warnings when I train a neural network. They take the form of ALSA lib pcm.c:7963:(snd_pcm_recover) underrun occurred. This message is printed to the screen more and more frequently as training progresses. Eventually, I get a ResourceExhaustedError and the program terminates. Here is the full error message:
W tensorflow/core/framework/op_kernel.cc:975] Resource exhausted: OOM when allocating tensor with shape[320000,512]
Traceback (most recent call last):
File "./train.py", line 121, in <module>
loss, _ = model.train(minibatch, gamma, sess) # Train the model based on the batch, the discount factor, and the tensorflow session.
File "/home/perrin/neural/dqn.py", line 174, in train
return sess.run([self.loss, self.optimize], feed_dict=self.feed_dict) # Runs the training. This is where the underrun errors happen
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 766, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 964, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1014, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1034, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[320000,512]
[[Node: gradients/fully_connected/MatMul_grad/MatMul_1 = MatMul[T=DT_FLOAT, transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/cpu:0"](dropout/mul, gradients/fully_connected/BiasAdd_grad/tuple/control_dependency)]]
Caused by op u'gradients/fully_connected/MatMul_grad/MatMul_1', defined at:
File "./train.py", line 72, in <module>
model = AC_Net([None, 201, 201, 3], 5, trainer) # This creates the neural network using the imported AC_Net class.
File "/home/perrin/neural/dqn.py", line 128, in __init__
self.optimize = trainer.minimize(self.loss) # This tells the trainer to adjust the weights in such a way as to minimize the loss. This is what actually
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 269, in minimize
grad_loss=grad_loss)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 335, in compute_gradients
colocate_gradients_with_ops=colocate_gradients_with_ops)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gradients_impl.py", line 482, in gradients
in_grads = grad_fn(op, *out_grads)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_grad.py", line 731, in _MatMulGrad
math_ops.matmul(op.inputs[0], grad, transpose_a=True))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 1729, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 1442, in _mat_mul
transpose_b=transpose_b, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 759, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
self._traceback = _extract_stack()
...which was originally created as op u'fully_connected/MatMul', defined at:
File "./train.py", line 72, in <module>
model = AC_Net([None, 201, 201, 3], 5, trainer) # This creates the neural network using the imported AC_Net class.
File "/home/perrin/neural/dqn.py", line 63, in __init__
net = slim.fully_connected(net, 512, activation_fn=tf.nn.elu, scope='fully_connected') # Feeds the input through a fully connected layer
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 177, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1350, in fully_connected
outputs = standard_ops.matmul(inputs, weights)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 1729, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 1442, in _mat_mul
transpose_b=transpose_b, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 759, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
self._traceback = _extract_stack()
ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[320000,512]
[[Node: gradients/fully_connected/MatMul_grad/MatMul_1 = MatMul[T=DT_FLOAT, transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/cpu:0"](dropout/mul, gradients/fully_connected/BiasAdd_grad/tuple/control_dependency)]]
I researched these problems but didn't get a clear idea of how I could fix them. I am pretty new to programming so I don't know much about how buffers and data reading/writing works. I am very perplexed by these errors. Does anyone know what parts of my code might be causing this and how to fix it? Thanks for taking the time to consider this question!
Here is my code for defining the neural network (based on this tutorial):
#! /usr/bin/python
import numpy as np
import tensorflow as tf
slim = tf.contrib.slim
# The neural network
class AC_Net:
# This defines the actual neural network.
# output_size: the number of outputs of the policy
# trainer: the tensorflow training optimizer used by the network
def __init__(self, input_shape, output_size, trainer):
with tf.name_scope('input'):
self.input = tf.placeholder(shape=list(input_shape), dtype=tf.float32, name='input')
net = tf.image.per_image_standardization(self.input[0])
net = tf.expand_dims(net, [0])
with tf.name_scope('convolution'):
net = slim.conv2d(net, 32, [8, 8], activation_fn=tf.nn.elu, scope='conv')
net = slim.max_pool2d(net, [2, 2], scope='pool')
net = slim.flatten(net)
net = tf.nn.dropout(net, .5)
net = slim.fully_connected(net, 512, activation_fn=tf.nn.elu, scope='fully_connected')
net = tf.nn.dropout(net, .5)
with tf.name_scope('LSTM'):
cell = tf.nn.rnn_cell.BasicLSTMCell(256, state_is_tuple=True, activation=tf.nn.elu)
with tf.name_scope('state_in'):
state_in = cell.zero_state(tf.shape(net)[0], tf.float32)
net = tf.expand_dims(net, [0])
step_size = tf.shape(self.input)[:1]
output, state = tf.nn.dynamic_rnn(cell, net, initial_state=state_in, sequence_length=step_size, time_major=False, scope='LSTM')
out = tf.reshape(output, [-1, 256])
out = tf.nn.dropout(out, .5)
self.policy = slim.fully_connected(out, output_size, activation_fn=tf.nn.softmax, scope='policy')
self.value = slim.fully_connected(out, 1, activation_fn=None, scope='value')
# Defines the loss functions
with tf.name_scope('loss_function'):
self.target_values = tf.placeholder(dtype=tf.float32, name='target_values') # The target value is the discounted reward.
self.actions = tf.placeholder(dtype=tf.int32, name='actions') # This is the network's policy.
# The advantage is the difference between what the network thought the value of an action was, and what it actually was.
# It is computed as R - V(s), where R is the discounted reward and V(s) is the value of being in state s.
self.advantages = tf.placeholder(dtype=tf.float32, name='advantages')
with tf.name_scope('entropy'):
entropy = -tf.reduce_sum(tf.log(self.policy + 1e-10) * self.policy)
with tf.name_scope('responsible_actions'):
actions_onehot = tf.one_hot(self.actions, output_size, dtype=tf.float32)
responsible_actions = tf.reduce_sum(self.policy * actions_onehot, [1]) # This returns only the actions that were selected.
with tf.name_scope('loss'):
with tf.name_scope('value_loss'):
self.value_loss = tf.reduce_sum(tf.square(self.target_values - tf.reshape(self.value, [-1])))
with tf.name_scope('policy_loss'):
self.policy_loss = -tf.reduce_sum(tf.log(responsible_actions + 1e-10) * self.advantages)
with tf.name_scope('total_loss'):
self.loss = self.value_loss + self.policy_loss - entropy * .01
tf.summary.scalar('loss', self.loss)
with tf.name_scope('gradient_clipping'):
tvars = tf.trainable_variables()
grads = tf.gradients(self.loss, tvars)
grads, _ = tf.clip_by_global_norm(grads, 20.)
self.optimize = trainer.apply_gradients(zip(grads, tvars))
def predict(self, inputs, sess):
return sess.run([self.policy, self.value], feed_dict={self.input:inputs})
def train(self, train_batch, gamma, sess):
inputs = train_batch[:, 0]
actions = train_batch[:, 1]
rewards = train_batch[:, 2]
values = train_batch[:, 4]
discounted_rewards = rewards[::-1]
for i, j in enumerate(discounted_rewards):
if i > 0:
discounted_rewards[i] += discounted_rewards[i - 1] * gamma
discounted_rewards = np.array(discounted_rewards, np.float32)[::-1]
advantages = discounted_rewards - values
self.feed_dict = {
self.input:np.vstack(inputs),
self.target_values:discounted_rewards,
self.actions:actions,
self.advantages:advantages
}
return sess.run([self.loss, self.optimize], feed_dict=self.feed_dict)
Here is my code for training the neural network:
#! /usr/bin/python
import game_env, move_right, move_right_with_obs, random, inspect, os
import tensorflow as tf
import numpy as np
from dqn import AC_Net
def process_outputs(x):
a = [int(x > 2), int(x%2 == 0 and x > 0)*2-int(x > 0)]
return a
environment = game_env # The environment to use
env_name = str(inspect.getmodule(environment).__name__) # The name of the environment
ep_length = 2000
num_episodes = 20
total_steps = ep_length * num_episodes # The total number of steps
model_path = '/home/perrin/neural/nn/' + env_name
learning_rate = 1e-4 # The learning rate
trainer = tf.train.AdamOptimizer(learning_rate=learning_rate) # The gradient descent optimizer used
first_epsilon = 0.6 # The initial chance of random action
final_epsilon = 0.01 # The final chance of random action
gamma = 0.9
anneal_steps = 35000 # The number of steps it takes to go from initial to random
count = 0 # Keeps track of the number of steps we've run
experience_buffer = [] # Stores the agent's experiences in a list
buffer_size = 10000 # How large the experience buffer can be
train_step = 256 # How often to train the model
batches_per_train = 10
save_step = 500 # How often to save the trained model
batch_size = 256 # How many experiences to train on at once
env_size = 500 # How many pixels tall and wide the environment should be.
load_model = True # Whether or not to load a pretrained model
train = True # Whether or not to train the model
test = False # Whether or not to test the model
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = AC_Net([None, 201, 201, 3], 5, trainer)
env = environment.Env(env_size)
action = [0, 0]
state, _ = env.step(True, action)
saver = tf.train.Saver() # This saves the model
epsilon = first_epsilon
tf.global_variables_initializer().run()
if load_model:
ckpt = tf.train.get_checkpoint_state(model_path)
saver.restore(sess, ckpt.model_checkpoint_path)
print 'Model loaded'
prev_out = None
while count <= total_steps and train:
if random.random() < epsilon or count == 0:
if prev_out is not None:
out = prev_out
if random.randint(0, 100) == 100 or prev_out is None:
out = np.random.rand(5)
out = np.array([val/np.sum(out) for val in out])
_, value = model.predict(state, sess)
prev_out = out
else:
out, value = model.predict(state, sess)
out = out[0]
act = np.random.choice(out, p=out)
act = np.argmax(out == act)
act1 = process_outputs(act)
action[act1[0]] = act1[1]
_, reward = env.step(True, action)
new_state = env.get_state()
experience_buffer.append((state, act, reward, new_state, value[0, 0]))
state = new_state
if len(experience_buffer) > buffer_size:
experience_buffer.pop(0)
if count % train_step == 0 and count > 0:
print "Training model"
for i in range(batches_per_train):
# Get a random sample of experiences and train the model based on it.
x = random.randint(0, len(experience_buffer)-batch_size)
minibatch = np.array(experience_buffer[x:x+batch_size])
loss, _ = model.train(minibatch, gamma, sess)
print "Loss for batch", str(i+1) + ":", loss
if count % save_step == 0 and count > 0:
saver.save(sess, model_path+'/model-'+str(count)+'.ckpt')
print "Model saved"
if count % ep_length == 0 and count > 0:
print "Starting new episode"
env = environment.Env(env_size)
if epsilon > final_epsilon:
epsilon -= (first_epsilon - final_epsilon)/anneal_steps
count += 1
while count <= total_steps and test:
out, _ = model.predict(state, sess)
out = out[0]
act = np.random.choice(out, p=out)
act = np.argmax(out == act)
act1 = process_outputs(act)
action[act1[0]] = act1[1]
state, reward = env.step(True, action)
new_state = env.get_state()
count += 1
# Write log files to create tensorboard visualizations
merged = tf.summary.merge_all()
writer = tf.summary.FileWriter('/home/perrin/neural/summaries', sess.graph)
if train:
summary = sess.run(merged, feed_dict=model.feed_dict)
writer.add_summary(summary)
writer.flush()
You are running out of memory. It's possible that your network requires more memory than you have to run, so the first step to tracking down excessive memory usage is to figure out what is using so much memory.
Here's one approach that uses timeline and statssummarizer:
https://gist.github.com/yaroslavvb/08afccbe087171881ceafc0c98abca05
This will print out several tables, one of the tables is the tensors sorted by top memory usage. You should check that you don't have something unusually large in there.
You can also see memory timeline using Chrome visualizer, as detailed here
A more advanced technique is to plot a timeline of memory allocations/deallocations, as done in this issue
Theoretically your memory usage shouldn't grow between steps if you aren't creating new stateful ops (Variables), but I found that global memory allocation can grow if sizes of your tensors change between steps.
A work-around is to periodically save your parameters to checkpoint and restart your script.

Why is my output in such a high dimension?

I am currently trying to make a RNN network for regression purposes capable of taking in an arbitraty number of samples and output a 14 length feature vector using tensorflow.
The network isn't running properly at the moment, for which I am trying to debug the issue.. Here is the code:
def length(sequence): ##Zero padding to fit the max lenght... Question whether that is a good idea.
used = tf.sign(tf.reduce_max(tf.abs(sequence), reduction_indices=2))
length = tf.reduce_sum(used, reduction_indices=1)
length = tf.cast(length, tf.int32)
return length
def cost(output, target):
# Compute cross entropy for each frame.
print output
cross_entropy = target * tf.log(output)
print "Hello world"
cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2)
mask = tf.sign(tf.reduce_max(tf.abs(target), reduction_indices=2))
cross_entropy *= mask
# Average over actual sequence lengths.
cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1)
cross_entropy /= tf.reduce_sum(mask, reduction_indices=1)
return tf.reduce_mean(cross_entropy)
def last_relevant(output):
max_length = int(output.get_shape()[1])
relevant = tf.reduce_sum(tf.mul(output, tf.expand_dims(tf.one_hot(length(output), max_length), -1)), 1)
return relevant
files_train_path = [dnn_train+f for f in listdir(dnn_train) if isfile(join(dnn_train, f))]
files_test_path = [dnn_test+f for f in listdir(dnn_test) if isfile(join(dnn_test, f))]
files_train_name = [f for f in listdir(dnn_train) if isfile(join(dnn_train, f))]
files_test_name = [f for f in listdir(dnn_test) if isfile(join(dnn_test, f))]
os.chdir(dnn_train)
train_name,train_data = generate_list_of_names_data(files_train_path)
train_data, train_names, train_output_data, train_class_output = load_sound_files(files_train_path,train_name,train_data)
max_length = 0 ## Used for variable sequence input
for element in train_data:
if element.size > max_length:
max_length = element.size
NUM_EXAMPLES = len(train_data)/2
test_data = train_data[NUM_EXAMPLES:]
test_output = train_output_data[NUM_EXAMPLES:]
train_data = train_data[:NUM_EXAMPLES]
train_output = train_output_data[:NUM_EXAMPLES]
print("--- %s seconds ---" % (time.time() - start_time))
#----------------------------------------------------------------------#
#----------------------------Main--------------------------------------#
### Tensorflow neural network setup
batch_size = None
sequence_length_max = max_length
input_dimension=1
data = tf.placeholder(tf.float32,[batch_size,sequence_length_max,input_dimension])
target = tf.placeholder(tf.float32,[None,14])
num_hidden = 24 ## Hidden layer
cell = tf.nn.rnn_cell.LSTMCell(num_hidden,state_is_tuple=True) ## Long short term memory
output, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32,sequence_length = length(data)) ## Creates the Rnn skeleton
last = last_relevant(output)#tf.gather(val, int(val.get_shape()[0]) - 1) ## Appedning as last
weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
cross_entropy = cost(output,target)# How far am I from correct value?
optimizer = tf.train.AdamOptimizer() ## TensorflowOptimizer
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
## Training ##
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
batch_size = 1000
no_of_batches = int(len(train_data)/batch_size)
epoch = 5000
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_data[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
ptr+=batch_size
sess.run(minimize,{data: inp, target: out})
print "Epoch - ",str(i)
incorrect = sess.run(error,{data: test_data, target: test_output})
print('Epoch {:2d} error {:3.1f}%'.format(i + 1, 100 * incorrect))
sess.close()
The code doesn't fully execute due to an error in the cross_entropy function.
Tensor("RNN/transpose:0", shape=(?, 138915, 24), dtype=float32)
Traceback (most recent call last):
File "tensorflow_test.py", line 186, in <module>
cross_entropy = cost(output,target)# How far am I from correct value?
File "tensorflow_test.py", line 122, in cost
cross_entropy = target * tf.log(output)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 754, in binary_op_wrapper
return func(x, y, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 903, in _mul_dispatch
return gen_math_ops.mul(x, y, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 1427, in mul
result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2312, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1704, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 1801, in _BroadcastShape
% (shape_x, shape_y))
ValueError: Incompatible shapes for broadcasting: (?, 14) and (?, 138915, 24)
It seem to me that the output I am receiving from the RNN has a quite high dimensionality. I was only expecting a vector with 14 elements so a 1 dimensional vector. But somehow am I ending up with quite a large dimensionality? Why? I guess something in my setup of the neural network must be incorrect.
Output of dynamic_rnn is of shape [batch_size, num_steps, dim_hidden]. In your case, number of timesteps in the RNN is apparently 138915.

tensorflow.python.framework.errors.OutOfRangeError:

Hi I am trying to run a conv. neural network addapted from MINST2 tutorial in tensorflow.
I am having the following error, but i am not sure what is going on:
W tensorflow/core/framework/op_kernel.cc:909] Invalid argument: Shape mismatch in tuple component 0. Expected [784], got [6272]
W tensorflow/core/framework/op_kernel.cc:909] Invalid argument: Shape mismatch in tuple component 0. Expected [784], got [6272]
Traceback (most recent call last):
File "4_Treino_Rede_Neural.py", line 161, in <module>
train_accuracy = accuracy.eval(feed_dict={keep_prob: 1.0})
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 555, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3498, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 372, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 636, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 708, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 728, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.OutOfRangeError: RandomShuffleQueue '_0_input/shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 100, current size 0)
[[Node: input/shuffle_batch = QueueDequeueMany[_class=["loc:#input/shuffle_batch/random_shuffle_queue"], component_types=[DT_FLOAT, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](input/shuffle_batch/random_shuffle_queue, input/shuffle_batch/n)]]
Caused by op u'input/shuffle_batch', defined at:
File "4_Treino_Rede_Neural.py", line 113, in <module>
x, y_ = inputs(train=True, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs)
File "4_Treino_Rede_Neural.py", line 93, in inputs
min_after_dequeue=1000)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 779, in shuffle_batch
dequeued = queue.dequeue_many(batch_size, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/data_flow_ops.py", line 400, in dequeue_many
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 465, in _queue_dequeue_many
timeout_ms=timeout_ms, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/op_def_library.py", line 704, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2260, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1230, in __init__
self._traceback = _extract_stack()
My program is:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
import time
import numpy as np
import tensorflow as tf
# Basic model parameters as external flags.
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_integer('num_epochs', 2, 'Number of epochs to run trainer.')
flags.DEFINE_integer('batch_size', 100, 'Batch size.')
flags.DEFINE_string('train_dir', '/root/data', 'Directory with the training data.')
#flags.DEFINE_string('train_dir', '/root/data2', 'Directory with the training data.')
# Constants used for dealing with the files, matches convert_to_records.
TRAIN_FILE = 'train.tfrecords'
VALIDATION_FILE = 'validation.tfrecords'
# Set-up dos pacotes
sess = tf.InteractiveSession()
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'image_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64),
})
# Convert from a scalar string tensor (whose single string has
# length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
# [mnist.IMAGE_PIXELS].
image = tf.decode_raw(features['image_raw'], tf.uint8)
image.set_shape([784])
# OPTIONAL: Could reshape into a 28x28 image and apply distortions
# here. Since we are not applying any distortions in this
# example, and the next step expects the image to be flattened
# into a vector, we don't bother.
# Convert from [0, 255] -> [-0.5, 0.5] floats.
image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
# Convert label from a scalar uint8 tensor to an int32 scalar.
label = tf.cast(features['label'], tf.int32)
return image, label
def inputs(train, batch_size, num_epochs):
"""Reads input data num_epochs times.
Args:
train: Selects between the training (True) and validation (False) data.
batch_size: Number of examples per returned batch.
num_epochs: Number of times to read the input data, or 0/None to
train forever.
Returns:
A tuple (images, labels), where:
* images is a float tensor with shape [batch_size, 30,26,1]
in the range [-0.5, 0.5].
* labels is an int32 tensor with shape [batch_size] with the true label,
a number in the range [0, char letras).
Note that an tf.train.QueueRunner is added to the graph, which
must be run using e.g. tf.train.start_queue_runners().
"""
if not num_epochs: num_epochs = None
filename = os.path.join(FLAGS.train_dir,
TRAIN_FILE if train else VALIDATION_FILE)
with tf.name_scope('input'):
filename_queue = tf.train.string_input_producer(
[filename], num_epochs=num_epochs)
# Even when reading in multiple threads, share the filename
# queue.
image, label = read_and_decode(filename_queue)
# Shuffle the examples and collect them into batch_size batches.
# (Internally uses a RandomShuffleQueue.)
# We run this in two threads to avoid being a bottleneck.
images, sparse_labels = tf.train.shuffle_batch(
[image, label], batch_size=batch_size, num_threads=2,
capacity=1000 + 3 * batch_size,
# Ensures a minimum amount of shuffling of examples.
min_after_dequeue=1000)
return images, sparse_labels
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
#Variaveis
x, y_ = inputs(train=True, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs)
#onehot_y_ = tf.one_hot(y_, 36, dtype=tf.float32)
#y_ = tf.string_to_number(y_, out_type=tf.int32)
#Layer 1
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
#Layer 2
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
#Densely Connected Layer
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
#Dropout - reduz overfitting
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#Readout layer
W_fc2 = weight_variable([1024, 36])
b_fc2 = bias_variable([36])
#y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
#Train and evaluate
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(onehot_y_ * tf.log(y_conv), reduction_indices=[1]))
cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y_conv, y_))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.initialize_all_variables())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(20000):
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(feed_dict={keep_prob: 0.5})
x, y_ = inputs(train=True, batch_size=2000)
#y_ = tf.string_to_number(y_, out_type=tf.int32)
print("test accuracy %g"%accuracy.eval(feed_dict={keep_prob: 1.0}))
coord.join(threads)
sess.close()
Can anyone explain me whats going on? And how to fix it?
Thanks!
Marcelo V
I had similar problems in the past, and it was due to that I was storing and reading the data in incorrect data types. For example, I had casted the data first as type float when converting original png data to tfrecords. Then when I read the data out from tfrecords, I once again casted it as float (assuming the data coming out was uint8. Hence I had mismatch of 3136 (784*4) when expected 784. I'm guessing that may also be the case for you here.
In the line:
filename_queue = tf.train.string_input_producer([filename], num_epochs=num_epochs)
You specify the number of epochs the queue will run through the filenames. The documentation explains it well:
num_epochs: An integer (optional). If specified, string_input_producer produces each string from num_epochs times before generating an OutOfRange error. If not specified, string_input_producer can cycle through the strings in string_tensor an unlimited number of times.
In flags.DEFINE_integer('num_epochs', 2, 'Number of epochs to run trainer.'), you specify a default number of epochs 2. You should either increase that, or remove the num_epochs argument in string_input_producer.

Tensorflow concat/split issue in recurrent network example

Consider the following example code:
import tensorflow as tf
import math
import numpy as np
INPUTS = 10
HIDDEN_1 = 20
BATCH_SIZE = 3
def iterate_state(prev_state_tuple, input):
with tf.name_scope('h1'):
weights = tf.get_variable('W', shape=[INPUTS, HIDDEN_1], initializer=tf.truncated_normal_initializer(stddev=1.0 / math.sqrt(float(INPUTS))))
biases = tf.get_variable('bias', shape=[HIDDEN_1], initializer=tf.constant_initializer(0.0))
matmuladd = tf.matmul(inputs, weights) + biases
print("prev state: ",prev_state_tuple.get_shape())
unpacked_state, unpacked_out = tf.split(0,2,prev_state_tuple)
prev_state = unpacked_state
state = 0.9* prev_state + 0.1*matmuladd
output = tf.nn.relu(state)
print(" state: ", state.get_shape())
print(" output: ", output.get_shape())
concat_result = tf.concat(0,[state, output])
print (" concat return: ", concat_result.get_shape())
return concat_result
def data_iter():
while True:
idxs = np.random.rand(BATCH_SIZE, INPUTS)
yield idxs
with tf.Graph().as_default():
inputs = tf.placeholder(tf.float32, shape=(BATCH_SIZE, INPUTS))
with tf.variable_scope('states'):
initial_state = tf.zeros([HIDDEN_1],
name='initial_state')
initial_out = tf.zeros([HIDDEN_1],
name='initial_out')
concat_tensor = tf.concat(0,[initial_state, initial_out])
print(" init state: ",initial_state.get_shape())
print(" init out: ",initial_out.get_shape())
print(" concat: ",concat_tensor.get_shape())
scanout = tf.scan(iterate_state, inputs, initializer=concat_tensor, name='state_scan')
print ("scanout shape: ", scanout.get_shape())
state, output = tf.split(0,2,scanout, name='split_scan_output')
sess = tf.Session()
# Run the Op to initialize the variables.
sess.run(tf.initialize_all_variables())
iter_ = data_iter()
for i in xrange(0, 2):
print ("iteration: ",i)
input_data = iter_.next()
out,st = sess.run([output,state], feed_dict={ inputs: input_data})
I am trying to concatenate and split the internal state and output tensors together so that it can conform to the tf.scan interface.
However, when running this example, I get this error:
(' init state: ', TensorShape([Dimension(20)]))
(' init out: ', TensorShape([Dimension(20)]))
(' concat: ', TensorShape([Dimension(40)]))
('prev state: ', TensorShape([Dimension(40)]))
(' state: ', TensorShape([Dimension(3), Dimension(20)]))
(' output: ', TensorShape([Dimension(3), Dimension(20)]))
(' concat return: ', TensorShape([Dimension(6), Dimension(20)]))
('scanout shape: ', TensorShape(None))
('iteration: ', 0)
Traceback (most recent call last):
File "cycles_in_graphs_with_scan.py", line 57, in <module>
out,st = sess.run([output,state], feed_dict={ inputs: input_data})
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 340, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 564, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 637, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 659, in _do_call
e.code)
tensorflow.python.framework.errors.InvalidArgumentError: Number of ways to split should evenly divide the split dimension, but got split_dim 0 (size = 3) and num_split 2
[[Node: states/split_scan_output = Split[T=DT_FLOAT, num_split=2, _device="/job:localhost/replica:0/task:0/cpu:0"](states/split_scan_output/split_dim, states/state_scan/TensorArrayPack)]]
Caused by op u'states/split_scan_output', defined at:
File "cycles_in_graphs_with_scan.py", line 46, in <module>
state, output = tf.split(0,2,scanout, name='split_scan_output')
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.py", line 525, in split
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1428, in _split
num_split=num_split, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/op_def_library.py", line 655, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2154, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1154, in __init__
self._traceback = _extract_stack()
while the return tensor has clearly dimensions (6,20) as shown, the return from tf.scan seems to have a shape of None, while the error says that it is finding an object of length 3
Any idea what might be causing this error?
It looks like the tf.scan() function is unable to infer a static shape for the output, and thus you're getting a runtime failure when you try to split scanout into 2 tensors on the 0th dimension.
In cases like this, the best thing to do is to evaluate scanout to see what its actual shape is:
sess = tf.Session()
sess.run(tf.initialize_all_variables())
iter_ = data_iter()
input_data = iter_.next()
scanout_val = sess.run(scanout, feed_dict={inputs: input_data})
print("Actual shape of scanout:", scanout_val.shape)
From the error message it looks like it has size 3 in the 0th dimension, which I suspect comes from the batch size, because the 0th dimension of tf.scan()'s input and output will have the same size. One possibility is that you actually want to split on the 1st dimension:
state, output = tf.split(1, 2, scanout, name='split_scan_output')

python - Error with Mariana/Theano neural network

I am facing a problem when I start my trainer and I can't figure out the cause.
My input data is of dimension 42 and my output should be one value out of 4.
This is the shape of my training and test set:
Training set:
input = (1152, 42) target = (1152,)
Training set: input = (1152, 42) target = (1152,)
Test set: input = (384, 42) target = (384,)
This is the construction of my network:
ls = MS.GradientDescent(lr=0.01)
cost = MC.CrossEntropy()
i = ML.Input(42, name='inp')
h = ML.Hidden(23, activation=MA.Sigmoid(), initializations=[MI.GlorotTanhInit()], name="hid")
o = ML.SoftmaxClassifier(4, learningScenario=ls, costObject=cost, name="out")
mlp = i > h > o
And this is the construction of the datasets, trainers and recorders:
trainData = MDM.RandomSeries(distances = train_set[0], next_state = train_set[1])
trainMaps = MDM.DatasetMapper()
trainMaps.mapInput(i, trainData.distances)
trainMaps.mapOutput(o, trainData.next_state)
testData = MDM.RandomSeries(distances = test_set[0], next_state = test_set[1])
testMaps = MDM.DatasetMapper()
testMaps.mapInput(i, testData.distances)
testMaps.mapOutput(o, testData.next_state)
earlyStop = MSTOP.GeometricEarlyStopping(testMaps, patience=100, patienceIncreaseFactor=1.1, significantImprovement=0.00001, outputFunction="score", outputLayer=o)
epochWall = MSTOP.EpochWall(1000)
trainer = MT.DefaultTrainer(
trainMaps=trainMaps,
testMaps=testMaps,
validationMaps=None,
stopCriteria=[earlyStop, epochWall],
testFunctionName="testAndAccuracy",
trainMiniBatchSize=MT.DefaultTrainer.ALL_SET,
saveIfMurdered=False
)
recorder = MREC.GGPlot2("MLP", whenToSave = [MREC.SaveMin("test", o.name, "score")], printRate=1, writeRate=1)
trainer.start("MLP", mlp, recorder = recorder)
But the following error is being produced:
Traceback (most recent call last):
File "nn-mariana.py", line 82, in <module>
trainer.start("MLP", mlp, recorder = recorder)
File "SUPRESSED/Mariana/Mariana/training/trainers.py", line 226, in start
Trainer_ABC.start( self, runName, model, recorder, trainingOrder, moreHyperParameters )
File "SUPRESSED/Mariana/Mariana/training/trainers.py", line 110, in start
return self.run(runName, model, recorder, *args, **kwargs)
File "SUPRESSED/Mariana/Mariana/training/trainers.py", line 410, in run
outputLayers
File "SUPRESSED/Mariana/Mariana/training/trainers.py", line 269, in _trainTest
res = modelFct(output, **kwargs)
File "SUPRESSED/Mariana/Mariana/network.py", line 47, in __call__
return self.callTheanoFct(outputLayer, **kwargs)
File "SUPRESSED/Mariana/Mariana/network.py", line 44, in callTheanoFct
return self.outputFcts[ol](**kwargs)
File "SUPRESSED/Mariana/Mariana/wrappers.py", line 110, in __call__
return self.run(**kwargs)
File "SUPRESSED/Mariana/Mariana/wrappers.py", line 102, in run
fres = iter(self.theano_fct(*self.fctInputs.values()))
File "SUPRESSED/Theano/theano/compile/function_module.py", line 871, in __call__
storage_map=getattr(self.fn, 'storage_map', None))
File "SUPRESSED/Theano/theano/gof/link.py", line 314, in raise_with_op
reraise(exc_type, exc_value, exc_trace)
File "SUPRESSED/Theano/theano/compile/function_module.py", line 859, in __call__
outputs = self.fn()
ValueError: Input dimension mis-match. (input[0].shape[1] = 1152, input[1].shape[1] = 4)
Apply node that caused the error: Elemwise{Composite{((i0 * i1) + (i2 * log(i3)))}}[(0, 1)](InplaceDimShuffle{x,0}.0, LogSoftmax.0, Elemwise{sub,no_inplace}.0, Elemwise{sub,no_inplace}.0)
Toposort index: 18
Inputs types: [TensorType(int32, row), TensorType(float64, matrix), TensorType(int32, row), TensorType(float64, matrix)]
Inputs shapes: [(1, 1152), (1152, 4), (1, 1152), (1152, 4)]
Inputs strides: [(4608, 4), (32, 8), (4608, 4), (32, 8)]
Inputs values: ['not shown', 'not shown', 'not shown', 'not shown']
Outputs clients: [[Sum{axis=[1], acc_dtype=float64}(Elemwise{Composite{((i0 * i1) + (i2 * log(i3)))}}[(0, 1)].0)]]
Versions:
Mariana (1.0.1rc1, /media/guilhermevrs/Data/Documentos/Academico/TCC-code/Mariana)
Theano (0.8.0.dev0, SUPRESSED/Theano)
This code was produced having as base the tutorial code from the mnist example.
Could you please help me to figure out what's going on?
Thank you in advance
I talked directly to the authors of Mariana and the cause and solution is explained in this issue