caffe2 - loss not working and accuracy crashes - caffe2

I am doing ramp up to caffe2 and I try a toy problem:
# Read the data from the database
def read_input(model, batch_size, db, db_type):
# load the data
data, binary, dist = model.TensorProtosDBInput(
[], ['data', 'binary'], batch_size=batch_size,
db=db, db_type=db_type)
# Get the absolute distance.
data = model.StopGradient(data, data)
binary = model.StopGradient(binary, binary)
return data, binary
def define_network(model, inp, b_classifier):
fc1 = brew.fc(model, inp, 'fc1', dim_in=2, dim_out=10)
fc1 = brew.relu(model, fc1, fc1)
fc2 = brew.fc(model, fc1, 'fc2', dim_in=10, dim_out=20)
fc2 = brew.relu(model, fc2, fc2)
fc3 = brew.fc(model, fc2, 'fc3', dim_in=20, dim_out=10)
fc3 = brew.relu(model, fc3, fc3)
nDimOut = 1+int(b_classifier)
predict = brew.fc(model, fc3, 'predict', dim_in=10, dim_out=nDimOut)
if b_classifier:
softmax = brew.softmax(model, predict, 'softmax')
return softmax
return predict
def add_classifier_loos_and_acc(model, softmax, gt):
crossEnt = model.LabelCrossEntropy([softmax, gt], 'crossEnt')
loss = model.AveragedLoss(crossEnt, "loss")
accuracy = brew.accuracy(model, [softmax, gt], "accuracy")
return loss, accuracy
def add_training_ops(model, loss):
model.AddGradientOperators([loss])
optimizer.build_rms_prop(model, base_learning_rate=0.001)
batchSize = 128
# Create a training network
train_model = model_helper.ModelHelper(name="binary_train")
data, binary = read_input(train_model, batch_size=batchSize, db='Data/' + folder + '/train.minidb', db_type='minidb')
softmax = define_network(train_model, data, True)
loss, _ = add_classifier_loos_and_acc(train_model, softmax, binary)
add_training_ops(train_model, loss)
# Train and check.
workspace.RunNetOnce(train_model.param_init_net)
# creating the network
workspace.CreateNet(train_model.net, overwrite=True)
# Train once
workspace.RunNet(train_model.net)
testRes = workspace.FetchBlob('softmax')
gt = workspace.FetchBlob('binary')
crossEnt = workspace.FetchBlob('crossEnt')
avgCrossEnt = np.mean(crossEnt)
loss = workspace.FetchBlob('loss')
When I run the code and get to the line of workspace.RunNet(train_model.net), my code crashes:
RuntimeError: [enforce fail at accuracy_op.cc:29] label.ndim() == 1. 2 vs 1 Error from operator:
input: "softmax" input: "binary" output: "accuracy" name: "" type: "Accuracy"
We've got an error while stopping in post-mortem: <type 'exceptions.KeyboardInterrupt'>
I tried to understand it about 2 days and I got nothing.
Does anyone has an idea what I am doing wrong ?
Thanks !

I found the problem.
The problem was that the binary was in the shape of (batchSize,1) and it should be at size of (batchSize,).
Adding FlattenToVec() solved the problem

Related

Is this method of calculating the top-5 accuracy in pytorch correct?

I am trying to validate the findings of a paper by testing it on the same model architecture as well as the same dataset reported by the paper. I have been using the imagenet script provided in the official pytorch repository's examples section to do the same.
class AverageMeter(object):
"""Computes and stores the average and current value
Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262
"""
def init(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def accuracy(output, target, topk=(1,)):
"""Computes the precision#k for the specified values of k"""
maxk = max(topk)
batchsize = target.size(0)
, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].reshape(-1).float().sum(0)
res.append(correctk.mul(100.0 / batch_size))
return res
top1 = AverageMeter()
top5 = AverageMeter()
# switch to evaluate mode
model.eval()
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(test_loader):
# measure data loading time
print(f"Processing {batch_idx+1}/{len(test_loader)}")
inputs, targets = inputs.cuda(), targets.cuda()
inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets)
# compute output
outputs = model(inputs)
# measure accuracy and record loss
prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
print(prec1,prec5)
top1.update(prec1.item(), inputs.size(0))
top5.update(prec5.item(), inputs.size(0))
print(top1)
print(top5)
However the top 5 error which I am getting by using this script is not matching with the one in the paper. Can anyone tell me what is wrong in this particular snippet?

Why do loss valued increase after some epochs in sampled_softmax_loss

I'm using Tensorflow to train a word2vec skip gram model. The computation graph is in the code below:
# training data
self.dataset = tf.data.experimental.make_csv_dataset(file_name, batch_size=self.batch_size, column_names=['input', 'output'], header=False, num_epochs=self.epochs)
self.datum = self.dataset.make_one_shot_iterator().get_next()
self.inputs, self.labels = self.datum['input'], self.datum['output']
# embedding layer
self.embedding_g = tf.Variable(tf.random_uniform((self.n_vocab, self.n_embedding), -1, 1))
self.embed = tf.nn.embedding_lookup(self.embedding_g, self.inputs)
# softmax layer
self.softmax_w_g = tf.Variable(tf.truncated_normal((self.n_context, self.n_embedding)))
self.softmax_b_g = tf.Variable(tf.zeros(self.n_context))
# Calculate the loss using negative sampling
self.labels = tf.reshape(self.labels, [-1, 1])
self.loss = tf.nn.sampled_softmax_loss(
weights=self.softmax_w_g,
biases=self.softmax_b_g,
labels=self.labels,
inputs=self.embed,
num_sampled=self.n_sampled,
num_classes=self.n_context)
self.cost = tf.reduce_mean(self.loss)
self.optimizer = tf.train.AdamOptimizer().minimize(self.cost)
But after 25 epochs, loss values begin to increase. Is there any reason for this?

Google colab : RuntimeError: Expected object of backend CPU but got backend CUDA for argument #4 'mat1'

I training my model on MNIST dataset, use Google Colab for GPU purpose
device is cuda still getting error, I have tried other solution it does not work as well as code working fine in local pc, Is anything colab required different?
I have done previously training on the aws and there was no issue with the code
epoch = 22
steps = 0
print_every_step = 5
total_train_loss, total_test_loss = [], []
for e in range(epoch):
train_loss = 0
test_loss = 0
accuracy = 0
for images, labels in train_loader:
# clear the gradients of all optimized variables
optimizer.zero_grad()
steps += 1
images, labels = images.to(device), labels.to(device)
# forward pass: compute predicted outputs by passing inputs to the model
log_ps = model(images)
loss = criterion(log_ps, labels)
loss.backward()
optimizer.step()
train_loss += loss.item()
if(steps % print_every_step == 0 ):
model.eval()
with torch.no_grad():
for images, labels in test_loader:
images, labels = images.to(device), labels.to(device)
log_ps = model(images)
loss = criterion(log_ps, labels)
test_loss += loss.item()
#calculate accuracy
ps = torch.exp(log_ps)
top_p, top_class = ps.topk(1, dim=1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
print(f"Epoch {epoch+1}/{epochs}.. "
f"Train loss: {train_loss/print_every_step:.3f}.. "
f"Test loss: {test_loss/len(test_loader):.3f}.. "
f"Test accuracy: {accuracy/len(testl_oader):.3f}")
model.train()
total_train_loss.append(train_loss/print_every_step)
total_test_loss.append(test_loss/len(testloader))

tensorflow error: InvalidArgumentError: Shape mismatch in tuple component 1. Expected [1], got [5]

I am trying to construct a batch of (wav_file, label) pair.
wav file labels and paths are listed in dev.csv.
below code is not working,
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import tensorflow as tf
FLAGS = tf.app.flags.FLAGS
threads = 1
batch_size = 5
global record_defaults
record_defaults = [['/Users/phoenix/workspace/dataset/data_thchs30/dev/A11_101.wav'], ['8.26'], ['七十 年代 末 我 外出 求学 母亲 叮咛 我 吃饭 要 细嚼慢咽 学习 要 深 钻 细 研']]
def read_record(filename_queue, num_records):
reader = tf.TextLineReader()
key, value = reader.read_up_to(filename_queue, num_records)
wav_filename, duration, transcript = tf.decode_csv(value, record_defaults, field_delim=",")
wav_reader = tf.WholeFileReader()
wav_key, wav_value = wav_reader.read_up_to(tf.train.string_input_producer(wav_filename, shuffle=False, capacity=num_records), num_records)
return [wav_key, transcript] # throw errors
# return [wav_key, wav_value] # works
# return [wav_filename, duration, transcript] # works
data_queue = tf.train.string_input_producer(tf.train.match_filenames_once('dev.csv'), shuffle=False)
batch_data = [read_record(data_queue, batch_size) for _ in range(threads)]
capacity = threads * batch_size
batch_values = tf.train.batch_join(batch_data, batch_size=batch_size, capacity=capacity, enqueue_many=True)
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
sess.run(tf.initialize_local_variables())
coord = tf.train.Coordinator()
print(coord)
threads = tf.train.start_queue_runners(coord=coord)
print("threads num: " + str(threads))
try:
step = 0
while not coord.should_stop():
step += 1
feat = sess.run([batch_values])
print("line:", step, feat)
except tf.errors.OutOfRangeError:
print(' training for 1 epochs, %d steps', step)
finally:
coord.request_stop()
coord.join(threads)
throw errors below, how can I fix it?:
dev.csv content as below:
/Users/phoenix/workspace/dataset/data_thchs30/dev/A11_101.wav,8.26,qi shi nian dai mo wo wai chu qiu xue
/Users/phoenix/workspace/dataset/data_thchs30/dev/A11_119.wav,6.9,chen yun tong shi yao qiu gan bu men ren zhen xue xi
I tried to rewrite your code like this.
This is my observation.
The error is no longer thrown. And the values are returned.
An obvious discrepancy is that the size of the batch for transcript is double that specified. So it is 4 instead of 2. It doubles for some reason. No such problem for the audio binary.
shapes=[tf.TensorShape(()),tf.TensorShape(batch_size,)] is based on an error I saw which mentioned that I have to specify this using TensorShape. I didn't find the documentation of any help but it is mentioned there.
shapes: (Optional.) A list of fully-defined TensorShape objects with the same length as dtypes, or None.
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.DEBUG)
FLAGS = tf.app.flags.FLAGS
threads = 1
batch_size = 2
record_defaults = [['D:/male.wav'], ['8.26'], ['七十 年代 末 我 外出 求学 母亲 叮咛 我 吃饭 要 细嚼慢咽 学习 要 深 钻 细 研']]
def readbatch(data_queue) :
reader = tf.TextLineReader()
_, rows = reader.read_up_to(data_queue, batch_size)
wav_filename, duration, transcript = tf.decode_csv(rows, record_defaults,field_delim=",")
audioreader = tf.WholeFileReader()
_, audio = audioreader.read( tf.train.string_input_producer(wav_filename) )
return [audio,transcript]
data_queue = tf.train.string_input_producer(
tf.train.match_filenames_once('D:/Book1.csv'), shuffle=False)
batch_data = [readbatch(data_queue) for _ in range(threads)]
capacity = threads * batch_size
batch_values = tf.train.batch_join(batch_data, shapes=[tf.TensorShape(()),tf.TensorShape(batch_size,)], capacity=capacity, batch_size=batch_size, enqueue_many=False )
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
sess.run(tf.initialize_local_variables())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
try:
step = 0
while not coord.should_stop():
step += 1
feat = sess.run([batch_values])
audio = feat[0][0]
print ('Size of audio is ' + str(audio.size))
script = feat[0][1]
print ('Size of script is ' + str(script.size))
except tf.errors.OutOfRangeError:
print(' training for 1 epochs, %d steps', step)
finally:
coord.request_stop()
coord.join(threads)
A sample dataset proves that there is an extra pair.
[[array([b'Text2', b'Text1'], dtype=object), array([[b'Translation-1', b'Translation-2'],
[b'Translation-1', b'Translation-2']], dtype=object)]]

Joining of curve fitting models

I have this 7 quasi-lorentzian curves which are fitted to my data.
and I would like to join them, to make one connected curved line. Do You have any ideas how to do this? I've read about ComposingModel at lmfit documentation, but it's not clear how to do this.
Here is a sample of my code of two fitted curves.
for dataset in [Bxfft]:
dataset = np.asarray(dataset)
freqs, psd = signal.welch(dataset, fs=266336/300, window='hamming', nperseg=16192, scaling='spectrum')
plt.semilogy(freqs[0:-7000], psd[0:-7000]/dataset.size**0, color='r', label='Bx')
x = freqs[100:-7900]
y = psd[100:-7900]
# 8 Hz
model = Model(lorentzian)
params = model.make_params(amp=6, cen=5, sig=1, e=0)
result = model.fit(y, params, x=x)
final_fit = result.best_fit
print "8 Hz mode"
print(result.fit_report(min_correl=0.25))
plt.plot(x, final_fit, 'k-', linewidth=2)
# 14 Hz
x2 = freqs[220:-7780]
y2 = psd[220:-7780]
model2 = Model(lorentzian)
pars2 = model2.make_params(amp=6, cen=10, sig=3, e=0)
pars2['amp'].value = 6
result2 = model2.fit(y2, pars2, x=x2)
final_fit2 = result2.best_fit
print "14 Hz mode"
print(result2.fit_report(min_correl=0.25))
plt.plot(x2, final_fit2, 'k-', linewidth=2)
UPDATE!!!
I've used some hints from user #MNewville, who posted an answer and using his code I got this:
So my code is similar to his, but extended with each peak. What I'm struggling now is replacing ready LorentzModel with my own.
The problem is when I do this, the code gives me an error like this.
C:\Python27\lib\site-packages\lmfit\printfuncs.py:153: RuntimeWarning:
invalid value encountered in double_scalars [[Model]] spercent =
'({0:.2%})'.format(abs(par.stderr/par.value))
About my own model:
def lorentzian(x, amp, cen, sig, e):
return (amp*(1-e)) / ((pow((1.0 * x - cen), 2)) + (pow(sig, 2)))
peak1 = Model(lorentzian, prefix='p1_')
peak2 = Model(lorentzian, prefix='p2_')
peak3 = Model(lorentzian, prefix='p3_')
# make composite by adding (or multiplying, etc) components
model = peak1 + peak2 + peak3
# make parameters for the full model, setting initial values
# using the prefixes
params = model.make_params(p1_amp=6, p1_cen=8, p1_sig=1, p1_e=0,
p2_ampe=16, p2_cen=14, p2_sig=3, p2_e=0,
p3_amp=16, p3_cen=21, p3_sig=3, p3_e=0,)
rest of the code is similar like at #MNewville
[![enter image description here][3]][3]
A composite model for 3 Lorentzians would look like this:
from lmfit import Model, LorentzianModel
peak1 = LorentzianModel(prefix='p1_')
peak2 = LorentzianModel(prefix='p2_')
peak3 = LorentzianModel(prefix='p3_')
# make composite by adding (or multiplying, etc) components
model = peak1 + peaks2 + peak3
# make parameters for the full model, setting initial values
# using the prefixes
params = model.make_params(p1_amplitude=10, p1_center=8, p1_sigma=3,
p2_amplitude=10, p2_center=15, p2_sigma=3,
p3_amplitude=10, p3_center=20, p3_sigma=3)
# perhaps set bounds to prevent peaks from swapping or crazy values
params['p1_amplitude'].min = 0
params['p2_amplitude'].min = 0
params['p3_amplitude'].min = 0
params['p1_sigma'].min = 0
params['p2_sigma'].min = 0
params['p3_sigma'].min = 0
params['p1_center'].min = 2
params['p1_center'].max = 11
params['p2_center'].min = 10
params['p2_center'].max = 18
params['p3_center'].min = 17
params['p3_center'].max = 25
# then do a fit over the full data range
result = model.fit(y, params, x=x)
I think the key parts you were missing were: a) just add models together, and b) use prefix to avoid name collisions of parameters.
I hope that is enough to get you started...