Google colab : RuntimeError: Expected object of backend CPU but got backend CUDA for argument #4 'mat1' - computer-vision

I training my model on MNIST dataset, use Google Colab for GPU purpose
device is cuda still getting error, I have tried other solution it does not work as well as code working fine in local pc, Is anything colab required different?
I have done previously training on the aws and there was no issue with the code
epoch = 22
steps = 0
print_every_step = 5
total_train_loss, total_test_loss = [], []
for e in range(epoch):
train_loss = 0
test_loss = 0
accuracy = 0
for images, labels in train_loader:
# clear the gradients of all optimized variables
steps += 1
images, labels =,
# forward pass: compute predicted outputs by passing inputs to the model
log_ps = model(images)
loss = criterion(log_ps, labels)
train_loss += loss.item()
if(steps % print_every_step == 0 ):
with torch.no_grad():
for images, labels in test_loader:
images, labels =,
log_ps = model(images)
loss = criterion(log_ps, labels)
test_loss += loss.item()
#calculate accuracy
ps = torch.exp(log_ps)
top_p, top_class = ps.topk(1, dim=1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
print(f"Epoch {epoch+1}/{epochs}.. "
f"Train loss: {train_loss/print_every_step:.3f}.. "
f"Test loss: {test_loss/len(test_loader):.3f}.. "
f"Test accuracy: {accuracy/len(testl_oader):.3f}")


Why do loss valued increase after some epochs in sampled_softmax_loss

I'm using Tensorflow to train a word2vec skip gram model. The computation graph is in the code below:
# training data
self.dataset =, batch_size=self.batch_size, column_names=['input', 'output'], header=False, num_epochs=self.epochs)
self.datum = self.dataset.make_one_shot_iterator().get_next()
self.inputs, self.labels = self.datum['input'], self.datum['output']
# embedding layer
self.embedding_g = tf.Variable(tf.random_uniform((self.n_vocab, self.n_embedding), -1, 1))
self.embed = tf.nn.embedding_lookup(self.embedding_g, self.inputs)
# softmax layer
self.softmax_w_g = tf.Variable(tf.truncated_normal((self.n_context, self.n_embedding)))
self.softmax_b_g = tf.Variable(tf.zeros(self.n_context))
# Calculate the loss using negative sampling
self.labels = tf.reshape(self.labels, [-1, 1])
self.loss = tf.nn.sampled_softmax_loss(
self.cost = tf.reduce_mean(self.loss)
self.optimizer = tf.train.AdamOptimizer().minimize(self.cost)
But after 25 epochs, loss values begin to increase. Is there any reason for this?

My neural network takes too much time to train one epoch

I am training a neural network which tries to classify a traffic signs, but it takes too much time to train only one epoch, maybe 30+ mins for just one epoch, I have set the batch size to 64 and the learning rate to be 0.002, the input is 20x20 pixels with 3 channels, and the model summary shows that it is training 173,931 parameters, is that too much or good?
Here is the network architecture
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
class Network(nn.Module):
def __init__(self):
#Convolutional Layers
self.conv1 = nn.Conv2d(3,16,3,padding=1)
self.conv2 = nn.Conv2d(16,32,3,padding=1)
#Max Pooling Layers
self.pool = nn.MaxPool2d(2,2)
#Linear Fully connected layers
self.fc1 = nn.Linear(32*5*5,200)
self.fc2 = nn.Linear(200,43)
self.dropout = nn.Dropout(p=0.25)
def forward(self,x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1,32*5*5)
x = self.dropout(x)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
Here is the optimizer instance
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optim = optim.SGD(model.parameters(),lr = 0.002)
Here is the training code
epochs = 20
valid_loss_min = np.Inf
print("Training the network")
for epoch in range (1,epochs+1):
train_loss = 0
valid_loss = 0
for data,target in train_data:
if gpu_available:
data,target = data.cuda(),target.cuda()
output = model(data)
loss = criterion(output,target)
train_loss += loss.item()*data.size(0)
###### Validate #########
for data,target in valid_data:
if gpu_available:
data,target = data.cuda(),target.cuda()
output = model(data)
loss = criterion(output,target)
valid_loss += loss.item()*data.size(0)
train_loss = train_loss/len(train_data.dataset)
valid_loss = train/len(valid_data.dataset)
print("Epoch {}.....Train Loss = {:.6f}....Valid Loss = {:.6f}".format(epoch,train_loss,valid_loss))
if valid_loss < valid_loss_min:, '')
print("Valid Loss min {:.6f} >>> {:.6f}".format(valid_loss_min, valid_loss))
I am using GPU through google colab

Joining of curve fitting models

I have this 7 quasi-lorentzian curves which are fitted to my data.
and I would like to join them, to make one connected curved line. Do You have any ideas how to do this? I've read about ComposingModel at lmfit documentation, but it's not clear how to do this.
Here is a sample of my code of two fitted curves.
for dataset in [Bxfft]:
dataset = np.asarray(dataset)
freqs, psd = signal.welch(dataset, fs=266336/300, window='hamming', nperseg=16192, scaling='spectrum')
plt.semilogy(freqs[0:-7000], psd[0:-7000]/dataset.size**0, color='r', label='Bx')
x = freqs[100:-7900]
y = psd[100:-7900]
# 8 Hz
model = Model(lorentzian)
params = model.make_params(amp=6, cen=5, sig=1, e=0)
result =, params, x=x)
final_fit = result.best_fit
print "8 Hz mode"
plt.plot(x, final_fit, 'k-', linewidth=2)
# 14 Hz
x2 = freqs[220:-7780]
y2 = psd[220:-7780]
model2 = Model(lorentzian)
pars2 = model2.make_params(amp=6, cen=10, sig=3, e=0)
pars2['amp'].value = 6
result2 =, pars2, x=x2)
final_fit2 = result2.best_fit
print "14 Hz mode"
plt.plot(x2, final_fit2, 'k-', linewidth=2)
I've used some hints from user #MNewville, who posted an answer and using his code I got this:
So my code is similar to his, but extended with each peak. What I'm struggling now is replacing ready LorentzModel with my own.
The problem is when I do this, the code gives me an error like this.
C:\Python27\lib\site-packages\lmfit\ RuntimeWarning:
invalid value encountered in double_scalars [[Model]] spercent =
About my own model:
def lorentzian(x, amp, cen, sig, e):
return (amp*(1-e)) / ((pow((1.0 * x - cen), 2)) + (pow(sig, 2)))
peak1 = Model(lorentzian, prefix='p1_')
peak2 = Model(lorentzian, prefix='p2_')
peak3 = Model(lorentzian, prefix='p3_')
# make composite by adding (or multiplying, etc) components
model = peak1 + peak2 + peak3
# make parameters for the full model, setting initial values
# using the prefixes
params = model.make_params(p1_amp=6, p1_cen=8, p1_sig=1, p1_e=0,
p2_ampe=16, p2_cen=14, p2_sig=3, p2_e=0,
p3_amp=16, p3_cen=21, p3_sig=3, p3_e=0,)
rest of the code is similar like at #MNewville
[![enter image description here][3]][3]
A composite model for 3 Lorentzians would look like this:
from lmfit import Model, LorentzianModel
peak1 = LorentzianModel(prefix='p1_')
peak2 = LorentzianModel(prefix='p2_')
peak3 = LorentzianModel(prefix='p3_')
# make composite by adding (or multiplying, etc) components
model = peak1 + peaks2 + peak3
# make parameters for the full model, setting initial values
# using the prefixes
params = model.make_params(p1_amplitude=10, p1_center=8, p1_sigma=3,
p2_amplitude=10, p2_center=15, p2_sigma=3,
p3_amplitude=10, p3_center=20, p3_sigma=3)
# perhaps set bounds to prevent peaks from swapping or crazy values
params['p1_amplitude'].min = 0
params['p2_amplitude'].min = 0
params['p3_amplitude'].min = 0
params['p1_sigma'].min = 0
params['p2_sigma'].min = 0
params['p3_sigma'].min = 0
params['p1_center'].min = 2
params['p1_center'].max = 11
params['p2_center'].min = 10
params['p2_center'].max = 18
params['p3_center'].min = 17
params['p3_center'].max = 25
# then do a fit over the full data range
result =, params, x=x)
I think the key parts you were missing were: a) just add models together, and b) use prefix to avoid name collisions of parameters.
I hope that is enough to get you started...

Tensorflow Deep Learning - model size and parameters

According to Andrej's blog -
Where he says that for a Convolutional Layer, with parameter sharing, it introduces F x F x D weights per filter, for a total of (F x F x D) x K weights and K biases.
In my tensorflow code, I have an architecture like this (where D=1)
conv1 : F = 3, K = 32, S = 1, P = 1.
pool1 :
and so on...
According to the formula,
A model generated with F=3 for conv1 should have 9K weights ,i.e. smaller model, and
A model generated with F=5 should have 25K weights i.e. bigger model
In my code, when I write out the model files for both these cases, I see that the .ckpt file is about 380MB (F=3) and 340MB (F=5). Am I missing something?
Here's the reference code for saving the variables to a model and printing its size.
''' Run the session and save the model'''
#Add a saver here
saver = tf.train.Saver()
# Run session
for i in range(201):
batch = mnist.train.next_batch(50)
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy)){x: batch[0], y_: batch[1], keep_prob: 1.0})
# Save model
save_path =, "/Users/voladoddi/Desktop/dropmodel.ckpt")
print("Model saved in file: %s" % save_path)
# Test
print("test accuracy %g"%accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
# Print model size.
vars = 0
for v in tf.all_variables():
vars +=

Plot in tensorboard is always closes and like a circle

I was trying to plot a loss curve, but is always abnormal (just like a circle, I really don't know how to describe it in English properly), I had found many topics about question like this and just can't solve, my tensorflow version is 0.10.0.
import tensorflow as tf
from tensorflow.core.util.event_pb2 import SessionLog
import os
# initialize variables/model parameters
# define the training loop operations
def inputs():
# read/generate input training data X and expected outputs Y
weight_age = [[84,46],[73,20],[65,52],[70,30],[76,57],[69,25],[63,28],[72,36],[79,57],[75,44],[27,24]
blodd_fat_content = [354,190,405,263,451,302,288,385,402,365,209,290,346,
return tf.to_float(weight_age), tf.to_float(blodd_fat_content)
def inference(X):
# compute inference model over data X and return the result
return tf.matmul(X, W) + b
def loss(X, Y):
# compute loss over training data X and expected outputs Y
Y_predicted = inference(X)
return tf.reduce_sum(tf.squared_difference(Y, Y_predicted))
def train(total_loss):
# train / adjust model parameters according to computed total loss
learning_rate = 1e-7
return tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)
def evaluate(sess, X, Y):
# evaluate the resulting trained model
print ([[80., 25.]])))
print ([[60., 25.]])))
g1 = tf.Graph()
with tf.Session(graph=g1) as sess:
W = tf.Variable(tf.zeros([2,1]), name="weights")
b = tf.Variable(0., name="bias")
X, Y = inputs()
print (
total_loss = loss(X, Y)
train_op = train(total_loss)
tf.scalar_summary("loss", total_loss)
summaries = tf.merge_all_summaries()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
summary_writer = tf.train.SummaryWriter('linear', g1)
summary_writer.add_session_log(session_log= SessionLog(status=SessionLog.START), global_step=1)
# actual training loop
training_steps = 100
tolerance = 100
total_loss_last = 0
initial_step = 0
# Create a saver.
saver = tf.train.Saver()
# verify if we don't have a checkpoint saved already
ckpt = tf.train.get_checkpoint_state(os.path.dirname('my_model'))
if ckpt and ckpt.model_checkpoint_path:
# Restores from checkpoint
saver.restore(sess, ckpt.model_checkpoint_path)
initial_step = int(ckpt.model_checkpoint_path.rsplit('-', 1)[1])
# summary_writer.add_session_log(SessionLog(status=SessionLog.START), global_step=initial_step)
for step in range(initial_step, training_steps):[train_op])
if step%20 == 0:, 'my-model', global_step=step)
gap = abs( - total_loss_last)
total_loss_last =
summary_writer.add_summary(, step)
# for debugging and learning purposes, see how the loss gets decremented thru training steps
if step % 10 == 0:
print ("loss: ",[total_loss]))
print("step: ", step)
if gap < tolerance:
# evaluation...
evaluate(sess, X, Y)
coord.join(threads), 'my-model', global_step=training_steps)