I have this code. I save weights after every epoch and the code saves it. But when I load the weights the loss value starts from the initial loss value which means the loading is failing somehow.
net = torchvision.models.resnet18(pretrained=True)
num_ftrs = net.fc.in_features
net.fc = nn.Linear(num_ftrs, 136)
def train():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
criterion = L1Loss(reduction='sum')
lr = 0.0000001
optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=0.0005)
net.to(device)
state = torch.load('face2.txt')
net.load_state_dict(state['state_dict'])
optimizer.load_state_dict(state['optimizer'])
for epoch in range(int(0), 200000):
for batch, data in enumerate(trainloader, 0):
torch.cuda.empty_cache()
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = net(inputs).reshape(-1, 68, 2)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
state = {
'epoch': epoch,
'state_dict': net.state_dict(),
'optimizer': optimizer.state_dict(),
}
torch.save(state, 'face2.txt')
if __name__ == '__main__':
train()
The initial loss is 50k plus and after some thousand epochs the loss becomes 50-60. Now when I re-run the code I expect it to start from a near loss value but it starts from around 50k again.
The code you wrote:
net = torchvision.models.resnet18(pretrained=True)
means you start over again with the same net - pretrained resnet18. Instead, you should load the last state if present, this would fix your problem.
I will slightly update your notation:
state = {
'epoch': epoch,
'model_state_dict': net.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
...
}
torch.save(state, filepath)
Learnable parameters are the first state_dict (model state dict).
The second state_dict is the optimizer state dict. You recall that the optimizer is used to improve our learnable parameters. But the optimizer state_dict is fixed. Nothing to learn in there.
Your code at some point should look like:
model.load_state_dict(state['model_state_dict'])
optimizer.load_state_dict(state['optimizer_state_dict'])
Related
I have defined two different functions, one for getting the prediction of the user using the Teachable Machine from Google and then a countdown function which is initiated by the 'S' key within the loop.
My issue is that when I click the 'S' key my Video Capture freeze frames and begins the countdown, when all I want it to be doing is allowing the user to ready themselves for creating one of three gestures.
The code is below:
def get_user_choice(self):
user_choice = ''
while True:
ret, frame = cap.read()
resized_frame = cv2.resize(frame, (224, 224), interpolation = cv2.INTER_AREA)
image_np = np.array(resized_frame)
normalized_image = (image_np.astype(np.float32) / 127.0) - 1 # Normalize the image
data[0] = normalized_image
prediction = model.predict(data, verbose=0)
cv2.imshow('frame', frame)
k = cv2.waitKey(1)
if k == ord('s'):
countdown()
user_index = np.argmax(prediction[0])
user_choice = user_options[user_index]
print('You chose:', user_choice)
return user_choice
def countdown():
print('Get your answer ready!')
prev = time.time() + 1
TIMER = int(4)
while TIMER >= 1:
cur = time.time()
if cur - prev >= 1:
prev = cur
TIMER = TIMER -1
print(TIMER)
It works almost perfectly apart from the countdown function which has created a freeze frame issue.
I've tried to mess around with the countdown function, and put the videocapture into the countdown function. Nothing seems to have worked for me so far, and thus is causing frustration.
I am restoring an object with tensor flow. However, I am getting this error
return [dim.value for dim in self._dims]
TypeError: 'NoneType' object is not iterable
when I define the optimzer:
train = optimizer.minimize(lossBatch)
I tested the random generation of weights and it worked well.
def init_weights(shape):
return tf.Variable(tf.random_uniform(shape, -0.01, 0.01, seed=0))
So I am concluding that the problem is related to the restoration of weights.
To restore the weights I am doing this:
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('my-model-88500.meta')
new_saver.restore(sess, 'my-model-88500')
w_h1= tf.get_default_graph().get_tensor_by_name("w_h1:0")
b_h1 = tf.get_default_graph().get_tensor_by_name("b_h1:0")
w_h2 = tf.get_default_graph().get_tensor_by_name("w_h2:0")
b_h2 = tf.get_default_graph().get_tensor_by_name("b_h2:0")
w_h3 = tf.get_default_graph().get_tensor_by_name("w_h3:0")
b_h3 = tf.get_default_graph().get_tensor_by_name("b_h3:0")
w_o = tf.get_default_graph().get_tensor_by_name("w_o:0")
b_o = tf.get_default_graph().get_tensor_by_name("b_o:0")
w_h1=tf.reshape(w_h1,[numberInputs,numberHiddenUnits1],'w_h1')
b_h1=tf.reshape(b_h1,[numberHiddenUnits1],'b_h1')
w_h2=tf.reshape(w_h2,[numberHiddenUnits1,numberHiddenUnits2],'w_h2')
b_h2=tf.reshape(b_h2,[numberHiddenUnits2],'b_h2')
w_h3=tf.reshape(w_h3,[numberHiddenUnits2,numberHiddenUnits3],'w_h3')
b_h3=tf.reshape(b_h3,[numberHiddenUnits3],'b_h3')
w_o=tf.reshape(w_o,[numberHiddenUnits3,numberOutputs],'w_o')
b_o=tf.reshape(b_o,[numberOutputs],'b_o')
init = tf.initialize_all_variables()
sess.run(init)
Then I redefine the network:
numberEpochs=1500000
batchSize=25000
learningRate=0.000001
numberOutputs=np.shape(theTrainOutput)[1]
numberTrainSamples=np.shape(theTrainInput)[0]
numberInputs=np.shape(theTrainInput)[1]
xTrain=tf.placeholder("float",[numberTrainSamples,numberInputs])
yTrain=tf.placeholder("float",[numberTrainSamples,numberOutputs])
yTrainModel=model(xTrain,w_h1,b_h1,w_h2,b_h2,w_h3,b_h3,w_o,b_o)
xBatch=tf.placeholder("float",[batchSize,numberInputs])
yBatch=tf.placeholder("float",[batchSize,numberOutputs])
yBatchModel=model(xBatch,w_h1,b_h1,w_h2,b_h2,w_h3,b_h3,w_o,b_o)
lossBatch = tf.reduce_mean(tf.abs(yBatch-yBatchModel))
optimizer = tf.train.AdamOptimizer(learningRate)
train = optimizer.minimize(lossBatch)
I get an error in this last line above! Note that before I redefined the entire network to reuse the weights.
It is worth mentioning that I am able to get the shape of a weight, namely
w_h1.get_shape()
TensorShape([Dimension(13), Dimension(50)])
On the other hand,
w_h1.dtype
tf.float32
furthermore, I am also able to print the weights:
print sess.run(w_h1)
trying to fit a peak to some data like this:
import scipy
from lmfit.models import GaussianModel
x = shm.PTR_P
y = shm.PORE_V_P
mod = LorentzianModel()
pars = mod.guess(y, x=x)
out = mod.fit(y, pars, x=x)
print(out.fit_report(min_correl=0.25))
and while I can do the fit if I generate the data, if I try to read them from another source it doesn't work. Am not too good at this and have no idea what the issue is. Here is the output:
[x,y]
[(34.145490000000002, 3.4599999999999999e-08),
(29.286449999999999, 4.8399999999999997e-08),
(25.118860000000002, 0.026773140000000001),
(21.544350000000001, 0.068791409999999997),
(18.4785, 0.083200979999999994),
(15.848929999999999, 0.02123179),
(11.659139999999999, 0.01551077),
(10.0, 0.084493879999999993),
(6.3095739999999996, 2.0899999999999998e-07),
(5.4116949999999999, 0.045209140000000002),
(4.6415889999999997, 0.054789360000000002),
(3.4145489999999996, 8.9399999999999993e-08),
(2.9286449999999999, 0.01100814),
(2.5118860000000001, 0.088990659999999999),
(1.84785, 3.5799999999999995e-07),
(1.5848930000000001, 0.099999009999999999),
(1.359356, 0.075139280000000003),
(1.1659139999999999, 0.167379),
(1.0, 0.57693050000000001),
(0.85769590000000007, 1.8658159999999999),
(0.73564230000000008, 8.4961369999999992),
(0.6309574, 25.299099999999999),
(0.54116949999999997, 21.413350000000001),
(0.46415889999999999, 13.408829999999998),
(0.39810719999999999, 8.3584750000000003),
(0.34145490000000006, 5.3982010000000002),
(0.29286440000000002, 3.7518540000000002),
(0.25118859999999998, 2.5325389999999999),
(0.21544349999999998, 1.7722470000000001),
(0.18478499999999998, 1.445808),
(0.15848929999999997, 1.182083),
(0.13593559999999999, 0.94957730000000007),
(0.1165914, 0.67620849999999999),
(0.10000000000000001, 0.46631620000000001),
(0.085769590000000007, 0.41001890000000002),
(0.07356422, 0.30625920000000001),
(0.063095730000000003, 0.24040219999999998),
(0.054116949999999997, 0.1942596),
(0.046415890000000001, 0.11306760000000002),
(0.039810720000000001, 0.099998470000000006),
(0.034145490000000001, 0.099998470000000006),
(0.029286449999999999, 0.02246857),
(0.025118870000000001, 0.077529909999999994)]
I would guess that either there are NaNs in your data or that the initial guess is so far off as to produce NaNs. The initial guess of 0 for sigma seems suspicious -- this should be > 0.
Either way, plotting the data and using np.isnan() would probably help isolate the problem.
Now I am making the leveldb to train caffe framework.So I use "convert_imageset.cpp". This cpp file writes the char-type data only to leveldb.
But I have the float data to write it to leveldb. This data is pre-proceed image data so it is float type data.
how can I write or convert this float data to leveldb.
This float data is a set of vector with 4096 dimensions.
Please help me.
Or not how to convert it to HDF5Data?
HDF5 stands for hierarchical data format. You can manipulate such data format for example with R (RHDF5 documentation)
Other software that can process HDF5 are Matlab and Mathematica.
EDIT
A new set of tools called HDFql has been recently released to simplify "managing HDF files through a high-level language like C/C++". You can check it out here
def del_and_create(dname):
if os.path.exists(dname):
shutil.rmtree(dname)
os.makedirs(dname)
def get_img_datum(image_fn):
img = cv.imread(image_fn, cv.IMREAD_COLOR)
img = img.swapaxes(0, 2).swapaxes(1, 2)
datum = caffe.io.array_to_datum(img, 0)
return datum
def get_jnt_datum(joint_fn):
joint = np.load(joint_fn)
datum = caffe.io.caffe_pb2.Datum()
datum.channels = len(joint)
datum.height = 1
datum.width = 1
datum.float_data.extend(joint.tolist())
return datum
def create_dataset():
img_db_fn = 'img.lmdb'
del_and_create(img_db_fn)
img_env = lmdb.Environment(img_db_fn, map_size=1099511627776)
img_txn = img_env.begin(write=True, buffers=True)
jnt_db_fn = 'joint.lmdb'
del_and_create(jnt_db_fn)
jnt_env = lmdb.Environment(jnt_db_fn, map_size=1099511627776)
jnt_txn = jnt_env.begin(write=True, buffers=True)
img_fns = glob.glob('imageData/*.jpg')
fileCount = len(img_fns)
print 'A total of ', fileCount, ' images.'
jnt_fns = glob.glob('jointData/*.npy')
jointCount = len(jnt_fns)
if(fileCount != jointCount):
print 'The file counts doesnot match'
exit()
keys = np.arange(fileCount)
np.random.shuffle(keys)
for i, (img_fn, jnt_fn) in enumerate( zip(sorted(img_fns), sorted(jnt_fns)) ):
img_datum = get_img_datum(img_fn)
jnt_datum = get_jnt_datum(jnt_fn)
key = '%010d' % keys[i]
img_txn.put(key, img_datum.SerializeToString())
jnt_txn.put(key, jnt_datum.SerializeToString())
if i % 10000 == 0:
img_txn.commit()
jnt_txn.commit()
jnt_txn = jnt_env.begin(write=True, buffers=True)
img_txn = img_env.begin(write=True, buffers=True)
print '%d'%(i), os.path.basename(img_fn), os.path.basename(jnt_fn)
img_txn.commit()
jnt_txn.commit()
img_env.close()
jnt_env.close()
The above code expects images from a given path, and the labels of each image as .npy file.
Credits: https://github.com/mitmul/deeppose/blob/caffe/scripts/dataset.py
Note: I had seen Shai's answer to a question, which claims that lmdb doesnot support float-type data. But, it does work for me with the latest version of Caffe and LMDB and using this code snippet. As his answer is way too old, its highly likely that older versions may not have supported float-type data.
I am working on recording using pyaudio on windows. I am plotting recorded sound using matplotlib.
Recording length is 60 sec.
buffer size is 1024
What i am getting is, while recording, for first few iterations i am getting junk values. After that it starts recording actual sound.
I also found that, even if the MIC is muted, its giving junk values wjen i plot it.
These junk values are affecting result of my computations.
Any idea, why this junk values/sound is getting recorded?? Any quick solution??
Code:
class record(Thread):
def __init__(self):
#Thread.__init__(self)
super(record, self).__init__()
self.lock=threading.Lock()
self.project=projectutils.getActiveProject()
self.outfile=self.project['name']+'.wav'
self.tw=tool.getToolWindow()
self.duration = 60 #record for 1 second. Pretty long duration don't you think
self.buffer=1024
self.pin = pyaudio.PyAudio()
self.channels=2
ccare.rate=self.rate=8820
self.format=pyaudio.paInt16
self.inStream = self.pin.open(format=self.format, channels=self.channels, rate=self.rate,input=True, frames_per_buffer=self.buffer)
self.flag=1
self.out = []
self.upper_lim = self.rate / self.buffer * self.duration
def run(self):
ccare.val=[]
x=[]
if not self.inStream:
return
self.lock.acquire()
data = self.inStream.read(self.buffer)
self.lock.release()
x=list(struct.unpack("%dh"%(len(data)/2),data))
self.lock.acquire()
ccare.val=ccare.val+list(x)
self.lock.release()
self.out.append(data)
for i in xrange(1, self.upper_lim):
x=[]
if not self.inStream:
break
data = self.inStream.read(self.buffer)
x=list(struct.unpack("%dh"%(len(data)/2),data))
self.lock.acquire()
ccare.val=ccare.val+list(x)
self.lock.release()
self.out.append(data)
if self.inStream:
self.inStream.stop_stream()
self.inStream.close()
self.pin.terminate()
self.save_file()
Simple Code:
import pyaudio
import wave
import struct
val = []
def record(out_file):
duration = 60 #record for 1 second. Pretty long duration don't you think
buffer=1024
pin = pyaudio.PyAudio()
channels=2
rate=8820
format=pyaudio.paInt16
inStream = pin.open(format=format, channels=channels, rate=rate,input=True, frames_per_buffer=buffer)
out = []
upper_lim = rate / buffer * duration
val=[]
x=[]
if not inStream:
return
data = inStream.read(buffer)
x=list(struct.unpack("%dh"%(len(data)/2),data))
val=val+list(x)
out.append(data)
for i in xrange(1, upper_lim):
x=[]
if not inStream:
break
data = inStream.read(buffer)
x=list(struct.unpack("%dh"%(len(data)/2),data))
val=val+list(x)
out.append(data)
if inStream:
inStream.stop_stream()
inStream.close()
pin.terminate()
The values stored in 'val' variable will be plotted in different thread using matplotlib.