2 layer NN weights not updating - python-2.7

I have a fairly simple NN that has 1 hidden layer.
However, the weights don't seem to be updating. Or perhaps they are but the variable values don't change ?
Either way, my accuracy is 0.1 and it doesn't change no matter I change the learning rate or the activation function. Not sure what is wrong. Any ideas ?
I've posted the entire code correctly formatter so you guys can directly copy paste it and run it on your local machines.
from tensorflow.examples.tutorials.mnist import input_data
import math
import numpy as np
import tensorflow as tf
# one hot option returns binarized labels. mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
# model parameters
x = tf.placeholder(tf.float32, [784, None],name='x')
# weights
W1 = tf.Variable(tf.truncated_normal([25, 784],stddev= 1.0/math.sqrt(784)),name='W')
W2 = tf.Variable(tf.truncated_normal([25, 25],stddev=1.0/math.sqrt(25)),name='W')
W3 = tf.Variable(tf.truncated_normal([10, 25],stddev=1.0/math.sqrt(25)),name='W')
# bias units b1 = tf.Variable(tf.zeros([25,1]),name='b1')
b2 = tf.Variable(tf.zeros([25,1]),name='b2')
b3 = tf.Variable(tf.zeros([10,1]),name='b3')
# NN architecture
hidden1 = tf.nn.relu(tf.matmul(W1, x,name='hidden1')+b1, name='hidden1_out')
# hidden2 = tf.nn.sigmoid(tf.matmul(W2, hidden1, name='hidden2')+b2, name='hidden2_out')
y = tf.matmul(W3, hidden1,name='y') + b3
y_ = tf.placeholder(tf.float32, [10, None],name='y_')
# Create the model
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
train_step = tf.train.GradientDescentOptimizer(2).minimize(cross_entropy)
sess = tf.Session()
summary_writer = tf.train.SummaryWriter('log_simple_graph', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
# Train
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
summary =sess.run(train_step, feed_dict={x: np.transpose(batch_xs), y_: np.transpose(batch_ys)})
if summary is not None:
summary_writer.add_event(summary)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: np.transpose(mnist.test.images), y_: np.transpose(mnist.test.labels)}))

The reason why you are getting 0.1 accuracy consistently is mainly due to the order of dimensions of the input placeholder and the weights following it. Learning rate is another factor. If the learning rate is very high, the gradient would be oscillating and will not reach any minima.
Tensorflow takes the number of instances(batches) as the first index value of placeholder. So the code which declares input x
x = tf.placeholder(tf.float32, [784, None],name='x')
should be declared as
x = tf.placeholder(tf.float32, [None, 784],name='x')
Consequently, W1 should be declared as
W1 = tf.Variable(tf.truncated_normal([784, 25],stddev= 1.0/math.sqrt(784)),name='W')
and so on.. Even the bias variables should be declared in the transpose sense. (Thats how tensorflow takes it :) )
For example
b1 = tf.Variable(tf.zeros([25]),name='b1')
b2 = tf.Variable(tf.zeros([25]),name='b2')
b3 = tf.Variable(tf.zeros([10]),name='b3')
I'm putting the corrected full code below for your reference. I achieved an accuracy of 0.9262 with this :D
from tensorflow.examples.tutorials.mnist import input_data
import math
import numpy as np
import tensorflow as tf
# one hot option returns binarized labels.
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
# model parameters
x = tf.placeholder(tf.float32, [None, 784],name='x')
# weights
W1 = tf.Variable(tf.truncated_normal([784, 25],stddev= 1.0/math.sqrt(784)),name='W')
W2 = tf.Variable(tf.truncated_normal([25, 25],stddev=1.0/math.sqrt(25)),name='W')
W3 = tf.Variable(tf.truncated_normal([25, 10],stddev=1.0/math.sqrt(25)),name='W')
# bias units
b1 = tf.Variable(tf.zeros([25]),name='b1')
b2 = tf.Variable(tf.zeros([25]),name='b2')
b3 = tf.Variable(tf.zeros([10]),name='b3')
# NN architecture
hidden1 = tf.nn.relu(tf.matmul(x, W1,name='hidden1')+b1, name='hidden1_out')
# hidden2 = tf.nn.sigmoid(tf.matmul(W2, hidden1, name='hidden2')+b2, name='hidden2_out')
y = tf.matmul(hidden1, W3,name='y') + b3
y_ = tf.placeholder(tf.float32, [None, 10],name='y_')
# Create the model
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy)
sess = tf.Session()
summary_writer = tf.train.SummaryWriter('log_simple_graph', sess.graph)
init = tf.initialize_all_variables()
sess.run(init)
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
summary =sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
if summary is not None:
summary_writer.add_event(summary)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

Related

I want to add TensorBoard to the code from pytorch.org

I'm pretty new to deep learning, but I want to add Tensorboard to the following code to track loss, accuracy, average precision and so on.
Sample code from the TorchVision -2.3 Object Detection Finetuning Tutorial
http://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
import os
import numpy as np
import torch
from PIL import Image
import sys
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from engine import train_one_epoch, evaluate
import utils
import transforms as T
#from torch.utils.tensorboard import SummaryWriter
#writer = SummaryWriter()
class PennFudanDataset(object):
def __init__(self, root, transforms):
self.root = root
self.transforms = transforms
# load all image files, sorting them to
# ensure that they are aligned
self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))
def __getitem__(self, idx):
# load images and masks
img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
img = Image.open(img_path).convert("RGB")
# note that we haven't converted the mask to RGB,
# because each color corresponds to a different instance
# with 0 being background
mask = Image.open(mask_path)
mask = np.array(mask)
# instances are encoded as different colors
obj_ids = np.unique(mask)
# first id is the background, so remove it
obj_ids = obj_ids[1:]
# split the color-encoded mask into a set
# of binary masks
masks = mask == obj_ids[:, None, None]
# get bounding box coordinates for each mask
num_objs = len(obj_ids)
boxes = []
for i in range(num_objs):
pos = np.where(masks[i])
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
masks = torch.as_tensor(masks, dtype=torch.uint8)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.imgs)
def get_model_instance_segmentation(num_classes):
# load an instance segmentation model pre-trained pre-trained on COCO
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# now get the number of input features for the mask classifier
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
hidden_layer,
num_classes)
return model
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
def main():
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# our dataset has two classes only - background and person
num_classes = 2
# use our dataset and defined transformations
dataset = PennFudanDataset('PennFudanPed', get_transform(train=True))
dataset_test = PennFudanDataset('PennFudanPed', get_transform(train=False))
# split the dataset in train and test set
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=4, shuffle=True, num_workers=4,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=2, shuffle=False, num_workers=4,
collate_fn=utils.collate_fn)
# get the model using our helper function
model = get_model_instance_segmentation(num_classes)
# move model to the right device
model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
# let's train it for 10 epochs
num_epochs = 10
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
evaluate(model, data_loader_test, device=device)
print("That's it!")
if __name__ == "__main__":
main()

My neural network takes too much time to train one epoch

I am training a neural network which tries to classify a traffic signs, but it takes too much time to train only one epoch, maybe 30+ mins for just one epoch, I have set the batch size to 64 and the learning rate to be 0.002, the input is 20x20 pixels with 3 channels, and the model summary shows that it is training 173,931 parameters, is that too much or good?
Here is the network architecture
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
class Network(nn.Module):
def __init__(self):
super(Network,self).__init__()
#Convolutional Layers
self.conv1 = nn.Conv2d(3,16,3,padding=1)
self.conv2 = nn.Conv2d(16,32,3,padding=1)
#Max Pooling Layers
self.pool = nn.MaxPool2d(2,2)
#Linear Fully connected layers
self.fc1 = nn.Linear(32*5*5,200)
self.fc2 = nn.Linear(200,43)
#Dropout
self.dropout = nn.Dropout(p=0.25)
def forward(self,x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1,32*5*5)
x = self.dropout(x)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
Here is the optimizer instance
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optim = optim.SGD(model.parameters(),lr = 0.002)
Here is the training code
epochs = 20
valid_loss_min = np.Inf
print("Training the network")
for epoch in range (1,epochs+1):
train_loss = 0
valid_loss = 0
model.train()
for data,target in train_data:
if gpu_available:
data,target = data.cuda(),target.cuda()
optim.zero_grad()
output = model(data)
loss = criterion(output,target)
loss.backward()
optim.step()
train_loss += loss.item()*data.size(0)
#########################
###### Validate #########
model.eval()
for data,target in valid_data:
if gpu_available:
data,target = data.cuda(),target.cuda()
output = model(data)
loss = criterion(output,target)
valid_loss += loss.item()*data.size(0)
train_loss = train_loss/len(train_data.dataset)
valid_loss = train/len(valid_data.dataset)
print("Epoch {}.....Train Loss = {:.6f}....Valid Loss = {:.6f}".format(epoch,train_loss,valid_loss))
if valid_loss < valid_loss_min:
torch.save(model.state_dict(), 'model_traffic.pt')
print("Valid Loss min {:.6f} >>> {:.6f}".format(valid_loss_min, valid_loss))
I am using GPU through google colab

ValueError: Tensor Tensor("Const:0", shape=(), dtype=float32) may not be fed with tf.placeholder

I'm trying to make speech recognition system with tensorflow.
Input data is an numpy array of size 50000 X 1.
Output data (mapping data) is an numpy array of size 400 X 1.
Input and mapping data is passed in batches of 2 in a list.
I've used this tutorial to design the neural network. Following is the code snippet:
For RNN:
input_data = tf.placeholder(tf.float32, [batch_size, sound_constants.MAX_ROW_SIZE_IN_DATA, sound_constants.MAX_COLUMN_SIZE_IN_DATA], name="train_input")
target = tf.placeholder(tf.float32, [batch_size, sound_constants.MAX_ROW_SIZE_IN_TXT, sound_constants.MAX_COLUMN_SIZE_IN_TXT], name="train_output")
fwd_cell = tf.nn.rnn_cell.BasicLSTMCell(num_hidden, state_is_tuple=True, forget_bias=1.0)
# creating one backward cell
bkwd_cell = tf.nn.rnn_cell.BasicLSTMCell(num_hidden, state_is_tuple=True, forget_bias=1.0)
# creating bidirectional RNN
val, _, _ = tf.nn.static_bidirectional_rnn(fwd_cell, bkwd_cell, tf.unstack(input_data), dtype=tf.float32)
For feeding data:
feed = {g['input_data'] : trb[0], g['target'] : trb[1], g['dropout'] : 0.6}
accuracy_, _ = sess.run([g['accuracy'], g['ts']], feed_dict=feed)
accuracy += accuracy_
When I ran the code, I got this error:
Traceback (most recent call last):
File "/home/wolborg/PycharmProjects/speech-to-text-rnn/src/rnn_train_1.py", line 205, in <module>
tr_losses, te_losses = train_network(g)
File "/home/wolborg/PycharmProjects/speech-to-text-rnn/src/rnn_train_1.py", line 177, in train_network
accuracy_, _ = sess.run([g['accuracy'], g['ts']], feed_dict=feed)
File "/home/wolborg/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/home/wolborg/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1102, in _run
raise ValueError('Tensor %s may not be fed.' % subfeed_t)
ValueError: Tensor Tensor("Const:0", shape=(), dtype=float32) may not be fed.
Process finished with exit code 1
Earlier, I was facing this issue with tf.sparse_placeholder, then after some browsing, I changed input type to tf.placeholder and made related changes. Now I'm clueless on where I'm making the error.
Please suggest something as how should I feed data.
Entire code:
import tensorflow as tf
# for taking MFCC and label input
import numpy as np
import rnn_input_data_1
import sound_constants
# input constants
# Training Parameters
num_input = 10 # mfcc data input
training_data_size = 8 # determines number of files in training and testing module
testing_data_size = num_input - training_data_size
# Network Parameters
learning_rate = 0.0001 # for large training set, it can be set 0.001
num_hidden = 200 # number of hidden layers
num_classes = 28 # total alphabet classes (a-z) + extra symbols (', ' ')
epoch = 1 # number of iterations
batch_size = 2 # number of batches
mfcc_coeffs, text_data = rnn_input_data_1.mfcc_and_text_encoding()
class DataGenerator:
def __init__(self, data_size):
self.ptr = 0
self.epochs = 0
self.data_size = data_size
def next_batch(self):
self.ptr += batch_size
if self.ptr > self.data_size:
self.epochs += 1
self.ptr = 0
return mfcc_coeffs[self.ptr-batch_size : self.ptr], text_data[self.ptr-batch_size : self.ptr]
def reset_graph():
if 'sess' in globals() and sess:
sess.close()
tf.reset_default_graph()
def struct_network():
print ('Inside struct network !!')
reset_graph()
input_data = tf.placeholder(tf.float32, [batch_size, sound_constants.MAX_ROW_SIZE_IN_DATA, sound_constants.MAX_COLUMN_SIZE_IN_DATA], name="train_input")
target = tf.placeholder(tf.float32, [batch_size, sound_constants.MAX_ROW_SIZE_IN_TXT, sound_constants.MAX_COLUMN_SIZE_IN_TXT], name="train_output")
keep_prob = tf.constant(1.0)
fwd_cell = tf.nn.rnn_cell.BasicLSTMCell(num_hidden, state_is_tuple=True, forget_bias=1.0)
# creating one backward cell
bkwd_cell = tf.nn.rnn_cell.BasicLSTMCell(num_hidden, state_is_tuple=True, forget_bias=1.0)
# creating bidirectional RNN
val, _, _ = tf.nn.static_bidirectional_rnn(fwd_cell, bkwd_cell, tf.unstack(input_data), dtype=tf.float32)
# adding dropouts
val = tf.nn.dropout(val, keep_prob)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)
# creating bidirectional RNN
print ('BiRNN created !!')
print ('Last Size: ', last.get_shape())
weight = tf.Variable(tf.truncated_normal([num_hidden * 2, sound_constants.MAX_ROW_SIZE_IN_TXT]))
bias = tf.Variable(tf.constant(0.1, shape=[sound_constants.MAX_ROW_SIZE_IN_TXT]))
# mapping to 28 output classes
logits = tf.matmul(last, weight) + bias
prediction = tf.nn.softmax(logits)
prediction = tf.reshape(prediction, shape = [batch_size, sound_constants.MAX_ROW_SIZE_IN_TXT, sound_constants.MAX_COLUMN_SIZE_IN_TXT])
# getting probability distribution
mat1 = tf.cast(tf.argmax(prediction,1),tf.float32)
correct = tf.equal(prediction, target)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
logits = tf.reshape(logits, shape=[batch_size, sound_constants.MAX_ROW_SIZE_IN_TXT, sound_constants.MAX_COLUMN_SIZE_IN_TXT])
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=target))
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
# returning components as dictionary elements
return {'input_data' : input_data,
'target' : target,
'dropout': keep_prob,
'loss': loss,
'ts': train_step,
'preds': prediction,
'accuracy': accuracy
}
def train_network(graph):
# initialize tensorflow session and all variables
# tf_gpu_config = tf.ConfigProto(allow_soft_placement = True, log_device_placement = True)
# tf_gpu_config.gpu_options.allow_growth = True
# with tf.Session(config = tf_gpu_config) as sess:
with tf.Session() as sess:
train_instance = DataGenerator(training_data_size)
test_instance = DataGenerator(testing_data_size)
print ('Training data size: ', train_instance.data_size)
print ('Testing data size: ', test_instance.data_size)
sess.run(tf.global_variables_initializer())
print ('Starting session...')
step, accuracy = 0, 0
tr_losses, te_losses = [], []
current_epoch = 0
while current_epoch < epoch:
step += 1
trb = train_instance.next_batch()
feed = {g['input_data'] : trb[0], g['target'] : trb[1], g['dropout'] : 0.6}
accuracy_, _ = sess.run([g['accuracy'], g['ts']], feed_dict=feed)
accuracy += accuracy_
if train_instance.epochs > current_epoch:
current_epoch += 1
tr_losses.append(accuracy / step)
step, accuracy = 0, 0
#eval test set
te_epoch = test_instance.epochs
while test_instance.epochs == te_epoch:
step += 1
print ('Testing round ', step)
trc = test_instance.next_batch()
feed = {g['input_data']: trc[0], g['target']: trc[1]}
accuracy_ = sess.run([g['accuracy']], feed_dict=feed)[0]
accuracy += accuracy_
te_losses.append(accuracy / step)
step, accuracy = 0,0
print("Accuracy after epoch", current_epoch, " - tr:", tr_losses[-1], "- te:", te_losses[-1])
return tr_losses, te_losses
g = struct_network()
tr_losses, te_losses = train_network(g)
You defined keep_prob as a tf.constant, but then trying to feed the value into it. Replace keep_prob = tf.constant(1.0) with keep_prob = tf.placeholder(tf.float32,[]) or keep_prob = tf.placeholder_with_default(1.0,[])

Tensorflow Deep Learning - model size and parameters

According to Andrej's blog -
Where he says that for a Convolutional Layer, with parameter sharing, it introduces F x F x D weights per filter, for a total of (F x F x D) x K weights and K biases.
In my tensorflow code, I have an architecture like this (where D=1)
conv1 : F = 3, K = 32, S = 1, P = 1.
pool1 :
conv2
and so on...
According to the formula,
A model generated with F=3 for conv1 should have 9K weights ,i.e. smaller model, and
A model generated with F=5 should have 25K weights i.e. bigger model
Question
In my code, when I write out the model files for both these cases, I see that the .ckpt file is about 380MB (F=3) and 340MB (F=5). Am I missing something?
Code:
Here's the reference code for saving the variables to a model and printing its size.
''' Run the session and save the model'''
#Add a saver here
saver = tf.train.Saver()
# Run session
sess.run(tf.initialize_all_variables())
for i in range(201):
batch = mnist.train.next_batch(50)
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
# Save model
save_path = saver.save(sess, "/Users/voladoddi/Desktop/dropmodel.ckpt")
print("Model saved in file: %s" % save_path)
# Test
print("test accuracy %g"%accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
# Print model size.
vars = 0
for v in tf.all_variables():
vars += np.prod(v.get_shape().as_list())
print(vars*4)/(1024**2),"MB"

Plot in tensorboard is always closes and like a circle

I was trying to plot a loss curve, but is always abnormal (just like a circle, I really don't know how to describe it in English properly), I had found many topics about question like this and just can't solve, my tensorflow version is 0.10.0.
import tensorflow as tf
from tensorflow.core.util.event_pb2 import SessionLog
import os
# initialize variables/model parameters
# define the training loop operations
def inputs():
# read/generate input training data X and expected outputs Y
weight_age = [[84,46],[73,20],[65,52],[70,30],[76,57],[69,25],[63,28],[72,36],[79,57],[75,44],[27,24]
,[89,31],[65,52],[57,23],[59,60],[69,48],[60,34],[79,51],[75,50],[82,34],[59,46],[67,23],
[85,37],[55,40],[63,30]]
blodd_fat_content = [354,190,405,263,451,302,288,385,402,365,209,290,346,
254,395,434,220,374,308,220,311,181,274,303,244]
return tf.to_float(weight_age), tf.to_float(blodd_fat_content)
def inference(X):
# compute inference model over data X and return the result
return tf.matmul(X, W) + b
def loss(X, Y):
# compute loss over training data X and expected outputs Y
Y_predicted = inference(X)
return tf.reduce_sum(tf.squared_difference(Y, Y_predicted))
def train(total_loss):
# train / adjust model parameters according to computed total loss
learning_rate = 1e-7
return tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)
def evaluate(sess, X, Y):
# evaluate the resulting trained model
print (sess.run(inference([[80., 25.]])))
print (sess.run(inference([[60., 25.]])))
g1 = tf.Graph()
with tf.Session(graph=g1) as sess:
W = tf.Variable(tf.zeros([2,1]), name="weights")
b = tf.Variable(0., name="bias")
tf.initialize_all_variables().run()
X, Y = inputs()
print (sess.run(W))
total_loss = loss(X, Y)
train_op = train(total_loss)
tf.scalar_summary("loss", total_loss)
summaries = tf.merge_all_summaries()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
summary_writer = tf.train.SummaryWriter('linear', g1)
summary_writer.add_session_log(session_log= SessionLog(status=SessionLog.START), global_step=1)
# actual training loop
training_steps = 100
tolerance = 100
total_loss_last = 0
initial_step = 0
# Create a saver.
saver = tf.train.Saver()
# verify if we don't have a checkpoint saved already
ckpt = tf.train.get_checkpoint_state(os.path.dirname('my_model'))
if ckpt and ckpt.model_checkpoint_path:
# Restores from checkpoint
saver.restore(sess, ckpt.model_checkpoint_path)
initial_step = int(ckpt.model_checkpoint_path.rsplit('-', 1)[1])
# summary_writer.add_session_log(SessionLog(status=SessionLog.START), global_step=initial_step)
for step in range(initial_step, training_steps):
sess.run([train_op])
if step%20 == 0:
saver.save(sess, 'my-model', global_step=step)
gap = abs(sess.run(total_loss) - total_loss_last)
total_loss_last = sess.run(total_loss)
summary_writer.add_summary(sess.run(summaries), step)
# for debugging and learning purposes, see how the loss gets decremented thru training steps
if step % 10 == 0:
print ("loss: ", sess.run([total_loss]))
print("step: ", step)
if gap < tolerance:
break
# evaluation...
evaluate(sess, X, Y)
coord.request_stop()
coord.join(threads)
saver.save(sess, 'my-model', global_step=training_steps)
summary_writer.flush()
sess.close()