I want to add TensorBoard to the code from pytorch.org - tensorboard

I'm pretty new to deep learning, but I want to add Tensorboard to the following code to track loss, accuracy, average precision and so on.
Sample code from the TorchVision -2.3 Object Detection Finetuning Tutorial
http://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
import os
import numpy as np
import torch
from PIL import Image
import sys
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from engine import train_one_epoch, evaluate
import utils
import transforms as T
#from torch.utils.tensorboard import SummaryWriter
#writer = SummaryWriter()
class PennFudanDataset(object):
def __init__(self, root, transforms):
self.root = root
self.transforms = transforms
# load all image files, sorting them to
# ensure that they are aligned
self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))
def __getitem__(self, idx):
# load images and masks
img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
img = Image.open(img_path).convert("RGB")
# note that we haven't converted the mask to RGB,
# because each color corresponds to a different instance
# with 0 being background
mask = Image.open(mask_path)
mask = np.array(mask)
# instances are encoded as different colors
obj_ids = np.unique(mask)
# first id is the background, so remove it
obj_ids = obj_ids[1:]
# split the color-encoded mask into a set
# of binary masks
masks = mask == obj_ids[:, None, None]
# get bounding box coordinates for each mask
num_objs = len(obj_ids)
boxes = []
for i in range(num_objs):
pos = np.where(masks[i])
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
masks = torch.as_tensor(masks, dtype=torch.uint8)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.imgs)
def get_model_instance_segmentation(num_classes):
# load an instance segmentation model pre-trained pre-trained on COCO
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# now get the number of input features for the mask classifier
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
hidden_layer,
num_classes)
return model
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
def main():
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# our dataset has two classes only - background and person
num_classes = 2
# use our dataset and defined transformations
dataset = PennFudanDataset('PennFudanPed', get_transform(train=True))
dataset_test = PennFudanDataset('PennFudanPed', get_transform(train=False))
# split the dataset in train and test set
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=4, shuffle=True, num_workers=4,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=2, shuffle=False, num_workers=4,
collate_fn=utils.collate_fn)
# get the model using our helper function
model = get_model_instance_segmentation(num_classes)
# move model to the right device
model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
# let's train it for 10 epochs
num_epochs = 10
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
evaluate(model, data_loader_test, device=device)
print("That's it!")
if __name__ == "__main__":
main()

Related

My neural network takes too much time to train one epoch

I am training a neural network which tries to classify a traffic signs, but it takes too much time to train only one epoch, maybe 30+ mins for just one epoch, I have set the batch size to 64 and the learning rate to be 0.002, the input is 20x20 pixels with 3 channels, and the model summary shows that it is training 173,931 parameters, is that too much or good?
Here is the network architecture
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
class Network(nn.Module):
def __init__(self):
super(Network,self).__init__()
#Convolutional Layers
self.conv1 = nn.Conv2d(3,16,3,padding=1)
self.conv2 = nn.Conv2d(16,32,3,padding=1)
#Max Pooling Layers
self.pool = nn.MaxPool2d(2,2)
#Linear Fully connected layers
self.fc1 = nn.Linear(32*5*5,200)
self.fc2 = nn.Linear(200,43)
#Dropout
self.dropout = nn.Dropout(p=0.25)
def forward(self,x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1,32*5*5)
x = self.dropout(x)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
Here is the optimizer instance
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optim = optim.SGD(model.parameters(),lr = 0.002)
Here is the training code
epochs = 20
valid_loss_min = np.Inf
print("Training the network")
for epoch in range (1,epochs+1):
train_loss = 0
valid_loss = 0
model.train()
for data,target in train_data:
if gpu_available:
data,target = data.cuda(),target.cuda()
optim.zero_grad()
output = model(data)
loss = criterion(output,target)
loss.backward()
optim.step()
train_loss += loss.item()*data.size(0)
#########################
###### Validate #########
model.eval()
for data,target in valid_data:
if gpu_available:
data,target = data.cuda(),target.cuda()
output = model(data)
loss = criterion(output,target)
valid_loss += loss.item()*data.size(0)
train_loss = train_loss/len(train_data.dataset)
valid_loss = train/len(valid_data.dataset)
print("Epoch {}.....Train Loss = {:.6f}....Valid Loss = {:.6f}".format(epoch,train_loss,valid_loss))
if valid_loss < valid_loss_min:
torch.save(model.state_dict(), 'model_traffic.pt')
print("Valid Loss min {:.6f} >>> {:.6f}".format(valid_loss_min, valid_loss))
I am using GPU through google colab

Removing dimension using reshape in keras?

Is it possible to remove a dimension using Reshape or any other function.
I have the following network.
import keras
from keras.layers.merge import Concatenate
from keras.models import Model
from keras.layers import Input, Dense
from keras.layers import Dropout
from keras.layers.core import Dense, Activation, Lambda, Reshape,Flatten
from keras.layers import Conv2D, MaxPooling2D, Reshape, ZeroPadding2D
import numpy as np
#Number_of_splits = ((input_width-win_dim)+1)/stride_dim
splits = ((40-5)+1)/1
print splits
train_data_1 = np.random.randint(100,size=(100,splits,45,5,3))
test_data_1 = np.random.randint(100,size=(10,splits,45,5,3))
labels_train_data =np.random.randint(145,size=(100,15))
labels_test_data =np.random.randint(145,size=(10,15))
list_of_input = [Input(shape = (45,5,3)) for i in range(splits)]
list_of_conv_output = []
list_of_max_out = []
for i in range(splits):
list_of_conv_output.append(Conv2D(filters = 145 , kernel_size = (15,3))(list_of_input[i])) #output dim: 36x(31,3,145)
list_of_max_out.append((MaxPooling2D(pool_size=(2,2))(list_of_conv_output[i]))) #output dim: 36x(15,1,145)
merge = keras.layers.concatenate(list_of_max_out) #Output dim: (15,1,5220)
#reshape = Reshape((merge.shape[0],merge.shape[3]))(merge) # expected output dim: (15,145)
dense1 = Dense(units = 1000, activation = 'relu', name = "dense_1")(merge)
dense2 = Dense(units = 1000, activation = 'relu', name = "dense_2")(dense1)
dense3 = Dense(units = 145 , activation = 'softmax', name = "dense_3")(dense2)
model = Model(inputs = list_of_input , outputs = dense3)
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
print model.summary()
raw_input("SDasd")
hist_current = model.fit(x = [train_input[i] for i in range(100)],
y = labels_train_data,
shuffle=False,
validation_data=([test_input[i] for i in range(10)], labels_test_data),
validation_split=0.1,
epochs=150000,
batch_size = 15,
verbose=1)
The maxpooling layer creates an output with dimension (15,1,36) which i would like to remove the middle axis, so the output dimension end up being (15,36)..
If possible would I like to avoid specifying the outer dimension, or as i've tried use the prior layer dimension to reshape it.
#reshape = Reshape((merge.shape[0],merge.shape[3]))(merge) # expected output dim: (15,145)
I need my output dimension for the entire network to be (15,145), in which the middle dimension is causing some problems.
How do i remove the middle dimension?
I wanted to remove all dimensions that are equal to 1, but not specify a specific size with Reshape so that my code does not break if I change the input size or number of kernels in a convolution. This works with the functional keras API on a tensorflow backend.
from keras.layers.core import Reshape
old_layer = Conv2D(#actualArguments) (older_layer)
#old_layer yields, e.g., a (None, 15,1,36) size tensor, where None is the batch size
newdim = tuple([x for x in old_layer.shape.as_list() if x != 1 and x is not None])
#newdim is now (15, 36). Reshape does not take batch size as an input dimension.
reshape_layer = Reshape(newdim) (old_layer)
reshape = Reshape((15,145))(merge) # expected output dim: (15,145)

How do i pass my input/output to this network?

I seem to have some problems starting my learning... I am not sure why..
the network is multi input (72 1d arrays) and output is a 1d array length 24. the 1d array output consist of numbers related to 145 different classes.
So: 72 inputs => 24 outputs
Minimal working example - without the input/output being set.
import keras
from keras.utils import np_utils
from keras import metrics
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Lambda, Reshape,Flatten
from keras.layers import Conv1D,Conv2D, MaxPooling2D, MaxPooling1D, Reshape, ZeroPadding2D
from keras.utils import np_utils
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.layers.advanced_activations import ELU
from keras.models import Model
from keras.layers import Input, Dense
from keras.layers import Dropout
from keras import backend as K
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import CSVLogger
from keras.callbacks import EarlyStopping
from keras.models import load_model
from keras.layers.merge import Concatenate
import numpy as np
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
nano_train_input = []
nano_train_output = []
nano_test_input = []
nano_test_output = []
## Creating train input:
for i in range(974):
nano_train_input.append(np.random.random((78,684,4)))
nano_train_output.append(np.random.randint(145,size=(228)).tolist())
## Creating test input:
for i in range(104):
nano_test_input.append(np.random.random((78,684,4)))
nano_test_output.append(np.random.randint(145,size=(228)).tolist())
def model(train_input, train_output, test_input, test_output, names=0):
# Paper uses dimension (40 x 45 =(15 * 3))
# Filter size 5
# Pooling size
# I use dimension (78 x 72 = (24 * 3)
# Filter size 9
print "In model"
i = 0
print_once = True
data_test_output = []
data_test_input = []
for matrix in test_input:
row,col,channel = matrix.shape
remove_output = (col/3)%24
remove_input = col%72
if remove_output > 0 :
test_output[i] = test_output[i][:-(remove_output)]
for split in chunks(test_output[i],24):
data_test_output.append(np.array(split))
if remove_input > 0:
out = np.split(matrix[:,:-(remove_input),:-1],matrix[:,:-(remove_input),:-1].shape[1]/72,axis=1)
else:
out = np.split(matrix[:,:,:-1],matrix[:,:,:-1].shape[1]/72,axis=1)
data_test_input.extend(out)
del out
i=i+1 # Increment
i=0
data_train_output = []
data_train_input = []
for matrix in train_input:
row,col,channel = matrix.shape
remove_output = (col/3)%24
remove_input = col%72
if remove_output > 0 :
train_output[i] = train_output[i][:-(remove_output)]
for split in chunks(train_output[i],24):
data_train_output.append(np.array(split))
if remove_input > 0:
out = np.split(matrix[:,:-(remove_input),:-1],matrix[:,:-(remove_input),:-1].shape[1]/72,axis=1)
else:
out = np.split(matrix[:,:,:-1],matrix[:,:,:-1].shape[1]/72,axis=1)
data_train_input.extend(out)
del out
i=i+1 # Increment
print
print "Len:"
print len(data_train_input)
print len(data_train_output)
print len(data_test_input)
print len(data_test_output)
print
print "Type[0]:"
print type(data_train_input[0])
print type(data_train_output[0])
print type(data_test_input[0])
print type(data_test_output[0])
print
print "Type:"
print type(data_train_input)
print type(data_train_output)
print type(data_test_input)
print type(data_test_output)
print
print "shape of [0]:"
print data_train_input[0].shape
print data_train_output[0].shape
print data_test_input[0].shape
print data_test_output[0].shape
list_of_input = [Input(shape = (78,3)) for i in range(72)]
list_of_conv_output = []
list_of_max_out = []
for i in range(72):
list_of_conv_output.append(Conv1D(filters = 32 , kernel_size = 6 , padding = "same", activation = 'relu')(list_of_input[i]))
list_of_max_out.append(MaxPooling1D(pool_size=3)(list_of_conv_output[i]))
merge = keras.layers.concatenate(list_of_max_out)
reshape = Flatten()(merge)
dense1 = Dense(units = 500, activation = 'relu', name = "dense_1")(reshape)
dense2 = Dense(units = 250, activation = 'relu', name = "dense_2")(dense1)
dense3 = Dense(units = 24 , activation = 'softmax', name = "dense_3")(dense2)
model = Model(inputs = list_of_input , outputs = dense3)
model.compile(loss="categorical_crossentropy", optimizer="adam" , metrics = [metrics.sparse_categorical_accuracy])
reduce_lr=ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, mode='auto', epsilon=0.01, cooldown=0, min_lr=0.000000000000000000001)
stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1, mode='auto')
print "Train!"
print model.summary()
hist_current = model.fit(x = ,
y = ,
shuffle=False,
validation_data=(,),
validation_split=0.1,
epochs=150000,
verbose=1,
callbacks=[reduce_lr,stop])
model(nano_train_input,nano_train_output,nano_test_input, nano_test_output)
The input and output is stored as a list of numpy.ndarrays.
This is a minimal working example.. how am I supposed to pass the input an output?
I would try:
merge = keras.layers.concatenate(list_of_max_out)
merge = Flatten()(merge) # or GlobalMaxPooling1D or GlobalAveragePooling1D
dense1 = Dense(500, activation = 'relu')(merge)
You probably want to apply something to transform your output from Convolutional layers. In order to do that - you need to squash the time / sequential dimension. In order to do that try techniques I provided.
If you take a look at your code and outputs you indeed have what you say: 24 outputs (data_train_outputs[0].shape). However, if you look at your layer output of Keras, you have this as output:
dense_3 (Dense) (None, 26, 145) 36395
I would say that this should be an array with shape (None, 24)....
I suggest you add a reshape layer to get the output you want to have!

2 layer NN weights not updating

I have a fairly simple NN that has 1 hidden layer.
However, the weights don't seem to be updating. Or perhaps they are but the variable values don't change ?
Either way, my accuracy is 0.1 and it doesn't change no matter I change the learning rate or the activation function. Not sure what is wrong. Any ideas ?
I've posted the entire code correctly formatter so you guys can directly copy paste it and run it on your local machines.
from tensorflow.examples.tutorials.mnist import input_data
import math
import numpy as np
import tensorflow as tf
# one hot option returns binarized labels. mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
# model parameters
x = tf.placeholder(tf.float32, [784, None],name='x')
# weights
W1 = tf.Variable(tf.truncated_normal([25, 784],stddev= 1.0/math.sqrt(784)),name='W')
W2 = tf.Variable(tf.truncated_normal([25, 25],stddev=1.0/math.sqrt(25)),name='W')
W3 = tf.Variable(tf.truncated_normal([10, 25],stddev=1.0/math.sqrt(25)),name='W')
# bias units b1 = tf.Variable(tf.zeros([25,1]),name='b1')
b2 = tf.Variable(tf.zeros([25,1]),name='b2')
b3 = tf.Variable(tf.zeros([10,1]),name='b3')
# NN architecture
hidden1 = tf.nn.relu(tf.matmul(W1, x,name='hidden1')+b1, name='hidden1_out')
# hidden2 = tf.nn.sigmoid(tf.matmul(W2, hidden1, name='hidden2')+b2, name='hidden2_out')
y = tf.matmul(W3, hidden1,name='y') + b3
y_ = tf.placeholder(tf.float32, [10, None],name='y_')
# Create the model
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
train_step = tf.train.GradientDescentOptimizer(2).minimize(cross_entropy)
sess = tf.Session()
summary_writer = tf.train.SummaryWriter('log_simple_graph', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
# Train
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
summary =sess.run(train_step, feed_dict={x: np.transpose(batch_xs), y_: np.transpose(batch_ys)})
if summary is not None:
summary_writer.add_event(summary)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: np.transpose(mnist.test.images), y_: np.transpose(mnist.test.labels)}))
The reason why you are getting 0.1 accuracy consistently is mainly due to the order of dimensions of the input placeholder and the weights following it. Learning rate is another factor. If the learning rate is very high, the gradient would be oscillating and will not reach any minima.
Tensorflow takes the number of instances(batches) as the first index value of placeholder. So the code which declares input x
x = tf.placeholder(tf.float32, [784, None],name='x')
should be declared as
x = tf.placeholder(tf.float32, [None, 784],name='x')
Consequently, W1 should be declared as
W1 = tf.Variable(tf.truncated_normal([784, 25],stddev= 1.0/math.sqrt(784)),name='W')
and so on.. Even the bias variables should be declared in the transpose sense. (Thats how tensorflow takes it :) )
For example
b1 = tf.Variable(tf.zeros([25]),name='b1')
b2 = tf.Variable(tf.zeros([25]),name='b2')
b3 = tf.Variable(tf.zeros([10]),name='b3')
I'm putting the corrected full code below for your reference. I achieved an accuracy of 0.9262 with this :D
from tensorflow.examples.tutorials.mnist import input_data
import math
import numpy as np
import tensorflow as tf
# one hot option returns binarized labels.
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
# model parameters
x = tf.placeholder(tf.float32, [None, 784],name='x')
# weights
W1 = tf.Variable(tf.truncated_normal([784, 25],stddev= 1.0/math.sqrt(784)),name='W')
W2 = tf.Variable(tf.truncated_normal([25, 25],stddev=1.0/math.sqrt(25)),name='W')
W3 = tf.Variable(tf.truncated_normal([25, 10],stddev=1.0/math.sqrt(25)),name='W')
# bias units
b1 = tf.Variable(tf.zeros([25]),name='b1')
b2 = tf.Variable(tf.zeros([25]),name='b2')
b3 = tf.Variable(tf.zeros([10]),name='b3')
# NN architecture
hidden1 = tf.nn.relu(tf.matmul(x, W1,name='hidden1')+b1, name='hidden1_out')
# hidden2 = tf.nn.sigmoid(tf.matmul(W2, hidden1, name='hidden2')+b2, name='hidden2_out')
y = tf.matmul(hidden1, W3,name='y') + b3
y_ = tf.placeholder(tf.float32, [None, 10],name='y_')
# Create the model
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy)
sess = tf.Session()
summary_writer = tf.train.SummaryWriter('log_simple_graph', sess.graph)
init = tf.initialize_all_variables()
sess.run(init)
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
summary =sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
if summary is not None:
summary_writer.add_event(summary)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

Plot in tensorboard is always closes and like a circle

I was trying to plot a loss curve, but is always abnormal (just like a circle, I really don't know how to describe it in English properly), I had found many topics about question like this and just can't solve, my tensorflow version is 0.10.0.
import tensorflow as tf
from tensorflow.core.util.event_pb2 import SessionLog
import os
# initialize variables/model parameters
# define the training loop operations
def inputs():
# read/generate input training data X and expected outputs Y
weight_age = [[84,46],[73,20],[65,52],[70,30],[76,57],[69,25],[63,28],[72,36],[79,57],[75,44],[27,24]
,[89,31],[65,52],[57,23],[59,60],[69,48],[60,34],[79,51],[75,50],[82,34],[59,46],[67,23],
[85,37],[55,40],[63,30]]
blodd_fat_content = [354,190,405,263,451,302,288,385,402,365,209,290,346,
254,395,434,220,374,308,220,311,181,274,303,244]
return tf.to_float(weight_age), tf.to_float(blodd_fat_content)
def inference(X):
# compute inference model over data X and return the result
return tf.matmul(X, W) + b
def loss(X, Y):
# compute loss over training data X and expected outputs Y
Y_predicted = inference(X)
return tf.reduce_sum(tf.squared_difference(Y, Y_predicted))
def train(total_loss):
# train / adjust model parameters according to computed total loss
learning_rate = 1e-7
return tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)
def evaluate(sess, X, Y):
# evaluate the resulting trained model
print (sess.run(inference([[80., 25.]])))
print (sess.run(inference([[60., 25.]])))
g1 = tf.Graph()
with tf.Session(graph=g1) as sess:
W = tf.Variable(tf.zeros([2,1]), name="weights")
b = tf.Variable(0., name="bias")
tf.initialize_all_variables().run()
X, Y = inputs()
print (sess.run(W))
total_loss = loss(X, Y)
train_op = train(total_loss)
tf.scalar_summary("loss", total_loss)
summaries = tf.merge_all_summaries()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
summary_writer = tf.train.SummaryWriter('linear', g1)
summary_writer.add_session_log(session_log= SessionLog(status=SessionLog.START), global_step=1)
# actual training loop
training_steps = 100
tolerance = 100
total_loss_last = 0
initial_step = 0
# Create a saver.
saver = tf.train.Saver()
# verify if we don't have a checkpoint saved already
ckpt = tf.train.get_checkpoint_state(os.path.dirname('my_model'))
if ckpt and ckpt.model_checkpoint_path:
# Restores from checkpoint
saver.restore(sess, ckpt.model_checkpoint_path)
initial_step = int(ckpt.model_checkpoint_path.rsplit('-', 1)[1])
# summary_writer.add_session_log(SessionLog(status=SessionLog.START), global_step=initial_step)
for step in range(initial_step, training_steps):
sess.run([train_op])
if step%20 == 0:
saver.save(sess, 'my-model', global_step=step)
gap = abs(sess.run(total_loss) - total_loss_last)
total_loss_last = sess.run(total_loss)
summary_writer.add_summary(sess.run(summaries), step)
# for debugging and learning purposes, see how the loss gets decremented thru training steps
if step % 10 == 0:
print ("loss: ", sess.run([total_loss]))
print("step: ", step)
if gap < tolerance:
break
# evaluation...
evaluate(sess, X, Y)
coord.request_stop()
coord.join(threads)
saver.save(sess, 'my-model', global_step=training_steps)
summary_writer.flush()
sess.close()