My neural network takes too much time to train one epoch - computer-vision

I am training a neural network which tries to classify a traffic signs, but it takes too much time to train only one epoch, maybe 30+ mins for just one epoch, I have set the batch size to 64 and the learning rate to be 0.002, the input is 20x20 pixels with 3 channels, and the model summary shows that it is training 173,931 parameters, is that too much or good?
Here is the network architecture
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
class Network(nn.Module):
def __init__(self):
super(Network,self).__init__()
#Convolutional Layers
self.conv1 = nn.Conv2d(3,16,3,padding=1)
self.conv2 = nn.Conv2d(16,32,3,padding=1)
#Max Pooling Layers
self.pool = nn.MaxPool2d(2,2)
#Linear Fully connected layers
self.fc1 = nn.Linear(32*5*5,200)
self.fc2 = nn.Linear(200,43)
#Dropout
self.dropout = nn.Dropout(p=0.25)
def forward(self,x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1,32*5*5)
x = self.dropout(x)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
Here is the optimizer instance
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optim = optim.SGD(model.parameters(),lr = 0.002)
Here is the training code
epochs = 20
valid_loss_min = np.Inf
print("Training the network")
for epoch in range (1,epochs+1):
train_loss = 0
valid_loss = 0
model.train()
for data,target in train_data:
if gpu_available:
data,target = data.cuda(),target.cuda()
optim.zero_grad()
output = model(data)
loss = criterion(output,target)
loss.backward()
optim.step()
train_loss += loss.item()*data.size(0)
#########################
###### Validate #########
model.eval()
for data,target in valid_data:
if gpu_available:
data,target = data.cuda(),target.cuda()
output = model(data)
loss = criterion(output,target)
valid_loss += loss.item()*data.size(0)
train_loss = train_loss/len(train_data.dataset)
valid_loss = train/len(valid_data.dataset)
print("Epoch {}.....Train Loss = {:.6f}....Valid Loss = {:.6f}".format(epoch,train_loss,valid_loss))
if valid_loss < valid_loss_min:
torch.save(model.state_dict(), 'model_traffic.pt')
print("Valid Loss min {:.6f} >>> {:.6f}".format(valid_loss_min, valid_loss))
I am using GPU through google colab

Related

I want to add TensorBoard to the code from pytorch.org

I'm pretty new to deep learning, but I want to add Tensorboard to the following code to track loss, accuracy, average precision and so on.
Sample code from the TorchVision -2.3 Object Detection Finetuning Tutorial
http://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
import os
import numpy as np
import torch
from PIL import Image
import sys
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from engine import train_one_epoch, evaluate
import utils
import transforms as T
#from torch.utils.tensorboard import SummaryWriter
#writer = SummaryWriter()
class PennFudanDataset(object):
def __init__(self, root, transforms):
self.root = root
self.transforms = transforms
# load all image files, sorting them to
# ensure that they are aligned
self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))
def __getitem__(self, idx):
# load images and masks
img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
img = Image.open(img_path).convert("RGB")
# note that we haven't converted the mask to RGB,
# because each color corresponds to a different instance
# with 0 being background
mask = Image.open(mask_path)
mask = np.array(mask)
# instances are encoded as different colors
obj_ids = np.unique(mask)
# first id is the background, so remove it
obj_ids = obj_ids[1:]
# split the color-encoded mask into a set
# of binary masks
masks = mask == obj_ids[:, None, None]
# get bounding box coordinates for each mask
num_objs = len(obj_ids)
boxes = []
for i in range(num_objs):
pos = np.where(masks[i])
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
masks = torch.as_tensor(masks, dtype=torch.uint8)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.imgs)
def get_model_instance_segmentation(num_classes):
# load an instance segmentation model pre-trained pre-trained on COCO
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# now get the number of input features for the mask classifier
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
hidden_layer,
num_classes)
return model
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
def main():
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# our dataset has two classes only - background and person
num_classes = 2
# use our dataset and defined transformations
dataset = PennFudanDataset('PennFudanPed', get_transform(train=True))
dataset_test = PennFudanDataset('PennFudanPed', get_transform(train=False))
# split the dataset in train and test set
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=4, shuffle=True, num_workers=4,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=2, shuffle=False, num_workers=4,
collate_fn=utils.collate_fn)
# get the model using our helper function
model = get_model_instance_segmentation(num_classes)
# move model to the right device
model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
# let's train it for 10 epochs
num_epochs = 10
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
evaluate(model, data_loader_test, device=device)
print("That's it!")
if __name__ == "__main__":
main()

No GPU Usage apparent in Google Cloud Vm with pytorch already installed and Cuda10

I have been using in my machine a network, that is nothing really special. I wanted to do it faster so I started using google cloud. But I notice something weird that my machine with a GTX 1050 ti was faster than a V100 GPU. This didn't add up so I checked the usage and it seems that even though I put some stress by creating a big network and passing a lot of data to it the gpu by using a simple .cuda() in both the model and the data: there wasn't ussage shown in nvidia-smi command as shown in the image
you can check my code here:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("The device is:",device,torch.cuda.get_device_name(0),"and how many are they",torch.cuda.device_count())
# # We load the training data
Samples , Ocupancy, num_samples, Samples_per_slice = common.load_samples(args.samples_filename)
Samples = Samples * args.scaling_todo
print(Samples_per_slice)
# Divide into Slices
Organize_Positions,Orginezed_Ocupancy, batch_size = common.organize_sample_data(Samples,Ocupancy,num_samples,Samples_per_slice,args.num_batches)
phi = common.MLP(3, 1).cuda()
x_test = torch.from_numpy(Organize_Positions.astype(np.float32)).cuda()
y_test = torch.from_numpy(Orginezed_Ocupancy.astype(np.float32)).cuda()
all_data = common.CustomDataset(x_test, y_test)
#Dive into Slices the data
Slice_data = DataLoader(dataset=all_data, batch_size = batch_size, shuffle=False) # only take batch_size = n/b TODO Don't shuffle
#Chunky_data = DataLoader(dataset=Slice_data, batch_size = chunch_size, shuffle=False)
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(phi.parameters(), lr = 0.0001)
epoch = args.num_epochs
fit_start_time = time.time()
phi.train()
for epoch in range(epoch):
curr_epoch_loss = 0
batch = 0
for x_batch, y_batch in Slice_data:
optimizer.zero_grad()
x_train = x_batch
#print(x_train,batch_size)
y_train = y_batch
y_pred = phi(x_train)
#print(y_pred,x_train)
loss = criterion(y_pred.squeeze(), y_train.squeeze())
curr_epoch_loss += loss
print('Batch {}: train loss: {}'.format(batch, loss.item())) # Backward pass
loss.backward()
optimizer.step() # Optimizes only phi parameters
batch+=1
print('Epoch {}: train loss: {}'.format(epoch, loss.item()))
fit_end_time = time.time()
print("Total time = %f" % (fit_end_time - fit_start_time))
# Save Model
torch.save({'state_dict': phi.state_dict()}, args.model_filename)
and the model here:
class MLP(nn.Module):
def __init__(self, in_dim: int, out_dim: int):
super().__init__()
self.in_dim = in_dim
self.out_dim = out_dim
self.fc1 = nn.Linear(in_dim, 128)
self.fc1_bn = nn.BatchNorm1d(128)
self.fc2 = nn.Linear(128, 256)
self.fc2_bn = nn.BatchNorm1d(256)
self.fc3 = nn.Linear(256, 512)
self.fc3_bn = nn.BatchNorm1d(512)
self.fc4 = nn.Linear(512, 512)
self.fc4_bn = nn.BatchNorm1d(512)
self.fc5 = nn.Linear(512, out_dim,bias=False)
self.relu = nn.LeakyReLU()
def forward(self, x):
x = self.relu(self.fc1_bn(self.fc1(x)))
x = self.relu(self.fc2_bn(self.fc2(x)))# leaky
x = self.relu(self.fc3_bn(self.fc3(x)))
x = self.relu(self.fc4_bn(self.fc4(x)))
x = self.fc5(x)
return x
class CustomDataset(Dataset):
def __init__(self, x_tensor, y_tensor):
self.x = x_tensor
self.y = y_tensor
def __getitem__(self, index):
return (self.x[index], self.y[index])
def __len__(self):
return len(self.x)

Why do loss valued increase after some epochs in sampled_softmax_loss

I'm using Tensorflow to train a word2vec skip gram model. The computation graph is in the code below:
# training data
self.dataset = tf.data.experimental.make_csv_dataset(file_name, batch_size=self.batch_size, column_names=['input', 'output'], header=False, num_epochs=self.epochs)
self.datum = self.dataset.make_one_shot_iterator().get_next()
self.inputs, self.labels = self.datum['input'], self.datum['output']
# embedding layer
self.embedding_g = tf.Variable(tf.random_uniform((self.n_vocab, self.n_embedding), -1, 1))
self.embed = tf.nn.embedding_lookup(self.embedding_g, self.inputs)
# softmax layer
self.softmax_w_g = tf.Variable(tf.truncated_normal((self.n_context, self.n_embedding)))
self.softmax_b_g = tf.Variable(tf.zeros(self.n_context))
# Calculate the loss using negative sampling
self.labels = tf.reshape(self.labels, [-1, 1])
self.loss = tf.nn.sampled_softmax_loss(
weights=self.softmax_w_g,
biases=self.softmax_b_g,
labels=self.labels,
inputs=self.embed,
num_sampled=self.n_sampled,
num_classes=self.n_context)
self.cost = tf.reduce_mean(self.loss)
self.optimizer = tf.train.AdamOptimizer().minimize(self.cost)
But after 25 epochs, loss values begin to increase. Is there any reason for this?

SciPy/Numpy's Pooling/Convolution faster than Tensorflow's Convolution/Pooling?

I am trying to use GPUs to accelerate convolution and pooling operations in my neural network application(Spiking networks). I wrote a small script to see how much speedup I can get by using Tensorflow. Surprisingly, SciPy/Numpy does better. In my application, all the inputs(images) are stored on the disk but for an example, I created a randomly initialized image of size 27x27 and weights kernel of size 5x5x30, i made sure that I am not transferring anything from CPU to GPU and I also increased the input image size to 270x270 and the weights kernel to 7x7x30, still I don't see any improvement. I made sure that all the TF methods are in fact being executed on my GPUs by setting
sess =tf.Session(config=tf.ConfigProto(log_device_placement=True))
I have access to 2 GPUs(Tesla K20m) on a cluster.
Here's my code:
import tensorflow as tf
import numpy as np
from scipy import signal
import time
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
image_size = 27
kernel_size = 5
nofMaps = 30
def convolution(Image, weights):
in_channels = 1 # 1 because our image has 1 units in the -z direction.
out_channels = weights.shape[-1]
strides_1d = [1, 1, 1, 1]
#in_2d = tf.constant(Image, dtype=tf.float32)
in_2d = Image
#filter_3d = tf.constant(weights, dtype=tf.float32)
filter_3d =weights
in_width = int(in_2d.shape[0])
in_height = int(in_2d.shape[1])
filter_width = int(filter_3d.shape[0])
filter_height = int(filter_3d.shape[1])
input_4d = tf.reshape(in_2d, [1, in_height, in_width, in_channels])
kernel_4d = tf.reshape(filter_3d, [filter_height, filter_width, in_channels, out_channels])
inter = tf.nn.conv2d(input_4d, kernel_4d, strides=strides_1d, padding='VALID')
output_3d = tf.squeeze(inter)
output_3d= sess.run(output_3d)
return output_3d
def pooling(Image):
in_channels = Image.shape[-1]
Image_3d = tf.constant(Image, dtype = tf.float32)
in_width = int(Image.shape[0])
in_height = int(Image.shape[1])
Image_4d = tf.reshape(Image_3d,[1,in_width,in_height,in_channels])
pooled_pots4d = tf.layers.max_pooling2d(inputs=Image_4d, pool_size=[2, 2], strides=2)
pooled_pots3d = tf.squeeze(pooled_pots4d)
return sess.run(pooled_pots3d)
t1 = time.time()
#with tf.device('/device:GPU:1'):
Image = tf.random_uniform([image_size, image_size], name='Image')
weights = tf.random_uniform([kernel_size,kernel_size,nofMaps], name='Weights')
conv_result = convolution(Image,weights)
pool_result = pooling(conv_result)
print('Time taken:{}'.format(time.time()-t1))
#with tf.device('/device:CPU:0'):
print('Pool_result shape:{}'.format(pool_result.shape))
#print('first map of pool result:\n',pool_result[:,:,0])
def scipy_convolution(Image,weights):
instant_conv1_pots = np.zeros((image_size-kernel_size+1,image_size-kernel_size+1,nofMaps))
for i in range(weights.shape[-1]):
instant_conv1_pots[:,:,i]=signal.correlate(Image,weights[:,:,i],mode='valid',method='fft')
return instant_conv1_pots
def scipy_pooling(conv1_spikes):
'''
Reshape splitting each of the two axes into two each such that the
latter of the split axes is of the same length as the block size.
This would give us a 4D array. Then, perform maximum finding along those
latter axes, which would be the second and fourth axes in that 4D array.
https://stackoverflow.com/questions/41813722/numpy-array-reshaped-but-how-to-change-axis-for-pooling
'''
if(conv1_spikes.shape[0]%2!=0): #if array is odd size then omit the last row and col
conv1_spikes = conv1_spikes[0:-1,0:-1,:]
else:
conv1_spikes = conv1_spikes
m,n = conv1_spikes[:,:,0].shape
o = conv1_spikes.shape[-1]
pool1_spikes = np.zeros((m/2,n/2,o))
for i in range(o):
pool1_spikes[:,:,i]=conv1_spikes[:,:,i].reshape(m/2,2,n/2,2).max(axis=(1,3))
return pool1_spikes
t1 = time.time()
Image = np.random.rand(image_size,image_size)
weights = np.random.rand(kernel_size,kernel_size,nofMaps)
conv_result = scipy_convolution(Image,weights)
pool_result = scipy_pooling(conv_result)
print('Time taken:{}'.format(time.time()-t1))
print('Pool_result shape:{}'.format(pool_result.shape))
#print('first map of pool result:\n',pool_result[:,:,0])
~
Results are as follows:
Time taken:0.746644973755
Pool_result shape:(11, 11, 30)
Time taken:0.0127348899841
Pool_result shape:(11, 11, 30)
With suggestions from the commenter, I set image_size=270 and enclosed both convolution and pool functions in a for loop, now, TF performs better than SciPy note that I am using tf.nn.conv2d and NOT the tf.layers.conv2d. I also set the parameter use_cudnn_on_gpu=True in tf.nn.conv2d but that didn't hurt or help.
Here's the code:
import tensorflow as tf
import numpy as np
from scipy import signal
import time
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
image_size = 270
kernel_size = 5
nofMaps = 30
def convolution(Image, weights):
in_channels = 1 # 1 because our image has 1 units in the -z direction.
out_channels = weights.shape[-1]
strides_1d = [1, 1, 1, 1]
#in_2d = tf.constant(Image, dtype=tf.float32)
in_2d = Image
#filter_3d = tf.constant(weights, dtype=tf.float32)
filter_3d =weights
in_width = int(in_2d.shape[0])
in_height = int(in_2d.shape[1])
filter_width = int(filter_3d.shape[0])
filter_height = int(filter_3d.shape[1])
input_4d = tf.reshape(in_2d, [1, in_height, in_width, in_channels])
kernel_4d = tf.reshape(filter_3d, [filter_height, filter_width, in_channels, out_channels])
inter = tf.nn.conv2d(input_4d, kernel_4d, strides=strides_1d, padding='VALID',use_cudnn_on_gpu=True)
output_3d = tf.squeeze(inter)
#t1 = time.time()
output_3d= sess.run(output_3d)
#print('TF Time for Conv:{}'.format(time.time()-t1))
return output_3d
def pooling(Image):
in_channels = Image.shape[-1]
Image_3d = tf.constant(Image, dtype = tf.float32)
in_width = int(Image.shape[0])
in_height = int(Image.shape[1])
Image_4d = tf.reshape(Image_3d,[1,in_width,in_height,in_channels])
pooled_pots4d = tf.layers.max_pooling2d(inputs=Image_4d, pool_size=[2, 2], strides=2)
pooled_pots3d = tf.squeeze(pooled_pots4d)
#t1 = time.time()
pool_res = sess.run(pooled_pots3d)
#print('TF Time for Pool:{}'.format(time.time()-t1))
return pool_res
#with tf.device('/device:GPU:1'):
Image = tf.random_uniform([image_size, image_size], name='Image')
weights = tf.random_uniform([kernel_size,kernel_size,nofMaps], name='Weights')
#init = tf.global_variables_initializer
#sess.run(init)
t1 = time.time()
for i in range(150):
#t1 = time.time()
conv_result = convolution(Image,weights)
pool_result = pooling(conv_result)
#print('TF Time taken:{}'.format(time.time()-t1))
print('TF Time taken:{}'.format(time.time()-t1))
#with tf.device('/device:CPU:0'):
print('TF Pool_result shape:{}'.format(pool_result.shape))
#print('first map of pool result:\n',pool_result[:,:,0])
def scipy_convolution(Image,weights):
instant_conv1_pots = np.zeros((image_size-kernel_size+1,image_size-kernel_size+1,nofMaps))
for i in range(weights.shape[-1]):
instant_conv1_pots[:,:,i]=signal.correlate(Image,weights[:,:,i],mode='valid',method='fft')
return instant_conv1_pots
def scipy_pooling(conv1_spikes):
'''
Reshape splitting each of the two axes into two each such that the
latter of the split axes is of the same length as the block size.
This would give us a 4D array. Then, perform maximum finding along those
latter axes, which would be the second and fourth axes in that 4D array.
https://stackoverflow.com/questions/41813722/numpy-array-reshaped-but-how-to-change-axis-for-pooling
'''
if(conv1_spikes.shape[0]%2!=0): #if array is odd size then omit the last row and col
conv1_spikes = conv1_spikes[0:-1,0:-1,:]
else:
conv1_spikes = conv1_spikes
m,n = conv1_spikes[:,:,0].shape
o = conv1_spikes.shape[-1]
pool1_spikes = np.zeros((m/2,n/2,o))
for i in range(o):
pool1_spikes[:,:,i]=conv1_spikes[:,:,i].reshape(m/2,2,n/2,2).max(axis=(1,3))
return pool1_spikes
Image = np.random.rand(image_size,image_size)
weights = np.random.rand(kernel_size,kernel_size,nofMaps)
t1 = time.time()
for i in range(150):
conv_result = scipy_convolution(Image,weights)
pool_result = scipy_pooling(conv_result)
print('Scipy Time taken:{}'.format(time.time()-t1))
print('Scipy Pool_result shape:{}'.format(pool_result.shape))
#print('first map of pool result:\n',pool_result[:,:,0])
Here are results:
image_size = 27x27
kernel_size = 5x5x30
iterations = 150
TF Time taken:11.0800771713
TF Pool_result shape:(11, 11, 30)
Scipy Time taken:1.4141368866
Scipy Pool_result shape:(11, 11, 30)
image_size = 270x270
kernel_size = 5x5x30
iterations = 150
TF Time taken:26.2359631062
TF Pool_result shape:(133, 133, 30)
Scipy Time taken:31.6651778221
Scipy Pool_result shape:(11, 11, 30)
image_size = 500x500
kernel_size = 5x5x30
iterations = 150
TF Time taken:89.7967050076
TF Pool_result shape:(248, 248, 30)
Scipy Time taken:143.391746044
Scipy Pool_result shape:(248, 248, 30)
In the 2nd case you can see that I got about 18% reduction in time.
In the 3rd case you can see that I goto about 38% reduction in time.

Plot in tensorboard is always closes and like a circle

I was trying to plot a loss curve, but is always abnormal (just like a circle, I really don't know how to describe it in English properly), I had found many topics about question like this and just can't solve, my tensorflow version is 0.10.0.
import tensorflow as tf
from tensorflow.core.util.event_pb2 import SessionLog
import os
# initialize variables/model parameters
# define the training loop operations
def inputs():
# read/generate input training data X and expected outputs Y
weight_age = [[84,46],[73,20],[65,52],[70,30],[76,57],[69,25],[63,28],[72,36],[79,57],[75,44],[27,24]
,[89,31],[65,52],[57,23],[59,60],[69,48],[60,34],[79,51],[75,50],[82,34],[59,46],[67,23],
[85,37],[55,40],[63,30]]
blodd_fat_content = [354,190,405,263,451,302,288,385,402,365,209,290,346,
254,395,434,220,374,308,220,311,181,274,303,244]
return tf.to_float(weight_age), tf.to_float(blodd_fat_content)
def inference(X):
# compute inference model over data X and return the result
return tf.matmul(X, W) + b
def loss(X, Y):
# compute loss over training data X and expected outputs Y
Y_predicted = inference(X)
return tf.reduce_sum(tf.squared_difference(Y, Y_predicted))
def train(total_loss):
# train / adjust model parameters according to computed total loss
learning_rate = 1e-7
return tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)
def evaluate(sess, X, Y):
# evaluate the resulting trained model
print (sess.run(inference([[80., 25.]])))
print (sess.run(inference([[60., 25.]])))
g1 = tf.Graph()
with tf.Session(graph=g1) as sess:
W = tf.Variable(tf.zeros([2,1]), name="weights")
b = tf.Variable(0., name="bias")
tf.initialize_all_variables().run()
X, Y = inputs()
print (sess.run(W))
total_loss = loss(X, Y)
train_op = train(total_loss)
tf.scalar_summary("loss", total_loss)
summaries = tf.merge_all_summaries()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
summary_writer = tf.train.SummaryWriter('linear', g1)
summary_writer.add_session_log(session_log= SessionLog(status=SessionLog.START), global_step=1)
# actual training loop
training_steps = 100
tolerance = 100
total_loss_last = 0
initial_step = 0
# Create a saver.
saver = tf.train.Saver()
# verify if we don't have a checkpoint saved already
ckpt = tf.train.get_checkpoint_state(os.path.dirname('my_model'))
if ckpt and ckpt.model_checkpoint_path:
# Restores from checkpoint
saver.restore(sess, ckpt.model_checkpoint_path)
initial_step = int(ckpt.model_checkpoint_path.rsplit('-', 1)[1])
# summary_writer.add_session_log(SessionLog(status=SessionLog.START), global_step=initial_step)
for step in range(initial_step, training_steps):
sess.run([train_op])
if step%20 == 0:
saver.save(sess, 'my-model', global_step=step)
gap = abs(sess.run(total_loss) - total_loss_last)
total_loss_last = sess.run(total_loss)
summary_writer.add_summary(sess.run(summaries), step)
# for debugging and learning purposes, see how the loss gets decremented thru training steps
if step % 10 == 0:
print ("loss: ", sess.run([total_loss]))
print("step: ", step)
if gap < tolerance:
break
# evaluation...
evaluate(sess, X, Y)
coord.request_stop()
coord.join(threads)
saver.save(sess, 'my-model', global_step=training_steps)
summary_writer.flush()
sess.close()