How to access representation of the hidden layers in CNTK 2.2? - hidden

Assume I have a simple model.
def create_model(out_classes):
f1 = Dense(16, activation=C.relu,bias=True,init_bias=0,name='FLayer')
l1 = Dense(16, activation=C.relu, bias=True, init_bias=0, name='LLayer')(f1)
c1 = Dense(out_classes,name='CLayer')(l1)
return c1
model = create_model(nClasses)
z = model(feature)
How do I access the representation of the FLayer or LLayer during the testing of my trained model?

By representation you mean accessing the functions of the hidden layers correct? I created a sample code to show it. I tested two approaches, the first one builds on what you did and the second one uses the functional API of CNTK which I prefer.
The easy way to get access to the functions is to return them:
import cntk
import numpy as np
def create_model(output_dimension):
l0 = cntk.layers.Dense(shape=1, activation=None, init=1, init_bias=1)
l1 = cntk.layers.Dense(shape=output_dimension, activation=None, init=1, init_bias=1)(l0)
l2 = cntk.layers.Dense(shape=output_dimension, activation=None, init=1, init_bias=1)(l1)
return l0, l1, l2
input_dim = 1
output_dim = 1
l0, l1, l2 = create_model(output_dim)
input = cntk.input_variable(shape=1)
layer0 = l0(input)
layer1 = l1(input)
layer2 = l2(input)
print("Non functional API")
print("output of layer 0: {}".format(layer0.eval({input: np.array([0], dtype=np.float32)})))
print("output of layer 1: {}".format(layer1.eval({input: np.array([0], dtype=np.float32)})))
print("output of model: {}".format(layer2.eval({input: np.array([0], dtype=np.float32)})))
The above model takes an input of dimension 1 and output a vector of dimension 1. I also set the weights and biases to 1 in each layer so it is easy to follow the computation. The function create_model returns a tuple which contains all the layers so these can be accessed externally.
The functional API approach is better in my opinion. Below, I create a list containing the different layers and then I build the model using cntk.layers.Sequential. Then, create_model2 returns a tuple whose entries are 1) the list containing all the layers and 2) the final model. This is cleaner when you have several layers. Also, it gives you better control with regards to what you can do with each layer.
def create_model2(output_dimension):
layers = [cntk.layers.Dense(shape=1, activation=None, init=1, init_bias=1),
cntk.layers.Dense(shape=output_dimension, activation=None, init=1, init_bias=1),
cntk.layers.Dense(shape=output_dimension, activation=None, init=1, init_bias=1)]
m = cntk.layers.Sequential(layers)
return m, layers
m, layers = create_model2(output_dim)
layer0 = layers[0](input)
layer1 = layers[1](input)
layer2 = layers[2](input)
layer01 = cntk.layers.Sequential(layers[0:2])(input)
layer012 = cntk.layers.Sequential(layers[0:3])(input)
model = m(input)
print("Functional API")
print("output of layer 0: {}".format(layer0.eval({input: np.array([0], dtype=np.float32)})))
print("output of layer 1: {}".format(layer1.eval({input: np.array([0], dtype=np.float32)})))
print("output of layer 2: {}".format(layer2.eval({input: np.array([0], dtype=np.float32)})))
print("output of model: {}".format(model.eval({input: np.array([0], dtype=np.float32)})))
print("output of layer 0 and 1: {}".format(layer01.eval({input: np.array([0], dtype=np.float32)})))
print("output of layer 0 and 1 and 2: {}".format(layer012.eval({input: np.array([0], dtype=np.float32)})))

Related

Is this method of calculating the top-5 accuracy in pytorch correct?

I am trying to validate the findings of a paper by testing it on the same model architecture as well as the same dataset reported by the paper. I have been using the imagenet script provided in the official pytorch repository's examples section to do the same.
class AverageMeter(object):
"""Computes and stores the average and current value
Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262
"""
def init(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def accuracy(output, target, topk=(1,)):
"""Computes the precision#k for the specified values of k"""
maxk = max(topk)
batchsize = target.size(0)
, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].reshape(-1).float().sum(0)
res.append(correctk.mul(100.0 / batch_size))
return res
top1 = AverageMeter()
top5 = AverageMeter()
# switch to evaluate mode
model.eval()
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(test_loader):
# measure data loading time
print(f"Processing {batch_idx+1}/{len(test_loader)}")
inputs, targets = inputs.cuda(), targets.cuda()
inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets)
# compute output
outputs = model(inputs)
# measure accuracy and record loss
prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
print(prec1,prec5)
top1.update(prec1.item(), inputs.size(0))
top5.update(prec5.item(), inputs.size(0))
print(top1)
print(top5)
However the top 5 error which I am getting by using this script is not matching with the one in the paper. Can anyone tell me what is wrong in this particular snippet?

TypeError: only length-1 arrays can be converted to Python scalars, python2.7

I searched about this issue, I got more questions "the same error" but different code and different reason. So, I was hesitant more to put my issue here. After reading the majority of answers, I didn't find a solution for my issue.
The original and full code here
chapter6.py:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
from datasets import gtsrb
from classifiers import MultiClassSVM
def main():
strategies = ['one-vs-one', 'one-vs-all']
features = [None, 'gray', 'rgb', 'hsv', 'surf', 'hog']
accuracy = np.zeros((2, len(features)))
precision = np.zeros((2, len(features)))
recall = np.zeros((2, len(features)))
for f in xrange(len(features)):
print "feature", features[f]
(X_train, y_train), (X_test, y_test) = gtsrb.load_data(
"datasets/gtsrb_training",
feature=features[f],
test_split=0.2,
seed=42)
# convert to numpy
X_train = np.squeeze(np.array(X_train)).astype(np.float32)
y_train = np.array(y_train)
X_test = np.squeeze(np.array(X_test)).astype(np.float32)
y_test = np.array(y_test)
# find all class labels
labels = np.unique(np.hstack((y_train, y_test)))
for s in xrange(len(strategies)):
print " - strategy", strategies[s]
# set up SVMs
MCS = MultiClassSVM(len(labels), strategies[s])
# training phase
print " - train"
MCS.fit(X_train, y_train)
# test phase
print " - test"
acc, prec, rec = MCS.evaluate(X_test, y_test)
accuracy[s, f] = acc
precision[s, f] = np.mean(prec)
recall[s, f] = np.mean(rec)
print " - accuracy: ", acc
print " - mean precision: ", np.mean(prec)
print " - mean recall: ", np.mean(rec)
# plot results as stacked bar plot
f, ax = plt.subplots(2)
for s in xrange(len(strategies)):
x = np.arange(len(features))
ax[s].bar(x - 0.2, accuracy[s, :], width=0.2, color='b',
hatch='/', align='center')
ax[s].bar(x, precision[s, :], width=0.2, color='r', hatch='\\',
align='center')
ax[s].bar(x + 0.2, recall[s, :], width=0.2, color='g', hatch='x',
align='center')
ax[s].axis([-0.5, len(features) + 0.5, 0, 1.5])
ax[s].legend(('Accuracy', 'Precision', 'Recall'), loc=2, ncol=3,
mode='expand')
ax[s].set_xticks(np.arange(len(features)))
ax[s].set_xticklabels(features)
ax[s].set_title(strategies[s])
plt.show()
if __name__ == '__main__':
main()
classifiers.py
import cv2
import numpy as np
from abc import ABCMeta, abstractmethod
from matplotlib import pyplot as plt
__author__ = "Michael Beyeler"
__license__ = "GNU GPL 3.0 or later"
class Classifier:
"""
Abstract base class for all classifiers
A classifier needs to implement at least two methods:
- fit: A method to train the classifier by fitting the model to
the data.
- evaluate: A method to test the classifier by predicting labels of
some test data based on the trained model.
A classifier also needs to specify a classification strategy via
setting self.mode to either "one-vs-all" or "one-vs-one".
The one-vs-all strategy involves training a single classifier per
class, with the samples of that class as positive samples and all
other samples as negatives.
The one-vs-one strategy involves training a single classifier per
class pair, with the samples of the first class as positive samples
and the samples of the second class as negative samples.
This class also provides method to calculate accuracy, precision,
recall, and the confusion matrix.
"""
__metaclass__ = ABCMeta
#abstractmethod
def fit(self, X_train, y_train):
pass
#abstractmethod
def evaluate(self, X_test, y_test, visualize=False):
pass
def _accuracy(self, y_test, Y_vote):
"""Calculates accuracy
This method calculates the accuracy based on a vector of
ground-truth labels (y_test) and a 2D voting matrix (Y_vote) of
size (len(y_test), num_classes).
:param y_test: vector of ground-truth labels
:param Y_vote: 2D voting matrix (rows=samples, cols=class votes)
:returns: accuracy e[0,1]
"""
# predicted classes
y_hat = np.argmax(Y_vote, axis=1)
# all cases where predicted class was correct
mask = y_hat == y_test
return np.float32(np.count_nonzero(mask)) / len(y_test)
def _precision(self, y_test, Y_vote):
"""Calculates precision
This method calculates precision extended to multi-class
classification by help of a confusion matrix.
:param y_test: vector of ground-truth labels
:param Y_vote: 2D voting matrix (rows=samples, cols=class votes)
:returns: precision e[0,1]
"""
# predicted classes
y_hat = np.argmax(Y_vote, axis=1)
if self.mode == "one-vs-one":
# need confusion matrix
conf = self._confusion(y_test, Y_vote)
# consider each class separately
prec = np.zeros(self.num_classes)
for c in xrange(self.num_classes):
# true positives: label is c, classifier predicted c
tp = conf[c, c]
# false positives: label is c, classifier predicted not c
fp = np.sum(conf[:, c]) - conf[c, c]
if tp + fp != 0:
prec[c] = tp * 1. / (tp + fp)
elif self.mode == "one-vs-all":
# consider each class separately
prec = np.zeros(self.num_classes)
for c in xrange(self.num_classes):
# true positives: label is c, classifier predicted c
tp = np.count_nonzero((y_test == c) * (y_hat == c))
# false positives: label is c, classifier predicted not c
fp = np.count_nonzero((y_test == c) * (y_hat != c))
if tp + fp != 0:
prec[c] = tp * 1. / (tp + fp)
return prec
def _recall(self, y_test, Y_vote):
"""Calculates recall
This method calculates recall extended to multi-class
classification by help of a confusion matrix.
:param y_test: vector of ground-truth labels
:param Y_vote: 2D voting matrix (rows=samples, cols=class votes)
:returns: recall e[0,1]
"""
# predicted classes
y_hat = np.argmax(Y_vote, axis=1)
if self.mode == "one-vs-one":
# need confusion matrix
conf = self._confusion(y_test, Y_vote)
# consider each class separately
recall = np.zeros(self.num_classes)
for c in xrange(self.num_classes):
# true positives: label is c, classifier predicted c
tp = conf[c, c]
# false negatives: label is not c, classifier predicted c
fn = np.sum(conf[c, :]) - conf[c, c]
if tp + fn != 0:
recall[c] = tp * 1. / (tp + fn)
elif self.mode == "one-vs-all":
# consider each class separately
recall = np.zeros(self.num_classes)
for c in xrange(self.num_classes):
# true positives: label is c, classifier predicted c
tp = np.count_nonzero((y_test == c) * (y_hat == c))
# false negatives: label is not c, classifier predicted c
fn = np.count_nonzero((y_test != c) * (y_hat == c))
if tp + fn != 0:
recall[c] = tp * 1. / (tp + fn)
return recall
def _confusion(self, y_test, Y_vote):
"""Calculates confusion matrix
This method calculates the confusion matrix based on a vector of
ground-truth labels (y-test) and a 2D voting matrix (Y_vote) of
size (len(y_test), num_classes).
Matrix element conf[r,c] will contain the number of samples that
were predicted to have label r but have ground-truth label c.
:param y_test: vector of ground-truth labels
:param Y_vote: 2D voting matrix (rows=samples, cols=class votes)
:returns: confusion matrix
"""
y_hat = np.argmax(Y_vote, axis=1)
conf = np.zeros((self.num_classes, self.num_classes)).astype(np.int32)
for c_true in xrange(self.num_classes):
# looking at all samples of a given class, c_true
# how many were classified as c_true? how many as others?
for c_pred in xrange(self.num_classes):
y_this = np.where((y_test == c_true) * (y_hat == c_pred))
conf[c_pred, c_true] = np.count_nonzero(y_this)
return conf
class MultiClassSVM(Classifier):
"""
Multi-class classification using Support Vector Machines (SVMs)
This class implements an SVM for multi-class classification. Whereas
some classifiers naturally permit the use of more than two classes
(such as neural networks), SVMs are binary in nature.
However, we can turn SVMs into multinomial classifiers using at least
two different strategies:
* one-vs-all: A single classifier is trained per class, with the
samples of that class as positives (label 1) and all
others as negatives (label 0).
* one-vs-one: For k classes, k*(k-1)/2 classifiers are trained for each
pair of classes, with the samples of the one class as
positives (label 1) and samples of the other class as
negatives (label 0).
Each classifier then votes for a particular class label, and the final
decision (classification) is based on a majority vote.
"""
def __init__(self, num_classes, mode="one-vs-all", params=None):
"""
The constructor makes sure the correct number of classifiers is
initialized, depending on the mode ("one-vs-all" or "one-vs-one").
:param num_classes: The number of classes in the data.
:param mode: Which classification mode to use.
"one-vs-all": single classifier per class
"one-vs-one": single classifier per class pair
Default: "one-vs-all"
:param params: SVM training parameters.
For now, default values are used for all SVMs.
Hyperparameter exploration can be achieved by
embedding the MultiClassSVM process flow in a
for-loop that classifies the data with
different parameter values, then pick the
values that yield the best accuracy.
Default: None
"""
self.num_classes = num_classes
self.mode = mode
self.params = params or dict()
# initialize correct number of classifiers
self.classifiers = []
if mode == "one-vs-one":
# k classes: need k*(k-1)/2 classifiers
for _ in xrange(num_classes*(num_classes - 1) / 2):
self.classifiers.append(cv2.ml.SVM_create())
elif mode == "one-vs-all":
# k classes: need k classifiers
for _ in xrange(num_classes):
self.classifiers.append(cv2.ml.SVM_create())
else:
print "Unknown mode ", mode
def fit(self, X_train, y_train, params=None):
"""Fits the model to training data
This method trains the classifier on data (X_train) using either
the "one-vs-one" or "one-vs-all" strategy.
:param X_train: input data (rows=samples, cols=features)
:param y_train: vector of class labels
:param params: dict to specify training options for cv2.SVM.train
leave blank to use the parameters passed to the
constructor
"""
if params is None:
params = self.params
if self.mode == "one-vs-one":
svm_id = 0
for c1 in xrange(self.num_classes):
for c2 in xrange(c1 + 1, self.num_classes):
# indices where class labels are either `c1` or `c2`
data_id = np.where((y_train == c1) + (y_train == c2))[0]
# set class label to 1 where class is `c1`, else 0
y_train_bin = np.where(y_train[data_id] == c1, 1,
0).flatten()
self.classifiers[svm_id].train(X_train[data_id, :],
y_train_bin,
params=self.params)
svm_id += 1
elif self.mode == "one-vs-all":
for c in xrange(self.num_classes):
# train c-th SVM on class c vs. all other classes
# set class label to 1 where class==c, else 0
y_train_bin = np.where(y_train == c, 1, 0).flatten()
# train SVM
self.classifiers[c].train(X_train, y_train_bin,
params=self.params)
def evaluate(self, X_test, y_test, visualize=False):
"""Evaluates the model on test data
This method evaluates the classifier's performance on test data
(X_test) using either the "one-vs-one" or "one-vs-all" strategy.
:param X_test: input data (rows=samples, cols=features)
:param y_test: vector of class labels
:param visualize: flag whether to plot the results (True) or not
(False)
:returns: accuracy, precision, recall
"""
# prepare Y_vote: for each sample, count how many times we voted
# for each class
Y_vote = np.zeros((len(y_test), self.num_classes))
if self.mode == "one-vs-one":
svm_id = 0
for c1 in xrange(self.num_classes):
for c2 in xrange(c1 + 1, self.num_classes):
data_id = np.where((y_test == c1) + (y_test == c2))[0]
X_test_id = X_test[data_id, :]
y_test_id = y_test[data_id]
# set class label to 1 where class==c1, else 0
# y_test_bin = np.where(y_test_id==c1,1,0).reshape(-1,1)
# predict labels
y_hat = self.classifiers[svm_id].predict_all(X_test_id)
for i in xrange(len(y_hat)):
if y_hat[i] == 1:
Y_vote[data_id[i], c1] += 1
elif y_hat[i] == 0:
Y_vote[data_id[i], c2] += 1
else:
print "y_hat[", i, "] = ", y_hat[i]
# we vote for c1 where y_hat is 1, and for c2 where y_hat
# is 0 np.where serves as the inner index into the data_id
# array, which in turn serves as index into the results
# array
# Y_vote[data_id[np.where(y_hat == 1)[0]], c1] += 1
# Y_vote[data_id[np.where(y_hat == 0)[0]], c2] += 1
svm_id += 1
elif self.mode == "one-vs-all":
for c in xrange(self.num_classes):
# set class label to 1 where class==c, else 0
# predict class labels
# y_test_bin = np.where(y_test==c,1,0).reshape(-1,1)
# predict labels
y_hat = self.classifiers[c].predict_all(X_test)
# we vote for c where y_hat is 1
if np.any(y_hat):
Y_vote[np.where(y_hat == 1)[0], c] += 1
# with this voting scheme it's possible to end up with samples
# that have no label at all...in this case, pick a class at
# random...
no_label = np.where(np.sum(Y_vote, axis=1) == 0)[0]
Y_vote[no_label, np.random.randint(self.num_classes,
size=len(no_label))] = 1
accuracy = self._accuracy(y_test, Y_vote)
precision = self._precision(y_test, Y_vote)
recall = self._recall(y_test, Y_vote)
return accuracy, precision, recall
when running chapter6.py
The output is:
feature None
- strategy one-vs-one
- train
Traceback (most recent call last):
File "/home/redhwan/Downloads/opencv-python-blueprints-master/chapter6/chapter6.py", line 77, in <module>
main()
File "/home/redhwan/Downloads/opencv-python-blueprints-master/chapter6/chapter6.py", line 44, in main
MCS.fit(X_train, y_train)
File "/home/redhwan/Downloads/opencv-python-blueprints-master/chapter6/classifiers.py", line 258, in fit
params=self.params)
TypeError: only length-1 arrays can be converted to Python scalars
please help me or your suggestion
Thank you in advance!

Is it possible to find all integer solutions?

I wanna get all integer solutions in a limited time, is it possible?
This is a linear, integer constraint satisfaction problem, which can be solved efficiently by OR Tools' CP-SAT. I've modified their example to solve your problem in Python:
from ortools.sat.python import cp_model
class VarArraySolutionPrinter(cp_model.CpSolverSolutionCallback):
"""Print intermediate solutions."""
def __init__(self, variables):
cp_model.CpSolverSolutionCallback.__init__(self)
self.__variables = variables
self.__solution_count = 0
def on_solution_callback(self):
self.__solution_count += 1
for v in self.__variables:
print('%s=%i' % (v, self.Value(v)), end=' ')
print()
def solution_count(self):
return self.__solution_count
def SearchForAllSolutionsSampleSat():
"""Showcases calling the solver to search for all solutions."""
# Creates the model.
model = cp_model.CpModel()
p = [1, 2, 3, 4]
ceq = 30
cgeq = 2
N = len(p)
# Creates the variables
x = [model.NewIntVar(0, 100, f'x{i}') for i in range(N)]
# Create the constraints.
model.Add(sum([xi*pi for xi, pi in zip(x, p)]) == ceq)
model.Add(sum(x) >= cgeq)
# Create a solver and solve.
solver = cp_model.CpSolver()
solution_printer = VarArraySolutionPrinter(x)
status = solver.SearchForAllSolutions(model, solution_printer)
print('Status = %s' % solver.StatusName(status))
print('Number of solutions found: %i' % solution_printer.solution_count())
SearchForAllSolutionsSampleSat()

Working example of multi-stage model in Pyomo

This paper describes Pyomo's Differential and Algebraic Equations framework. It also mentions multi-stage problems; however, it does not show a complete example of such a problem. Does such an example exist somewhere?
The following demonstrates a complete minimum working example of a multi-stage optimization problem using Pyomo's DAE system:
#!/usr/bin/env python3
#http://www.gpops2.com/Examples/OrbitRaising.html
from pyomo.environ import *
from pyomo.dae import *
from pyomo.opt import SolverStatus, TerminationCondition
import random
import matplotlib.pyplot as plt
T = 10 #Maximum time for each stage of the model
STAGES = 3 #Number of stages
m = ConcreteModel() #Model
m.t = ContinuousSet(bounds=(0,T)) #Time variable
m.stages = RangeSet(0, STAGES) #Stages in the range [0,STAGES]. Can be thought of as an integer-valued set
m.a = Var(m.stages, m.t) #State variable defined for all stages and times
m.da = DerivativeVar(m.a, wrt=m.t) #First derivative of state variable with respect to time
m.u = Var(m.stages, m.t, bounds=(0,1)) #Control variable defined for all stages and times. Bounded to range [0,1]
#Setting the value of the derivative.
def eq_da(m,stage,t): #m argument supplied when function is called. `stage` and `t` are given values from m.stages and m.t (see below)
return m.da[stage,t] == m.u[stage,t] #Derivative is proportional to the control variable
m.eq_da = Constraint(m.stages, m.t, rule=eq_da) #Call constraint function eq_da for each unique value of m.stages and m.t
#We need to connect the different stages together...
def eq_stage_continuity(m,stage):
if stage==m.stages.last(): #The last stage doesn't connect to anything
return Constraint.Skip #So skip this constraint
else:
return m.a[stage,T]==m.a[stage+1,0] #Final time of each stage connects with the initial time of the following stage
m.eq_stage_continuity = Constraint(m.stages, rule=eq_stage_continuity)
#Boundary conditions
def _init(m):
yield m.a[0,0] == 0 #Initial value (at zeroth stage and zeroth time) of `a` is 0
yield ConstraintList.End
m.con_boundary = ConstraintList(rule=_init) #Repeatedly call `_init` until `ConstraintList.End` is returned
#Objective function: maximize `a` at the end of the final stage
m.obj = Objective(expr=m.a[STAGES,T], sense=maximize)
#Get a discretizer
discretizer = TransformationFactory('dae.collocation')
#Disrectize the model
#nfe (number of finite elements)
#ncp (number of collocation points within finite element)
discretizer.apply_to(m,nfe=30,ncp=6,scheme='LAGRANGE-RADAU')
#Get a solver
solver = SolverFactory('ipopt', keepfiles=True, log_file='/z/log', soln_file='/z/sol')
solver.options['max_iter'] = 100000
solver.options['print_level'] = 1
solver.options['linear_solver'] = 'ma27'
solver.options['halt_on_ampl_error'] = 'yes'
#Solve the model
results = solver.solve(m, tee=True)
print(results.solver.status)
print(results.solver.termination_condition)
#Retrieve the results in a pleasant format
r_t = [t for s in sorted(m.stages) for t in sorted(m.t)]
r_a = [value(m.a[s,t]) for s in sorted(m.stages) for t in sorted(m.t)]
r_u = [value(m.u[s,t]) for s in sorted(m.stages) for t in sorted(m.t)]
plt.plot(r_t, r_a, label="r_a")
plt.plot(r_t, r_u, label="r_u")
plt.legend()
plt.show()

Tensorflow Deep Learning - model size and parameters

According to Andrej's blog -
Where he says that for a Convolutional Layer, with parameter sharing, it introduces F x F x D weights per filter, for a total of (F x F x D) x K weights and K biases.
In my tensorflow code, I have an architecture like this (where D=1)
conv1 : F = 3, K = 32, S = 1, P = 1.
pool1 :
conv2
and so on...
According to the formula,
A model generated with F=3 for conv1 should have 9K weights ,i.e. smaller model, and
A model generated with F=5 should have 25K weights i.e. bigger model
Question
In my code, when I write out the model files for both these cases, I see that the .ckpt file is about 380MB (F=3) and 340MB (F=5). Am I missing something?
Code:
Here's the reference code for saving the variables to a model and printing its size.
''' Run the session and save the model'''
#Add a saver here
saver = tf.train.Saver()
# Run session
sess.run(tf.initialize_all_variables())
for i in range(201):
batch = mnist.train.next_batch(50)
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
# Save model
save_path = saver.save(sess, "/Users/voladoddi/Desktop/dropmodel.ckpt")
print("Model saved in file: %s" % save_path)
# Test
print("test accuracy %g"%accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
# Print model size.
vars = 0
for v in tf.all_variables():
vars += np.prod(v.get_shape().as_list())
print(vars*4)/(1024**2),"MB"