AttributeError: 'tuple' object has no attribute 'lower' while object creation of a Model - tuples

While Defining a model happyModel()
Implements the forward propagation for the binary classification model:
ZEROPAD2D -> CONV2D -> BATCHNORM -> RELU -> MAXPOOL -> FLATTEN -> DENSE
Note that for simplicity and grading purposes, you'll hard-code all the values
such as the stride and kernel (filter) sizes.
Normally, functions should take these values as function parameters.
model -- TF Keras model (object containing the information for the entire training process)
def happyModel():
model = tf.keras.Sequential(
[
## ZeroPadding2D with padding 3, input shape of 64 x 64 x 3
tfl.ZeroPadding2D(padding=(3,3), data_format=(64,64,3)),
## Conv2D with 32 7x7 filters and stride of 1
tfl.Conv2D(32, (7, 7), strides = (1, 1), name = 'conv0'),
## BatchNormalization for axis 3
tfl.BatchNormalization(axis = 3, name = 'bn0'),
## ReLU
tfl.Activation('relu'),
## Max Pooling 2D with default parameters
tfl.MaxPooling2D((2, 2), name='max_pool0'),
## Flatten layer
tfl.Flatten(),
## Dense layer with 1 unit for output & 'sigmoid' activation
tfl.Dense(1, activation='sigmoid', name='fc'),
# YOUR CODE STARTS HERE
# YOUR CODE ENDS HERE
]
)
return model
Object Creation of the defined model:
happy_model = happyModel()
# Print a summary for each layer
for layer in summary(happy_model):
print(layer)
output = [['ZeroPadding2D', (None, 70, 70, 3), 0, ((3, 3), (3, 3))],
['Conv2D', (None, 64, 64, 32), 4736, 'valid', 'linear', 'GlorotUniform'],
['BatchNormalization', (None, 64, 64, 32), 128],
['ReLU', (None, 64, 64, 32), 0],
['MaxPooling2D', (None, 32, 32, 32), 0, (2, 2), (2, 2), 'valid'],
['Flatten', (None, 32768), 0],
['Dense', (None, 1), 32769, 'sigmoid']]
comparator(summary(happy_model), output)
The error I am getting "AttributeError: 'tuple' object has no attribute 'lower'"
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-50-f33284fd82fe> in <module>
----> 1 happy_model = happyModel()
2 # Print a summary for each layer
3 for layer in summary(happy_model):
4 print(layer)
5
<ipython-input-49-b5fc98b1ebba> in happyModel()
21
22 ## ZeroPadding2D with padding 3, input shape of 64 x 64 x 3
---> 23 tfl.ZeroPadding2D(padding=(3,3), data_format=(64,64,3)),
24
25 ## Conv2D with 32 7x7 filters and stride of 1
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/layers/convolutional.py in
__init__(self, padding, data_format, **kwargs)
2800 def __init__(self, padding=(1, 1), data_format=None, **kwargs):
2801 super(ZeroPadding2D, self).__init__(**kwargs)
-> 2802 self.data_format = conv_utils.normalize_data_format(data_format)
2803 if isinstance(padding, int):
2804 self.padding = ((padding, padding), (padding, padding))
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/utils/conv_utils.py in
normalize_data_format(value)
190 if value is None:
191 value = backend.image_data_format()
--> 192 data_format = value.lower()
193 if data_format not in {'channels_first', 'channels_last'}:
194 raise ValueError('The `data_format` argument must be one of '
AttributeError: 'tuple' object has no attribute 'lower'
What is the reason behind it? Can anyone suggest me any solutions! TIA

I think the problem was in the parameter of the function definition. The following code works for me and solves the problem.
def happyModel():
Implements the forward propagation for the binary classification model:
ZEROPAD2D -> CONV2D -> BATCHNORM -> RELU -> MAXPOOL -> FLATTEN -> DENSE
model = tf.keras.Sequential([
## ZeroPadding2D with padding 3, input shape of 64 x 64 x 3
tfl.ZeroPadding2D(padding=(3,3), input_shape=(64, 64, 3), data_format=None),
## Conv2D with 32 7x7 filters and stride of 1
tfl.Conv2D(filters=32, kernel_size=7, strides=1,padding='valid'),
## BatchNormalization for axis 3
tfl.BatchNormalization(axis = 3, name = 'bn0'),
## ReLU
tfl.ReLU(max_value=None, negative_slope=0, threshold=0),
## Max Pooling 2D with default parameters
tfl.MaxPool2D(pool_size=(2, 2), strides=None, padding='valid',data_format=None),
## Flatten layer
tfl.Flatten(data_format=None),
## Dense layer with 1 unit for output & 'sigmoid' activation
tfl.Dense(1, activation='sigmoid', name='fc'),
])
return model

Related

How can I convert the dimension in the model form 2D to 1D?

I am beginner of using pytorch. I would like to classify 2d binary array (17 * 20 ) to 8 classes, I am using cross entropy as loss function . I have 512 batch size . the input is 512 batches of size (17 * 20 )and the final outpu 512 batches of size 8. I applied the following model , I would like to get the final output to be only list of length 8. like [512,8] but I got that dim [512,680,8] (I printed the dimensions i git from the model after the code). How can I get [512,8] from that network as final output.
def __init__(self, M=1):
super(PPS, self).__init__()
#input layer
self.layer1 = nn.Sequential(
nn.Conv2d(17, 680, kernel_size=1, stride=1, padding=0),
nn.ReLU())
self.drop1 = nn.Sequential(nn.Dropout())
self.batch1 = nn.BatchNorm2d(680)
self.lstm1=nn.Sequential(nn.LSTM(
input_size=20,
hidden_size=16,
num_layers=1,
bidirectional=True,
batch_first= True))
self.gru = nn.Sequential(nn.GRU(
input_size=16*2,
hidden_size=16,
num_layers=2,
bidirectional=True,
batch_first=True))
self.fc1 = nn.Linear(16*2,8)
def forward(self, x):
out = self.layer1(x)
out = self.drop1(out)
out = self.batch1(out)
out = out.squeeze()
out,_ = self.lstm1(out)
out,_ = self.gru(out)
out = self.fc1(out)
return out
cov2d torch.Size([512, 680, 20, 1])
drop torch.Size([512, 680, 20, 1])
batch torch.Size([512, 680, 20])
lstm1 torch.Size([512, 680, 32])
lstm2 torch.Size([512, 680, 32])
linear1 torch.Size([512, 680, 8])
If you want the output to be (512, 8) then you would have to change your last linear layer to something like this:
def __init__(self, M=1):
...
self.gru = nn.Sequential(nn.GRU(
input_size=16*2,
hidden_size=16,
num_layers=2,
bidirectional=True,
batch_first=True))
self.fc1 = nn.Linear(680 * 16*2, 8)
def forward (self, x):
...
out, _ = self.gru(out)
out = self.fc1(out.reshape(-1, 680 * 16*2))
return out
The goal is to reduce the number of feature from 680 * 16 * 2 to 8. You can (and probably should) add more final linear layers that will do this reduction for you.

Keras2.0 MissingInputError while trying to visualize the trained CNN filters

I'm trying to visualize the filters of trained convolutional neural network on Keras following Keras blog https://blog.keras.io/how-convolutional-neural-networks-see-the-world.html.
import keras
from keras.layers import Input, Dense, Dropout, Flatten, Activation
from keras.layers import Conv2D, MaxPooling2D
from keras.models import Model
from keras import backend as K
num_classes = 10
input_shape = (32, 32, 1) # 32x32 image, 1 channel
# model
inputs = Input(shape=input_shape)
x = Conv2D(32, (3, 3), activation='relu', name='block1_conv1')(inputs)
x = Conv2D(32, (3, 3), activation='relu', name='block1_conv2')(x)
x = Conv2D(32, (3, 3), activation='relu', name='block1_conv3')(x)
x = Conv2D(32, (3, 3), activation='relu', name='block1_conv4')(x)
x = MaxPooling2D(pool_size=(2, 2), name='block1_pool')(x)
x = Dropout(0.25)(x)
x = Conv2D(64, (3, 3), activation='relu', name='block2_conv1')(x)
x = Conv2D(64, (3, 3), activation='relu', name='block2_conv2')(x)
x = Conv2D(64, (3, 3), activation='relu', name='block2_conv3')(x)
x = Conv2D(64, (3, 3), activation='relu', name='block2_conv4')(x)
x = MaxPooling2D(pool_size=(2, 2), name='block2_pool')(x)
x = Dropout(0.25)(x)
x = Flatten(name='flatten')(x)
x = Dense(512, activation='relu', name='fc1')(x)
x = Dropout(0.5)(x)
x = Dense(num_classes, name='fc2')(x)
predictions = Activation('sigmoid')(x)
model = Model(input=inputs, output=predictions)
# weights are stored in 'best_weights.hdf5'
model.load_weights('best_weights.hdf5')
input_tensor = model.input
layer_dict = dict([(layer.name, layer) for layer in model.layers])
layer_output = layer_dict['fc2'].output
activation = K.mean(layer_output[:, 0])
# compute the gradient of the input picture wrt the activation
grads = K.gradients(activation, input_tensor)[0]
# normalization trick: we normalize the gradient
grads /= (K.sqrt(K.mean(K.square(grads))) + K.epsilon())
# this function returns the activation and grads given the input picture
iterate = K.function([input_tensor], [activation, grads])
However, I received the error:
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "C:\Users\mouse008\Anaconda3\envs\python27\lib\site-packages\keras\backend\theano_backend.py", line 1132, in function
return Function(inputs, outputs, updates=updates, **kwargs)
File "C:\Users\mouse008\Anaconda3\envs\python27\lib\site-packages\keras\backend\theano_backend.py", line 1118, in __init__
**kwargs)
File "C:\Users\mouse008\Anaconda3\envs\python27\lib\site-packages\theano\compile\function.py", line 326, in function
output_keys=output_keys)
File "C:\Users\mouse008\Anaconda3\envs\python27\lib\site-packages\theano\compile\pfunc.py", line 486, in pfunc
output_keys=output_keys)
File "C:\Users\mouse008\Anaconda3\envs\python27\lib\site-packages\theano\compile\function_module.py", line 1794, in orig_function
output_keys=output_keys).create(
File "C:\Users\mouse008\Anaconda3\envs\python27\lib\site-packages\theano\compile\function_module.py", line 1446, in __init__
accept_inplace)
File "C:\Users\mouse008\Anaconda3\envs\python27\lib\site-packages\theano\compile\function_module.py", line 177, in std_fgraph
update_mapping=update_mapping)
File "C:\Users\mouse008\Anaconda3\envs\python27\lib\site-packages\theano\gof\fg.py", line 180, in __init__
self.__import_r__(output, reason="init")
File "C:\Users\mouse008\Anaconda3\envs\python27\lib\site-packages\theano\gof\fg.py", line 351, in __import_r__
self.__import__(variable.owner, reason=reason)
File "C:\Users\mouse008\Anaconda3\envs\python27\lib\site-packages\theano\gof\fg.py", line 397, in __import__
raise MissingInputError(error_msg, variable=r)
theano.gof.fg.MissingInputError: Input 0 of the graph (indices start from 0), used to compute if{}(keras_learning_phase, Elemwise{true_div,no_inplace}.0, InplaceDimShuffle{0,2,3,1}.0), was not provided and not given a value. Use the Theano flag exception_verbosity='high', for more information on this error.
Backtrace when that variable is created:
File "<stdin>", line 1, in <module>
File "C:\Users\mouse008\Anaconda3\envs\python27\lib\site-packages\keras\__init__.py", line 3, in <module>
from . import activations
File "C:\Users\mouse008\Anaconda3\envs\python27\lib\site-packages\keras\activations.py", line 3, in <module>
from . import backend as K
File "C:\Users\mouse008\Anaconda3\envs\python27\lib\site-packages\keras\backend\__init__.py", line 70, in <module>
from .theano_backend import *
File "C:\Users\mouse008\Anaconda3\envs\python27\lib\site-packages\keras\backend\theano_backend.py", line 28, in <module>
_LEARNING_PHASE = T.scalar(dtype='uint8', name='keras_learning_phase') # 0 = test, 1 = train
Could anyone help me?
Thank you.
Usually you need to provide yet another argument which informs Keras if it needs to run a function in a inference or training/learning mode. Try:
iterate = K.function([input_tensor, K.learning_phase()], [activation, grads])
And when you call iterate you need to provide 0 if you want to run your function in an inference mode or 1 otherwise.

numpy recarray append_fields: can't append numpy array of datetimes

I have a recarray containing various fields and I want to append an array of datetime objects on to it.
However, it seems like the append_fields function in numpy.lib.recfunctions won't let me add an array of objects.
Here's some example code:
import numpy as np
import datetime
import numpy.lib.recfunctions as recfun
dtype= np.dtype([('WIND_WAVE_HGHT', '<f4'), ('WIND_WAVE_PERD', '<f4')])
obs = np.array([(0.1,10.0),(0.2,11.0),(0.3,12.0)], dtype=dtype)
dates = np.array([datetime.datetime(2001,1,1,0),
datetime.datetime(2001,1,1,0),
datetime.datetime(2001,1,1,0)])
# This doesn't work:
recfun.append_fields(obs,'obdate',dates,dtypes=np.object)
I keep getting the error TypeError: Cannot change data-type for object array.
It seems to only be an issue with np.object arrays as I can append other fields ok. Am I missing something?
The problem
In [143]: recfun.append_fields(obs,'test',np.array([None,[],1]))
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-143-5c3de23b09f7> in <module>()
----> 1 recfun.append_fields(obs,'test',np.array([None,[],1]))
/usr/local/lib/python3.5/dist-packages/numpy/lib/recfunctions.py in append_fields(base, names, data, dtypes, fill_value, usemask, asrecarray)
615 if dtypes is None:
616 data = [np.array(a, copy=False, subok=True) for a in data]
--> 617 data = [a.view([(name, a.dtype)]) for (name, a) in zip(names, data)]
618 else:
619 if not isinstance(dtypes, (tuple, list)):
/usr/local/lib/python3.5/dist-packages/numpy/lib/recfunctions.py in <listcomp>(.0)
615 if dtypes is None:
616 data = [np.array(a, copy=False, subok=True) for a in data]
--> 617 data = [a.view([(name, a.dtype)]) for (name, a) in zip(names, data)]
618 else:
619 if not isinstance(dtypes, (tuple, list)):
/usr/local/lib/python3.5/dist-packages/numpy/core/_internal.py in _view_is_safe(oldtype, newtype)
363
364 if newtype.hasobject or oldtype.hasobject:
--> 365 raise TypeError("Cannot change data-type for object array.")
366 return
367
TypeError: Cannot change data-type for object array.
So the problem is in this a.view([(name, a.dtype)]) expression. It tries to make a single field structured array from a. That works with dtypes like int and str, but fails with object. That failure is in the core view handling, so isn't likely to change.
In [148]: x=np.arange(3)
In [149]: x.view([('test', x.dtype)])
Out[149]:
array([(0,), (1,), (2,)],
dtype=[('test', '<i4')])
In [150]: x=np.array(['one','two'])
In [151]: x.view([('test', x.dtype)])
Out[151]:
array([('one',), ('two',)],
dtype=[('test', '<U3')])
In [152]: x=np.array([[1],[1,2]])
In [153]: x
Out[153]: array([[1], [1, 2]], dtype=object)
In [154]: x.view([('test', x.dtype)])
...
TypeError: Cannot change data-type for object array.
The fact that recfunctions requires a separate load indicates that it is somewhat of a backwater, that isn't used a lot, and not under active development. I haven't examined the code in detail, but I suspect a fix would be a kludge.
A fix
Here's a way of adding a new field from scratch. It performs the same basic actions as append_fields:
Define a new dtype, using the obs and the new field name and dtype:
In [158]: obs.dtype.descr
Out[158]: [('WIND_WAVE_HGHT', '<f4'), ('WIND_WAVE_PERD', '<f4')]
In [159]: obs.dtype.descr+[('TEST',object)]
Out[159]: [('WIND_WAVE_HGHT', '<f4'), ('WIND_WAVE_PERD', '<f4'), ('TEST', object)]
In [160]: dt1 =np.dtype(obs.dtype.descr+[('TEST',object)])
Make an empty target array, and fill it by copying data by field name:
In [161]: newobs = np.empty(obs.shape, dtype=dt1)
In [162]: for n in obs.dtype.names:
...: newobs[n]=obs[n]
In [167]: dates
Out[167]:
array([datetime.datetime(2001, 1, 1, 0, 0),
datetime.datetime(2001, 1, 1, 0, 0),
datetime.datetime(2001, 1, 1, 0, 0)], dtype=object)
In [168]: newobs['TEST']=dates
In [169]: newobs
Out[169]:
array([( 0.1 , 10., datetime.datetime(2001, 1, 1, 0, 0)),
( 0.2 , 11., datetime.datetime(2001, 1, 1, 0, 0)),
( 0.30000001, 12., datetime.datetime(2001, 1, 1, 0, 0))],
dtype=[('WIND_WAVE_HGHT', '<f4'), ('WIND_WAVE_PERD', '<f4'), ('TEST', 'O')])
datetime64 alternative
With the native numpy datetimes, append works
In [179]: dates64 = dates.astype('datetime64[D]')
In [180]: recfun.append_fields(obs,'test',dates64,usemask=False)
Out[180]:
array([( 0.1 , 10., '2001-01-01'),
( 0.2 , 11., '2001-01-01'), ( 0.30000001, 12., '2001-01-01')],
dtype=[('WIND_WAVE_HGHT', '<f4'), ('WIND_WAVE_PERD', '<f4'), ('test', '<M8[D]')])
append_fields has some bells-n-whistles that my version doesn't - fill values, masked arrays, recarray, etc.
structured dates array
I could create a structured array with the dates
In [197]: sdates = np.array([(i,) for i in dates],dtype=[('test',object)])
In [198]: sdates
Out[198]:
array([(datetime.datetime(2001, 1, 1, 0, 0),),
(datetime.datetime(2001, 1, 1, 0, 0),),
(datetime.datetime(2001, 1, 1, 0, 0),)],
dtype=[('test', 'O')])
There must be a function that merges fields of existing arrays, but I'm not finding it.
previous work
This felt familiar:
https://github.com/numpy/numpy/issues/2346
TypeError when appending fields to a structured array of size ONE
Adding datetime field to recarray

Why I am getting the following AttributeError in Python?

I am using the sklearn's GradientBoostingRegression method. So after fitting it with 2000 estimators, I wanted to add more estimators to it. Since it is taking too long to rerun the entire fitting process, I used the set_params() method. Note that it is a multi-target problem, meaning, I have 3 targets to fit. So I am using the following code to add more estimators.
'''parameters: models (list of length 3 in our case )
train_X, train_y [n_samples x 3], test
n_estimators : previous + 500 (default) [additional estimators]
warm_start : True (default)
'''
def addMoreEstimators(train_X, train_y, test, models, n_estimators = 500, warm_start=True):
params = {'n_estimators':n_estimators, 'warm_start':warm_start}
gbm_pred= pd.DataFrame()
for (i,stars),clf in zip(enumerate(['*','**','***']), models):
clf.set_params(**params)
%time clf.fit(train_X.todense(),train_y[stars])
%time gbm_pred[stars] = clf.predict(test.todense())
gbm_pred = gbm_pred.as_matrix()
gbm_dict ={'model': gbm, 'prediction': gbm_pred}
return gbm_dict
Note: the models parameter is a list of 3 fitted models for the 3 targets.
When I ran it for the first time using 2500 (originally I had 2000 estimators), it ran fine and gave me an output.
When, I am running the same function using 3000 estimators, I am getting an AttributeError (see the traceback of the error below). Here the models contained the 3 fitted models. Below is the traceback of the error: (it's kinda long)
AttributeError Traceback (most recent call last)
<ipython-input-104-9418ada3b36f> in <module>()
7 test = val_X_tfidf[:,shortened_col_index],
8 models = models,
----> 9 n_estimators = 3000)
10
11 reduced_features_gbm_pred_3000_2_lr_1_msp_2 = reduced_features_gbm_model_3000_2_lr_1_msp_2['prediction']
<ipython-input-103-e15a4fb70b50> in addMoreEstimators(train_X, train_y, test, models, n_estimators, warm_start)
15
16 clf.set_params(**params)
---> 17 get_ipython().magic(u'time clf.fit(train_X.todense(),train_y[stars])')
18 print 'starting prediction'
19
//anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in magic(self, arg_s)
2305 magic_name, _, magic_arg_s = arg_s.partition(' ')
2306 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2307 return self.run_line_magic(magic_name, magic_arg_s)
2308
2309 #-------------------------------------------------------------------------
//anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
2226 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2227 with self.builtin_trap:
-> 2228 result = fn(*args,**kwargs)
2229 return result
2230
//anaconda/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in time(self, line, cell, local_ns)
//anaconda/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
191 # but it's overkill for just that one bit of state.
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
//anaconda/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in time(self, line, cell, local_ns)
1160 if mode=='eval':
1161 st = clock2()
-> 1162 out = eval(code, glob, local_ns)
1163 end = clock2()
1164 else:
<timed eval> in <module>()
//anaconda/lib/python2.7/site-packages/sklearn/ensemble/gradient_boosting.pyc in fit(self, X, y, sample_weight, monitor)
973 self.estimators_.shape[0]))
974 begin_at_stage = self.estimators_.shape[0]
--> 975 y_pred = self._decision_function(X)
976 self._resize_state()
977
//anaconda/lib/python2.7/site-packages/sklearn/ensemble/gradient_boosting.pyc in _decision_function(self, X)
1080 # not doing input validation.
1081 score = self._init_decision_function(X)
-> 1082 predict_stages(self.estimators_, X, self.learning_rate, score)
1083 return score
1084
sklearn/ensemble/_gradient_boosting.pyx in sklearn.ensemble._gradient_boosting.predict_stages (sklearn/ensemble/_gradient_boosting.c:2502)()
AttributeError: 'int' object has no attribute 'tree_'
Sorry for the long traceback, but I think it wouldn't be possible to provide me with meaningful feedback.
Again, why am I getting this feedback ?
Any help would be greatly appreciated.
Thanks
Edit
Below is the code that generates the models that was one of the inputs in the above function.
from sklearn import ensemble
def updated_runGBM(train_X, train_y, test,
n_estimators =100,
max_depth = 1,
min_samples_split=1,
learning_rate=0.01,
loss= 'ls',
warm_start=True):
'''train_X : n_samples x m_features
train_y : n_samples x k_targets (multiple targets allowed)
test : n_samples x m_features
warm_start : True (originally the default is False, but I want to add trees)
'''
params = {'n_estimators': n_estimators, 'max_depth': max_depth, 'min_samples_split': min_samples_split,
'learning_rate': learning_rate, 'loss': loss,'warm_start':warm_start}
gbm1 = ensemble.GradientBoostingRegressor(**params)
gbm2 = ensemble.GradientBoostingRegressor(**params)
gbm3 = ensemble.GradientBoostingRegressor(**params)
gbm = [gbm1,gbm2,gbm3]
gbm_pred= pd.DataFrame()
for (i,stars),clf in zip(enumerate(['*','**','***']), gbm):
%time clf.fit(train_X.todense(),train_y[stars])
%time gbm_pred[stars] = clf.predict(test.todense())
gbm_pred = gbm_pred.as_matrix()
gbm_pred = np.clip(gbm_pred,0,np.inf)
gbm_dict ={'model': gbm, 'prediction': gbm_pred}
return gbm_dict
NOTE In the code above, I have removed some of the print statements to reduce clutter.
These are the two functions I am using, nothing else (apart from the code to split up the data).

number of parameters in Caffe LENET or Imagenet models

How to calculate number of parameters in a model e.g. LENET for mnist, or ConvNet for imagent model etc.
Is there any specific function in caffe that returns or saves number of parameters in a model.
regards
Here is a python snippet to compute the number of parameters in a Caffe model:
import caffe
caffe.set_mode_cpu()
import numpy as np
from numpy import prod, sum
from pprint import pprint
def print_net_parameters (deploy_file):
print "Net: " + deploy_file
net = caffe.Net(deploy_file, caffe.TEST)
print "Layer-wise parameters: "
pprint([(k, v[0].data.shape) for k, v in net.params.items()])
print "Total number of parameters: " + str(sum([prod(v[0].data.shape) for k, v in net.params.items()]))
deploy_file = "/home/ubuntu/deploy.prototxt"
print_net_parameters(deploy_file)
# Sample output:
# Net: /home/ubuntu/deploy.prototxt
# Layer-wise parameters:
#[('conv1', (96, 3, 11, 11)),
# ('conv2', (256, 48, 5, 5)),
# ('conv3', (384, 256, 3, 3)),
# ('conv4', (384, 192, 3, 3)),
# ('conv5', (256, 192, 3, 3)),
# ('fc6', (4096, 9216)),
# ('fc7', (4096, 4096)),
# ('fc8', (819, 4096))]
# Total number of parameters: 60213280
https://gist.github.com/kaushikpavani/a6a32bd87fdfe5529f0e908ed743f779
I can offer an explicit way to do this via the Matlab interface (make sure the matcaffe is installed first).
Basically, you extract set of parameters from each network layer and count them.
In Matlab:
% load the network
net_model = <path to your *deploy.prototxt file>
net_weights = <path to your *.caffemodel file>
phase = 'test';
test_net = caffe.Net(net_model, net_weights, phase);
% get the list of layers
layers_list = test_net.layer_names;
% for those layers which have parameters, count them
counter = 0;
for j = 1:length(layers_list),
if ~isempty(test_net.layers(layers_list{j}).params)
feat = test_net.layers(layers_list{j}).params(1).get_data();
counter = counter + numel(feat)
end
end
In the end, 'counter' contains the number of parameters.