How to join/merge two data frames from Quandl? - python-2.7

Specificly, I use Python2.7. I read and print the two data frames from Quandl: 'FMAC/HPI_AK' and 'FMAC/HPI_CA' individually with no problem. I used merged = pd.merge(df1, df2, on = 'Date', how = 'outer') to merge the two data frames. But when I tried to merge the two data frames, I get a traceback saying keyerror: 'Date' where 'Date' is the attribute in the first/index column in both data frames.
import quandl
import pandas as pd
api_key = open('quandlapikey.txt', 'r').read()
df1 = quandl.get('FMAC/HPI_ak', authtoken=api_key)
df2 = quandl.get('FMAC/HPI_ca', authtoken=api_key)
print(df1.head())
print(df2.head())
merged = pd.merge(df1, df2, on = 'Date', how = 'outer')
merged.set_index('Date', inplace = True)
print(merged)
Date Value
1975-01-31 15.671711
1975-02-28 15.726897
1975-03-31 15.919058
1975-04-30 16.233030
1975-05-31 16.494823
Date Value
1975-01-31 34.447924
1975-02-28 34.958144
1975-03-31 35.480144
1975-04-30 36.024334
1975-05-31 36.617578
Traceback (most recent call last):
File "", line 1, in
runfile('/Users/hans/Desktop/sentdex/buildingdataset.py', wdir='/Users/hans/Desktop/sentdex')
File "/Users/hans/anaconda2/lib/python2.7/site-packages/spyder/utils/site/sitecustomize.py", line 866, in runfile
execfile(filename, namespace)
File "/Users/hans/anaconda2/lib/python2.7/site-packages/spyder/utils/site/sitecustomize.py", line 94, in execfile
builtins.execfile(filename, *where)
File "/Users/hans/Desktop/sentdex/buildingdataset.py", line 22, in
merged = pd.merge(df1, df2, on = 'Date', how = 'outer')
File "/Users/hans/anaconda2/lib/python2.7/site-packages/pandas/tools/merge.py", line 61, in merge
copy=copy, indicator=indicator)
File "/Users/hans/anaconda2/lib/python2.7/site-packages/pandas/tools/merge.py", line 543, in init
self.join_names) = self._get_merge_keys()
File "/Users/hans/anaconda2/lib/python2.7/site-packages/pandas/tools/merge.py", line 810, in _get_merge_keys
right_keys.append(right[rk]._values)
File "/Users/hans/anaconda2/lib/python2.7/site-packages/pandas/core/frame.py", line 2059, in getitem
return self._getitem_column(key)
File "/Users/hans/anaconda2/lib/python2.7/site-packages/pandas/core/frame.py", line 2066, in _getitem_column
return self._get_item_cache(key)
File "/Users/hans/anaconda2/lib/python2.7/site-packages/pandas/core/generic.py", line 1386, in _get_item_cache
values = self._data.get(item)
File "/Users/hans/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 3543, in get
loc = self.items.get_loc(item)
File "/Users/hans/anaconda2/lib/python2.7/site-packages/pandas/indexes/base.py", line 2136, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas/index.pyx", line 132, in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)
File "pandas/index.pyx", line 154, in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)
File "pandas/src/hashtable_class_helper.pxi", line 732, in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)
File "pandas/src/hashtable_class_helper.pxi", line 740, in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)
KeyError: 'Date'

You're getting that error because Date is an index in those DataFrames not a column.
You can instead do (tested):
merged = pd.merge(df1, df2, how='outer', left_index=True, right_index=True)

Related

How to create multiple Conv3DLSTMCell in tensorflow

I'm trying to create my model with several conv3d lstm cell layers:
I run the following code:
conv1, state1 = conv3d('conv1', _X, [8,112,112,1], [3,3,3], 64)
pool1 = max_pool('pool1', conv1, k=1)
conv2, state2 = conv3d('conv2', pool1, [8, 56, 56, 64], [3, 3, 3], 128)
pool2 = max_pool('pool2', conv2, k=2)
The conv3d functions:
def conv3d(myname, l_input, shape, kernel, outchan):
cell = contrib_rnn_cell.Conv3DLSTMCell(input_shape=shape, output_channels=out$
hidden = cell.zero_state(array_ops.shape(l_input)[0], dtypes.float32)
output, state = cell(l_input, hidden)
print(output.shape)
return output, state
My code runs OK for the conv1 and pool1 but for conv2 layer it shows me an error:
Traceback (most recent call last):
File "conv3dlstm.py", line 272, in <module>
run(16)
File "conv3dlstm.py", line 199, in run
biases)
File "/home/user/projects/model_conv3dlstm.py", line 47, in inference_c3d
conv2, state2 = conv3d('conv2', pool1, [8, 56, 56, 64], [3, 3, 3], 128)
File "/home/user/projects/model_conv3dlstm.py", line 32, in conv3d
output, state = cell(l_input, hidden)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell_impl.py", line 190, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 696, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/rnn/python/ops/rnn_cell.py", line 2110, in call
4 * self._output_channels, self._use_bias)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/rnn/python/ops/rnn_cell.py", line 2200, in _conv
"kernel", filter_size + [total_arg_size_depth, num_features], dtype=dtype)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 1297, in get_variable
constraint=constraint)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 1093, in get_variable
constraint=constraint)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 431, in get_variable
return custom_getter(**custom_getter_kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell_impl.py", line 193, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 408, in _true_getter
use_resource=use_resource, constraint=constraint)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 747, in _get_single_variable
name, "".join(traceback.format_list(tb))))
ValueError: Variable conv_lstm_cell/kernel already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/rnn/python/ops/rnn_cell.py", line 2200, in _conv
"kernel", filter_size + [total_arg_size_depth, num_features], dtype=dtype)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/rnn/python/ops/rnn_cell.py", line 2110, in call
4 * self._output_channels, self._use_bias)
File "/home/user/projects/model_conv3dlstm.py", line 32, in conv3d
output, state = cell(l_input, hidden)
I saw the code in run_cell.py at line 2200 which is:
kernel = vs.get_variable(
"kernel", filter_size + [total_arg_size_depth, num_features], dtype=dtype)
Which is getting variable with fixed name "kernel". If I understand it correctly, it is supposed to be unique. But it means I can't create more of Conv3DLSTMCells than one? Is it a bug or am I using it incorrectly?

Insert using bulk_create fails with an ValueError of a field name

It is definitely different from this question.
In my question the problem is that it is showing ValueError for a field that doesn't even exist
this is my model in which i am doing bulk insert
class Seats(models.Model):
seat_no = models.ManyToManyField(Snumber)
movie_name = models.ForeignKey(Movies)
multiplex_name = models.ForeignKey(Multiplex)
date = models.ForeignKey(Date)
time = models.ForeignKey(Time)
def __str__(self):
b = str(self.date)
c = str(self.time)
d = str(self.multiplex_name)
return d+" "+b+" "+c
class Meta:
unique_together = ('movie_name', 'multiplex_name', 'date', 'time')
verbose_name_plural="Seats"
I have the single object of Movies in mov , Multiplex in mul
And have multiple objects of Date in dt, Time in tm and Snumber in st
I want to add all objects of st in each time in tm on each date in dt with multiplex in mul and movie in mov
This is my Failed attempt to do so
Seats.objects.bulk_create([
Seats(movie_name = mov,
multiplex_name = mul,
seat_no = set,
date = dat,
time = tim
)for dat in dt for tim in tm for set in st])
It is showing ValueError
Traceback (most recent call last):
File "<console>", line 2, in <module>
File "<console>", line 2, in <listcomp>
File "/usr/local/lib/python3.5/dist-packages/django/db/models/base.py", line 550, in __init__
setattr(self, prop, kwargs[prop])
File "/usr/local/lib/python3.5/dist-packages/django/db/models/fields/related_descriptors.py", line 499, in __set__
manager = self.__get__(instance)
File "/usr/local/lib/python3.5/dist-packages/django/db/models/fields/related_descriptors.py", line 476, in __get__
return self.related_manager_cls(instance)
File "/usr/local/lib/python3.5/dist-packages/django/db/models/fields/related_descriptors.py", line 783, in __init__
(instance, self.source_field_name))
ValueError: "<Seats: Badrinath Ki Dulhania 2017-05-07 09:00:00>" needs to have a value for field "seats" before this many-to-many relationship can be used.
It is telling me to set a value for field seats but i don't have any field named seats in any of my models.
What am i missing Help!!

Unable to load custom initializer from the saved model, passing custom_objects is not working

I saved model and weights in Keras and then try to load them ,but it shows that Invalid initialization: my_init.How can I fix the problem?
model = Sequential()
def my_init(shape, name=None):
return initializations.normal(shape, scale=0.1, name=name)
def m6_1():
model.add(Convolution2D(32, 3, 3, init=my_init))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3, init=my_init))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(256, init=my_init))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
save model and weights
model_json = model.to_json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
model.save_weights("model.h5")
load model and weights
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json,custom_objects={'my_init':my_init})
loaded_model.load_weights("model.h5")
error messageTraceback (most recent call last):
File "revised_learn_ETL6_load_model.py", line 73, in <module>
loaded_model = model_from_json(loaded_model_json,custom_objects={"my_init": my_init})
File "/home/ubuntu/.env/local/lib/python2.7/site-packages/keras/models.py", line 197, in model_from_json
return layer_from_config(config, custom_objects=custom_objects)
File "/home/ubuntu/.env/local/lib/python2.7/site-packages/keras/utils/layer_utils.py", line 36, in layer_from_config
return layer_class.from_config(config['config'])
File "/home/ubuntu/.env/local/lib/python2.7/site-packages/keras/models.py", line 1019, in from_config
layer = get_or_create_layer(first_layer)
File "/home/ubuntu/.env/local/lib/python2.7/site-packages/keras/models.py", line 1003, in get_or_create_layer
layer = layer_from_config(layer_data)
File "/home/ubuntu/.env/local/lib/python2.7/site-packages/keras/utils/layer_utils.py", line 36, in layer_from_config
return layer_class.from_config(config['config'])
File "/home/ubuntu/.env/local/lib/python2.7/site-packages/keras/engine/topology.py", line 929, in from_config
return cls(**config)
File "/home/ubuntu/.env/local/lib/python2.7/site-packages/keras/layers/convolutional.py", line 381, in __init__
self.init = initializations.get(init, dim_ordering=dim_ordering)
File "/home/ubuntu/.env/local/lib/python2.7/site-packages/keras/initializations.py", line 107, in get
'initialization', kwargs=kwargs)
File "/home/ubuntu/.env/local/lib/python2.7/site-packages/keras/utils/generic_utils.py", line 16, in get_from_module
str(identifier))
Exception: Invalid initialization: my_init

Python Petl - _csv.Error: line contains NULL byte

I'm running this:
table = petl.fromcsv('file.txt', delimiter = ',' , encoding = 'utf-8)
petl.todb(table, connection, 'table')
And i am getting this error on insert step:
File "/usr/local/lib/python2.7/dist-packages/petl/util/base.py", line 28, in __len__
return sum(1 for _ in self)
File "/usr/local/lib/python2.7/dist-packages/petl/util/base.py", line 28, in <genexpr>
return sum(1 for _ in self)
File "/usr/local/lib/python2.7/dist-packages/petl/io/csv_py2.py", line 45, in __iter__
for row in reader:
File "/usr/local/lib/python2.7/dist-packages/petl/io/csv_py2.py", line 149, in next
row = self.reader.next()
_csv.Error: line contains NULL byte
What i can to do to solve this problem?

python - Error with Mariana/Theano neural network

I am facing a problem when I start my trainer and I can't figure out the cause.
My input data is of dimension 42 and my output should be one value out of 4.
This is the shape of my training and test set:
Training set:
input = (1152, 42) target = (1152,)
Training set: input = (1152, 42) target = (1152,)
Test set: input = (384, 42) target = (384,)
This is the construction of my network:
ls = MS.GradientDescent(lr=0.01)
cost = MC.CrossEntropy()
i = ML.Input(42, name='inp')
h = ML.Hidden(23, activation=MA.Sigmoid(), initializations=[MI.GlorotTanhInit()], name="hid")
o = ML.SoftmaxClassifier(4, learningScenario=ls, costObject=cost, name="out")
mlp = i > h > o
And this is the construction of the datasets, trainers and recorders:
trainData = MDM.RandomSeries(distances = train_set[0], next_state = train_set[1])
trainMaps = MDM.DatasetMapper()
trainMaps.mapInput(i, trainData.distances)
trainMaps.mapOutput(o, trainData.next_state)
testData = MDM.RandomSeries(distances = test_set[0], next_state = test_set[1])
testMaps = MDM.DatasetMapper()
testMaps.mapInput(i, testData.distances)
testMaps.mapOutput(o, testData.next_state)
earlyStop = MSTOP.GeometricEarlyStopping(testMaps, patience=100, patienceIncreaseFactor=1.1, significantImprovement=0.00001, outputFunction="score", outputLayer=o)
epochWall = MSTOP.EpochWall(1000)
trainer = MT.DefaultTrainer(
trainMaps=trainMaps,
testMaps=testMaps,
validationMaps=None,
stopCriteria=[earlyStop, epochWall],
testFunctionName="testAndAccuracy",
trainMiniBatchSize=MT.DefaultTrainer.ALL_SET,
saveIfMurdered=False
)
recorder = MREC.GGPlot2("MLP", whenToSave = [MREC.SaveMin("test", o.name, "score")], printRate=1, writeRate=1)
trainer.start("MLP", mlp, recorder = recorder)
But the following error is being produced:
Traceback (most recent call last):
File "nn-mariana.py", line 82, in <module>
trainer.start("MLP", mlp, recorder = recorder)
File "SUPRESSED/Mariana/Mariana/training/trainers.py", line 226, in start
Trainer_ABC.start( self, runName, model, recorder, trainingOrder, moreHyperParameters )
File "SUPRESSED/Mariana/Mariana/training/trainers.py", line 110, in start
return self.run(runName, model, recorder, *args, **kwargs)
File "SUPRESSED/Mariana/Mariana/training/trainers.py", line 410, in run
outputLayers
File "SUPRESSED/Mariana/Mariana/training/trainers.py", line 269, in _trainTest
res = modelFct(output, **kwargs)
File "SUPRESSED/Mariana/Mariana/network.py", line 47, in __call__
return self.callTheanoFct(outputLayer, **kwargs)
File "SUPRESSED/Mariana/Mariana/network.py", line 44, in callTheanoFct
return self.outputFcts[ol](**kwargs)
File "SUPRESSED/Mariana/Mariana/wrappers.py", line 110, in __call__
return self.run(**kwargs)
File "SUPRESSED/Mariana/Mariana/wrappers.py", line 102, in run
fres = iter(self.theano_fct(*self.fctInputs.values()))
File "SUPRESSED/Theano/theano/compile/function_module.py", line 871, in __call__
storage_map=getattr(self.fn, 'storage_map', None))
File "SUPRESSED/Theano/theano/gof/link.py", line 314, in raise_with_op
reraise(exc_type, exc_value, exc_trace)
File "SUPRESSED/Theano/theano/compile/function_module.py", line 859, in __call__
outputs = self.fn()
ValueError: Input dimension mis-match. (input[0].shape[1] = 1152, input[1].shape[1] = 4)
Apply node that caused the error: Elemwise{Composite{((i0 * i1) + (i2 * log(i3)))}}[(0, 1)](InplaceDimShuffle{x,0}.0, LogSoftmax.0, Elemwise{sub,no_inplace}.0, Elemwise{sub,no_inplace}.0)
Toposort index: 18
Inputs types: [TensorType(int32, row), TensorType(float64, matrix), TensorType(int32, row), TensorType(float64, matrix)]
Inputs shapes: [(1, 1152), (1152, 4), (1, 1152), (1152, 4)]
Inputs strides: [(4608, 4), (32, 8), (4608, 4), (32, 8)]
Inputs values: ['not shown', 'not shown', 'not shown', 'not shown']
Outputs clients: [[Sum{axis=[1], acc_dtype=float64}(Elemwise{Composite{((i0 * i1) + (i2 * log(i3)))}}[(0, 1)].0)]]
Versions:
Mariana (1.0.1rc1, /media/guilhermevrs/Data/Documentos/Academico/TCC-code/Mariana)
Theano (0.8.0.dev0, SUPRESSED/Theano)
This code was produced having as base the tutorial code from the mnist example.
Could you please help me to figure out what's going on?
Thank you in advance
I talked directly to the authors of Mariana and the cause and solution is explained in this issue