Raster plot not showing symbols using plt.scatter function - python-2.7

I have to do raster plot, of 1 neuron, with 10 trials of data and time is 4500 ms.
import numpy as np
import matplotlib.pyplot as plt
#Plotting 1 neuron with 12 trials of info
maatriks = []
for i in range(1,14):
if i<10:
string = 'C:\\Users\\latel\\Desktop\\kool\\Neuro\\prax3\\data\\lgn\\plain\\neuron_01_stimulus_0'+str(i)+'.csv'
else:
string = 'C:\\Users\\latel\\Desktop\\kool\\Neuro\\prax3\\data\\lgn\\plain\\neuron_01_stimulus_'+str(i)+'.csv'
data_in = np.genfromtxt(string,dtype = 'int', delimiter = ',' or '\n')
maatriks.append(data_in)
data = np.array(maatriks)
print data.shape
spikes = np.array(data[8])
print spikes.shape
nonzeros = 0
for i,item in enumerate(spikes):
nonzeros += np.count_nonzero(item)
plt.scatter(item, i*np.ones(item.shape), marker = '|')
print nonzeros
plt.ylim(-1,len(spikes))
plt.xlim(0,len(spikes[0]))
plt.xlabel("Time is seconds")
plt.ylabel("Trial number")
plt.tight_layout()
plt.show()
This outputs me(the prints) :
(13L, 10L, 4501L)
(10L, 4501L)
55
But the plot is empty , i cannot understand why the plot is empty. There should be 55 lines in my opinion ...
Edit: Got it working. Added this code.
for row in spikes:
for i in range(len(row)):
if (row[i] == 1):
row[i] = i
Because the data was only 0 or 1.
Anyone know how to do it shorter ?

Related

Removing dimension using reshape in keras?

Is it possible to remove a dimension using Reshape or any other function.
I have the following network.
import keras
from keras.layers.merge import Concatenate
from keras.models import Model
from keras.layers import Input, Dense
from keras.layers import Dropout
from keras.layers.core import Dense, Activation, Lambda, Reshape,Flatten
from keras.layers import Conv2D, MaxPooling2D, Reshape, ZeroPadding2D
import numpy as np
#Number_of_splits = ((input_width-win_dim)+1)/stride_dim
splits = ((40-5)+1)/1
print splits
train_data_1 = np.random.randint(100,size=(100,splits,45,5,3))
test_data_1 = np.random.randint(100,size=(10,splits,45,5,3))
labels_train_data =np.random.randint(145,size=(100,15))
labels_test_data =np.random.randint(145,size=(10,15))
list_of_input = [Input(shape = (45,5,3)) for i in range(splits)]
list_of_conv_output = []
list_of_max_out = []
for i in range(splits):
list_of_conv_output.append(Conv2D(filters = 145 , kernel_size = (15,3))(list_of_input[i])) #output dim: 36x(31,3,145)
list_of_max_out.append((MaxPooling2D(pool_size=(2,2))(list_of_conv_output[i]))) #output dim: 36x(15,1,145)
merge = keras.layers.concatenate(list_of_max_out) #Output dim: (15,1,5220)
#reshape = Reshape((merge.shape[0],merge.shape[3]))(merge) # expected output dim: (15,145)
dense1 = Dense(units = 1000, activation = 'relu', name = "dense_1")(merge)
dense2 = Dense(units = 1000, activation = 'relu', name = "dense_2")(dense1)
dense3 = Dense(units = 145 , activation = 'softmax', name = "dense_3")(dense2)
model = Model(inputs = list_of_input , outputs = dense3)
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
print model.summary()
raw_input("SDasd")
hist_current = model.fit(x = [train_input[i] for i in range(100)],
y = labels_train_data,
shuffle=False,
validation_data=([test_input[i] for i in range(10)], labels_test_data),
validation_split=0.1,
epochs=150000,
batch_size = 15,
verbose=1)
The maxpooling layer creates an output with dimension (15,1,36) which i would like to remove the middle axis, so the output dimension end up being (15,36)..
If possible would I like to avoid specifying the outer dimension, or as i've tried use the prior layer dimension to reshape it.
#reshape = Reshape((merge.shape[0],merge.shape[3]))(merge) # expected output dim: (15,145)
I need my output dimension for the entire network to be (15,145), in which the middle dimension is causing some problems.
How do i remove the middle dimension?
I wanted to remove all dimensions that are equal to 1, but not specify a specific size with Reshape so that my code does not break if I change the input size or number of kernels in a convolution. This works with the functional keras API on a tensorflow backend.
from keras.layers.core import Reshape
old_layer = Conv2D(#actualArguments) (older_layer)
#old_layer yields, e.g., a (None, 15,1,36) size tensor, where None is the batch size
newdim = tuple([x for x in old_layer.shape.as_list() if x != 1 and x is not None])
#newdim is now (15, 36). Reshape does not take batch size as an input dimension.
reshape_layer = Reshape(newdim) (old_layer)
reshape = Reshape((15,145))(merge) # expected output dim: (15,145)

How do i pass my input/output to this network?

I seem to have some problems starting my learning... I am not sure why..
the network is multi input (72 1d arrays) and output is a 1d array length 24. the 1d array output consist of numbers related to 145 different classes.
So: 72 inputs => 24 outputs
Minimal working example - without the input/output being set.
import keras
from keras.utils import np_utils
from keras import metrics
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Lambda, Reshape,Flatten
from keras.layers import Conv1D,Conv2D, MaxPooling2D, MaxPooling1D, Reshape, ZeroPadding2D
from keras.utils import np_utils
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.layers.advanced_activations import ELU
from keras.models import Model
from keras.layers import Input, Dense
from keras.layers import Dropout
from keras import backend as K
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import CSVLogger
from keras.callbacks import EarlyStopping
from keras.models import load_model
from keras.layers.merge import Concatenate
import numpy as np
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
nano_train_input = []
nano_train_output = []
nano_test_input = []
nano_test_output = []
## Creating train input:
for i in range(974):
nano_train_input.append(np.random.random((78,684,4)))
nano_train_output.append(np.random.randint(145,size=(228)).tolist())
## Creating test input:
for i in range(104):
nano_test_input.append(np.random.random((78,684,4)))
nano_test_output.append(np.random.randint(145,size=(228)).tolist())
def model(train_input, train_output, test_input, test_output, names=0):
# Paper uses dimension (40 x 45 =(15 * 3))
# Filter size 5
# Pooling size
# I use dimension (78 x 72 = (24 * 3)
# Filter size 9
print "In model"
i = 0
print_once = True
data_test_output = []
data_test_input = []
for matrix in test_input:
row,col,channel = matrix.shape
remove_output = (col/3)%24
remove_input = col%72
if remove_output > 0 :
test_output[i] = test_output[i][:-(remove_output)]
for split in chunks(test_output[i],24):
data_test_output.append(np.array(split))
if remove_input > 0:
out = np.split(matrix[:,:-(remove_input),:-1],matrix[:,:-(remove_input),:-1].shape[1]/72,axis=1)
else:
out = np.split(matrix[:,:,:-1],matrix[:,:,:-1].shape[1]/72,axis=1)
data_test_input.extend(out)
del out
i=i+1 # Increment
i=0
data_train_output = []
data_train_input = []
for matrix in train_input:
row,col,channel = matrix.shape
remove_output = (col/3)%24
remove_input = col%72
if remove_output > 0 :
train_output[i] = train_output[i][:-(remove_output)]
for split in chunks(train_output[i],24):
data_train_output.append(np.array(split))
if remove_input > 0:
out = np.split(matrix[:,:-(remove_input),:-1],matrix[:,:-(remove_input),:-1].shape[1]/72,axis=1)
else:
out = np.split(matrix[:,:,:-1],matrix[:,:,:-1].shape[1]/72,axis=1)
data_train_input.extend(out)
del out
i=i+1 # Increment
print
print "Len:"
print len(data_train_input)
print len(data_train_output)
print len(data_test_input)
print len(data_test_output)
print
print "Type[0]:"
print type(data_train_input[0])
print type(data_train_output[0])
print type(data_test_input[0])
print type(data_test_output[0])
print
print "Type:"
print type(data_train_input)
print type(data_train_output)
print type(data_test_input)
print type(data_test_output)
print
print "shape of [0]:"
print data_train_input[0].shape
print data_train_output[0].shape
print data_test_input[0].shape
print data_test_output[0].shape
list_of_input = [Input(shape = (78,3)) for i in range(72)]
list_of_conv_output = []
list_of_max_out = []
for i in range(72):
list_of_conv_output.append(Conv1D(filters = 32 , kernel_size = 6 , padding = "same", activation = 'relu')(list_of_input[i]))
list_of_max_out.append(MaxPooling1D(pool_size=3)(list_of_conv_output[i]))
merge = keras.layers.concatenate(list_of_max_out)
reshape = Flatten()(merge)
dense1 = Dense(units = 500, activation = 'relu', name = "dense_1")(reshape)
dense2 = Dense(units = 250, activation = 'relu', name = "dense_2")(dense1)
dense3 = Dense(units = 24 , activation = 'softmax', name = "dense_3")(dense2)
model = Model(inputs = list_of_input , outputs = dense3)
model.compile(loss="categorical_crossentropy", optimizer="adam" , metrics = [metrics.sparse_categorical_accuracy])
reduce_lr=ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, mode='auto', epsilon=0.01, cooldown=0, min_lr=0.000000000000000000001)
stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1, mode='auto')
print "Train!"
print model.summary()
hist_current = model.fit(x = ,
y = ,
shuffle=False,
validation_data=(,),
validation_split=0.1,
epochs=150000,
verbose=1,
callbacks=[reduce_lr,stop])
model(nano_train_input,nano_train_output,nano_test_input, nano_test_output)
The input and output is stored as a list of numpy.ndarrays.
This is a minimal working example.. how am I supposed to pass the input an output?
I would try:
merge = keras.layers.concatenate(list_of_max_out)
merge = Flatten()(merge) # or GlobalMaxPooling1D or GlobalAveragePooling1D
dense1 = Dense(500, activation = 'relu')(merge)
You probably want to apply something to transform your output from Convolutional layers. In order to do that - you need to squash the time / sequential dimension. In order to do that try techniques I provided.
If you take a look at your code and outputs you indeed have what you say: 24 outputs (data_train_outputs[0].shape). However, if you look at your layer output of Keras, you have this as output:
dense_3 (Dense) (None, 26, 145) 36395
I would say that this should be an array with shape (None, 24)....
I suggest you add a reshape layer to get the output you want to have!

printing output as a table in python terminal and saving output as a .txt with proper headings

I have written a code to find approximated sum of an exponential function, which should run iteration till N-1 terms, then return the iteration no, sum, abs error and relative error for each iteration step.
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
import math
N = input ("Please enter an integer at which term you want to turncate your summation")
x = input ("please enter a number for which you want to run the exponential summation e^{x}")
function= math.exp(x)
exp_sum = 0.0
abs_err = 0.0
rel_err = 0.0
for n in range (0, N):
factorial = math.factorial(n)
power = x**n
nth_term = power/factorial
exp_sum = exp_sum + nth_term
abs_err = abs(function - exp_sum)
rel_err = abs(abs_err)/abs(function)
print "The exponential function which has %d-term expansion, returns the approximated sum to be %.16f." % (n, exp_sum)
print "This approximated sum has an absolute error to be %.25f" % abs_err
print "and a relative error to be %.25f" % rel_err
right now, it actually looks silly printing values at each iteration and it only looks good till a few iteration, my plan is to get the output as a table with proper column headings (iteration, sum, abs err, rel err) in the terminal after I execute the .py file.
also I wish to save a .txt file of the output, if anyone has idea how to do that in python, I would very much appreciate the help and thanks.
You might use a pretty_table() function in order to pretty print tabular data, like this:
def pretty_table(rows, column_count, column_spacing=4):
aligned_columns = []
for column in range(column_count):
column_data = list(map(lambda row: row[column], rows))
aligned_columns.append((max(map(len, column_data)) + column_spacing, column_data))
for row in range(len(rows)):
aligned_row = map(lambda x: (x[0], x[1][row]), aligned_columns)
yield ''.join(map(lambda x: x[1] + ' ' * (x[0] - len(x[1])), aligned_row))
This little function, given a list of rows and the number of columns, will yield pretty-formatted table data, line by line. You can even adjust the spacing between columns if you wish.
In your particular code, you may do the following:
# At first, contains just the header columns.
rows = [['Term', 'Exponential sum', 'Absolute error', 'Relative error']]
for n in range (0, N):
factorial = math.factorial(n)
power = x**n
nth_term = power/factorial
exp_sum = exp_sum + nth_term
abs_err = abs(function - exp_sum)
rel_err = abs(abs_err)/abs(function)
rows.append((str(n), str(exp_sum), str(abs_err), str(rel_err)))
for line in pretty_table(rows, 4):
print(line)
For an input of N = 10, X = 5, this code outputs:
Term Exponential sum Absolute error Relative error
0 1.0 147.413159103 0.993262053001
1 6.0 142.413159103 0.959572318005
2 18.5 129.913159103 0.875347980517
3 39.3333333333 109.079825769 0.734974084703
4 65.375 83.0381591026 0.559506714935
5 91.4166666667 56.9964924359 0.384039345167
6 113.118055556 35.295103547 0.237816537027
7 128.619047619 19.7941114835 0.13337167407
8 138.307167659 10.1059914438 0.0680936347218
9 143.68945657 4.72370253291 0.0318280573062
If you want to redirect it into a file, do this instead of the last for loop:
with open('my_file.txt', 'w') as output:
for line in pretty_table(rows, 4):
print >> output, line

Filling Value of a Pandas Data Frame From a Large DB Query (Python)

I am running a snippet of code that queries a database and then fills in a pandas dataframe with a value of 1 if that tuple is present in the query. it does this by running the query then iterates over the tuples and fills in the dataframe. However, the query returns almost 8 million rows of data.
My question is if anyone knows how to speed up a process like this. Here is the code below:
user_age = pd.read_sql_query(sql_age, datastore, index_col=['userid']).age.astype(np.int, copy=False)
x = pd.DataFrame(0, index=user_age.index, columns=range(366), dtype=np.int8)
for r in pd.read_sql_query(sql_active, datastore, chunksize=50000):
for userid, day in r.itertuples(index=False):
x.at[userid, day] = 1
Thank you in advance!
You could save some time by replacing the Python loop
for userid, day in r.itertuples(index=False):
x.at[userid, day] = 1
with a NumPy array assignment using "advanced integer indexing":
x[npidx[r['userid']], r['day']] = 1
On a 80000-row DataFrame, using_numpy (below) is about 6x faster:
In [7]: %timeit orig()
1 loop, best of 3: 984 ms per loop
In [8]: %timeit using_numpy()
10 loops, best of 3: 162 ms per loop
import numpy as np
import pandas as pd
def mock_read_sql_query():
np.random.seed(2016)
for arr in np.array_split(index, N//M):
size = len(arr)
df = pd.DataFrame({'userid':arr , 'day':np.random.randint(366, size=size)})
df = df[['userid', 'day']]
yield df
N, M = 8*10**4, 5*10**2
index = np.arange(N)
np.random.shuffle(index)
columns = range(366)
def using_numpy():
npidx = np.empty_like(index)
npidx[index] = np.arange(len(index))
x = np.zeros((len(index), len(columns)), dtype=np.int8)
for r in mock_read_sql_query():
x[npidx[r['userid']], r['day']] = 1
x = pd.DataFrame(x, columns=columns, index=index)
return x
def orig():
x = pd.DataFrame(0, index=index, columns=columns, dtype=np.int8)
for r in mock_read_sql_query():
for userid, day in r.itertuples(index=False):
x.at[userid, day] = 1
return x
expected = orig()
result = using_numpy()
expected_index, expected_col = np.where(expected)
result_index, result_col = np.where(result)
assert np.equal(expected_index, result_index).all()
assert np.equal(expected_col, result_col).all()

python plotting moving average python error

i made some code where i need to make a plot where my data is persed to moving average
import numpy as np
import csv
import datetime
import matplotlib.pyplot as plt
#Open Data/File
data = open('iphonevsandroid.csv', 'r')
reader = csv.reader(data, delimiter=',')
#Define lists
iphone_data = []
android_data = []
dateTime = []
stringdates = []
#iphone_data_average = []
#android_data_average = []
for row in reader:
first_date_row = row[0]
first_date = row[0][:-13]
if row[1] != 'iphone':
iphone_data.append(int(row[1]))
if row[2] != 'android':
android_data.append(int(row[2]))
if row[0] != 'week':
stringdates.append(row[0][:-13])
for item in stringdates:
dateTime.append(datetime.datetime.strptime(item, '%Y-%m-%d'))
def movingaverage(values,window):
weigths = np.repeat(1.0, window)/window
#including valid will REQUIRE there to be enough datapoints.
#for example, if you take out valid, it will start # point one,
#not having any prior points, so itll be 1+0+0 = 1 /3 = .3333
smas = np.convolve(values, weigths, 'valid')
return smas # as a numpy array
movingaverage(iphone_data,3)
movingaverage(android_data,3)
plt.ylabel('Indsæt y label')
plt.xlabel('Indsæt x label')
plt.plot(dateTime,movingaverage(iphone_data,3)+2)
plt.plot(dateTime,movingaverage(android_data,3)+2)
plt.show()
My problem is that i get this error: ValueError: x and y must have same first dimension.
I know its because of the len of the values,
if i print the len of:
print len(dateTime)
print len(movingaverage(iphone_data,3))
print len(movingaverage(android_data,3))
i get:
528
526
526
How do i get dateTime to 526???
smas = np.convolve(values, weigths, 'valid')
should be
smas = np.convolve(values, weigths, 'same')
and if you don't want the border values, then you will have to remove them yourself, that is for odd window lengths:
smas = np.convolve(values, weigths, 'valid')[(window-1)/2:-(window-1)/2]
Note that you would also have to remove these values from android_data and iphone_data.