sys.argv : index out of range - python-2.7

I try to run this code
import sys
import numpy as np
filename = sys.argv[1]
X = []
y = []
with open(filename, 'r') as f:
for line in f.readlines():
xt, yt = [float(i) for i in line.split(',')]
X.append(xt)
y.append(yt)
and I get this error
4 filename = sys.argv[1]
5 X = []
6 y = []
IndexError: list index out of range
how can I fix it ?
I have a file in txt that I want to read my data from it.
4.94,4.37
-1.58,1.7
-4.45,1.88
-6.06,0.56
-1.22,2.23
-3.55,1.53
0.36,2.99
-3.24,0.48
1.31,2.76
2.17,3.99
2.94,3.25
-0.92,2.27
-0.91,2.0
1.24,4.75
1.56,3.52
-4.14,1.39
3.75,4.9
4.15,4.44
0.33,2.72
3.41,4.59
2.27,5.3
2.6,3.43
1.06,2.53
1.04,3.69
2.74,3.1
-0.71,2.72
-2.75,2.82
0.55,3.53
-3.45,1.77
1.09,4.61
2.47,4.24
-6.35,1.0
1.83,3.84
-0.68,2.42
-3.83,0.67
-2.03,1.07
3.13,3.19
0.92,4.21
4.02,5.24
3.89,3.94
-1.81,2.85
3.94,4.86
-2.0,1.31
0.54,3.99
0.78,2.92
2.15,4.72
2.55,3.83
-0.63,2.58
1.06,2.89
-0.36,1.99

Make sure you pass the filename as #hpaulj suggested. You could also check the length of sys.argv
import sys
print(sys.argv, len(sys.argv))
if len(sys.argv) < 2:
sys.exit('Usage: %s input_file' % sys.argv[0])
You may also want to check this helper class:
https://docs.python.org/2/library/fileinput.html#module-fileinput

Related

How do i pass my input/output to this network?

I seem to have some problems starting my learning... I am not sure why..
the network is multi input (72 1d arrays) and output is a 1d array length 24. the 1d array output consist of numbers related to 145 different classes.
So: 72 inputs => 24 outputs
Minimal working example - without the input/output being set.
import keras
from keras.utils import np_utils
from keras import metrics
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Lambda, Reshape,Flatten
from keras.layers import Conv1D,Conv2D, MaxPooling2D, MaxPooling1D, Reshape, ZeroPadding2D
from keras.utils import np_utils
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.layers.advanced_activations import ELU
from keras.models import Model
from keras.layers import Input, Dense
from keras.layers import Dropout
from keras import backend as K
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import CSVLogger
from keras.callbacks import EarlyStopping
from keras.models import load_model
from keras.layers.merge import Concatenate
import numpy as np
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
nano_train_input = []
nano_train_output = []
nano_test_input = []
nano_test_output = []
## Creating train input:
for i in range(974):
nano_train_input.append(np.random.random((78,684,4)))
nano_train_output.append(np.random.randint(145,size=(228)).tolist())
## Creating test input:
for i in range(104):
nano_test_input.append(np.random.random((78,684,4)))
nano_test_output.append(np.random.randint(145,size=(228)).tolist())
def model(train_input, train_output, test_input, test_output, names=0):
# Paper uses dimension (40 x 45 =(15 * 3))
# Filter size 5
# Pooling size
# I use dimension (78 x 72 = (24 * 3)
# Filter size 9
print "In model"
i = 0
print_once = True
data_test_output = []
data_test_input = []
for matrix in test_input:
row,col,channel = matrix.shape
remove_output = (col/3)%24
remove_input = col%72
if remove_output > 0 :
test_output[i] = test_output[i][:-(remove_output)]
for split in chunks(test_output[i],24):
data_test_output.append(np.array(split))
if remove_input > 0:
out = np.split(matrix[:,:-(remove_input),:-1],matrix[:,:-(remove_input),:-1].shape[1]/72,axis=1)
else:
out = np.split(matrix[:,:,:-1],matrix[:,:,:-1].shape[1]/72,axis=1)
data_test_input.extend(out)
del out
i=i+1 # Increment
i=0
data_train_output = []
data_train_input = []
for matrix in train_input:
row,col,channel = matrix.shape
remove_output = (col/3)%24
remove_input = col%72
if remove_output > 0 :
train_output[i] = train_output[i][:-(remove_output)]
for split in chunks(train_output[i],24):
data_train_output.append(np.array(split))
if remove_input > 0:
out = np.split(matrix[:,:-(remove_input),:-1],matrix[:,:-(remove_input),:-1].shape[1]/72,axis=1)
else:
out = np.split(matrix[:,:,:-1],matrix[:,:,:-1].shape[1]/72,axis=1)
data_train_input.extend(out)
del out
i=i+1 # Increment
print
print "Len:"
print len(data_train_input)
print len(data_train_output)
print len(data_test_input)
print len(data_test_output)
print
print "Type[0]:"
print type(data_train_input[0])
print type(data_train_output[0])
print type(data_test_input[0])
print type(data_test_output[0])
print
print "Type:"
print type(data_train_input)
print type(data_train_output)
print type(data_test_input)
print type(data_test_output)
print
print "shape of [0]:"
print data_train_input[0].shape
print data_train_output[0].shape
print data_test_input[0].shape
print data_test_output[0].shape
list_of_input = [Input(shape = (78,3)) for i in range(72)]
list_of_conv_output = []
list_of_max_out = []
for i in range(72):
list_of_conv_output.append(Conv1D(filters = 32 , kernel_size = 6 , padding = "same", activation = 'relu')(list_of_input[i]))
list_of_max_out.append(MaxPooling1D(pool_size=3)(list_of_conv_output[i]))
merge = keras.layers.concatenate(list_of_max_out)
reshape = Flatten()(merge)
dense1 = Dense(units = 500, activation = 'relu', name = "dense_1")(reshape)
dense2 = Dense(units = 250, activation = 'relu', name = "dense_2")(dense1)
dense3 = Dense(units = 24 , activation = 'softmax', name = "dense_3")(dense2)
model = Model(inputs = list_of_input , outputs = dense3)
model.compile(loss="categorical_crossentropy", optimizer="adam" , metrics = [metrics.sparse_categorical_accuracy])
reduce_lr=ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, mode='auto', epsilon=0.01, cooldown=0, min_lr=0.000000000000000000001)
stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1, mode='auto')
print "Train!"
print model.summary()
hist_current = model.fit(x = ,
y = ,
shuffle=False,
validation_data=(,),
validation_split=0.1,
epochs=150000,
verbose=1,
callbacks=[reduce_lr,stop])
model(nano_train_input,nano_train_output,nano_test_input, nano_test_output)
The input and output is stored as a list of numpy.ndarrays.
This is a minimal working example.. how am I supposed to pass the input an output?
I would try:
merge = keras.layers.concatenate(list_of_max_out)
merge = Flatten()(merge) # or GlobalMaxPooling1D or GlobalAveragePooling1D
dense1 = Dense(500, activation = 'relu')(merge)
You probably want to apply something to transform your output from Convolutional layers. In order to do that - you need to squash the time / sequential dimension. In order to do that try techniques I provided.
If you take a look at your code and outputs you indeed have what you say: 24 outputs (data_train_outputs[0].shape). However, if you look at your layer output of Keras, you have this as output:
dense_3 (Dense) (None, 26, 145) 36395
I would say that this should be an array with shape (None, 24)....
I suggest you add a reshape layer to get the output you want to have!

Convert a text file to an numpyarray

I am new to python. I have a .txt file
SET = 20,21,23,21,23
45,46,42,23,55
with many number of rows. How would I convert this txt file into an array ignoring spaces and commas? Any help would be really appreciated.
l1=[]
file = open('list-num')
for l in file:
l2 = map(int,l.split(','))
l1 = l1 + l2
print l1
Your data looks like :
SET 1 = 13900100,13900141,13900306,13900442,13900453,13900461, 13900524,13900537,13900619,13900632,13900638,13900661, 13900665,13900758,13900766,13900825,13900964,13901123, 13901131,13901136,13901141,13901143,13901195,13901218,
you can use the numpy command : np.genfromtxt ()
import numpy as np
import matplotlib.pyplot as plt
data = np.genfromtxt("text.txt", delimiter=",")
data = data[np.logical_not(np.isnan(data))] #Remove nan value
print data
I get :
[ 13900141. 13900306. 13900442. 13900453. 13900461. 13900524.
13900537. 13900619. 13900632. 13900638. 13900661. 13900665.
13900758. 13900766. 13900825. 13900964. 13901123. 13901131.
13901136. 13901141. 13901143. 13901195. 13901218.]
It should work ;)
------------------------------------
Other way :
import numpy as np
f = open("text.txt", "r") #Open data file
data = f.read() #Read data file
cut = data.split() #Split data file
value = cut[2] #Pick the value part
array = np.array(value) #Value becomes an array
print array
I get :
13900100,13900141,13900306,13900442,13900453,13900461,13900524,13900537,13900619,13900632,13900638,13900661,13900665,13900758,13900766,13900825,13900964,13901123,13901131,13901136,13901141,13901143,13901195,13901218

Python KeyError: 1.0

I'm trying to run this code
from math import sqrt
import numpy as np
import warnings
from collections import Counter
import pandas as pd
import random
def k_nearest_neighbors(data,predict, k =3):
if len(data) >= k:
warnings.warn('K is set to a value less than total voting groups')
distances = []
for group in data:
for features in data[group]:
eucliden_distance = np.linalg.norm(np.array(features)-np.array(predict))
distances.append([eucliden_distance,group])
votes = [i[1] for i in sorted(distances)[:k]]
print(Counter(votes).most_common(1))
vote_result = Counter(votes).most_common(1)[0][0]
return vote_result
df = pd.read_csv('bc2.txt')
df.replace('?',-99999,inplace=True)
df.drop(['id'],1,inplace = True)
full_data = df.astype(float).values.tolist()
random.shuffle(full_data)
test_size = 0.2
train_set = {2:[],4:[]}
test_set = {2:[],4:[]}
train_data = full_data[:-int(test_size*len(full_data))]
test_data = full_data[-int(test_size*len(full_data)):]
for i in train_data:
train_set[i[-1]].append(i[:-1])
for i in train_data:
test_set[i[-1]].append(i[:-1])
correct = 0
total = 0
for group in test_set:
for data in test_set[group]:
vote = k_nearest_neighbors(train_set,data, k=5)
if group == vote:
correct += 1
total += 1
print ('Accuracy:',correct/total)
it comes out with this error msg
File "ml8.py", line 38, in <module>
train_set[i[-1]].append(i[:-1])
KeyError: 1.0
file m18.py is this above code file
below is the sample of txt file
id,clump_thickness,unif_cell_size,unif_cell_shape,marg_adhesion,single_epith_cell_size,bare_nuclei,bland_chrom,norm_nucleoli,mitoses,class
1000025,2,5,1,1,1,2,1,3,1,1
1002945,2,5,4,4,5,7,10,3,2,1
1015425,2,3,1,1,1,2,2,3,1,1
1016277,2,6,8,8,1,3,4,3,7,1
1017023,2,4,1,1,3,2,1,3,1,1
1017122,4,8,10,10,8,7,10,9,7,1
1018099,2,1,1,1,1,2,10,3,1,1
1018561,2,2,1,2,1,2,1,3,1,1
1033078,2,2,1,1,1,2,1,1,1,5
1033078,2,4,2,1,1,2,1,2,1,1
1035283,2,1,1,1,1,1,1,3,1,1
1036172,2,2,1,1,1,2,1,2,1,1
1041801,4,5,3,3,3,2,3,4,4,1
I'm using 2.7.11 version
Your train_set only contains keys 2 and 4, whereas your classes in that sample are 1 and 5.
Instead of using
train_set = {2:[],4:[]}
you might have better luck with defaultdict:
from collections import defaultdict
train_set = defaultdict(list)
This way a non-existent key will be initialized to a new empty list on first access.

For loop run one time more

I am going to write the codes to open a file and jump from headers (6 lines) and then in output file print time with 0.005 interval. number of rows should be the same as file that I opened.
File that I have,has 16 rows but output file has 17. how can I make the for loop run 16?
I try to define the length of opened file but was not working.
thanks in advance.
Codes:
import time
from tkFileDialog import *
import Tkinter as T
import easygui as Ea
import pandas as pd
import numpy as N
import math as M
name0= 'file.txt'
with open(name0, "r") as f:
f.next()
f.next()
f.next()
f.next()
f.next()
f.next()
fintime = fintime - 0.005
for line in (f):
fintime = fintime + 0.005
numbers_str = line.split()
numbers_float = [float(x) for x in ((numbers_str))]
fName=name0+'new.txt'
w1 = open(fName, "a")
w1.write("%s\n" % ((str(format(fintime, '.3f'))) ))
w1.close()
You can just change the for loop and remove manually the extra lap, like this:
for line in (f-1):
fintime = fintime + 0.005
numbers_str = line.split()
numbers_float = [float(x) for x in [(numbers_str)-1]
fName=name0+'new.txt'
w1 = open(fName, "a")
w1.write("%s\n" % ((str(format(fintime, '.3f'))) ))
w1.close()

python plotting moving average python error

i made some code where i need to make a plot where my data is persed to moving average
import numpy as np
import csv
import datetime
import matplotlib.pyplot as plt
#Open Data/File
data = open('iphonevsandroid.csv', 'r')
reader = csv.reader(data, delimiter=',')
#Define lists
iphone_data = []
android_data = []
dateTime = []
stringdates = []
#iphone_data_average = []
#android_data_average = []
for row in reader:
first_date_row = row[0]
first_date = row[0][:-13]
if row[1] != 'iphone':
iphone_data.append(int(row[1]))
if row[2] != 'android':
android_data.append(int(row[2]))
if row[0] != 'week':
stringdates.append(row[0][:-13])
for item in stringdates:
dateTime.append(datetime.datetime.strptime(item, '%Y-%m-%d'))
def movingaverage(values,window):
weigths = np.repeat(1.0, window)/window
#including valid will REQUIRE there to be enough datapoints.
#for example, if you take out valid, it will start # point one,
#not having any prior points, so itll be 1+0+0 = 1 /3 = .3333
smas = np.convolve(values, weigths, 'valid')
return smas # as a numpy array
movingaverage(iphone_data,3)
movingaverage(android_data,3)
plt.ylabel('Indsæt y label')
plt.xlabel('Indsæt x label')
plt.plot(dateTime,movingaverage(iphone_data,3)+2)
plt.plot(dateTime,movingaverage(android_data,3)+2)
plt.show()
My problem is that i get this error: ValueError: x and y must have same first dimension.
I know its because of the len of the values,
if i print the len of:
print len(dateTime)
print len(movingaverage(iphone_data,3))
print len(movingaverage(android_data,3))
i get:
528
526
526
How do i get dateTime to 526???
smas = np.convolve(values, weigths, 'valid')
should be
smas = np.convolve(values, weigths, 'same')
and if you don't want the border values, then you will have to remove them yourself, that is for odd window lengths:
smas = np.convolve(values, weigths, 'valid')[(window-1)/2:-(window-1)/2]
Note that you would also have to remove these values from android_data and iphone_data.