Python KeyError: 1.0 - python-2.7

I'm trying to run this code
from math import sqrt
import numpy as np
import warnings
from collections import Counter
import pandas as pd
import random
def k_nearest_neighbors(data,predict, k =3):
if len(data) >= k:
warnings.warn('K is set to a value less than total voting groups')
distances = []
for group in data:
for features in data[group]:
eucliden_distance = np.linalg.norm(np.array(features)-np.array(predict))
distances.append([eucliden_distance,group])
votes = [i[1] for i in sorted(distances)[:k]]
print(Counter(votes).most_common(1))
vote_result = Counter(votes).most_common(1)[0][0]
return vote_result
df = pd.read_csv('bc2.txt')
df.replace('?',-99999,inplace=True)
df.drop(['id'],1,inplace = True)
full_data = df.astype(float).values.tolist()
random.shuffle(full_data)
test_size = 0.2
train_set = {2:[],4:[]}
test_set = {2:[],4:[]}
train_data = full_data[:-int(test_size*len(full_data))]
test_data = full_data[-int(test_size*len(full_data)):]
for i in train_data:
train_set[i[-1]].append(i[:-1])
for i in train_data:
test_set[i[-1]].append(i[:-1])
correct = 0
total = 0
for group in test_set:
for data in test_set[group]:
vote = k_nearest_neighbors(train_set,data, k=5)
if group == vote:
correct += 1
total += 1
print ('Accuracy:',correct/total)
it comes out with this error msg
File "ml8.py", line 38, in <module>
train_set[i[-1]].append(i[:-1])
KeyError: 1.0
file m18.py is this above code file
below is the sample of txt file
id,clump_thickness,unif_cell_size,unif_cell_shape,marg_adhesion,single_epith_cell_size,bare_nuclei,bland_chrom,norm_nucleoli,mitoses,class
1000025,2,5,1,1,1,2,1,3,1,1
1002945,2,5,4,4,5,7,10,3,2,1
1015425,2,3,1,1,1,2,2,3,1,1
1016277,2,6,8,8,1,3,4,3,7,1
1017023,2,4,1,1,3,2,1,3,1,1
1017122,4,8,10,10,8,7,10,9,7,1
1018099,2,1,1,1,1,2,10,3,1,1
1018561,2,2,1,2,1,2,1,3,1,1
1033078,2,2,1,1,1,2,1,1,1,5
1033078,2,4,2,1,1,2,1,2,1,1
1035283,2,1,1,1,1,1,1,3,1,1
1036172,2,2,1,1,1,2,1,2,1,1
1041801,4,5,3,3,3,2,3,4,4,1
I'm using 2.7.11 version

Your train_set only contains keys 2 and 4, whereas your classes in that sample are 1 and 5.
Instead of using
train_set = {2:[],4:[]}
you might have better luck with defaultdict:
from collections import defaultdict
train_set = defaultdict(list)
This way a non-existent key will be initialized to a new empty list on first access.

Related

Same Key is appearing twice in the output of a MapReduce program when Combiner is used

I have written a MapReduce code to calculate the mean of a set of values for each key. It works fine when I use just mapper and a reducer. But when I introduce a combiner in between to reduce the load on reducer, the keys are getting repeated in the result. TIA. Code is given below.
mapper.py:
#!/usr/bin/env python
import sys
from datetime import datetime
for line in sys.stdin:
data = line.strip().split("\t")
if(len(data) < 5):
continue
date, time, store, item, cost, payment = data
print("{0}\t{1}".format(datetime.strptime(date, "%Y-%m-%d").weekday(),cost))
combiner.py:
#!/usr/bin/env python
import sys
from datetime import datetime
oldKey = None
salesList = ""
for line in sys.stdin:
data = line.rstrip().split('\t')
thisKey, thisSale = data
if(oldKey and oldKey != thisKey):
print("{0}\t{1}".format(oldKey,salesList))
salesList = ""
else:
if(oldKey == thisKey):
if(salesList != ""):
salesList = salesList + ',' + thisSale
else:
salesList = thisSale
oldKey = thisKey
if(oldKey):
salesList = salesList + ',' + thisSale
print("{0}\t{1}".format(oldKey,salesList))
salesList = ""
reducer.py
#!/usr/bin/env python
import sys
from datetime import datetime
oldKey = None
meanSales = None
salesTotal = 0
count = 0
for line in sys.stdin:
data = line.rstrip().split('\t')
thisKey, thisSale = data
thisSaleList = thisSale.split(',')
thisSaleListFloat = list(map(float, thisSaleList))
meanSales = sum(thisSaleListFloat)/len(thisSaleListFloat)
print("{0}\t{1}".format(thisKey, meanSales))
Output
0 249.91917747419384
0 250.09807318775844
1 249.87984898663836
1 249.59593170284487
2 249.95321425419965
2 249.75339205149234
3 249.54634982922747
3 250.19731461573994
4 250.3129656082639
4 250.13323419720658
5 250.13367036331366
5 250.03468060131152
6 250.207532163134
6 249.67593639719652

How do i pass my input/output to this network?

I seem to have some problems starting my learning... I am not sure why..
the network is multi input (72 1d arrays) and output is a 1d array length 24. the 1d array output consist of numbers related to 145 different classes.
So: 72 inputs => 24 outputs
Minimal working example - without the input/output being set.
import keras
from keras.utils import np_utils
from keras import metrics
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Lambda, Reshape,Flatten
from keras.layers import Conv1D,Conv2D, MaxPooling2D, MaxPooling1D, Reshape, ZeroPadding2D
from keras.utils import np_utils
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.layers.advanced_activations import ELU
from keras.models import Model
from keras.layers import Input, Dense
from keras.layers import Dropout
from keras import backend as K
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import CSVLogger
from keras.callbacks import EarlyStopping
from keras.models import load_model
from keras.layers.merge import Concatenate
import numpy as np
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
nano_train_input = []
nano_train_output = []
nano_test_input = []
nano_test_output = []
## Creating train input:
for i in range(974):
nano_train_input.append(np.random.random((78,684,4)))
nano_train_output.append(np.random.randint(145,size=(228)).tolist())
## Creating test input:
for i in range(104):
nano_test_input.append(np.random.random((78,684,4)))
nano_test_output.append(np.random.randint(145,size=(228)).tolist())
def model(train_input, train_output, test_input, test_output, names=0):
# Paper uses dimension (40 x 45 =(15 * 3))
# Filter size 5
# Pooling size
# I use dimension (78 x 72 = (24 * 3)
# Filter size 9
print "In model"
i = 0
print_once = True
data_test_output = []
data_test_input = []
for matrix in test_input:
row,col,channel = matrix.shape
remove_output = (col/3)%24
remove_input = col%72
if remove_output > 0 :
test_output[i] = test_output[i][:-(remove_output)]
for split in chunks(test_output[i],24):
data_test_output.append(np.array(split))
if remove_input > 0:
out = np.split(matrix[:,:-(remove_input),:-1],matrix[:,:-(remove_input),:-1].shape[1]/72,axis=1)
else:
out = np.split(matrix[:,:,:-1],matrix[:,:,:-1].shape[1]/72,axis=1)
data_test_input.extend(out)
del out
i=i+1 # Increment
i=0
data_train_output = []
data_train_input = []
for matrix in train_input:
row,col,channel = matrix.shape
remove_output = (col/3)%24
remove_input = col%72
if remove_output > 0 :
train_output[i] = train_output[i][:-(remove_output)]
for split in chunks(train_output[i],24):
data_train_output.append(np.array(split))
if remove_input > 0:
out = np.split(matrix[:,:-(remove_input),:-1],matrix[:,:-(remove_input),:-1].shape[1]/72,axis=1)
else:
out = np.split(matrix[:,:,:-1],matrix[:,:,:-1].shape[1]/72,axis=1)
data_train_input.extend(out)
del out
i=i+1 # Increment
print
print "Len:"
print len(data_train_input)
print len(data_train_output)
print len(data_test_input)
print len(data_test_output)
print
print "Type[0]:"
print type(data_train_input[0])
print type(data_train_output[0])
print type(data_test_input[0])
print type(data_test_output[0])
print
print "Type:"
print type(data_train_input)
print type(data_train_output)
print type(data_test_input)
print type(data_test_output)
print
print "shape of [0]:"
print data_train_input[0].shape
print data_train_output[0].shape
print data_test_input[0].shape
print data_test_output[0].shape
list_of_input = [Input(shape = (78,3)) for i in range(72)]
list_of_conv_output = []
list_of_max_out = []
for i in range(72):
list_of_conv_output.append(Conv1D(filters = 32 , kernel_size = 6 , padding = "same", activation = 'relu')(list_of_input[i]))
list_of_max_out.append(MaxPooling1D(pool_size=3)(list_of_conv_output[i]))
merge = keras.layers.concatenate(list_of_max_out)
reshape = Flatten()(merge)
dense1 = Dense(units = 500, activation = 'relu', name = "dense_1")(reshape)
dense2 = Dense(units = 250, activation = 'relu', name = "dense_2")(dense1)
dense3 = Dense(units = 24 , activation = 'softmax', name = "dense_3")(dense2)
model = Model(inputs = list_of_input , outputs = dense3)
model.compile(loss="categorical_crossentropy", optimizer="adam" , metrics = [metrics.sparse_categorical_accuracy])
reduce_lr=ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, mode='auto', epsilon=0.01, cooldown=0, min_lr=0.000000000000000000001)
stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1, mode='auto')
print "Train!"
print model.summary()
hist_current = model.fit(x = ,
y = ,
shuffle=False,
validation_data=(,),
validation_split=0.1,
epochs=150000,
verbose=1,
callbacks=[reduce_lr,stop])
model(nano_train_input,nano_train_output,nano_test_input, nano_test_output)
The input and output is stored as a list of numpy.ndarrays.
This is a minimal working example.. how am I supposed to pass the input an output?
I would try:
merge = keras.layers.concatenate(list_of_max_out)
merge = Flatten()(merge) # or GlobalMaxPooling1D or GlobalAveragePooling1D
dense1 = Dense(500, activation = 'relu')(merge)
You probably want to apply something to transform your output from Convolutional layers. In order to do that - you need to squash the time / sequential dimension. In order to do that try techniques I provided.
If you take a look at your code and outputs you indeed have what you say: 24 outputs (data_train_outputs[0].shape). However, if you look at your layer output of Keras, you have this as output:
dense_3 (Dense) (None, 26, 145) 36395
I would say that this should be an array with shape (None, 24)....
I suggest you add a reshape layer to get the output you want to have!

Feature selection in scikit learn for multiple variables and thousands+ features

I am trying to perform feature selection for logistic regression classifier. Originally there are 4 variables: name, location, gender, and label = ethnicity. The three variables, namely the name, give rise to tens of thousands of more "features", for example, name "John Snow" will give rise to 2-letter substrings like 'jo', 'oh', 'hn'... etc. The feature set undergoes DictVectorization.
I am trying to follow this tutorial (http://scikit-learn.org/stable/auto_examples/feature_selection/plot_feature_selection.html) but I am not sure if I am doing it right since the tutorial is using a small number of features while mine has tens of thousands after vectorization. And also the plt.show() shows a blank figure.
# coding=utf-8
import pandas as pd
from pandas import DataFrame, Series
import numpy as np
import re
import random
import time
from random import randint
import csv
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.metrics import confusion_matrix as sk_confusion_matrix
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
# Assign X and y variables
X = df.raw_name.values
X2 = df.name.values
X3 = df.gender.values
X4 = df.location.values
y = df.ethnicity_scan.values
# Feature extraction functions
def feature_full_name(nameString):
try:
full_name = nameString
if len(full_name) > 1: # not accept name with only 1 character
return full_name
else: return '?'
except: return '?'
def feature_avg_wordLength(nameString):
try:
space = 0
for i in nameString:
if i == ' ':
space += 1
length = float(len(nameString) - space)
name_entity = float(space + 1)
avg = round(float(length/name_entity), 0)
return avg
except:
return 0
def feature_name_entity(nameString2):
space = 0
try:
for i in nameString2:
if i == ' ':
space += 1
return space+1
except: return 0
def feature_gender(genString):
try:
gender = genString
if len(gender) >= 1:
return gender
else: return '?'
except: return '?'
def feature_noNeighborLoc(locString):
try:
x = re.sub(r'^[^, ]*', '', locString) # remove everything before and include first ','
y = x[2:] # remove subsequent ',' and ' '
return y
except: return '?'
def list_to_dict(substring_list):
try:
substring_dict = {}
for i in substring_list:
substring_dict['substring='+str(i)] = True
return substring_dict
except: return '?'
# Transform format of X variables, and spit out a numpy array for all features
my_dict13 = [{'name-entity': feature_name_entity(feature_full_name(i))} for i in X2]
my_dict14 = [{'avg-length': feature_avg_wordLength(feature_full_name(i))} for i in X]
my_dict15 = [{'gender': feature_full_name(i)} for i in X3]
my_dict16 = [{'location': feature_noNeighborLoc(feature_full_name(i))} for i in X4]
my_dict17 = [{'dummy1': 1} for i in X]
my_dict18 = [{'dummy2': random.randint(0,2)} for i in X]
all_dict = []
for i in range(0, len(my_dict)):
temp_dict = dict(my_dict13[i].items() + my_dict14[i].items()
+ my_dict15[i].items() + my_dict16[i].items() + my_dict17[i].items() + my_dict18[i].items()
)
all_dict.append(temp_dict)
newX = dv.fit_transform(all_dict)
# Separate the training and testing data sets
half_cut = int(len(df)/2.0)*-1
X_train = newX[:half_cut]
X_test = newX[half_cut:]
y_train = y[:half_cut]
y_test = y[half_cut:]
# Fitting X and y into model, using training data
lr = LogisticRegression()
lr.fit(X_train, y_train)
dv = DictVectorizer()
# Feature selection
plt.figure(1)
plt.clf()
X_indices = np.arange(X_train.shape[-1])
selector = SelectPercentile(f_classif, percentile=10)
selector.fit(X_train, y_train)
scores = -np.log10(selector.pvalues_)
scores /= scores.max()
plt.bar(X_indices - .45, scores, width=.2,
label=r'Univariate score ($-Log(p_{value})$)', color='g')
plt.show()
Warning:
E:\Program Files Extra\Python27\lib\site-packages\sklearn\feature_selection\univariate_selection.py:111: UserWarning: Features [[0 0 0 ..., 0 0 0]] are constant.
It looks like the way you split your data into training and testing sets is not working:
# Separate the training and testing data sets
X_train = newX[:half_cut]
X_test = newX[half_cut:]
If you already use sklearn, it is much more convenient to use the builtin splitting routine for this:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.5, random_state=0)

How to get a graph for stock market analysis?

I updated the code and it now provides the graph, however after giving me the graph it produces the following error messages.
Warning (from warnings module):
File "C:\Python27\lib\site-packages\matplotlib\collections.py", line 590
if self._edgecolors == str('face'):
FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
import urllib2
import time
import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.dates as mdates
from matplotlib.finance import candlestick_ochl
import matplotlib
import pylab
matplotlib.rcParams.update({'font.size': 9})
def rsiFunc(prices, n=14):
deltas = np.diff(prices)
seed = deltas[:n+1]
up = seed[seed>=0].sum()/n
down = -seed[seed<0].sum()/n
rs = up/down
rsi = np.zeros_like(prices)
rsi[:n] = 100. - 100./(1.+rs)
for i in range(n, len(prices)):
delta = deltas[i-1] # cause the diff is 1 shorter
if delta>0:
upval = delta
downval = 0.
else:
upval = 0.
downval = -delta
up = (up*(n-1) + upval)/n
down = (down*(n-1) + downval)/n
rs = up/down
rsi[i] = 100. - 100./(1.+rs)
return rsi
def movingaverage(values,window):
weigths = np.repeat(1.0, window)/window
smas = np.convolve(values, weigths, 'valid')
return smas # as a numpy array
def ExpMovingAverage(values, window):
weights = np.exp(np.linspace(-1., 0., window))
weights /= weights.sum()
a = np.convolve(values, weights, mode='full')[:len(values)]
a[:window] = a[window]
return a
def computeMACD(x, slow=26, fast=12):
"""
compute the MACD (Moving Average Convergence/Divergence) using a fast and slow exponential moving avg'
return value is emaslow, emafast, macd which are len(x) arrays
"""
emaslow = ExpMovingAverage(x, slow)
emafast = ExpMovingAverage(x, fast)
return emaslow, emafast, emafast - emaslow
def graphData(stock,MA1,MA2):
'''
Use this to dynamically pull a stock:
'''
try:
print 'Currently Pulling',stock
print str(datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S'))
#Keep in mind this is close high low open data from Yahoo
urlToVisit = 'http://chartapi.finance.yahoo.com/instrument/1.0/'+stock+'/chartdata;type=quote;range=10y/csv'
stockFile =[]
try:
sourceCode = urllib2.urlopen(urlToVisit).read()
splitSource = sourceCode.split('\n')
for eachLine in splitSource:
splitLine = eachLine.split(',')
if len(splitLine)==6:
if 'values' not in eachLine:
stockFile.append(eachLine)
except Exception, e:
print str(e), 'failed to organize pulled data.'
except Exception,e:
print str(e), 'failed to pull pricing data'
try:
date, closep, highp, lowp, openp, volume = np.loadtxt(stockFile,delimiter=',', unpack=True,
converters={ 0: mdates.strpdate2num('%Y%m%d')})
x = 0
y = len(date)
newAr = []
while x < y:
appendLine = date[x],openp[x],closep[x],highp[x],lowp[x],volume[x]
newAr.append(appendLine)
x+=1
Av1 = movingaverage(closep, MA1)
Av2 = movingaverage(closep, MA2)
SP = len(date[MA2-1:])
fig = plt.figure(facecolor='#07000d')
ax1 = plt.subplot2grid((6,4), (1,0), rowspan=4, colspan=4, axisbg='#07000d')
candlestick_ochl(ax1, newAr[-SP:], width=.6, colorup='#53c156', colordown='#ff1717')#width=.6, plot_day_summary_ohlc
Label1 = str(MA1)+' SMA'
Label2 = str(MA2)+' SMA'
ax1.plot(date[-SP:],Av1[-SP:],'#e1edf9',label=Label1, linewidth=1.5)
ax1.plot(date[-SP:],Av2[-SP:],'#4ee6fd',label=Label2, linewidth=1.5)
ax1.grid(True, color='w')
ax1.xaxis.set_major_locator(mticker.MaxNLocator(10))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax1.yaxis.label.set_color("w")
ax1.spines['bottom'].set_color("#5998ff")
ax1.spines['top'].set_color("#5998ff")
ax1.spines['left'].set_color("#5998ff")
ax1.spines['right'].set_color("#5998ff")
ax1.tick_params(axis='y', colors='w')
plt.gca().yaxis.set_major_locator(mticker.MaxNLocator(prune='upper')) #gca()
ax1.tick_params(axis='x', colors='w')
plt.ylabel('Stock price and Volume')
maLeg = plt.legend(loc=9, ncol=2, prop={'size':7},
fancybox=True, borderaxespad=0.)
maLeg.get_frame().set_alpha(0.4)
textEd = plt.gca().get_legend().get_texts()#pylab.gca() changed to plt.gca()
plt.setp(textEd[0:5], color = 'w')#changed pylab.setp to plt.setp
volumeMin = 0
ax0 = plt.subplot2grid((6,4), (0,0), sharex=ax1, rowspan=1, colspan=4, axisbg='#07000d')
rsi = rsiFunc(closep)
rsiCol = '#c1f9f7'
posCol = '#386d13'
negCol = '#8f2020'
ax0.plot(date[-SP:], rsi[-SP:], rsiCol, linewidth=1.5)
ax0.axhline(70, color=negCol)
ax0.axhline(30, color=posCol)
ax0.fill_between(date[-SP:], rsi[-SP:], 70, where=(rsi[-SP:]>=70), facecolor=negCol, edgecolor=negCol, alpha=0.5)
ax0.fill_between(date[-SP:], rsi[-SP:], 30, where=(rsi[-SP:]<=30), facecolor=posCol, edgecolor=posCol, alpha=0.5)
ax0.set_yticks([30,70])
ax0.yaxis.label.set_color("w")
ax0.spines['bottom'].set_color("#5998ff")
ax0.spines['top'].set_color("#5998ff")
ax0.spines['left'].set_color("#5998ff")
ax0.spines['right'].set_color("#5998ff")
ax0.tick_params(axis='y', colors='w')
ax0.tick_params(axis='x', colors='w')
plt.ylabel('RSI')
ax1v = ax1.twinx()
ax1v.fill_between(date[-SP:],volumeMin, volume[-SP:], facecolor='#00ffe8', alpha=.4)
ax1v.axes.yaxis.set_ticklabels([])
ax1v.grid(False)
ax1v.set_ylim(0, 3*volume.max())
ax1v.spines['bottom'].set_color("#5998ff")
ax1v.spines['top'].set_color("#5998ff")
ax1v.spines['left'].set_color("#5998ff")
ax1v.spines['right'].set_color("#5998ff")
ax1v.tick_params(axis='x', colors='w')
ax1v.tick_params(axis='y', colors='w')
ax2 = plt.subplot2grid((6,4), (5,0), sharex=ax1, rowspan=1, colspan=4, axisbg='#07000d')
# START NEW INDICATOR CODE #
# END NEW INDICATOR CODE #
plt.gca().yaxis.set_major_locator(mticker.MaxNLocator(prune='upper'))
ax2.spines['bottom'].set_color("#5998ff")
ax2.spines['top'].set_color("#5998ff")
ax2.spines['left'].set_color("#5998ff")
ax2.spines['right'].set_color("#5998ff")
ax2.tick_params(axis='x', colors='w')
ax2.tick_params(axis='y', colors='w')
ax2.yaxis.set_major_locator(mticker.MaxNLocator(nbins=5, prune='upper'))
for label in ax2.xaxis.get_ticklabels():
label.set_rotation(45)
plt.suptitle(stock.upper(),color='w')
plt.setp(ax0.get_xticklabels(), visible=False)
plt.setp(ax1.get_xticklabels(), visible=False)
'''ax1.annotate('Big news!',(date[510],Av1[510]),
xytext=(0.8, 0.9), textcoords='axes fraction',
arrowprops=dict(facecolor='white', shrink=0.05),
fontsize=14, color = 'w',
horizontalalignment='right', verticalalignment='bottom')'''
plt.subplots_adjust(left=.09, bottom=.14, right=.94, top=.95, wspace=.20, hspace=0)
plt.show()
fig.savefig('example.png',facecolor=fig.get_facecolor())
except Exception,e:
print 'main loop',str(e)
while True:
stock = raw_input('Stock to plot: ')
graphData(stock,10,50)
Please look at the thread Violin plot: warning with matplotlib 1.4.3 and pyplot fill_between warning since upgrade of numpy to 1.10.10
It seems there is a bug in matplotlib 1.4.3 (which has only started causing that error since the upgrade to numpy 1.10). This is reportedly corrected in 1.5.0 (which should be released soon). Hope this helps.

How do I separate out unique rows in a list that has both a datetime and float column?

I'm relatively new to Python, and I am having trouble separating out unique rows from a data set that I had recently converted into lists. I broke separated out the data's unixtime recordings and converted them into datetime. Then when I recombined the data into a list I tried to separate out the unique rows of data. But instead I get the error.
[[[datetime.datetime(2014, 6, 20, 0, 0) -16.0]
[datetime.datetime(2014, 6, 20, 0, 0) -16.0]........
Traceback (most recent call last):
File "C:\Users\lenovo\Favorites\Microsoft 网站\Downloads\OTdataparser.py", line 33, in <module>
indicies = np.unique(okdat, return_index = True) #<-- NOT WORKING
File "C:\Python27\lib\site-packages\numpy\lib\arraysetops.py", line 180, in unique
perm = ar.argsort(kind='mergesort')
TypeError: can't compare datetime.datetime to float
My script is below.
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import math
ds5 = np.genfromtxt("gpsdata.dat.140620", delimiter = '',
usecols = (2,4,5), dtype = object)
print ds5
ds = np.array([x for x in ds5 if x[0] == "06/20/2014"])
dot = ds[:,2].astype(float)
print ds
rndsht = np.genfromtxt(ds[:,1], delimiter = ".", dtype = float) #Rm decimal
print rndsht
dutc = np.array([datetime.utcfromtimestamp(x) for x in rndsht[:,0]])
print dutc
#dutc = np.array([datetime.utcfromtimestamp(x) for x in ds[:,1].astype(float)])
okdat = np.dstack((dutc,dot))
#okdat.astype(object)
print okdat
#indicies = np.unique(dutc, return_index=True) #<-- WORKS! BUT okdat??
#print indicies
indicies = np.unique(okdat, return_index = True) #<-- NOT WORKING
print indicies
#Can't figure out how to use indicies to limit dot
You could write your own unique function.
Here is quick example (you can probably do better). Note that is doesn't preserve order, but you could use insert and do that.
def
def unique(data):
x = 0
while x < len(data):
i = data[x]
c = 0
while (i in data):
c += 1
data.remove(i)
data.append(i)
if (c <= 1):
x += 1
return data