List format error using matlotlib linecollection - python-2.7

I have a list (coordpairs) that I am trying to use as the basis for plotting using LineCollection. The list is derived from a Pandas data frame. I am having trouble getting the list in the right format, despite what is admittedly a clear error code. Trimmed data frame contents, code, and error are below. Thank you for any help.
Part of the Data Frame
RUP_ID Vert_ID Longitude Latitude
1 1 -116.316961 34.750178
1 2 -116.316819 34.750006
2 1 -116.316752 34.749938
2 2 -116.31662 34.749787
10 1 -116.317165 34.754078
10 2 -116.317277 34.751492
10 3 -116.317206 34.751273
10 4 -116.317009 34.75074
10 5 -116.316799 34.750489
11 1 -116.316044 34.760377
11 2 -116.317105 34.755674
11 3 -116.317165 34.754078
Code
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
fig = plt.figure()
ax1 = plt.subplot2grid((2, 2), (0, 0), rowspan=2, colspan=1)
for ii in range(1,len(mydf)):
temp = mydf.loc[mydf.RUP_ID == ii]
df_line = temp.sort_values(by='Vert_ID', ascending=True)
del temp
lat = df_line.Latitude
lon = df_line.Longitude
lat = lat.tolist()
long = long.tolist()
coordpairs = zip(lat, long)
lc = LineCollection(coordpairs, colors='r') # this is line 112 in the error
ax1.add_collection(lc)
# note I also tried:
# import numpy as np
# coordpairs2 = np.vstack([np.array(u) for u in set([tuple(p) for p in coordpairs])])
# lc = LineCollection(coordpairs2, colors='r')
# and received the same plotting error
Error/Outputs
C:\apath\python.exe C:/mypath/myscript.py
Traceback (most recent call last):
File "C:/mypath/myscript.py", line 112, in <module>
lc = LineCollection(coordpairs, colors='r') # this is line 112 in the error
File "C:\apath\lib\site-packages\matplotlib\collections.py", line 1149, in __init__
self.set_segments(segments)
File "C:\apath\lib\site-packages\matplotlib\collections.py", line 1164, in set_segments
self._paths = [mpath.Path(_seg) for _seg in _segments]
File "C:\apath\lib\site-packages\matplotlib\path.py", line 141, in __init__
raise ValueError(msg)
ValueError: 'vertices' must be a 2D list or array with shape Nx2
Process finished with exit code 1

You would want to create one single LineCollection, with several lines, one per RUP_ID value from the first dataframe column. That means you best loop over the unique values of that column (not over every row!) and append the coordinates to a list. Use that list as the input to LineCollection.
u = """RUP_ID Vert_ID Longitude Latitude
1 1 -116.316961 34.750178
1 2 -116.316819 34.750006
2 1 -116.316752 34.749938
2 2 -116.31662 34.749787
10 1 -116.317165 34.754078
10 2 -116.317277 34.751492
10 3 -116.317206 34.751273
10 4 -116.317009 34.75074
10 5 -116.316799 34.750489
11 1 -116.316044 34.760377
11 2 -116.317105 34.755674
11 3 -116.317165 34.754078"""
import io
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
df = pd.read_csv(io.StringIO(u), sep="\s+")
verts = []
for (RUP_ID, grp) in df.groupby("RUP_ID"):
df_line = grp.sort_values(by='Vert_ID', ascending=True)
lat = df_line.Latitude
lon = df_line.Longitude
verts.append(list(zip(lon, lat)))
lc = LineCollection(verts, color='r')
fig, ax = plt.subplots()
ax.add_collection(lc)
ax.autoscale()
plt.show()

Related

Region growing with the watershed transform

I am trying out the code by adfoucart for Region growing with the watershed transform but I ran into some errors when identifying the markers for the image.
from skimage.filters import rank,gaussian
from skimage.morphology import disk
from skimage.feature import peak_local_max
def get_markers(img2, indices=False):
im_ = gaussian(img2, sigma=4)
gradr = rank.gradient(im_[:,:,0],disk(5)).astype('int')
gradg = rank.gradient(im_[:,:,1],disk(5)).astype('int')
gradb = rank.gradient(im_[:,:,2],disk(5)).astype('int')
grad = gradr+gradg+gradb
return peak_local_max(grad.max()-grad,threshold_rel=0.5, min_distance=60,indices=indices),grad
markers,grad = get_markers(img2, True)
plt.figure()
plt.imshow(grad, cmap=plt.cm.gray)
plt.plot(markers[:,1],markers[:,0],'b+')
plt.show()
and I am receiving this error.
IndexError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_17316/2204442073.py in <module>
12 return peak_local_max(grad.max()-grad,threshold_rel=0.5, min_distance=60,indices=indices),grad
13
---> 14 markers,grad = get_markers(img2, True)
15 plt.figure()
16 plt.imshow(grad, cmap=plt.cm.gray)
~\AppData\Local\Temp/ipykernel_17316/2204442073.py in get_markers(img2, indices)
5 def get_markers(img2, indices=False):
6 im_ = gaussian(img2, sigma=4)
----> 7 gradr = rank.gradient(im_[:,:,0],disk(5)).astype('int')
8 gradg = rank.gradient(im_[:,:,1],disk(5)).astype('int')
9 gradb = rank.gradient(im_[:,:,2],disk(5)).astype('int')
IndexError: too many indices for array: array is 2-dimensional, but 3 were indexed
Any help will be appreciated thanj you!
You are probably trying to run the code on a grayscale image, which will only have 2 dimensions (height and width), while the code was written expecting an RGB image with 3 dimensions (height, width and color channel).
On a grayscale image, the lines:
gradr = rank.gradient(im_[:,:,0],disk(5)).astype('int')
gradg = rank.gradient(im_[:,:,1],disk(5)).astype('int')
gradb = rank.gradient(im_[:,:,2],disk(5)).astype('int')
grad = gradr+gradg+gradb
Could be simply replaced by:
grad = rank.gradient(im_, disk(5))

Invalid literal for float in k nearest neighbor

I am having the hardest time figuring out why i am getting this error. I have searched a lot but unable to fine any solution
import numpy as np
import warnings
from collections import Counter
import pandas as pd
def k_nearest_neighbors(data, predict, k=3):
if len(data) >= k:
warnings.warn('K is set to a value less than total voting groups!')
distances = []
for group in data:
for features in data[group]:
euclidean_distance = np.linalg.norm(np.array(features)-
np.array(predict))
distances.append([euclidean_distance,group])
votes = [i[1] for i in sorted(distances)[:k]]
vote_result = Counter(votes).most_common(1)[0][0]
return vote_result
df = pd.read_csv("data.txt")
df.replace('?',-99999, inplace=True)
df.drop(['id'], 1, inplace=True)
full_data = df.astype(float).values.tolist()
print(full_data)
After running. it gives error
Traceback (most recent call last):
File "E:\Jazab\Machine Learning\Lec18(Testing K Neatest Nerighbors
Classifier)\Lec18(Testing K Neatest Nerighbors
Classifier)\Lec18_Testing_K_Neatest_Nerighbors_Classifier_.py", line 25, in
<module>
full_data = df.astype(float).values.tolist()
File "C:\Python27\lib\site-packages\pandas\util\_decorators.py", line 91, in
wrapper
return func(*args, **kwargs)
File "C:\Python27\lib\site-packages\pandas\core\generic.py", line 3299, in
astype
**kwargs)
File "C:\Python27\lib\site-packages\pandas\core\internals.py", line 3224, in
astype
return self.apply('astype', dtype=dtype, **kwargs)
File "C:\Python27\lib\site-packages\pandas\core\internals.py", line 3091, in
apply
applied = getattr(b, f)(**kwargs)
File "C:\Python27\lib\site-packages\pandas\core\internals.py", line 471, in
astype
**kwargs)
File "C:\Python27\lib\site-packages\pandas\core\internals.py", line 521, in
_astype
values = astype_nansafe(values.ravel(), dtype, copy=True)
File "C:\Python27\lib\site-packages\pandas\core\dtypes\cast.py", line 636,
in astype_nansafe
return arr.astype(dtype)
ValueError: invalid literal for float(): 3) <-----Reappears in Group 8 as:
Press any key to continue . . .
if i remove astype(float) program run fine
What should i need to do ?
There are bad data (3)), so need to_numeric with apply because need processes all columns.
Non numeric are converted to NaNs, which are replaced by fillna to some scalar, e.g. 0:
full_data = df.apply(pd.to_numeric, errors='coerce').fillna(0).values.tolist()
Sample:
df = pd.DataFrame({'A':[1,2,7], 'B':['3)',4,5]})
print (df)
A B
0 1 3)
1 2 4
2 7 5
full_data = df.apply(pd.to_numeric, errors='coerce').fillna(0).values.tolist()
print (full_data)
[[1.0, 0.0], [2.0, 4.0], [7.0, 5.0]]
It looks like you have 3) as an entry in your CSV file, and Pandas is complaining because it can't cast it to a float because of the ).

Python KeyError: 1.0

I'm trying to run this code
from math import sqrt
import numpy as np
import warnings
from collections import Counter
import pandas as pd
import random
def k_nearest_neighbors(data,predict, k =3):
if len(data) >= k:
warnings.warn('K is set to a value less than total voting groups')
distances = []
for group in data:
for features in data[group]:
eucliden_distance = np.linalg.norm(np.array(features)-np.array(predict))
distances.append([eucliden_distance,group])
votes = [i[1] for i in sorted(distances)[:k]]
print(Counter(votes).most_common(1))
vote_result = Counter(votes).most_common(1)[0][0]
return vote_result
df = pd.read_csv('bc2.txt')
df.replace('?',-99999,inplace=True)
df.drop(['id'],1,inplace = True)
full_data = df.astype(float).values.tolist()
random.shuffle(full_data)
test_size = 0.2
train_set = {2:[],4:[]}
test_set = {2:[],4:[]}
train_data = full_data[:-int(test_size*len(full_data))]
test_data = full_data[-int(test_size*len(full_data)):]
for i in train_data:
train_set[i[-1]].append(i[:-1])
for i in train_data:
test_set[i[-1]].append(i[:-1])
correct = 0
total = 0
for group in test_set:
for data in test_set[group]:
vote = k_nearest_neighbors(train_set,data, k=5)
if group == vote:
correct += 1
total += 1
print ('Accuracy:',correct/total)
it comes out with this error msg
File "ml8.py", line 38, in <module>
train_set[i[-1]].append(i[:-1])
KeyError: 1.0
file m18.py is this above code file
below is the sample of txt file
id,clump_thickness,unif_cell_size,unif_cell_shape,marg_adhesion,single_epith_cell_size,bare_nuclei,bland_chrom,norm_nucleoli,mitoses,class
1000025,2,5,1,1,1,2,1,3,1,1
1002945,2,5,4,4,5,7,10,3,2,1
1015425,2,3,1,1,1,2,2,3,1,1
1016277,2,6,8,8,1,3,4,3,7,1
1017023,2,4,1,1,3,2,1,3,1,1
1017122,4,8,10,10,8,7,10,9,7,1
1018099,2,1,1,1,1,2,10,3,1,1
1018561,2,2,1,2,1,2,1,3,1,1
1033078,2,2,1,1,1,2,1,1,1,5
1033078,2,4,2,1,1,2,1,2,1,1
1035283,2,1,1,1,1,1,1,3,1,1
1036172,2,2,1,1,1,2,1,2,1,1
1041801,4,5,3,3,3,2,3,4,4,1
I'm using 2.7.11 version
Your train_set only contains keys 2 and 4, whereas your classes in that sample are 1 and 5.
Instead of using
train_set = {2:[],4:[]}
you might have better luck with defaultdict:
from collections import defaultdict
train_set = defaultdict(list)
This way a non-existent key will be initialized to a new empty list on first access.

Python Side-by-side box plots on same figure

I am trying to generate a box plot in Python 2.7 for each categorical value in column E from the Pandas dataframe below
A B C D E
0 0.647366 0.317832 0.875353 0.993592 1
1 0.504790 0.041806 0.113889 0.445370 2
2 0.769335 0.120647 0.749565 0.935732 3
3 0.215003 0.497402 0.795033 0.246890 1
4 0.841577 0.211128 0.248779 0.250432 1
5 0.045797 0.710889 0.257784 0.207661 4
6 0.229536 0.094308 0.464018 0.402725 3
7 0.067887 0.591637 0.949509 0.858394 2
8 0.827660 0.348025 0.507488 0.343006 3
9 0.559795 0.820231 0.461300 0.921024 1
I would be willing to do this with Matplotlib or any other plotting library. So far the above code can plot all the categories combined on one plot. Here is the code to generate the above data and produce the plot:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
# Data
df = pd.DataFrame(np.random.rand(10,4),columns=list('ABCD'))
df['E'] = [1,2,3,1,1,4,3,2,3,1]
# Boxplot
bp = ax.boxplot(df.iloc[:,:-1].values, widths=0.2)
plt.show()
In this example, the categories are 1,2,3,4. I would like to plot separate boxplots side-by-side on the same figure, for only categories 1 and 2 and show the category names in the legend.
Is there a way to do this?
Additional Information:
The output should look similar to the 3rd figure from here - replace "Yes","No" by "1","2".
Starting with this:
import numpy
import pandas
from matplotlib import pyplot
import seaborn
seaborn.set(style="ticks")
# Data
df = pandas.DataFrame(numpy.random.rand(10,4), columns=list('ABCD'))
df['E'] = [1, 2, 3, 1, 1, 4, 3, 2, 3, 1]
You've got a couple of options. If separate axes are ok,
fig, axes = pyplot.subplots(ncols=4, figsize=(12, 5), sharey=True)
df.query("E in [1, 2]").boxplot(by='E', return_type='axes', ax=axes)
If you want 1 axes, I think seaborn will be easier. You just need to clean up your data.
ax = (
df.set_index('E', append=True) # set E as part of the index
.stack() # pull A - D into rows
.to_frame() # convert to a dataframe
.reset_index() # make the index into reg. columns
.rename(columns={'level_2': 'quantity', 0: 'value'}) # rename columns
.drop('level_0', axis='columns') # drop junk columns
.pipe((seaborn.boxplot, 'data'), x='E', y='value', hue='quantity', order=[1, 2])
)
seaborn.despine(trim=True)
The cool thing about seaborn is that tweaking the parameters slightly can achieve a lot in terms of the plot's layout. If we switch our hue and x variables, we get:
ax = (
df.set_index('E', append=True) # set E as part of the index
.stack() # pull A - D into rows
.to_frame() # convert to a dataframe
.reset_index() # make the index into reg. columns
.rename(columns={'level_2': 'quantity', 0: 'value'}) # rename columns
.drop('level_0', axis='columns') # drop junk columns
.pipe((seaborn.boxplot, 'data'), x='quantity', y='value', hue='E', hue_order=[1, 2])
)
seaborn.despine(trim=True)
If you're curious, the resulting dataframe looks something like this:
E quantity value
0 1 A 0.935433
1 1 B 0.862290
2 1 C 0.197243
3 1 D 0.977969
4 2 A 0.675037
5 2 B 0.494440
6 2 C 0.492762
7 2 D 0.531296
8 3 A 0.119273
9 3 B 0.303639
10 3 C 0.911700
11 3 D 0.807861
An addition to #Paul_H answer.
Side-by-side boxplots on the single matplotlib.axes.Axes, no seaborn:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.rand(10,4), columns=list('ABCD'))
df['E'] = [1, 2, 1, 1, 1, 2, 1, 2, 2, 1]
mask_e = df['E'] == 1
# prepare data
data_to_plot = [df[mask_e]['A'], df[~mask_e]['A'],
df[mask_e]['B'], df[~mask_e]['B'],
df[mask_e]['C'], df[~mask_e]['C'],
df[mask_e]['D'], df[~mask_e]['D']]
# Positions defaults to range(1, N+1) where N is the number of boxplot to be drawn.
# we will move them a little, to visually group them
plt.figure(figsize=(10, 6))
box = plt.boxplot(data_to_plot,
positions=[1, 1.6, 2.5, 3.1, 4, 4.6, 5.5, 6.1],
labels=['A1','A0','B1','B0','C1','C0','D1','D0'])

Raster plot not showing symbols using plt.scatter function

I have to do raster plot, of 1 neuron, with 10 trials of data and time is 4500 ms.
import numpy as np
import matplotlib.pyplot as plt
#Plotting 1 neuron with 12 trials of info
maatriks = []
for i in range(1,14):
if i<10:
string = 'C:\\Users\\latel\\Desktop\\kool\\Neuro\\prax3\\data\\lgn\\plain\\neuron_01_stimulus_0'+str(i)+'.csv'
else:
string = 'C:\\Users\\latel\\Desktop\\kool\\Neuro\\prax3\\data\\lgn\\plain\\neuron_01_stimulus_'+str(i)+'.csv'
data_in = np.genfromtxt(string,dtype = 'int', delimiter = ',' or '\n')
maatriks.append(data_in)
data = np.array(maatriks)
print data.shape
spikes = np.array(data[8])
print spikes.shape
nonzeros = 0
for i,item in enumerate(spikes):
nonzeros += np.count_nonzero(item)
plt.scatter(item, i*np.ones(item.shape), marker = '|')
print nonzeros
plt.ylim(-1,len(spikes))
plt.xlim(0,len(spikes[0]))
plt.xlabel("Time is seconds")
plt.ylabel("Trial number")
plt.tight_layout()
plt.show()
This outputs me(the prints) :
(13L, 10L, 4501L)
(10L, 4501L)
55
But the plot is empty , i cannot understand why the plot is empty. There should be 55 lines in my opinion ...
Edit: Got it working. Added this code.
for row in spikes:
for i in range(len(row)):
if (row[i] == 1):
row[i] = i
Because the data was only 0 or 1.
Anyone know how to do it shorter ?