Python Numpy : operands could not be broadcast together with shapes - python-2.7

I am getting this error "operands could not be broadcast together with shapes" for this code
import numpy as np
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
beantown = load_boston()
x=beantown.data
y=beantown.target
model = LinearRegression()
model = model.fit(x,y)
def mse(truth, predictions):
return ((truth - predictions) ** 2).mean(None)
print model.score(x,y)
print mse(x,y)
The error is on the line print mse(x,y)
Error is ValueError:
operands could not be broadcast together with shapes (506,13) (506,)

Reshape y:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
beantown = load_boston()
x = beantown.data
y = beantown.target
y = y.reshape(y.size, 1)
model = LinearRegression()
model = model.fit(x, y)
def mse(truth, predictions):
return ((truth - predictions) ** 2).mean(None)
print model.score(x, y)
print mse(x, y)

Related

Retrun event.xdata from a function

I have made a small gui to select points on a given image, plot the points, interpolate them and save them. I would like to make it so that the interpolated points are given as output of the function gui_pos(image) retrun, but I couldn't find the way to do it. So far I have fixed it saving the interpolated point in a .pckl file but it is not a good solution.
The code is the following:
from PIL import Image
import numpy as np
import pickle
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
def interp_mia(x,y,xx):
f= interp1d(x,y,fill_value="extrapolate")
yy= f(xx)
return yy
def onclick(event):
plt.plot(event.xdata, event.ydata, '.')
fig.canvas.draw()
coordsx.append(event.xdata)
coordsy.append(event.ydata)
if (event.button == 3) :
xx = np.arange(np.min(coordsx),np.max(coordsx))
yy = interp_mia(coordsx, coordsy, xx)
print('you pressed', event.button)
plt.plot(xx,yy,'k-')
fig.canvas.draw()
fig.canvas.mpl_disconnect(cid)
dum = np.array((xx,yy))
f = open('gui_pos.pckl', 'wb')
pickle.dump(dum, f)
f.close()
def gui_pos(image):
global coordsx
global coordsy
global fig
global cid
global xx
global yy
global slit
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(image, origin="lower")
coordsx = []
coordsy = []
cid = fig.canvas.mpl_connect('button_press_event', onclick)
return
Any ideas?
Thank you,
Qarolina

PyMC3 Bayesian Inference with NUTS initialization

I'm trying to implement a simple Bayesian Inference using a ODE model. I want to use the NUTS algorithm to sample but it gives me an initialization error. I do not know much about the PyMC3 as I'm new to this. Please take a look and tell me what is wrong.
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
import seaborn
import pymc3 as pm
import theano.tensor as T
from theano.compile.ops import as_op
#Actual Solution of the Differential Equation(Used to generate data)
def actual(a,b,x):
Y = np.exp(-b*x)*(a*np.exp(b*x)*(b*x-1)+a+b**2)/b**2
return Y
#Method For Solving the ODE
def lv(xdata, a=5.0, b=0.2):
def dy_dx(y, x):
return a*x - b*y
y0 = 1.0
Y, dict = odeint(dy_dx,y0,xdata,full_output=True)
return Y
#Generating Data for Bayesian Inference
a0, b0 = 5, 0.2
xdata = np.linspace(0, 21, 100)
ydata = actual(a0,b0,xdata)
# Adding some error to the ydata points
yerror = 10*np.random.rand(len(xdata))
ydata += np.random.normal(0.0, np.sqrt(yerror))
ydata = np.ravel(ydata)
#as_op(itypes=[T.dscalar, T.dscalar], otypes=[T.dvector])
def func(al,be):
Q = lv(xdata, a=al, b=be)
return np.ravel(Q)
# Number of Samples and Initial Conditions
nsample = 5000
y0 = 1.0
# Model for Bayesian Inference
model = pm.Model()
with model:
# Priors for unknown model parameters
alpha = pm.Uniform('alpha', lower=a0/2, upper=a0+a0/2)
beta = pm.Uniform('beta', lower=b0/2, upper=b0+b0/2)
# Expected value of outcome
mu = func(alpha,beta)
# Likelihood (sampling distribution) of observations
Y_obs = pm.Normal('Y_obs', mu=mu, sd=yerror, observed=ydata)
trace = pm.sample(nsample, nchains=1)
pm.traceplot(trace)
plt.show()
The error that I get is
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS failed. Falling back to elementwise auto-assignment.
Any help would be really appreciated

Colour schemes used to present data on sphere

Hi I a have a data set which I project onto a sphere such that the magnitude of the data, as a function of theta and phi, is shown using a colour spectrum (which uses "ax.plot_surface", "plt.colorbar" and "facecolors"). My query is that at this stage I am limited to "cm.hot" and "cm.jet". Does anyone know of any other colour schemes which are available for this purpose. Please see my code and the figures below
Code:
from numpy import*
import math
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import matplotlib.cm as cm
#theta inclination angle
#phi azimuthal angle
n_theta = 100 #number of values for theta
n_phi = 100 #number of values for phi
r = 1 #radius of sphere
theta, phi = np.mgrid[0: pi:n_theta*1j,-pi:pi:n_phi*1j ]
x = r*np.sin(theta)*np.cos(phi)
y = r*np.sin(theta)*np.sin(phi)
z = r*np.cos(theta)
inp = []
f = open("data.dat","r")
for line in f:
i = float(line.split()[0])
j = float(line.split()[1])
val = float(line.split()[2])
inp.append([i, j, val])
inp = np.array(inp)
#reshape the input array to the shape of the x,y,z arrays.
c = inp[:,2].reshape((n_phi,n_theta))
#Set colours and render
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
#use facecolors argument, provide array of same shape as z
# cm.<cmapname>() allows to get rgba color from array.
# array must be normalized between 0 and 1
surf = ax.plot_surface(
x,y,z, rstride=1, cstride=1, facecolors=cm.jet(c), alpha=0.9, linewidth=1, shade=False)
ax.set_xlim([-2.0,2.0])
ax.set_ylim([-2.0,2.0])
ax.set_zlim([-2,2])
ax.set_aspect("equal")
plt.title('Plot with cm.jet')
#Label axis.
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
#Creates array for colorbar from 0 to 1.
a = array( [1.0, 0.5, 0.0])
#Creates colorbar
m = cm.ScalarMappable(cmap=cm.jet)
m.set_array(a)
plt.colorbar(m)
plt.savefig('facecolor plots')
f.close()
plt.show()
The following is a list of colormaps provided directly by matplotlib. It's taken from the Colormap reference example.
cmaps = [('Perceptually Uniform Sequential', [
'viridis', 'plasma', 'inferno', 'magma', 'cividis']),
('Sequential', [
'Greys', 'Purples', 'Blues', 'Greens', 'Oranges', 'Reds',
'YlOrBr', 'YlOrRd', 'OrRd', 'PuRd', 'RdPu', 'BuPu',
'GnBu', 'PuBu', 'YlGnBu', 'PuBuGn', 'BuGn', 'YlGn']),
('Sequential (2)', [
'binary', 'gist_yarg', 'gist_gray', 'gray', 'bone', 'pink',
'spring', 'summer', 'autumn', 'winter', 'cool', 'Wistia',
'hot', 'afmhot', 'gist_heat', 'copper']),
('Diverging', [
'PiYG', 'PRGn', 'BrBG', 'PuOr', 'RdGy', 'RdBu',
'RdYlBu', 'RdYlGn', 'Spectral', 'coolwarm', 'bwr', 'seismic']),
('Qualitative', [
'Pastel1', 'Pastel2', 'Paired', 'Accent',
'Dark2', 'Set1', 'Set2', 'Set3',
'tab10', 'tab20', 'tab20b', 'tab20c']),
('Miscellaneous', [
'flag', 'prism', 'ocean', 'gist_earth', 'terrain', 'gist_stern',
'gnuplot', 'gnuplot2', 'CMRmap', 'cubehelix', 'brg', 'hsv',
'gist_rainbow', 'rainbow', 'jet', 'nipy_spectral', 'gist_ncar'])]
To easily view them all you may e.g. use the following 3D colormap viewer (written in PyQt5).
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from PyQt5 import QtGui, QtCore, QtWidgets
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.figure import Figure
import sys
class MainWindow(QtWidgets.QMainWindow):
def __init__(self):
QtWidgets.QMainWindow.__init__(self)
self.main_widget = QtWidgets.QWidget(self)
self.fig = Figure()
self.canvas = FigureCanvas(self.fig)
self.ax = self.fig.add_subplot(111, projection=Axes3D.name)
u = np.linspace(0, 2 * np.pi, 100)
v = np.linspace(0, np.pi, 100)
x = 10 * np.outer(np.cos(u), np.sin(v))
y = 10 * np.outer(np.sin(u), np.sin(v))
z = 10 * np.outer(np.ones(np.size(u)), np.cos(v))
# Plot the surface
self.surf = self.ax.plot_surface(x, y, z, cmap="YlGnBu")
self.cb = self.fig.colorbar(self.surf)
self.canvas.setSizePolicy(QtWidgets.QSizePolicy.Expanding,
QtWidgets.QSizePolicy.Expanding)
self.canvas.updateGeometry()
self.dropdown1 = QtWidgets.QComboBox()
items = []
for cats in cmaps:
items.extend(cats[1])
self.dropdown1.addItems(items)
self.dropdown1.currentIndexChanged.connect(self.update)
self.label = QtWidgets.QLabel("A plot:")
self.layout = QtWidgets.QGridLayout(self.main_widget)
self.layout.addWidget(QtWidgets.QLabel("Select Colormap"))
self.layout.addWidget(self.dropdown1)
self.layout.addWidget(self.canvas)
self.setCentralWidget(self.main_widget)
self.show()
self.update()
def update(self):
self.surf.set_cmap(self.dropdown1.currentText())
self.fig.canvas.draw_idle()
if __name__ == '__main__':
app = QtWidgets.QApplication(sys.argv)
win = MainWindow()
sys.exit(app.exec_())

How to reshape a numpy array from (#dim1,#dim2,#channel) to (#channel, #dim1,#dim2)

I have an array with the shape of (#dim1,#dim2,#channel). I want to reshape it to (#channel, #dim1,#dim2).
The plt.reshape(x, (#channel, #dim1,#dim2)) shows me a wrong image.
If you are using the Cifar10 dataset you could use the following code:
import numpy as np
import matplotlib.pyplot as plt
import cPickle
def unpickle(file):
with open(file, 'rb') as fo:
dict = cPickle.load(fo)
return dict
# Read the data
imageDict = unpickle('cifar-10-batches-py/data_batch_2')
imageArray = imageDict['data']
# Now we reshape
imageArray = np.swapaxes(imageArray.reshape(10000,32,32,3,order='F'), 1, 2)
# Get the labels
labels = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']
imageLabels = [labels[i] for i in imageDict['labels']]
# Plot some images
fig, ax = plt.subplots(4,4, figsize=(8,8))
for axIndex in [(i,j) for i in range(4) for j in range(4)]:
index = np.random.randint(0,10000)
ax[axIndex].imshow(imageArray[index], origin='upper')
ax[axIndex].set_title(imageLabels[index])
ax[axIndex].axis('off')
fig.show()
Which gives you:

Feature selection in scikit learn for multiple variables and thousands+ features

I am trying to perform feature selection for logistic regression classifier. Originally there are 4 variables: name, location, gender, and label = ethnicity. The three variables, namely the name, give rise to tens of thousands of more "features", for example, name "John Snow" will give rise to 2-letter substrings like 'jo', 'oh', 'hn'... etc. The feature set undergoes DictVectorization.
I am trying to follow this tutorial (http://scikit-learn.org/stable/auto_examples/feature_selection/plot_feature_selection.html) but I am not sure if I am doing it right since the tutorial is using a small number of features while mine has tens of thousands after vectorization. And also the plt.show() shows a blank figure.
# coding=utf-8
import pandas as pd
from pandas import DataFrame, Series
import numpy as np
import re
import random
import time
from random import randint
import csv
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.metrics import confusion_matrix as sk_confusion_matrix
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
# Assign X and y variables
X = df.raw_name.values
X2 = df.name.values
X3 = df.gender.values
X4 = df.location.values
y = df.ethnicity_scan.values
# Feature extraction functions
def feature_full_name(nameString):
try:
full_name = nameString
if len(full_name) > 1: # not accept name with only 1 character
return full_name
else: return '?'
except: return '?'
def feature_avg_wordLength(nameString):
try:
space = 0
for i in nameString:
if i == ' ':
space += 1
length = float(len(nameString) - space)
name_entity = float(space + 1)
avg = round(float(length/name_entity), 0)
return avg
except:
return 0
def feature_name_entity(nameString2):
space = 0
try:
for i in nameString2:
if i == ' ':
space += 1
return space+1
except: return 0
def feature_gender(genString):
try:
gender = genString
if len(gender) >= 1:
return gender
else: return '?'
except: return '?'
def feature_noNeighborLoc(locString):
try:
x = re.sub(r'^[^, ]*', '', locString) # remove everything before and include first ','
y = x[2:] # remove subsequent ',' and ' '
return y
except: return '?'
def list_to_dict(substring_list):
try:
substring_dict = {}
for i in substring_list:
substring_dict['substring='+str(i)] = True
return substring_dict
except: return '?'
# Transform format of X variables, and spit out a numpy array for all features
my_dict13 = [{'name-entity': feature_name_entity(feature_full_name(i))} for i in X2]
my_dict14 = [{'avg-length': feature_avg_wordLength(feature_full_name(i))} for i in X]
my_dict15 = [{'gender': feature_full_name(i)} for i in X3]
my_dict16 = [{'location': feature_noNeighborLoc(feature_full_name(i))} for i in X4]
my_dict17 = [{'dummy1': 1} for i in X]
my_dict18 = [{'dummy2': random.randint(0,2)} for i in X]
all_dict = []
for i in range(0, len(my_dict)):
temp_dict = dict(my_dict13[i].items() + my_dict14[i].items()
+ my_dict15[i].items() + my_dict16[i].items() + my_dict17[i].items() + my_dict18[i].items()
)
all_dict.append(temp_dict)
newX = dv.fit_transform(all_dict)
# Separate the training and testing data sets
half_cut = int(len(df)/2.0)*-1
X_train = newX[:half_cut]
X_test = newX[half_cut:]
y_train = y[:half_cut]
y_test = y[half_cut:]
# Fitting X and y into model, using training data
lr = LogisticRegression()
lr.fit(X_train, y_train)
dv = DictVectorizer()
# Feature selection
plt.figure(1)
plt.clf()
X_indices = np.arange(X_train.shape[-1])
selector = SelectPercentile(f_classif, percentile=10)
selector.fit(X_train, y_train)
scores = -np.log10(selector.pvalues_)
scores /= scores.max()
plt.bar(X_indices - .45, scores, width=.2,
label=r'Univariate score ($-Log(p_{value})$)', color='g')
plt.show()
Warning:
E:\Program Files Extra\Python27\lib\site-packages\sklearn\feature_selection\univariate_selection.py:111: UserWarning: Features [[0 0 0 ..., 0 0 0]] are constant.
It looks like the way you split your data into training and testing sets is not working:
# Separate the training and testing data sets
X_train = newX[:half_cut]
X_test = newX[half_cut:]
If you already use sklearn, it is much more convenient to use the builtin splitting routine for this:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.5, random_state=0)