How to change latex names of sympy vector? - sympy

When creating a sympy vector, first I create the coordinate system, for example for cylindrical coordinates:
from sympy.vector import CoordSys3D
from IPython.display import display
R = CoordSys3D('', transformation='cylindrical', variable_names=("r", "theta", "z"),vector_names=('i', 'j', 'k'))
I want to change the latex names of variables and vectors. As I searched, the CoordSys3D has those inputs:
class sympy.vector.coordsysrect.CoordSys3D(name, transformation=None, parent=None, location=None, rotation_matrix=None, vector_names=None, variable_names=None)[source]
if I print R.theta with:
display(R.theta)
I get $theta$, but i would like to have $\theta$
However, inside its __init__, there are the latex_scalars and latex_vects
__init__(name, location=None, rotation_matrix=None, parent=None, vector_names=None, variable_names=None, latex_vects=None, pretty_vects=None, latex_scalars=None, pretty_scalars=None, transformation=None)
Can I give a value to it?

I was having the same problem, and I found an answer.
R = CoordSys3D('R', transformation='cylindrical', variable_names=("r", "theta", "z"),vector_names=('i', 'j', 'k'))
R.theta._latex_form = '\\theta'
R.j._latex_form = '\\vectorunit{\\theta}'
The ._latex_form attribute also works the same way when you define a sympy function:
E_phi = smp.Function('E_phi')()
E_phi._latex_form = 'E_{\\phi}'
However, there is no ._latex_form attribute for symbols. But I found another way to do it:
E_phi = Symbol('E_{\\phi}')
Remember to use double backslash when entering latex code in python.
Here is some example code I wrote. The output is formatted for MathJax. The output looks like this:
$$ \require{physics} $$
$$ \vb{E} = (E_{r})\vectorunit{r} + (7 e^{i \omega t})\vectorunit{\phi} + (E_{z})\vectorunit{z} $$
$$ \curl{\vb{E}} = (\frac{7 e^{i \omega t}}{r})\vectorunit{z} $$
$$ \vb{B} = (\begin{cases} \frac{7 i e^{i \omega t}}{r \omega} & \text{for}\: r \omega \neq 0 \\- \frac{7 t}{r} & \text{otherwise} \end{cases})\vectorunit{z} $$
import numpy as np
import sympy as smp
import matplotlib.pyplot as plt
from sympy import *
from sympy import symbols
from sympy.vector import divergence
from sympy.vector import curl
from sympy.vector import gradient
from sympy import diff
from sympy import exp
from sympy import integrate
from sympy import I, pi
from sympy.vector import CoordSys3D
N = CoordSys3D('N', transformation='cylindrical',
vector_names=("r", "phi", "z"),
variable_names=("R", "PHI", "Z"))
# Variables
R = N.R
PHI = N.PHI
Z = N.Z
R._latex_form = 'r'
PHI._latex_form = '\\phi'
Z._latex_form = 'z'
# Basis Unit Vectors
rhat = N.r
phihat = N.phi
zhat = N.z
rhat._latex_form = '\\vectorunit{r}'
phihat._latex_form = '\\vectorunit{\\phi}'
zhat._latex_form = '\\vectorunit{z}'
# The r, phi, and z components of the Electric Field
E_r = Symbol('E_{r}')
E_phi = Symbol('E_{\\phi}')
E_z = Symbol('E_{z}')
# Symbols
x, y, z, t = symbols('x y z t')
wavenumber = symbols('k')
E_0 = symbols('E_0') # Amplitude of E field
w = symbols('omega' , real=True, positive=True)
# Define E_phi(r,phi,z,t)
# Note: I = sqrt(-1)
# E_phi = (1/R) * smp.exp( I * ( w*t ))
E_phi = 7 * smp.exp( I * w * t)
# The Electric Field
E = (E_r * rhat) + (E_phi * phihat) + (E_z * zhat)
# Compute B by integrating the curl of E wrt time
# curl E = - dB/dt
# integrate( (curl E) , t ) = - B
jimmy = curl( E )
B = -integrate( jimmy, t )
# Display the answers as LATEX
# init_printing(use_unicode=True, wrap_line=False)
init_printing( use_latex='mathjax' )
print("$$ \\require{physics} $$")
print("$$ \\vb{E} = " + latex(E) + " $$")
print("$$ \\curl{\\vb{E}} = " + latex(jimmy) + " $$")
print("$$ \\vb{B} = " + latex(B.simplify()) + " $$")
# Alternative Way of defining the components of
# the Electric Field:
# The r, phi, and z components of the Electric Field
# E_r = smp.Function('E_r')()
# E_phi = smp.Function('E_phi')()
# E_z = smp.Function('E_z')()
# E_r._latex_form = 'E_{r}'
# E_phi._latex_form = 'E_{\\phi}'
# E_z._latex_form = 'E_{z}'
# Note that if we define these variables as symbols()
# Then we have to give the latex form this way:
# E_phi = symbol('E_{\\phi}')
# symbols have no attribute ._latex_form
# Whereas Functions do.

Related

Improve curve fitting log

I try to make a fit of my curve. My raw data is in an xlsx file. I extract them using pandas. I want to do two different fit because there is a change in behavior from Ra = 1e6. We know that Ra is proportional to Nu**a. a = 0.25 for Ra <1e6 and if not a = 0.33.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import log10
from scipy.optimize import curve_fit
import lmfit
data=pd.read_excel('data.xlsx',sheet_name='Sheet2',index=False,dtype={'Ra': float})
print(data)
plt.xscale('log')
plt.yscale('log')
plt.scatter(data['Ra'].values, data['Nu_top'].values, label='Nu_top')
plt.scatter(data['Ra'].values, data['Nu_bottom'].values, label='Nu_bottom')
plt.errorbar(data['Ra'].values, data['Nu_top'].values , yerr=data['Ecart type top'].values, linestyle="None")
plt.errorbar(data['Ra'].values, data['Nu_bottom'].values , yerr=data['Ecart type bot'].values, linestyle="None")
def func(x,a):
return 10**(np.log10(x)/a)
"""maxX = max(data['Ra'].values)
minX = min(data['Ra'].values)
maxY = max(data['Nu_top'].values)
minY = min(data['Nu_top'].values)
maxXY = max(maxX, maxY)
parameterBounds = [-maxXY, maxXY]"""
from lmfit import Model
mod = Model(func)
params = mod.make_params(a=0.25)
ret = mod.fit(data['Nu_top'].head(10).values, params, x=data['Ra'].head(10).values)
print(ret.fit_report())
popt, pcov = curve_fit(func, data['Ra'].head(10).values,
data['Nu_top'].head(10).values, sigma=data['Ecart type top'].head(10).values,
absolute_sigma=True, p0=[0.25])
plt.plot(data['Ra'].head(10).values, func(data['Ra'].head(10).values, *popt),
'r-', label='fit: a=%5.3f' % tuple(popt))
popt, pcov = curve_fit(func, data['Ra'].tail(4).values, data['Nu_top'].tail(4).values,
sigma=data['Ecart type top'].tail(4).values,
absolute_sigma=True, p0=[0.33])
plt.plot(data['Ra'].tail(4).values, func(data['Ra'].tail(4).values, *popt),
'b-', label='fit: a=%5.3f' % tuple(popt))
print(pcov)
plt.grid
plt.title("Nusselt en fonction de Ra")
plt.xlabel('Ra')
plt.ylabel('Nu')
plt.legend()
plt.show()
So I use the log: logRa = a * logNu.
Ra = x axis
Nu = y axis
That's why I defined my function func in this way.
my two fit are not all correct as you can see. I have a covariance equal to [0.00010971]. So I had to do something wrong but I don't see it. I need help please.
Here the data file:
data.xlsx
I noticed that the data values for Ra are large, and after scaling them I performed an equation search - here is my result with code. I use the standard scipy genetic algorithm module differential_evolution to determine initial parameter values for curve_fit(), and that module uses the Latin Hypercube algorithm to ensure a thorough search of parameter space which requires bounds within which to search. It is much easier to give ranges for the initial parameter estimates than to find specific values. This equation works well for both nu_top and nu_bottom, note that the plots are not log scaled as it is unnecessary in this example.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import pandas
import warnings
filename = 'data.xlsx'
data=pandas.read_excel(filename,sheet_name='Sheet2',index=False,dtype={'Ra': float})
# notice the Ra scaling by 10000.0
xData = data['Ra'].values / 10000.0
yData = data['Nu_bottom']
def func(x, a, b, c): # "Combined Power And Exponential" from zunzun.com
return a * numpy.power(x, b) * numpy.exp(c * x)
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
maxY = max(yData)
minY = min(yData)
parameterBounds = []
parameterBounds.append([0.0, 10.0]) # search bounds for a
parameterBounds.append([0.0, 10.0]) # search bounds for b
parameterBounds.append([0.0, 10.0]) # search bounds for c
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()
# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters:', fittedParameters)
print()
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
Here I put my data x and y in log10 (). The graph is in log scale. So normally I should have two affine functions with a coefficient of 0.25 and 0.33. I change the function func in your program James and bounds for b and c but I have no good result.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import log10, log
from scipy.optimize import curve_fit
import lmfit
data=pd.read_excel('data.xlsx',sheet_name='Sheet2',index=False,dtype={'Ra': float})
print(data)
plt.xscale('log')
plt.yscale('log')
plt.scatter(np.log10(data['Ra'].values), np.log10(data['Nu_top'].values), label='Nu_top')
plt.scatter(np.log10(data['Ra'].values), np.log10(data['Nu_bottom'].values), label='Nu_bottom')
plt.errorbar(np.log10(data['Ra'].values), np.log10(data['Nu_top'].values) , yerr=data['Ecart type top'].values, linestyle="None")
plt.errorbar(np.log10(data['Ra'].values), np.log10(data['Nu_bottom'].values) , yerr=data['Ecart type bot'].values, linestyle="None")
def func(x,a):
return a*x
maxX = max(data['Ra'].values)
minX = min(data['Ra'].values)
maxY = max(data['Nu_top'].values)
minY = min(data['Nu_top'].values)
maxXY = max(maxX, maxY)
parameterBounds = [-maxXY, maxXY]
from lmfit import Model
mod = Model(func)
params = mod.make_params(a=0.25)
ret = mod.fit(np.log10(data['Nu_top'].head(10).values), params, x=np.log10(data['Ra'].head(10).values))
print(ret.fit_report())
popt, pcov = curve_fit(func, np.log10(data['Ra'].head(10).values), np.log10(data['Nu_top'].head(10).values), sigma=data['Ecart type top'].head(10).values, absolute_sigma=True, p0=[0.25])
plt.plot(np.log10(data['Ra'].head(10).values), func(np.log10(data['Ra'].head(10).values), *popt), 'r-', label='fit: a=%5.3f' % tuple(popt))
popt, pcov = curve_fit(func, np.log10(data['Ra'].tail(4).values), np.log10(data['Nu_top'].tail(4).values), sigma=data['Ecart type top'].tail(4).values, absolute_sigma=True, p0=[0.33])
plt.plot(np.log10(data['Ra'].tail(4).values), func(np.log10(data['Ra'].tail(4).values), *popt), 'b-', label='fit: a=%5.3f' % tuple(popt))
print(pcov)
plt.grid
plt.title("Nusselt en fonction de Ra")
plt.xlabel('log10(Ra)')
plt.ylabel('log10(Nu)')
plt.legend()
plt.show()
With polyfit I have better results.
With my code I open the file and I calculate log (Ra) and log (Nu) then plot (log (Ra), log (Nu)) in log scale.
I'm supposed to have a = 0.25 for Ra <1e6 and if not a = 0.33
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import log10
from numpy import polyfit
import numpy.polynomial.polynomial as poly
data=pd.read_excel('data.xlsx',sheet_name='Sheet2',index=False,dtype={'Ra': float})
print(data)
x=np.log10(data['Ra'].values)
y1=np.log10(data['Nu_top'].values)
y2=np.log10(data['Nu_bottom'].values)
x2=np.log10(data['Ra'].head(11).values)
y4=np.log10(data['Nu_top'].head(11).values)
x3=np.log10(data['Ra'].tail(4).values)
y5=np.log10(data['Nu_top'].tail(4).values)
plt.xscale('log')
plt.yscale('log')
plt.scatter(x, y1, label='Nu_top')
plt.scatter(x, y2, label='Nu_bottom')
plt.errorbar(x, y1 , yerr=data['Ecart type top'].values, linestyle="None")
plt.errorbar(x, y2 , yerr=data['Ecart type bot'].values, linestyle="None")
"""a=np.ones(10, dtype=np.float)
weights = np.insert(a,0,1E10)"""
coefs = poly.polyfit(x2, y4, 1)
print(coefs)
ffit = poly.polyval(x2, coefs)
plt.plot(x2, ffit, label='fit: b=%5.3f, a=%5.3f' % tuple(coefs))
absError = ffit - x2
SE = np.square(absError) # squared errors
MSE = np.mean(SE) # mean squared errors
RMSE = np.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (np.var(absError) / np.var(x2))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
print('Predicted value at x=0:', ffit[0])
print()
coefs = poly.polyfit(x3, y5, 1)
ffit = poly.polyval(x3, coefs)
plt.plot(x3, ffit, label='fit: b=%5.3f, a=%5.3f' % tuple(coefs))
plt.grid
plt.title("Nusselt en fonction de Ra")
plt.xlabel('log10(Ra)')
plt.ylabel('log10(Nu)')
plt.legend()
plt.show()
My problem is solved, I managed to fit my curves with more or less correct results

Odeint function from scipy.integrate gives wrong result

I use odeint function to solve a coupled differential equations system and plot one of the variables (theta_i) after the system is solved. My variable (theta_i) comes from the equation:
theta_i = np.arctan2(g1,g2)
where g1 ang g2 are variables calculated in the same function. The results have to be between -pi and pi and they are supposed to look like this (plot from matlab simulation):
However, when I try to plot theta_i after odeint has finished I get this(plot from my python code):
which is really weird. When I print the values of theta_i right after its calcumation (still inside the function) they look correct (between -0.2 and 0.5), so it has to be something with the result's storing and my implementation of odeint. All the other variables that come from the odeint solution are correct. I searched similar posts but nobody had the same problem with me. What might be the problem here? I am new to python and I use python 2.7.12. Thank you in advance.
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
added_mass_x = 0.03 # kg
added_mass_y = 0.04
mb = 0.3 # kg
m1 = mb-added_mass_x
m2 = mb-added_mass_y
l1 = 0.07 # m
l2 = 0.05 # m
J = 0.00050797 # kgm^2
Sa = 0.0110 # m^2
Cd = 2.44
Cl = 3.41
Kd = 0.000655 # kgm^2
r = 1000 # kg/m^3
c1 = 0.5*r*Sa*Cd
c2 = 0.5*r*Sa*Cl
c3 = 0.5*mb*(l1**2)
c4 = Kd/J
c5 = (1/(2*J))*(l1**2)*mb*l2
c6 = (1/(3*J))*(l1**3)*mb
theta_0 = 10*(np.pi/180) # rad
theta_A = 20*(np.pi/180) # rad
f = 2 # Hz
t = np.linspace(0,100,8000) # s
def direct(u,t):
vcx = u[0]
vcy = u[1]
wz = u[2]
psi = u[3]
x = u[4]
y = u[5]
vcx_i = u[6]
vcy_i = u[7]
psi_i = u[8]
wz_i = u[9]
theta_i = u[10]
theta_deg_i = u[11]
# Subsystem 1
omega = 2*np.pi*f # rad/s
theta = theta_0 + theta_A*np.sin(omega*t) # rad
theta_deg = (theta*180)/np.pi # deg
thetadotdot = -(omega**2)*theta_A*np.sin(omega*t) # rad/s^2
# Subsystem 2
vcxdot = (m2/m1)*vcy*wz-(c1/m1)*vcx*np.sqrt((vcx**2)+(vcy**2))+(c2/m1)*vcy*np.sqrt((vcx**2)+(vcy**2))*np.arctan2(vcy,vcx)-(c3/m1)*thetadotdot*np.sin(theta)
vcydot = -(m1/m2)*vcx*wz-(c1/m2)*vcy*np.sqrt((vcx**2)+(vcy**2))-(c2/m2)*vcx*np.sqrt((vcx**2)+(vcy**2))*np.arctan2(vcy,vcx)+(c3/m2)*thetadotdot*np.cos(theta)
wzdot = ((m1-m2)/J)*vcx*vcy-c4*wz*wz*np.sign(wz)-c5*thetadotdot*np.cos(theta)-c6*thetadotdot
psidot = wz
# Subsystem 3
xdotdot = vcxdot*np.cos(psi)-vcx*np.sin(psi)*wz+vcydot*np.sin(psi)+vcy*np.cos(psi)*wz # m/s^2
ydotdot = -vcxdot*np.sin(psi)-vcx*np.cos(psi)*wz+vcydot*np.cos(psi)-vcy*np.sin(psi)*wz # m/s^2
xdot = vcx*np.cos(psi)+vcy*np.sin(psi) # m/s
ydot = -vcx*np.sin(psi)+vcy*np.cos(psi) # m/s
# Subsystem 4
vcx_i = xdot*np.cos(psi_i)-ydot*np.sin(psi_i)
vcy_i = ydot*np.cos(psi_i)+xdot*np.sin(psi_i)
psidot_i = wz_i
vcxdot_i = xdotdot*np.cos(psi_i)-xdot*np.sin(psi_i)*psidot_i-ydotdot*np.sin(psi_i)-ydot*np.cos(psi_i)*psidot_i
vcydot_i = ydotdot*np.cos(psi_i)-ydot*np.sin(psi_i)*psidot_i+xdotdot*np.sin(psi_i)+xdot*np.cos(psi_i)*psidot_i
g1 = -(m1/c3)*vcxdot_i+(m2/c3)*vcy_i*wz_i-(c1/c3)*vcx_i*np.sqrt((vcx_i**2)+(vcy_i**2))+(c2/c3)*vcy_i*np.sqrt((vcx_i**2)+(vcy_i**2))*np.arctan2(vcy_i,vcx_i)
g2 = (m2/c3)*vcydot_i+(m1/c3)*vcx_i*wz_i+(c1/c3)*vcy_i*np.sqrt((vcx_i**2)+(vcy_i**2))+(c2/c3)*vcx_i*np.sqrt((vcx_i**2)+(vcy_i**2))*np.arctan2(vcy_i,vcx_i)
A = 12*np.sin(2*np.pi*f*t+np.pi) # eksiswsi tail_frequency apo simulink
if A>=0.1:
wzdot_i = ((m1-m2)/J)*vcx_i*vcy_i-c4*wz_i**2*np.sign(wz_i)-c5*g2-c6*np.sqrt((g1**2)+(g2**2))
elif A<-0.1:
wzdot_i = ((m1-m2)/J)*vcx_i*vcy_i-c4*wz_i**2*np.sign(wz_i)-c5*g2+c6*np.sqrt((g1**2)+(g2**2))
else:
wzdot_i = ((m1-m2)/J)*vcx_i*vcy_i-c4*wz_i**2*np.sign(wz)-c5*g2
if g2>0:
theta_i = np.arctan2(g1,g2)
elif g2<0 and g1>=0:
theta_i = np.arctan2(g1,g2)-np.pi
elif g2<0 and g1<0:
theta_i = np.arctan2(g1,g2)+np.pi
elif g2==0 and g1>0:
theta_i = -np.pi/2
elif g2==0 and g1<0:
theta_i = np.pi/2
elif g1==0 and g2==0:
theta_i = 0
theta_deg_i = (theta_i*180)/np.pi
#print theta_deg_i
return [vcxdot, vcydot, wzdot, psidot, xdot, ydot, vcxdot_i, vcydot_i, psidot_i, wzdot_i, theta_i, theta_deg_i]
# arxikes synthikes
vcx_0 = 0.1257
vcy_0 = 0
wz_0 = 0
psi_0 = 0
x_0 = 0
y_0 = 0
vcx_i_0 = 0.1257
vcy_i_0 = 0
psi_i_0 = 0
wz_i_0 = 0
theta_i_0 = 0.1745
theta_deg_i_0 = 9.866
u0 = [vcx_0, vcy_0, wz_0, psi_0, x_0, y_0, vcx_i_0, vcy_i_0, psi_i_0, wz_i_0, theta_i_0, theta_deg_i_0]
u = odeint(direct, u0, t, tfirst=False)
vcx = u[:,0]
vcy = u[:,1]
wz = u[:,2]
psi = u[:,3]
x = u[:,4]
y = u[:,5]
vcx_i = u[:,6]
vcy_i = u[:,7]
psi_i = u[:,8]
wz_i = u[:,9]
theta_i = u[:,10]
theta_deg_i = u[:,11]
print theta_i
plt.figure(17)
plt.plot(t,theta_i,'r-',linewidth=1,label='theta_i')
plt.xlabel('t [s]')
plt.title('theta_i [rad] (Main body CF)')
plt.legend()
plt.show()
The problem as you stated is that theta_i is not part of the gradient. When you formulate your direct, it should be of the form:
def direct(vector, t):
return vector_dot
The quickest and dirtiest solution (without cleaning the code) is to use the function you already defined:
theta_i = [direct(u_i, t_i)[10] for t_i, u_i in zip(t, u)]
I used a a shorter interval: t = np.linspace(0,10,8000). It yielded this:
EDIT: How to remove your theta from the integrator:
def direct(u, t):
# your original function as it is
def direct2(u,t):
return direct(u,t)[:9]
#now integrate the second function
u = odeint(direct2, u0, t)

Indent Error when there appears to be none

You I have been trying to run this script but I keep getting an indentation error at the end
of the backprop(x,y) function. I would really appreciate ANY help!!
import cPickle
import gzip
def load_data():
f = gzip.open('mnist.pkl.gz', 'rb')
training_data, validation_data, test_data = cPickle.load(f)
f.close()
return (training_data, validation_data, test_data)
import numpy as np
class Network(object):
def __init__(self, layers):
self.layers = layers
self.biases = [np.random.randn(y,1) for y
in layers[1:]]
self.weights = [np.transpose(np.random.randn(x,y))
for x,y
in zip(layers[:-1],layers[1:])]
self.num_layers = len(layers)
def backprop(self, x, y):
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
# feedforward
activation = x
activations = [x] # list to store all the activations, layer by layer
zs = [] # list to store all the z vectors, layer by layer
for b, w in zip(self.biases, self.weights):
z = np.dot(w, activation)+b
zs.append(z)
activation = sigmoid(z)
activations.append(activation)
# backward pass
delta = self.cost_derivative(activations[-1], y) * \
sigmoid_prime(zs[-1])#set first delta
nabla_b[-1] = delta#set last dC/db to delta vector
nabla_w[-1] = np.dot(delta, activations[-2].transpose())
#calculate nabla_b, nabla_w for the rest of the layers
for l in xrange(2, self.num_layers):
z = zs[-l]
sp = sigmoid_prime(z)
delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
#this is where python says there is an indent error!
return (nabla_b, nabla_w)
The problem was fixed by selecting the "edit" drop-down menu of Notepad++, choosing Blank Operations, and finally, clicking 'TAB to spaces'; obviously, this should be done after selecting the portion of code that's triggering the error.

Feature selection in scikit learn for multiple variables and thousands+ features

I am trying to perform feature selection for logistic regression classifier. Originally there are 4 variables: name, location, gender, and label = ethnicity. The three variables, namely the name, give rise to tens of thousands of more "features", for example, name "John Snow" will give rise to 2-letter substrings like 'jo', 'oh', 'hn'... etc. The feature set undergoes DictVectorization.
I am trying to follow this tutorial (http://scikit-learn.org/stable/auto_examples/feature_selection/plot_feature_selection.html) but I am not sure if I am doing it right since the tutorial is using a small number of features while mine has tens of thousands after vectorization. And also the plt.show() shows a blank figure.
# coding=utf-8
import pandas as pd
from pandas import DataFrame, Series
import numpy as np
import re
import random
import time
from random import randint
import csv
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.metrics import confusion_matrix as sk_confusion_matrix
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
# Assign X and y variables
X = df.raw_name.values
X2 = df.name.values
X3 = df.gender.values
X4 = df.location.values
y = df.ethnicity_scan.values
# Feature extraction functions
def feature_full_name(nameString):
try:
full_name = nameString
if len(full_name) > 1: # not accept name with only 1 character
return full_name
else: return '?'
except: return '?'
def feature_avg_wordLength(nameString):
try:
space = 0
for i in nameString:
if i == ' ':
space += 1
length = float(len(nameString) - space)
name_entity = float(space + 1)
avg = round(float(length/name_entity), 0)
return avg
except:
return 0
def feature_name_entity(nameString2):
space = 0
try:
for i in nameString2:
if i == ' ':
space += 1
return space+1
except: return 0
def feature_gender(genString):
try:
gender = genString
if len(gender) >= 1:
return gender
else: return '?'
except: return '?'
def feature_noNeighborLoc(locString):
try:
x = re.sub(r'^[^, ]*', '', locString) # remove everything before and include first ','
y = x[2:] # remove subsequent ',' and ' '
return y
except: return '?'
def list_to_dict(substring_list):
try:
substring_dict = {}
for i in substring_list:
substring_dict['substring='+str(i)] = True
return substring_dict
except: return '?'
# Transform format of X variables, and spit out a numpy array for all features
my_dict13 = [{'name-entity': feature_name_entity(feature_full_name(i))} for i in X2]
my_dict14 = [{'avg-length': feature_avg_wordLength(feature_full_name(i))} for i in X]
my_dict15 = [{'gender': feature_full_name(i)} for i in X3]
my_dict16 = [{'location': feature_noNeighborLoc(feature_full_name(i))} for i in X4]
my_dict17 = [{'dummy1': 1} for i in X]
my_dict18 = [{'dummy2': random.randint(0,2)} for i in X]
all_dict = []
for i in range(0, len(my_dict)):
temp_dict = dict(my_dict13[i].items() + my_dict14[i].items()
+ my_dict15[i].items() + my_dict16[i].items() + my_dict17[i].items() + my_dict18[i].items()
)
all_dict.append(temp_dict)
newX = dv.fit_transform(all_dict)
# Separate the training and testing data sets
half_cut = int(len(df)/2.0)*-1
X_train = newX[:half_cut]
X_test = newX[half_cut:]
y_train = y[:half_cut]
y_test = y[half_cut:]
# Fitting X and y into model, using training data
lr = LogisticRegression()
lr.fit(X_train, y_train)
dv = DictVectorizer()
# Feature selection
plt.figure(1)
plt.clf()
X_indices = np.arange(X_train.shape[-1])
selector = SelectPercentile(f_classif, percentile=10)
selector.fit(X_train, y_train)
scores = -np.log10(selector.pvalues_)
scores /= scores.max()
plt.bar(X_indices - .45, scores, width=.2,
label=r'Univariate score ($-Log(p_{value})$)', color='g')
plt.show()
Warning:
E:\Program Files Extra\Python27\lib\site-packages\sklearn\feature_selection\univariate_selection.py:111: UserWarning: Features [[0 0 0 ..., 0 0 0]] are constant.
It looks like the way you split your data into training and testing sets is not working:
# Separate the training and testing data sets
X_train = newX[:half_cut]
X_test = newX[half_cut:]
If you already use sklearn, it is much more convenient to use the builtin splitting routine for this:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.5, random_state=0)

Python -- Matplotlib for elliptic curve with sympy solve()

I have an elliptic curve plotted. I want to draw a line along a P,Q,R (where P and Q will be determined independent of this question). The main problem with the P is that sympy solve() returns another equation and it needs to instead return a value so it can be used to plot the x-value for P. As I understood it, solve() should return a value, so I'm clearly doing something wrong here that I'm just totally not seeing. For reference, here's how P+Q=R should look:
I've been going over the docs and other material and this is as far as I've been able to get myself into trouble:
from mpl_toolkits.axes_grid.axislines import SubplotZero
from pylab import *
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.path import Path
import matplotlib.patches as patches
from matplotlib import rc
import random
from sympy.solvers import solve
from sympy import *
def plotGraph():
fig = plt.figure(1)
#ax = SubplotZero(fig, 111)
#fig.add_subplot(ax)
#for direction in ["xzero", "yzero"]:
#ax.axis[direction].set_axisline_style("-|>")
#ax.axis[direction].set_visible(True)
#ax.axis([-10,10,-10,10])
a = -2; b = 1
y, x = np.ogrid[-10:10:100j, -10:10:100j]
xlist = x.ravel(); ylist = y.ravel()
elliptic_curve = pow(y, 2) - pow(x, 3) - x * a - b
plt.contour(xlist, ylist, elliptic_curve, [0])
#rand = random.uniform(-5,5)
randmid = random.randint(30,70)
#y = ylist[randmid]; x = xlist[randmid]
xsym, ysym = symbols('x ylist[randmid]')
x_result = solve(pow(ysym, 2) - pow(xsym, 3) - xsym * a - b, xsym) # 11/5/13 needs to return a value
plt.plot([-1.5,5], [-1,8], color = "c", linewidth=1) # plot([x1,x2,x3,...],[y1,y2,y3,...])
plt.plot([xlist[randmid],5], [ylist[randmid],8], color = "m", linewidth=1)
#rc('text', usetex=True)
text(-9,6,' size of xlist: %s \n size of ylist: %s \n x_coord: %s \n random_y: %s'
%(len(xlist),len(ylist),x_result,ylist[randmid]),
fontsize=10, color = 'blue',bbox=dict(facecolor='tan', alpha=0.5))
plt.annotate('$P+Q=R$', xy=(2, 1), xytext=(3, 1.5),arrowprops=dict(facecolor='black', shrink=0.05))
## verts = [(-5, -10),(5, 10)] # [(x,y)startpoint,(x,y)endpoint] #,(0, 0)]
## codes = [Path.MOVETO,Path.LINETO] # related to verts[] #,Path.STOP]
## path = Path(verts, codes)
## patch = patches.PathPatch(path, facecolor='none', lw=2)
## ax.add_patch(patch)
plt.grid(True)
plt.show()
def main():
plotGraph()
if __name__ == '__main__':
main()
Ultimately, I'd like to draw a line to show P+Q=R, so if someone also has something to add on how to code to get the Q that would be greatly appreciated. I'm teaching myself about Python and elliptic curves so I'm sure that any entry-level programmer can figure out in 2 minutes what I've been on for some time already.
I don't know what are you calculating, but here is the code that can plot the graph:
import numpy as np
import pylab as pl
Y, X = np.mgrid[-10:10:100j, -10:10:100j]
def f(x):
return x**3 -3*x + 5
px = -2.0
py = -np.sqrt(f(px))
qx = 0.5
qy = np.sqrt(f(qx))
k = (qy - py)/(qx - px)
b = -px*k + py
poly = np.poly1d([-1, k**2, 2*k*b+3, b**2-5])
x = np.roots(poly)
y = np.sqrt(f(x))
pl.contour(X, Y, Y**2 - f(X), levels=[0])
pl.plot(x, y, "o")
pl.plot(x, -y, "o")
x = np.linspace(-5, 5)
pl.plot(x, k*x+b)
graph:
based on HYRY's answer, I just update some details to make it better:
import numpy as np
import pylab as pl
Y, X = np.mgrid[-10:10:100j, -10:10:100j]
def f(x, a, b):
return x**3 + a*x + b
a = -2
b = 4
# the 1st point: 0, -2
x1 = 0
y1 = -np.sqrt(f(x1, a, b))
print(x1, y1)
# the second point
x2 = 3
y2 = np.sqrt(f(x2, a, b))
print(x2, y2)
# line: y=kl*x+bl
kl = (y2 - y1)/(x2 - x1)
bl = -x1*kl + y1 # bl = -x2*kl + y2
# y^2=x^3+ax+b , y=kl*x+bl => [-1, kl^2, 2*kl*bl, bl^2-b]
poly = np.poly1d([-1, kl**2, 2*kl*bl-a, bl**2-b])
# the roots of the poly
x = np.roots(poly)
y = np.sqrt(f(x, a, b))
print(x, y)
pl.contour(X, Y, Y**2 - f(X, a, b), levels=[0])
pl.plot(x, y, "o")
pl.plot(x, -y, "o")
x = np.linspace(-5, 5)
pl.plot(x, kl*x+bl)
And we got the roots of this poly:
[3. 2.44444444 0. ] [5. 3.7037037 2. ]