error in calculating gradient use python - python-2.7

I use the following formule to calculate the gradient
gradient = [f(x+h) - f(x-h)] / 2h
and I test it with a linear function, but something is wrong.
The code is here:
import numpy as np
def evla_numerical_gradient(f, x):
gradient = np.zeros(x.shape, dtype=np.float64)
delta_x = 0.00001
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
index = it.multi_index
x_old = x[index]
x[index] = x_old + delta_x
fx_addh = f(x)
print(fx_addh)
x[index] = x_old - delta_x
fx_minush = f(x)
print(fx_minush)
x[index] = x_old
print((fx_addh - fx_minush) / (2 * delta_x))
gradient[index] = (fx_addh - fx_minush) / (2. * delta_x)
it.iternext()
return gradient
def lin(x):
return x
if __name__ == '__main__':
x = np.array([0.001])
grad = evla_numerical_gradient(lin, x)
print(grad)
The result is here:
[ 0.00101]
[ 0.00099]
[ 0.]
[ 0.]
Why the gradient at x is 0?

The problem with your code is on the following combination of lines (I show the example of fx_addh, the case of fx_minush is similar
fx_addh = f(x)
x[index] = x_old
You are placing the result of f(x) into fx_addh. But the problem is that the way you have defined f(x), which is just a handle to your lin(x) you are returning the argument directly.
In Python assignment operations do not copy objects, but create a binding between a target (on the left of the assignment =) and the object (on the right of the assignment =). More on this here.
To convince yourself that this is happening you can place another print(fx_addh) after the line in which you set x[index] = x_old; and you will see that it now contains the value zero.
To fix this you can modify your lin(x) function to return a copy of the object passed in as an argument:
import numpy as np
import copy
def evla_numerical_gradient(f, x):
gradient = np.zeros(x.shape, dtype=np.float64)
delta_x = 0.00001
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
index = it.multi_index
x_old = x[index]
x[index] = x_old + delta_x
fx_addh = f(x)
print(fx_addh)
x[index] = x_old - delta_x
fx_minush = f(x)
print(fx_minush)
x[index] = x_old
print((fx_addh - fx_minush) / (2 * delta_x))
gradient[index] = (fx_addh - fx_minush) / (2. * delta_x)
it.iternext()
return gradient
def lin(x):
return copy.copy(x)
if __name__ == '__main__':
x = np.array([0.001])
grad = evla_numerical_gradient(lin, x)
print(grad)
Which returns:
[ 0.00101]
[ 0.00099]
[ 1.]
[ 1.]
Indicating a gradient of 1 as you would expect.

Because fx_addh and fx_minush are pointing to the same index of the memory. change the lin function to this:
def lin(x):
return x.copy()
result:
[ 0.00101]
[ 0.00099]
[ 1.]
[ 1.]

Related

Netwon's method without pre-built functions of Python: Calculation of gradient and Hessian

I am trying to write the basic Newton's method without pre-built solvers.
This is the function:
## definition of variables
x_1, x_2 = sym.symbols("x_1 x_2")
a_T=np.array([[0.3],[0.6],[0.2]])
b_T=np.array([5,26,3])
c_T=np.array([40,1,10])
u= x_1-0.8
v= x_2-(a_T[0]+a_T[1]*u**2*(1-u)**(1/2)-a_T[2]*u)
alpha= -b_T[0]+(b_T[1]*u**2)*(1+u)**(1/2)+(b_T[2])*u
beta= c_T[0]*v**2*(1-c_T[1]*v)/(1+c_T[2]*u**2)
## function
f = alpha**(-beta)
I calculated the gradient and the Hessian and defined the other parameters:
## gradient
gradient_cal = sym.Matrix(1,2,sym.derive_by_array(f, (x_1, x_2)))
## hessian
hessian_cal = sym.Matrix(2, 2, sym.derive_by_array(gradient_cal, (x_1, x_2)))
# initial guess
x_A= Matrix([[1],[0.5]])
xk = x_A
#tolerance
epsilon= 1e-10
#maximum iterations
max_iter=100
And the function itself:
def newton(gradient_cal,hessian_cal,xk,epsilon,max_iter):
for k in range(0,max_iter):
fxk = gradient_cal.evalf(subs={x_1:xk[0], x_2:xk[1]})
if fxk.norm() < epsilon:
print('Found solution after',k,'iterations.')
return xk
Dfxk = hessian_cal.evalf(subs={x_1: xk[0], x_2: xk[1]})
if Dfxk == 0:
print('Zero derivative. No solution found.')
return None
A=hessian_cal.evalf(subs={x_1: xk[0], x_2: xk[1]})
B=gradient_cal.evalf(subs={x_1: xk[0], x_2: xk[1]})
pk= (A.inv().dot(B))
xk = np.subtract(xk, pk)
print('Exceeded maximum iterations. No solution found.')
return None
approx = newton(gradient_cal,hessian_cal,x_A,epsilon,max_iter)
print(approx)
The following error shows up:
TypeError: Shape should contain integers only.
I checked it and saw that the Hessian contains "I" values. Therefore I am not sure if the calculations of the gradient and the Hessian are correct.
Does anyone have a better solution to calculate the gradient and the Hessian for such a complex function?
The jacobian-batteries are already included in SymPy:
>>> from sympy.abc import x, y
>>> f = x/y + x*y**2
>>> Matrix([f]).jacobian((x,y))
Matrix([[y**2 + 1/y, 2*x*y - x/y**2]])
>>> _.jacobian((x,y)) # Hessian
Matrix([
[ 0, 2*y - 1/y**2],
[2*y - 1/y**2, 2*x + 2*x/y**3]])
So you could try
x_1, x_2 = sym.symbols("x_1 x_2")
xx = x_1, x_2
a_T=[0.3,0.6,0.2]
b_T=[5,26,3]
c_T=[40,1,10]
u= x_1-0.8
v= x_2-(a_T[0]+a_T[1]*u**2*(1-u)**(1/2)-a_T[2]*u)
alpha= -b_T[0]+(b_T[1]*u**2)*(1+u)**(1/2)+(b_T[2])*u
beta= c_T[0]*v**2*(1-c_T[1]*v)/(1+c_T[2]*u**2)
## function
f = alpha**(-beta)
jac = Matrix([f]).jacobian(xx)
hes = jac.jacobian(xx)

How to set parameters for lightgbm when using customized objective function for multi-class classification?

I want to test a customized objective function for lightgbm in multi-class classification.
I have specified the parameter "num_class=3".
However, an error: "
Number of classes must be 1 for non-multiclass training" is thrown
I am using python 3.6 and lightgbm version 0.2
# iris data
from sklearn import datasets
import lightgbm as lgb
import numpy as np
iris = datasets.load_iris()
X = iris['data']
y = iris['target']
# construct train-test
num_train = int(X.shape[0] / 3 * 2)
idx = np.random.permutation(X.shape[0])
x_train = X[idx[:num_train]]
x_test = X[idx[num_train:]]
y_train = y[idx[:num_train]]
y_test = y[idx[num_train:]]
# softmax function
def softmax(x):
'''
input x: an np.array of n_sample * n_class
return : an np.array of n_sample * n_class (probabilities)
'''
x = np.where(x>100, 100, x)
x = np.exp(x)
return x / np.reshape(np.sum(x, 1), [x.shape[0], 1])
# objective function
def objective(y_true, y_pred):
'''
input:
y_true: np.array of size (n_sample,)
y_pred: np.array of size (n_sample, n_class)
'''
y_pred = softmax(y_pred)
temp = np.zeros_like(y_pred)
temp[range(y_pred.shape[0]), y_true] = 1
gradient = y_pred - temp
hessian = y_pred * (1 - y_pred)
return [gradient, hessian]
# lightgbm model
model = lgb.LGBMClassifier(n_estimators=10000,
num_classes = 3,
objective = objective,
nthread=4)
model.fit(x_train, y_train,
eval_metric = 'multi_logloss',
eval_set = [(x_test, y_test), (x_train, y_train)],
eval_names = ['valid', 'train'],
early_stopping_rounds = 200, verbose = 100)
Let me answer my own question.
The arguments in the objective function should be:
y_true of size [n_sample, ]
y_pred of size [n_sample * n_class, ] instead of [n_sample, n_class]
To be more specific, y_pred should be like
y_pred = [first_class, first_class,..., second_class, second_class,..., third_class, third_class,...]
Moreover, gradient and hessian should be grouped in the same way.
def objective(y_true, y_pred):
'''
input:
y_true: np.array of size [n_sample,]
y_pred: np.array of size [n_sample * n_class, ]
return:
gradient and hessian should have exactly the same form of y_pred
'''
y_pred = np.reshape(y_pred, [num_train, 3], order = 'F')
y_pred = softmax(y_pred)
temp = np.zeros_like(y_pred)
temp[range(y_pred.shape[0]), y_true] = 1
gradient = y_pred - temp
hessian = y_pred * (1 - y_pred)
return [gradient.ravel(order = 'F'), hessian.ravel(order = 'F')]

Gradient Descent in Python 2

Part of my assignment is to implement the Gradient Descent to find the best approximation of values c_1, c_2 and r_1 for the function
.
Given is only a list of 30 y-values corresponding to x from 0 to 30. I am implementing this in Enthought Canopy like this:
First I start with random values:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as pyplt
c1 = -0.1
c2 = 0.1
r1 = 0.1
x = np.linspace(0,29,30) #start,stop,numitems
y = c1*np.exp(r1*x) + (c1*x)**3.0 - (c2*x)**2.0
pyplt.plot(x,y)
values_x = np.linspace(0,29,30)
values_y = np.array([0.2, -0.142682939241718, -0.886680607211679, -2.0095087143494, -3.47583798747496, -5.24396052331554, -7.2690008846359, -9.50451068338581, -11.9032604272567, -14.4176327390446, -16.9998176236069, -19.6019094345634, -22.1759550265352, -24.6739776668383, -27.0479889096801, -29.2499944927101, -31.2319972651608, -32.945998641919, -34.3439993255969, -35.3779996651013, -35.9999998336943, -36.161999917415, -35.8159999589895, -34.9139999796348, -33.4079999898869, -31.249999994978, -28.3919999975061, -24.7859999987616, -20.383999999385, -15.1379999996945])
pyplt.plot(values_x,values_y)
The squared error is quite high:
def Error(y,y0):
return ( (1.0)*sum((y-y0)**2.0) )
print Error(y,values_y)
Now, to implement the gradient descent, I derived the partial derivative functions for c_1, c_2 and r_1 and implemented the Gradient Descent:
step_size = 0.0000005
accepted_Error = 50
dc1 = c1
dc2 = c2
dr1 = r1
y0 = values_y
previous_Error = 100000
left = True
for _ in range(1000):
gc1 = (2.0) * sum( ( y - dc1*np.exp(dr1*x) - (dc1*x)**3 + (dc2*x)**2 ) * ( -1*np.exp(dr1*x) - (3*(dc1**2)*(x**3)) ) )
gc2 = (2.0) * sum( ( y - dc1*np.exp(dr1*x) - (dc1*x)**3 + (dc2*x)**2 ) * ( 2*dc2*(x**2) ) )
gr1 = (2.0) * sum( ( y - dc1*np.exp(dr1*x) - (dc1*x)**3 + (dc2*x)**2 ) * ( -1*dc1*x*np.exp(dr1*x) ) )
dc1 = dc1 - step_size*gc1
dc2 = dc2 - step_size*gc2
dr1 = dr1 - step_size*gr1
y1 = dc1*np.exp(dr1*x) + (dc1*x)**3.0 - (dc2*x)**2.0
current_Error = Error(y0,y1)
if (current_Error > accepted_Error):
print currentError
else:
break
if (current_Error > previous_Error):
print currentError
print "DIVERGING"
break
if (current_Error==previous_Error):
print "CAN'T IMPROVE"
break
previous_Error = current_Error
However, the error is not improving at all, and I tried to vary the step size. Is there a mistake in my code?

generate N random numbers from a skew normal distribution using numpy

I need a function in python to return N random numbers from a skew normal distribution. The skew needs to be taken as a parameter.
e.g. my current use is
x = numpy.random.randn(1000)
and the ideal function would be e.g.
x = randn_skew(1000, skew=0.7)
Solution needs to conform with: python version 2.7, numpy v.1.9
A similar answer is here: skew normal distribution in scipy However this generates a PDF not the random numbers.
I start by generating the PDF curves for reference:
NUM_SAMPLES = 100000
SKEW_PARAMS = [-3, 0]
def skew_norm_pdf(x,e=0,w=1,a=0):
# adapated from:
# http://stackoverflow.com/questions/5884768/skew-normal-distribution-in-scipy
t = (x-e) / w
return 2.0 * w * stats.norm.pdf(t) * stats.norm.cdf(a*t)
# generate the skew normal PDF for reference:
location = 0.0
scale = 1.0
x = np.linspace(-5,5,100)
plt.subplots(figsize=(12,4))
for alpha_skew in SKEW_PARAMS:
p = skew_norm_pdf(x,location,scale,alpha_skew)
# n.b. note that alpha is a parameter that controls skew, but the 'skewness'
# as measured will be different. see the wikipedia page:
# https://en.wikipedia.org/wiki/Skew_normal_distribution
plt.plot(x,p)
Next I found a VB implementation of sampling random numbers from the skew normal distribution and converted it to python:
# literal adaption from:
# http://stackoverflow.com/questions/4643285/how-to-generate-random-numbers-that-follow-skew-normal-distribution-in-matlab
# original at:
# http://www.ozgrid.com/forum/showthread.php?t=108175
def rand_skew_norm(fAlpha, fLocation, fScale):
sigma = fAlpha / np.sqrt(1.0 + fAlpha**2)
afRN = np.random.randn(2)
u0 = afRN[0]
v = afRN[1]
u1 = sigma*u0 + np.sqrt(1.0 -sigma**2) * v
if u0 >= 0:
return u1*fScale + fLocation
return (-u1)*fScale + fLocation
def randn_skew(N, skew=0.0):
return [rand_skew_norm(skew, 0, 1) for x in range(N)]
# lets check they at least visually match the PDF:
plt.subplots(figsize=(12,4))
for alpha_skew in SKEW_PARAMS:
p = randn_skew(NUM_SAMPLES, alpha_skew)
sns.distplot(p)
And then wrote a quick version which (without extensive testing) appears to be correct:
def randn_skew_fast(N, alpha=0.0, loc=0.0, scale=1.0):
sigma = alpha / np.sqrt(1.0 + alpha**2)
u0 = np.random.randn(N)
v = np.random.randn(N)
u1 = (sigma*u0 + np.sqrt(1.0 - sigma**2)*v) * scale
u1[u0 < 0] *= -1
u1 = u1 + loc
return u1
# lets check again
plt.subplots(figsize=(12,4))
for alpha_skew in SKEW_PARAMS:
p = randn_skew_fast(NUM_SAMPLES, alpha_skew)
sns.distplot(p)
from scipy.stats import skewnorm
a=10
data= skewnorm.rvs(a, size=1000)
Here, a is a parameter which you can refer to:
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.skewnorm.html
Adapted from rsnorm function from fGarch R package
def random_snorm(n, mean = 0, sd = 1, xi = 1.5):
def random_snorm_aux(n, xi):
weight = xi/(xi + 1/xi)
z = numpy.random.uniform(-weight,1-weight,n)
xi_ = xi**numpy.sign(z)
random = -numpy.absolute(numpy.random.normal(0,1,n))/xi_ * numpy.sign(z)
m1 = 2/numpy.sqrt(2 * numpy.pi)
mu = m1 * (xi - 1/xi)
sigma = numpy.sqrt((1 - m1**2) * (xi**2 + 1/xi**2) + 2 * m1**2 - 1)
return (random - mu)/sigma
return random_snorm_aux(n, xi) * sd + mean

Python -- Matplotlib for elliptic curve with sympy solve()

I have an elliptic curve plotted. I want to draw a line along a P,Q,R (where P and Q will be determined independent of this question). The main problem with the P is that sympy solve() returns another equation and it needs to instead return a value so it can be used to plot the x-value for P. As I understood it, solve() should return a value, so I'm clearly doing something wrong here that I'm just totally not seeing. For reference, here's how P+Q=R should look:
I've been going over the docs and other material and this is as far as I've been able to get myself into trouble:
from mpl_toolkits.axes_grid.axislines import SubplotZero
from pylab import *
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.path import Path
import matplotlib.patches as patches
from matplotlib import rc
import random
from sympy.solvers import solve
from sympy import *
def plotGraph():
fig = plt.figure(1)
#ax = SubplotZero(fig, 111)
#fig.add_subplot(ax)
#for direction in ["xzero", "yzero"]:
#ax.axis[direction].set_axisline_style("-|>")
#ax.axis[direction].set_visible(True)
#ax.axis([-10,10,-10,10])
a = -2; b = 1
y, x = np.ogrid[-10:10:100j, -10:10:100j]
xlist = x.ravel(); ylist = y.ravel()
elliptic_curve = pow(y, 2) - pow(x, 3) - x * a - b
plt.contour(xlist, ylist, elliptic_curve, [0])
#rand = random.uniform(-5,5)
randmid = random.randint(30,70)
#y = ylist[randmid]; x = xlist[randmid]
xsym, ysym = symbols('x ylist[randmid]')
x_result = solve(pow(ysym, 2) - pow(xsym, 3) - xsym * a - b, xsym) # 11/5/13 needs to return a value
plt.plot([-1.5,5], [-1,8], color = "c", linewidth=1) # plot([x1,x2,x3,...],[y1,y2,y3,...])
plt.plot([xlist[randmid],5], [ylist[randmid],8], color = "m", linewidth=1)
#rc('text', usetex=True)
text(-9,6,' size of xlist: %s \n size of ylist: %s \n x_coord: %s \n random_y: %s'
%(len(xlist),len(ylist),x_result,ylist[randmid]),
fontsize=10, color = 'blue',bbox=dict(facecolor='tan', alpha=0.5))
plt.annotate('$P+Q=R$', xy=(2, 1), xytext=(3, 1.5),arrowprops=dict(facecolor='black', shrink=0.05))
## verts = [(-5, -10),(5, 10)] # [(x,y)startpoint,(x,y)endpoint] #,(0, 0)]
## codes = [Path.MOVETO,Path.LINETO] # related to verts[] #,Path.STOP]
## path = Path(verts, codes)
## patch = patches.PathPatch(path, facecolor='none', lw=2)
## ax.add_patch(patch)
plt.grid(True)
plt.show()
def main():
plotGraph()
if __name__ == '__main__':
main()
Ultimately, I'd like to draw a line to show P+Q=R, so if someone also has something to add on how to code to get the Q that would be greatly appreciated. I'm teaching myself about Python and elliptic curves so I'm sure that any entry-level programmer can figure out in 2 minutes what I've been on for some time already.
I don't know what are you calculating, but here is the code that can plot the graph:
import numpy as np
import pylab as pl
Y, X = np.mgrid[-10:10:100j, -10:10:100j]
def f(x):
return x**3 -3*x + 5
px = -2.0
py = -np.sqrt(f(px))
qx = 0.5
qy = np.sqrt(f(qx))
k = (qy - py)/(qx - px)
b = -px*k + py
poly = np.poly1d([-1, k**2, 2*k*b+3, b**2-5])
x = np.roots(poly)
y = np.sqrt(f(x))
pl.contour(X, Y, Y**2 - f(X), levels=[0])
pl.plot(x, y, "o")
pl.plot(x, -y, "o")
x = np.linspace(-5, 5)
pl.plot(x, k*x+b)
graph:
based on HYRY's answer, I just update some details to make it better:
import numpy as np
import pylab as pl
Y, X = np.mgrid[-10:10:100j, -10:10:100j]
def f(x, a, b):
return x**3 + a*x + b
a = -2
b = 4
# the 1st point: 0, -2
x1 = 0
y1 = -np.sqrt(f(x1, a, b))
print(x1, y1)
# the second point
x2 = 3
y2 = np.sqrt(f(x2, a, b))
print(x2, y2)
# line: y=kl*x+bl
kl = (y2 - y1)/(x2 - x1)
bl = -x1*kl + y1 # bl = -x2*kl + y2
# y^2=x^3+ax+b , y=kl*x+bl => [-1, kl^2, 2*kl*bl, bl^2-b]
poly = np.poly1d([-1, kl**2, 2*kl*bl-a, bl**2-b])
# the roots of the poly
x = np.roots(poly)
y = np.sqrt(f(x, a, b))
print(x, y)
pl.contour(X, Y, Y**2 - f(X, a, b), levels=[0])
pl.plot(x, y, "o")
pl.plot(x, -y, "o")
x = np.linspace(-5, 5)
pl.plot(x, kl*x+bl)
And we got the roots of this poly:
[3. 2.44444444 0. ] [5. 3.7037037 2. ]