I try to make a fit of my curve. My raw data is in an xlsx file. I extract them using pandas. I want to do two different fit because there is a change in behavior from Ra = 1e6. We know that Ra is proportional to Nu**a. a = 0.25 for Ra <1e6 and if not a = 0.33.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import log10
from scipy.optimize import curve_fit
import lmfit
data=pd.read_excel('data.xlsx',sheet_name='Sheet2',index=False,dtype={'Ra': float})
print(data)
plt.xscale('log')
plt.yscale('log')
plt.scatter(data['Ra'].values, data['Nu_top'].values, label='Nu_top')
plt.scatter(data['Ra'].values, data['Nu_bottom'].values, label='Nu_bottom')
plt.errorbar(data['Ra'].values, data['Nu_top'].values , yerr=data['Ecart type top'].values, linestyle="None")
plt.errorbar(data['Ra'].values, data['Nu_bottom'].values , yerr=data['Ecart type bot'].values, linestyle="None")
def func(x,a):
return 10**(np.log10(x)/a)
"""maxX = max(data['Ra'].values)
minX = min(data['Ra'].values)
maxY = max(data['Nu_top'].values)
minY = min(data['Nu_top'].values)
maxXY = max(maxX, maxY)
parameterBounds = [-maxXY, maxXY]"""
from lmfit import Model
mod = Model(func)
params = mod.make_params(a=0.25)
ret = mod.fit(data['Nu_top'].head(10).values, params, x=data['Ra'].head(10).values)
print(ret.fit_report())
popt, pcov = curve_fit(func, data['Ra'].head(10).values,
data['Nu_top'].head(10).values, sigma=data['Ecart type top'].head(10).values,
absolute_sigma=True, p0=[0.25])
plt.plot(data['Ra'].head(10).values, func(data['Ra'].head(10).values, *popt),
'r-', label='fit: a=%5.3f' % tuple(popt))
popt, pcov = curve_fit(func, data['Ra'].tail(4).values, data['Nu_top'].tail(4).values,
sigma=data['Ecart type top'].tail(4).values,
absolute_sigma=True, p0=[0.33])
plt.plot(data['Ra'].tail(4).values, func(data['Ra'].tail(4).values, *popt),
'b-', label='fit: a=%5.3f' % tuple(popt))
print(pcov)
plt.grid
plt.title("Nusselt en fonction de Ra")
plt.xlabel('Ra')
plt.ylabel('Nu')
plt.legend()
plt.show()
So I use the log: logRa = a * logNu.
Ra = x axis
Nu = y axis
That's why I defined my function func in this way.
my two fit are not all correct as you can see. I have a covariance equal to [0.00010971]. So I had to do something wrong but I don't see it. I need help please.
Here the data file:
data.xlsx
I noticed that the data values for Ra are large, and after scaling them I performed an equation search - here is my result with code. I use the standard scipy genetic algorithm module differential_evolution to determine initial parameter values for curve_fit(), and that module uses the Latin Hypercube algorithm to ensure a thorough search of parameter space which requires bounds within which to search. It is much easier to give ranges for the initial parameter estimates than to find specific values. This equation works well for both nu_top and nu_bottom, note that the plots are not log scaled as it is unnecessary in this example.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import pandas
import warnings
filename = 'data.xlsx'
data=pandas.read_excel(filename,sheet_name='Sheet2',index=False,dtype={'Ra': float})
# notice the Ra scaling by 10000.0
xData = data['Ra'].values / 10000.0
yData = data['Nu_bottom']
def func(x, a, b, c): # "Combined Power And Exponential" from zunzun.com
return a * numpy.power(x, b) * numpy.exp(c * x)
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
maxY = max(yData)
minY = min(yData)
parameterBounds = []
parameterBounds.append([0.0, 10.0]) # search bounds for a
parameterBounds.append([0.0, 10.0]) # search bounds for b
parameterBounds.append([0.0, 10.0]) # search bounds for c
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()
# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters:', fittedParameters)
print()
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
Here I put my data x and y in log10 (). The graph is in log scale. So normally I should have two affine functions with a coefficient of 0.25 and 0.33. I change the function func in your program James and bounds for b and c but I have no good result.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import log10, log
from scipy.optimize import curve_fit
import lmfit
data=pd.read_excel('data.xlsx',sheet_name='Sheet2',index=False,dtype={'Ra': float})
print(data)
plt.xscale('log')
plt.yscale('log')
plt.scatter(np.log10(data['Ra'].values), np.log10(data['Nu_top'].values), label='Nu_top')
plt.scatter(np.log10(data['Ra'].values), np.log10(data['Nu_bottom'].values), label='Nu_bottom')
plt.errorbar(np.log10(data['Ra'].values), np.log10(data['Nu_top'].values) , yerr=data['Ecart type top'].values, linestyle="None")
plt.errorbar(np.log10(data['Ra'].values), np.log10(data['Nu_bottom'].values) , yerr=data['Ecart type bot'].values, linestyle="None")
def func(x,a):
return a*x
maxX = max(data['Ra'].values)
minX = min(data['Ra'].values)
maxY = max(data['Nu_top'].values)
minY = min(data['Nu_top'].values)
maxXY = max(maxX, maxY)
parameterBounds = [-maxXY, maxXY]
from lmfit import Model
mod = Model(func)
params = mod.make_params(a=0.25)
ret = mod.fit(np.log10(data['Nu_top'].head(10).values), params, x=np.log10(data['Ra'].head(10).values))
print(ret.fit_report())
popt, pcov = curve_fit(func, np.log10(data['Ra'].head(10).values), np.log10(data['Nu_top'].head(10).values), sigma=data['Ecart type top'].head(10).values, absolute_sigma=True, p0=[0.25])
plt.plot(np.log10(data['Ra'].head(10).values), func(np.log10(data['Ra'].head(10).values), *popt), 'r-', label='fit: a=%5.3f' % tuple(popt))
popt, pcov = curve_fit(func, np.log10(data['Ra'].tail(4).values), np.log10(data['Nu_top'].tail(4).values), sigma=data['Ecart type top'].tail(4).values, absolute_sigma=True, p0=[0.33])
plt.plot(np.log10(data['Ra'].tail(4).values), func(np.log10(data['Ra'].tail(4).values), *popt), 'b-', label='fit: a=%5.3f' % tuple(popt))
print(pcov)
plt.grid
plt.title("Nusselt en fonction de Ra")
plt.xlabel('log10(Ra)')
plt.ylabel('log10(Nu)')
plt.legend()
plt.show()
With polyfit I have better results.
With my code I open the file and I calculate log (Ra) and log (Nu) then plot (log (Ra), log (Nu)) in log scale.
I'm supposed to have a = 0.25 for Ra <1e6 and if not a = 0.33
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import log10
from numpy import polyfit
import numpy.polynomial.polynomial as poly
data=pd.read_excel('data.xlsx',sheet_name='Sheet2',index=False,dtype={'Ra': float})
print(data)
x=np.log10(data['Ra'].values)
y1=np.log10(data['Nu_top'].values)
y2=np.log10(data['Nu_bottom'].values)
x2=np.log10(data['Ra'].head(11).values)
y4=np.log10(data['Nu_top'].head(11).values)
x3=np.log10(data['Ra'].tail(4).values)
y5=np.log10(data['Nu_top'].tail(4).values)
plt.xscale('log')
plt.yscale('log')
plt.scatter(x, y1, label='Nu_top')
plt.scatter(x, y2, label='Nu_bottom')
plt.errorbar(x, y1 , yerr=data['Ecart type top'].values, linestyle="None")
plt.errorbar(x, y2 , yerr=data['Ecart type bot'].values, linestyle="None")
"""a=np.ones(10, dtype=np.float)
weights = np.insert(a,0,1E10)"""
coefs = poly.polyfit(x2, y4, 1)
print(coefs)
ffit = poly.polyval(x2, coefs)
plt.plot(x2, ffit, label='fit: b=%5.3f, a=%5.3f' % tuple(coefs))
absError = ffit - x2
SE = np.square(absError) # squared errors
MSE = np.mean(SE) # mean squared errors
RMSE = np.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (np.var(absError) / np.var(x2))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
print('Predicted value at x=0:', ffit[0])
print()
coefs = poly.polyfit(x3, y5, 1)
ffit = poly.polyval(x3, coefs)
plt.plot(x3, ffit, label='fit: b=%5.3f, a=%5.3f' % tuple(coefs))
plt.grid
plt.title("Nusselt en fonction de Ra")
plt.xlabel('log10(Ra)')
plt.ylabel('log10(Nu)')
plt.legend()
plt.show()
My problem is solved, I managed to fit my curves with more or less correct results
Related
I have some x and y data, with which I would like to generate a 3D histogram, with a color gradient (bwr or whatever).
I have written a script which plot the interesting values, in between -2 and 2 for both x and y abscesses:
import numpy as np
import numpy.random
import matplotlib.pyplot as plt
# To generate some test data
x = np.random.randn(500)
y = np.random.randn(500)
XY = np.stack((x,y),axis=-1)
def selection(XY, limitXY=[[-2,+2],[-2,+2]]):
XY_select = []
for elt in XY:
if elt[0] > limitXY[0][0] and elt[0] < limitXY[0][1] and elt[1] > limitXY[1][0] and elt[1] < limitXY[1][1]:
XY_select.append(elt)
return np.array(XY_select)
XY_select = selection(XY, limitXY=[[-2,+2],[-2,+2]])
heatmap, xedges, yedges = np.histogram2d(XY_select[:,0], XY_select[:,1], bins = 7, range = [[-2,2],[-2,2]])
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
plt.figure("Histogram")
#plt.clf()
plt.imshow(heatmap.T, extent=extent, origin='lower')
plt.show()
And give this correct result:
Now, I would like to turn this into a 3D histogram. Unfortunatly I don't success to plot it correctly with bar3d because it takes by default the length of x and y for abscisse.
I am quite sure that there is a very easy way to plot this in 3D with imshow. Like an unknow option...
I finaly succeded in doing it. I am almost sure there is a better way to do it, but at leat it works:
import numpy as np
import numpy.random
import matplotlib.pyplot as plt
# To generate some test data
x = np.random.randn(500)
y = np.random.randn(500)
XY = np.stack((x,y),axis=-1)
def selection(XY, limitXY=[[-2,+2],[-2,+2]]):
XY_select = []
for elt in XY:
if elt[0] > limitXY[0][0] and elt[0] < limitXY[0][1] and elt[1] > limitXY[1][0] and elt[1] < limitXY[1][1]:
XY_select.append(elt)
return np.array(XY_select)
XY_select = selection(XY, limitXY=[[-2,+2],[-2,+2]])
xAmplitudes = np.array(XY_select)[:,0]#your data here
yAmplitudes = np.array(XY_select)[:,1]#your other data here
fig = plt.figure() #create a canvas, tell matplotlib it's 3d
ax = fig.add_subplot(111, projection='3d')
hist, xedges, yedges = np.histogram2d(x, y, bins=(7,7), range = [[-2,+2],[-2,+2]]) # you can change your bins, and the range on which to take data
# hist is a 7X7 matrix, with the populations for each of the subspace parts.
xpos, ypos = np.meshgrid(xedges[:-1]+xedges[1:], yedges[:-1]+yedges[1:]) -(xedges[1]-xedges[0])
xpos = xpos.flatten()*1./2
ypos = ypos.flatten()*1./2
zpos = np.zeros_like (xpos)
dx = xedges [1] - xedges [0]
dy = yedges [1] - yedges [0]
dz = hist.flatten()
cmap = cm.get_cmap('jet') # Get desired colormap - you can change this!
max_height = np.max(dz) # get range of colorbars so we can normalize
min_height = np.min(dz)
# scale each z to [0,1], and get their rgb values
rgba = [cmap((k-min_height)/max_height) for k in dz]
ax.bar3d(xpos, ypos, zpos, dx, dy, dz, color=rgba, zsort='average')
plt.title("X vs. Y Amplitudes for ____ Data")
plt.xlabel("My X data source")
plt.ylabel("My Y data source")
plt.savefig("Your_title_goes_here")
plt.show()
I use this example, but I modified it, because it introduced an offset. The result is this:
You can generate the same result using something as simple as the following:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(-2, 2, 7)
y = np.linspace(-2, 2, 7)
xx, yy = np.meshgrid(x, y)
z = xx*0+yy*0+ np.random.random(size=[7,7])
plt.imshow(z, interpolation='nearest', cmap=plt.cm.viridis, extent=[-2,2,2,2])
plt.show()
from mpl_toolkits.mplot3d import Axes3D
ax = Axes3D(plt.figure())
ax.plot_surface(xx, yy, z, cmap=plt.cm.viridis, cstride=1, rstride=1)
plt.show()
The results are given below:
I'm trying to implement a simple Bayesian Inference using a ODE model. I want to use the NUTS algorithm to sample but it gives me an initialization error. I do not know much about the PyMC3 as I'm new to this. Please take a look and tell me what is wrong.
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
import seaborn
import pymc3 as pm
import theano.tensor as T
from theano.compile.ops import as_op
#Actual Solution of the Differential Equation(Used to generate data)
def actual(a,b,x):
Y = np.exp(-b*x)*(a*np.exp(b*x)*(b*x-1)+a+b**2)/b**2
return Y
#Method For Solving the ODE
def lv(xdata, a=5.0, b=0.2):
def dy_dx(y, x):
return a*x - b*y
y0 = 1.0
Y, dict = odeint(dy_dx,y0,xdata,full_output=True)
return Y
#Generating Data for Bayesian Inference
a0, b0 = 5, 0.2
xdata = np.linspace(0, 21, 100)
ydata = actual(a0,b0,xdata)
# Adding some error to the ydata points
yerror = 10*np.random.rand(len(xdata))
ydata += np.random.normal(0.0, np.sqrt(yerror))
ydata = np.ravel(ydata)
#as_op(itypes=[T.dscalar, T.dscalar], otypes=[T.dvector])
def func(al,be):
Q = lv(xdata, a=al, b=be)
return np.ravel(Q)
# Number of Samples and Initial Conditions
nsample = 5000
y0 = 1.0
# Model for Bayesian Inference
model = pm.Model()
with model:
# Priors for unknown model parameters
alpha = pm.Uniform('alpha', lower=a0/2, upper=a0+a0/2)
beta = pm.Uniform('beta', lower=b0/2, upper=b0+b0/2)
# Expected value of outcome
mu = func(alpha,beta)
# Likelihood (sampling distribution) of observations
Y_obs = pm.Normal('Y_obs', mu=mu, sd=yerror, observed=ydata)
trace = pm.sample(nsample, nchains=1)
pm.traceplot(trace)
plt.show()
The error that I get is
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS failed. Falling back to elementwise auto-assignment.
Any help would be really appreciated
I am using odeint in python to solve something (the Friedmann equation for a matter only universe) and it gives me the values of a that i want. However, how do i get it to return/plot (da/dt)/a? i.e how can divide the values for the function for the derivative by the corresponding values of the solution?
This is my attempted code: (ignore the earlier bits i.e the figure 1 plot; its the part with H i'm concerned about)
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
from scipy.integrate import odeint
t_0 = 0.0004
a_0 = 0.001
omega_m = 1.0 #for EdS
H_0 = 1./13.7
#the function for EdS universe
def Friedmann(a, t):
dadt = H_0 * (omega_m)**(1./2.) * a**(-1./2.)
return dadt
t = np.linspace(t_0,13.7,101)
a = odeint(Friedmann, a_0, t)
a = np.array(a).flatten()
plt.figure(1)
plt.subplot(211)
plt.plot(t, a)
plt.title("Einstein-de Sitter Universe")
plt.xlabel("t")
plt.ylabel("a")
#comparing to analytic solution
an = (((3. / 2.) * (H_0 * omega_m**(1./2.)) * (t - t_0)) + a_0**(3. / 2.))**(2. / 3.)
an = np.array(an).flatten()
plt.figure(1)
plt.subplot(212)
plt.plot(t, a, t, an, "+")
H = [x/y for x, y in zip(Friedmann(a, t), a)]
plt.figure(2)
plt.plot(t, H)
plt.show()
Any help is much appreciated.
I'm trying to fit a curve with scipy.optimize.curve_fit and it works pretty good so far, except in the case that a value in my sigma array is zero. I understand that the algorithm can't handle this, as I divide by zero in this case. From the scipy documentation:
sigma : None or M-length sequence, optional
If not None, the uncertainties in the ydata array. These are used as weights in the least-squares problem i.e. minimising np.sum( ((f(xdata, *popt) - ydata) / sigma)**2 ) If None, the uncertainties are assumed to be 1.
Here's what my code looks like:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
x = [0.125, 0.375, 0.625, 0.875, 1.125, 1.375, 1.625, 1.875, 2.125, 2.375, 2.625, 2.875, 3.125, 3.375, 3.625, 3.875, 4.125, 4.375]
y_para = [0, 0, 0.0414, 0.2164, 0.2616, 0.4254, 0.5698, 0.5921, 0.6286, 0.6452, 0.5879, 0.6032, 0.6667, 0.6325, 0.7629, 0.7164, 0.7091, 0.7887]
err = [0, 0, 0.0391, 0.0331, 0.0943, 0.0631, 0.1219, 0.1063, 0.0912, 0.0516, 0.0365, 0.0327, 0.0227, 0.103, 0.1344, 0.0697, 0.0114, 0.0465]
def logistic_growth(x, A1, A2, x_0, p):
return A2 + (A1-A2)/(1+(x/x_0)**p)
x_plot = np.linspace(0, 4.5, 100)
bounds_para = ([0.,0,-np.inf,-np.inf],[0.0000000001, 1,np.inf,np.inf])
paras, paras_cov = curve_fit(logistic_growth, x, y_para, bounds = bounds_para, sigma = err, absolute_sigma=True)
para_curve = logistic_growth(x_plot, *paras)
plt.figure()
plt.errorbar(x,y_para, err, color = 'b', fmt = 'o', label = "Data")
plt.plot(x_plot, para_curve, color = 'b', label = "Fit")
plt.show()
Executing this without the sigma-option in curve_fit works fine, but including it raises:
ValueError: Residuals are not finite in the initial point.
, which results from the zeros in the err-array.
Does anyone know a way to work around this?
Why not just drop the variable? If it has zero variance it cannot contribute in any meaningful way to your analysis.
This is what the scipy doc says about the curve_fit sigma parameter: 'These are used as weights in the least-squares problem ...' Then, in my opinion, they should be inverse to the errors. Here's what I suggest.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
x = [0.125, 0.375, 0.625, 0.875, 1.125, 1.375, 1.625, 1.875, 2.125, 2.375, 2.625, 2.875, 3.125, 3.375, 3.625, 3.875, 4.125, 4.375]
y_para = [0, 0, 0.0414, 0.2164, 0.2616, 0.4254, 0.5698, 0.5921, 0.6286, 0.6452, 0.5879, 0.6032, 0.6667, 0.6325, 0.7629, 0.7164, 0.7091, 0.7887]
err = [0, 0, 0.0391, 0.0331, 0.0943, 0.0631, 0.1219, 0.1063, 0.0912, 0.0516, 0.0365, 0.0327, 0.0227, 0.103, 0.1344, 0.0697, 0.0114, 0.0465]
weights = [1/max(_,0.001) for _ in err]
print (weights)
def logistic_growth(x, A1, A2, x_0, p):
return A2 + (A1-A2)/(1+(x/x_0)**p)
x_plot = np.linspace(0, 4.5, 100)
bounds_para = ([0.,0,-np.inf,-np.inf],[0.0000000001, 1,np.inf,np.inf])
paras, paras_cov = curve_fit(logistic_growth, x, y_para, bounds = bounds_para,
absolute_sigma=True,
sigma = weights)
para_curve = logistic_growth(x_plot, *paras)
plt.figure()
plt.errorbar(x,y_para, err, color = 'b', fmt = 'o', label = "Data")
plt.plot(x_plot, para_curve, color = 'b', label = "Fit")
plt.show()
This results in the following plot, where those initial data points are made to lie very close to the fitted line.
I have the following code to create a 3D plot of a Bivariate Gaussian Distribution:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
class Data(object):
data = None
columns = 0
rows = 0
def __init__(self, path='file.txt'):
self.data = np.loadtxt(path, delimiter=' ', dtype='float32')
self.rows, self.columns = self.data.shape
def _pdf(self, x, mu, cov):
part1 = 1 / ( ((2* np.pi)**(len(mu)/2)) * (np.linalg.det(cov)**(1/2)) )
part2 = (-1/2) * ((x-mu).T.dot(np.linalg.inv(cov))).dot((x-mu))
return float(part1 * np.exp(part2))
def compute_Z(self):
mu = np.array([[2.99413181],[3.05209659]], dtype="float")
cov = np.array([[1.01023423, 0.02719138], [0.02719138, 2.93782296]], dtype="float")
Z = []
for i, j in zip(X, Y):
x = np.array([i,j]).reshape(2,1)
Z.append(self._pdf(x, mu, cov))
return np.array(Z)
if __name__ == "__main__":
data = Data()
X = data.data[:, 0]
Y = data.data[:, 1]
Z = data.compute_Z()
X, Y = np.meshgrid(X, Y)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, color='0.9', alpha=0.9, linewidth=1)
plt.show()
But this is taking a lot of time and also using a lot of RAM. Is there some way to reduce it? Or there is a better method to create this plot?
Thanks!
matplotlib 3D plotting isn't very good for large amount of data.
You can use mayavi, which has very similar interface using mlab.
from mayavi import mlab
mlab.figure()
mlab.surf(X, Y, Z)
mlab.show()