I'm very new to curve/peak fitting, but I am trying to fit a data set with multiple separate independent peaks. I've tried something similar to the example provided by lmfit, and here's my code:
import matplotlib.pyplot as plt
from lmfit.models import GaussianModel
from numpy import loadtxt
data = loadtxt('079-55.freq')
x = data[:, 0]
y = data[:, 1]
gauss1 = GaussianModel(prefix='g1_')
pars = gauss1.make_params()
pars['g1_center'].set(4100, min=2000, max=4500)
pars['g1_amplitude'].set(170, min=10)
gauss2 = GaussianModel(prefix='g2_')
pars.update(gauss2.make_params())
pars['g2_center'].set(4900, min=4500, max=5500)
pars['g2_amplitude'].set(30, min=10)
gauss3 = GaussianModel(prefix='g3_')
pars.update(gauss3.make_params())
pars['g3_center'].set(600, min=5500, max=10000)
pars['g3_amplitude'].set(13, min=10)
mod = gauss1 + gauss2 + gauss3
init = mod.eval(pars, x=x)
plt.plot(x, init, 'k--')
out = mod.fit(y, pars, x=x)
print(out.fit_report())
plt.plot(x, out.best_fit, 'r-')
plt.plot(x, y)
plt.show()
However, the result becomes something like this:
I am very confused as to how to proceed to fit three separate peaks as shown below. I think the parameter update is for pitting multiple model into the same data set, not for separate independent peaks. I could be wrong though. Is there any suggestions?
pars['g3_center'].set(600, min=5500, max=10000)
Probably confuses the parameter or model class as 600 is not within the bounds of min and max.
Related
Hi I need to speed up this code
import numpy as np
matrix3d=np.empty([10,10,1000])
matrix3d[:]=np.random.randint(10)
matrix3d_1=np.empty([10,10,1000])
x=10
y=1
for z in range(0,1000):
matrix3d_1[:,:,z]=func(matrix3d[:,:,z],x,y)
def func(matrix,x,y):
return matrix*x+y
I have tried using multiprocessig using Pool.map() but it did not work.
from functools import partial
import multiprocessing as mp
pool=mp.Pool(processes=2)
args=partial(func,x,y)
matrix3d_2=np.empty([10,10,1000])
matrix3d_2=pool.map(args,matrix3d)
pool.close()
If I compare the two matrix matrix3d_1==matrix3d_2 the results is false.
How can this be fixed?
Parallel processing of a 3d matrix
The python map method as well as the pool.map methode can only take one input object. See for example https://stackoverflow.com/a/10973817/4045774
To reduce the inputs to one input we can use for example functools. The input which remains have to be on the last place.
from functools import partial
import numpy as np
import multiprocessing as mp
def main():
matrix3d=np.empty([10,10,1000])
matrix3d[:]=np.random.randint(10)
matrix3d_1=np.empty([10,10,1000])
x=10
y=1
pool=mp.Pool(processes=4)
func_p=partial(func,x,y)
#parallel map returns a list
res=pool.map(func_p,(matrix3d[:,:,z] for z in xrange(0,matrix3d.shape[2])))
#copy the data to array
for i in xrange(0,matrix3d.shape[2]):
matrix3d_1[:,:,i]=res[i]
def func(x,y,matrix):
return matrix*x+y
Parallel version using numba
This version will scale well over all cores and is at least 200 times faster than simple multiprocessing shown above. I have modified the code you linked to a bit, to get rid of any other dependencies than numpy.
import numpy
from numba import njit, prange
nb_meanInterp = njit("float32[:,:](float32[:,:],int64,int64)")(meanInterp)
resample_3d_nb = njit("float32[:,:,:](float32[:,:,:],int64,int64)",parallel=True)(resample_3d)
def resample_3d(matrix_3d,x,y):
matrix3d_res=numpy.empty((x,y,matrix_3d.shape[2]),dtype=numpy.float32)
for z in prange(0,matrix_3d.shape[2]):
matrix3d_res[:,:,z]=nb_meanInterp(matrix_3d[:,:,z],x,y)
return matrix3d_res
def meanInterp(data, m, n):
newData = numpy.zeros((m,n),dtype=numpy.float32)
mOrig, nOrig = data.shape
hBoundariesOrig, vBoundariesOrig = numpy.linspace(0,1,mOrig+1),
numpy.linspace(0,1,nOrig+1)
hBoundaries, vBoundaries = numpy.linspace(0,1,m+1), numpy.linspace(0,1,n+1)
for iOrig in range(mOrig):
for jOrig in range(nOrig):
for i in range(m):
if hBoundaries[i+1] <= hBoundariesOrig[iOrig]: continue
if hBoundaries[i] >= hBoundariesOrig[iOrig+1]: break
for j in range(n):
if vBoundaries[j+1] <= vBoundariesOrig[jOrig]: continue
if vBoundaries[j] >= vBoundariesOrig[jOrig+1]: break
#boxCoords = ((hBoundaries[i], vBoundaries[j]),(hBoundaries[i+1], vBoundaries[j+1]))
#origBoxCoords = ((hBoundariesOrig[iOrig], vBoundariesOrig[jOrig]),(hBoundariesOrig[iOrig+1], vBoundariesOrig[jOrig+1]))
#area=overlap(boxCoords, origBoxCoords)
#hopefully this is equivivalent (not tested)-----
T_x=(hBoundaries[i],hBoundaries[i+1],hBoundariesOrig[iOrig],hBoundariesOrig[iOrig+1])
T_y=(vBoundaries[j],vBoundaries[j+1],vBoundariesOrig[jOrig],vBoundariesOrig[jOrig+1])
tx=(T_x[1]-T_x[0]+T_x[3]-T_x[2])-(max(T_x)-min(T_x))
ty=(T_y[1]-T_y[0]+T_y[3]-T_y[2])-(max(T_y)-min(T_y))
area=tx*ty
#------------------------
newData[i][j] += area * data[iOrig][jOrig] / (hBoundaries[1] * vBoundaries[1])
return newData
I am trying to extract data from an excel spreadsheet, then find a percent change between adjacent rows. The columns that I would like to do this manipulation on is column 1 and 4. I would like to then graph these percent changes in two different bar charts using subplots using column 0 as the x axis.
I am able to do everything except extract the data and formulate a percent change between adjacent rows. The formula for the percent change is Current/previous-1 or (r,0)/(r-1,0)-1. Below is my current script:
import xlrd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as tkr
import matplotlib.dates as mdates
import datetime
from matplotlib import rc
rc('mathtext', default='regular')
file_location = "/Users/adampatel/Desktop/psw01.xls"
workbook = xlrd.open_workbook(file_location, on_demand = False)
worksheet = workbook.sheet_by_name('Data 1')
x = [worksheet.cell_value(i+1699, 0) for i in range(worksheet.nrows-1699)]
y1 = [worksheet.cell_value(i+1699, 1) for i in range(worksheet.nrows-1699)]
y2 = [worksheet.cell_value(i+1699, 4) for i in range(worksheet.nrows-1699)]
fig = plt.figure()
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212, sharex = ax1)
start_date = datetime.date(1899, 12, 30)
dates=[start_date + datetime.timedelta(xval) for xval in x]
ax1.xaxis.set_major_locator(mdates.MonthLocator((), bymonthday=1, interval=2))
ax1.xaxis.set_minor_locator(mdates.MonthLocator((), bymonthday=1, interval=1))
ax1.xaxis.set_major_formatter(mdates.DateFormatter("%b'%y"))
ly1 = ax1.bar(dates, y1, 0.9)
ly2 = ax2.bar(dates, y2, 0.9)
ax1.grid()
ax2.grid()
ax1.set_ylim(-3,3)
ax2.set_ylim(-3,3)
fig.text(0.5, 0.04, 'Inventory Weekly Percent Change', ha='center', va='center', size = '14')
fig.text(0.06, 0.5, 'Weekly Percent Change', ha='center', va='center', size = '14', rotation='vertical')
ax1.set_title('Oil', size = '12')
ax2.set_title('Gasoline', size = '12')
plt.savefig('Gasoline Inventories Weekly Percent Change.png', bbox_inches='tight', dpi=300)
plt.show()
Given list of values:
y1 = [1000,1010,950,1050,1100,1030]
Pure python solution:
Use the zip function to create tuples of the numerator and denominator. Then use list comprehension to get a list of the percent changes.
pct_chg = [1.0*num / den - 1 for num, den in zip(y1[1:], y1)]
Numpy solution:
Convert list to numpy array, then perform computation using array slices.
a1 = np.array(y1)
pct_chg = np.divide(a1[1:],a1[:-1])-1
Pandas package solution:
Convert list to Pandas series and use the built-in percent change function
s1 = pd.Series(y1)
pct_chg = s1.pct_change()
Now, pct_chg is a series too. You can get its values in a numpy array via pct_chg.values. Matplotlib should accept numpy arrays as containers in most cases.
I have the following code:
import matplotlib.pyplot as plt
horas = [1,2,3,4]
diccionario = {(1,1,2,1):[2,3,4,5],
(1,2,2,2):[2,5,1,5],
(1,3,2,3):[2,5,5,5],
(1,4,2,4):[2,6,8,5],
(1,5,2,5):[2,7,5,5],
(1,6,2,6):[2,8,2,5],
(1,7,2,7):[2,9,6,5],
(1,8,2,8):[2,4,9,5]}
plt.figure()
i = 1
maximo = 0
keys = diccionario.keys()
for n in range(0,len(keys)-1,2):
gn, = plt.plot(horas,diccionario[keys[n]],'ro-')
gn1, = plt.plot(horas,diccionario[keys[n+1]],'g*-')
plt.subplot(len(keys)//2, 1,i)
plt.legend([gn,gn1], [keys[n],keys[n+1]])
i+=1
plt.show()
I expect to have 4 subplots with two lines each. I have them, but the last one is empty.
Could anyone explain why? I have tried many different ways without succeeding.
Put your subplot() before you plot gn and gn1. That will solve your problem.
for n in range(0, len(keys) - 1,2):
plt.subplot(len(keys)//2, 1, i)
gn, = plt.plot(horas, diccionario[keys[n]], 'ro-')
gn1, = plt.plot(horas, diccionario[keys[n+1]], 'g*-')
plt.legend([gn, gn1], [keys[n], keys[n+1]])
i+=1
By the way, I recommend to use tuple instead of dict. You may notice that the sequence of results is quite different from what you want.
I have a pandas dataframe that resembles one generated as follows.
import numpy as np
import pandas as pd
x0 = pd.DataFrame(np.random.normal(size=(10, 4)))
x1 = pd.DataFrame({'x': [1,1,2,3,2,3,4,1,2,3]})
df = pd.concat((x0, x1), axis=1)
and a function:
def fun(df, n=100):
z = np.random.normal(size=n)
return np.dot(df[[0,1,2,3]], [0.5*z,-1*z,0.3*z,1.2*z])
I would like to:
use identical draws z for each unique value in x,
take the product of the output in the above step over items of unique x
Any suggestion?
Explanation:
Generate n=100 draws to get z such that len(z)=100
For each elem in z, evaluate the function fun,
For i in df.x.unique(), compute the product of the output in step (2) element-wise. I am expecting to get a DataFrame or array of dimension (len(df.x.unique(), n=100)
4.
It sounds like you want to group by 'x', taking one of its instances (let's assume we take the first one observed).
just call your function as follows:
f = fun(df.groupby('x').first())
>>> f.shape
Out[25]: (4, 100)
>>> len(df.x.unique()
Out[26]: 4
In a scatter plot matrix, I would like to draw a region in every subplot and print the points that are included in the region. I found the LassoSelector widget, which does exactly that. I am trying to extend its functionality for more than one subplots. I am getting the following error: self.xys = collection.get_offsets(),
AttributeError: 'numpy.flatiter' object has no attribute 'get_offsets'.
when the line selector = SelectFromCollection(axes, ax.flat) is in the for loop, and I am getting the error: self.canvas = ax.figure.canvas,AttributeError: 'numpy.ndarray' object has no attribute 'figure' when the line selector = SelectFromCollection(ax, ax.flat) is outside of the loop. Why does this happen?
Here is my code:
from __future__ import print_function
import numpy as np
from matplotlib.widgets import LassoSelector
from matplotlib.path import Path
class SelectFromCollection(object):
"""Select indices from a matplotlib collection using `LassoSelector`.
Selected indices are saved in the `ind` attribute. This tool highlights
selected points by fading them out (i.e., reducing their alpha values).
If your collection has alpha < 1, this tool will permanently alter them.
Note that this tool selects collection objects based on their *origins*
(i.e., `offsets`).
Parameters
----------
ax : :class:`~matplotlib.axes.Axes`
Axes to interact with.
collection : :class:`matplotlib.collections.Collection` subclass
Collection you want to select from.
alpha_other : 0 <= float <= 1
To highlight a selection, this tool sets all selected points to an
alpha value of 1 and non-selected points to `alpha_other`.
"""
def __init__(self, ax, collection, alpha_other=0.3):
self.canvas = ax.figure.canvas
self.collection = collection
self.alpha_other = alpha_other
self.xys = collection.get_offsets()
self.Npts = len(self.xys)
# Ensure that we have separate colors for each object
self.fc = collection.get_facecolors()
if len(self.fc) == 0:
raise ValueError('Collection must have a facecolor')
elif len(self.fc) == 1:
self.fc = np.tile(self.fc, self.Npts).reshape(self.Npts, -1)
self.lasso = LassoSelector(ax, onselect=self.onselect)
self.ind = []
def onselect(self, verts):
path = Path(verts)
self.ind = np.nonzero([path.contains_point(xy) for xy in self.xys])[0]
self.fc[:, -1] = self.alpha_other
self.fc[self.ind, -1] = 1
self.collection.set_facecolors(self.fc)
self.canvas.draw_idle()
print(selector.xys[selector.ind])
#selector.disconnect()
def disconnect(self):
self.lasso.disconnect_events()
self.fc[:, -1] = 1
self.collection.set_facecolors(self.fc)
self.canvas.draw_idle()
if __name__ == '__main__':
import matplotlib.pyplot as plt
plt.ion()
data=np.loadtxt(r"data.txt")
x = data[:, 3]
x1 = data[:, 4]
y = data[:,5]
y1 = data[:,6]
fig, ax = plt.subplots(nrows=2, ncols=2, squeeze=True)
for axes, marker in zip(ax.flat, ['o', 'o']):
ax.flat[0].plot(x, y, 'r', ls='', marker=marker)
ax.flat[1].plot(x, x1,'r', ls='', marker=marker)
ax.flat[2].plot(x, y1,'r', ls='', marker=marker)
ax.flat[3].plot(y, x1,'r', ls='', marker=marker)
selector = SelectFromCollection(ax, ax.flat)
plt.show(block=True)
plt.draw()
Ok, I found a few problems that are causing your code not to work properly. There we go:
Firts of all, you modified the SelectFromCollection class that you got from the LassoSelector example to print every selected point, but forgot a detail:
class SelectFromCollection(object):
def __init__(self, ax, collection, alpha_other=0.3):
# ...
# No changes here...
# ...
def onselect(self, verts):
path = Path(verts)
self.ind = np.nonzero([path.contains_point(xy) for xy in self.xys])[0]
self.fc[:, -1] = self.alpha_other
self.fc[self.ind, -1] = 1
self.collection.set_facecolors(self.fc)
self.canvas.draw_idle()
print(self.xys[self.ind]) # <- THIS LINE HAS CHANGED!!!
#selector.disconnect()
def disconnect(self):
# ...
# No changes here...
# ...
Now you can use multiple instances of SelectFromCollection.
Then, you are also creating only one instance of SelectFromCollection (so only one subplot would react). Furthermore, according to the doctsring the second argument the __init__ method expects is a matplotlib.collections.Collection instance.
Instead you are passing it a numpy array (in fact a numpy.Flatiter) that contains two Axes instances. If you look at the example, there it gets a Collection instance returned by the scattercommand (they use scatter instead of plot).
All in all, and restiling the loop, this is my version
if __name__ == '__main__':
import matplotlib.pyplot as plt
data=np.random.rand(3,100)
xdata = data[:-1] # all rows but last
y = data[-1] # last row
fig, axes = plt.subplots(nrows=1, ncols=2, squeeze=True)
markers = ['o','^']
selectors =[]
for i in xrange(xdata.shape[0]):
pts = axes[i].scatter(xdata[i], y, c='r', marker=markers[i])
selectors.append(SelectFromCollection(axes[i], pts))
plt.show()
EDIT
If you want to do more plots, it is not hard. You can try to write more synthetic code with a for loop and so on, but an easier solution is to write directly the repetitions of the code:
if __name__ == '__main__':
import matplotlib.pyplot as plt
data=np.loadtxt(r"data.txt")
x = data[:, 3]
x1 = data[:, 4]
y = data[:,5]
y1 = data[:,6]
fig, axes = plt.subplots(nrows=2, ncols=2)
pts1 = axes[0,0].scatter(x, y, c='r', marker='o')
select1 = SelectFromCollection(axes[0,0], pts1)
pts2 = axes[1,0].scatter(x, x1, c='r', marker='o')
select2 = SelectFromCollection(axes[1,0], pts2)
pts3 = axes[0,1].scatter(x, y1, c='r', marker='o')
select3 = SelectFromCollection(axes[0,1], pts3)
pts4 = axes[1,1].scatter(y, x1, c='r', marker='o')
select4 = SelectFromCollection(axes[1,1], pts4)
plt.show()
Still, it is necessary that you change the definition of the SelectFromCollection class as I said above.