How to fit and plot a linear regression line in python? - python-2.7

I have a file contains x, y, and y-err and I simply want to fit a straight line to these data.
This is my original code which I'm plotting the data. based n this I want to fit the straight line:
import numpy as np
import matplotlib.pyplot as plt
#read the data file
Data = np.loadtxt('dmvals.dat')
MJD = Data[:,0]
DM = Data[:,1]
DM_err = Data[:,2]
font = {'family': 'serif',
'color': 'blue',
'weight': 'normal',
'size': 14,
}
plt.figure()
plt.xlabel('time[MJD]', fontdict=font)
plt.ylabel('DM[pc/cm^3]', fontdict=font)
plt.title('DM values', fontdict=font)
plt.errorbar(MJD, DM, DM_err,color='magenta')
plt.subplots_adjust(left=0.15 , hspace = 0.5)
plt.savefig('dm_variations_plot.png')

The easiest way is to use numpy.polyfit to fit a 1st degree polinomial:
p = numpy.polyfit(MJD, DM, deg=1)
p will be a list containing the intercept and the slope of the fit line
You can then plot the line on your data using
x = MJD
y = p[1] + p[0] * MJD
plt.plot(x, y, '--')

Related

How to represent the data in x and y axis using matplotlib

Here in my program i want to create the month wise dates on x axis label and and another rs data i want to represent on the y axis.can you please help me how to mention my data in matplotlib.
Given below is my sample program:
import matplotlib.pyplot as plt
from matplotlib import style
# line 1 points
x1 = [1,2,3]
y1 = [2,4,1]
# plotting the line 1 points
plt.plot(x1, y1, 'g', label = "line 1",linewidth=10)
plt.title('Two lines on same graph!')
plt.xlabel('x - axis')
plt.ylabel('y - axis')
plt.legend()
plt.grid(True,color="k")
plt.show()
# xticks(np.arange(12), calendar.month_name[1:13], rotation=20)
i don't want to mention in between the values it is tacking the x and y values i want to mention like in given diagram.
After few edits and your comments. Is this more closer what you are looking for?
import matplotlib.pyplot as plt
import datetime
# line 1 points
val = [1,2,3,2,6]
cust = [2,4,1,6,2]
orders = [3,5,2,7,3]
col = [1,3,4,2,6]
# plotting the line 1 points
fig, ax = plt.subplots()
start_date = datetime.datetime(2019, 07, 01)
dates = []
# Dates based on the measurement count
# See: https://stackoverflow.com/questions/1060279/iterating-through-a-range-of-dates-in-python
for single_date in (start_date + datetime.timedelta(n) for n in range(len(val))):
dates.append(single_date.strftime('%Y-%m-%d'))
# Values
plt.plot(dates, val, '.',color='g', markersize=12)
plt.plot(dates, val, label='Values', color='g')
# Customers
plt.plot(dates, cust, '.',color='b', markersize=12)
plt.plot(dates, cust, label='Customers',color='b')
# Orders
plt.plot(dates, orders, '.',color='r', markersize=12)
plt.plot(dates, orders, label='Orders',color='r')
# Collection
plt.plot(dates, col, '.',color='black', markersize=12)
plt.plot(dates, col, label='Collection',color='black')
plt.title('Four lines on same graph!')
plt.tick_params(axis='x', rotation=20)
plt.xlabel('x - axis')
plt.ylabel('y - axis')
plt.grid(True,color="k")
plt.legend()
plt.show()

Plot a 3D bar histogram with python

I have some x and y data, with which I would like to generate a 3D histogram, with a color gradient (bwr or whatever).
I have written a script which plot the interesting values, in between -2 and 2 for both x and y abscesses:
import numpy as np
import numpy.random
import matplotlib.pyplot as plt
# To generate some test data
x = np.random.randn(500)
y = np.random.randn(500)
XY = np.stack((x,y),axis=-1)
def selection(XY, limitXY=[[-2,+2],[-2,+2]]):
XY_select = []
for elt in XY:
if elt[0] > limitXY[0][0] and elt[0] < limitXY[0][1] and elt[1] > limitXY[1][0] and elt[1] < limitXY[1][1]:
XY_select.append(elt)
return np.array(XY_select)
XY_select = selection(XY, limitXY=[[-2,+2],[-2,+2]])
heatmap, xedges, yedges = np.histogram2d(XY_select[:,0], XY_select[:,1], bins = 7, range = [[-2,2],[-2,2]])
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
plt.figure("Histogram")
#plt.clf()
plt.imshow(heatmap.T, extent=extent, origin='lower')
plt.show()
And give this correct result:
Now, I would like to turn this into a 3D histogram. Unfortunatly I don't success to plot it correctly with bar3d because it takes by default the length of x and y for abscisse.
I am quite sure that there is a very easy way to plot this in 3D with imshow. Like an unknow option...
I finaly succeded in doing it. I am almost sure there is a better way to do it, but at leat it works:
import numpy as np
import numpy.random
import matplotlib.pyplot as plt
# To generate some test data
x = np.random.randn(500)
y = np.random.randn(500)
XY = np.stack((x,y),axis=-1)
def selection(XY, limitXY=[[-2,+2],[-2,+2]]):
XY_select = []
for elt in XY:
if elt[0] > limitXY[0][0] and elt[0] < limitXY[0][1] and elt[1] > limitXY[1][0] and elt[1] < limitXY[1][1]:
XY_select.append(elt)
return np.array(XY_select)
XY_select = selection(XY, limitXY=[[-2,+2],[-2,+2]])
xAmplitudes = np.array(XY_select)[:,0]#your data here
yAmplitudes = np.array(XY_select)[:,1]#your other data here
fig = plt.figure() #create a canvas, tell matplotlib it's 3d
ax = fig.add_subplot(111, projection='3d')
hist, xedges, yedges = np.histogram2d(x, y, bins=(7,7), range = [[-2,+2],[-2,+2]]) # you can change your bins, and the range on which to take data
# hist is a 7X7 matrix, with the populations for each of the subspace parts.
xpos, ypos = np.meshgrid(xedges[:-1]+xedges[1:], yedges[:-1]+yedges[1:]) -(xedges[1]-xedges[0])
xpos = xpos.flatten()*1./2
ypos = ypos.flatten()*1./2
zpos = np.zeros_like (xpos)
dx = xedges [1] - xedges [0]
dy = yedges [1] - yedges [0]
dz = hist.flatten()
cmap = cm.get_cmap('jet') # Get desired colormap - you can change this!
max_height = np.max(dz) # get range of colorbars so we can normalize
min_height = np.min(dz)
# scale each z to [0,1], and get their rgb values
rgba = [cmap((k-min_height)/max_height) for k in dz]
ax.bar3d(xpos, ypos, zpos, dx, dy, dz, color=rgba, zsort='average')
plt.title("X vs. Y Amplitudes for ____ Data")
plt.xlabel("My X data source")
plt.ylabel("My Y data source")
plt.savefig("Your_title_goes_here")
plt.show()
I use this example, but I modified it, because it introduced an offset. The result is this:
You can generate the same result using something as simple as the following:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(-2, 2, 7)
y = np.linspace(-2, 2, 7)
xx, yy = np.meshgrid(x, y)
z = xx*0+yy*0+ np.random.random(size=[7,7])
plt.imshow(z, interpolation='nearest', cmap=plt.cm.viridis, extent=[-2,2,2,2])
plt.show()
from mpl_toolkits.mplot3d import Axes3D
ax = Axes3D(plt.figure())
ax.plot_surface(xx, yy, z, cmap=plt.cm.viridis, cstride=1, rstride=1)
plt.show()
The results are given below:

extracting and plotting atomic coordinates from pdb file python

I am trying to extract just the alpha carbon coordinates and plot them in a 3D representation. The top half of the following code works fine, but I can't seem to plot my results.
import re
import glob
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
coord = []
pattern = re.compile('ATOM\s{5,}\d+\s{2}CA\s{2,}\w{3}\s\w\s{2,}\d+\s{6}\d+\.\d+\s\d+\.\d+\s{2}\d+\.\d+', flags=re.S)
for file in glob.glob('file_rank_1.pdb'):
with open(file) as fp:
for result in pattern.findall(fp.read()):
output = result[-22:]
coord = " ".join(output.split())
coord = coord.replace(" ",",")
c = coord.split(',')
print(c)
X,Y,Z = (c)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_wireframe(X,Y,Z)
ax.set_xlabel('x axis')
ax.set_ylabel('y axis')
ax.set_zlabel('z axis')
plt.show()
My results from running the above looks like...
['72.438', '109.283', '43.980']
['75.664', '110.907', '45.079']
['74.354', '111.094', '48.594']
['73.380', '107.449', '48.722']
['76.614', '106.603', '46.958']
['79.740', '105.625', '48.895']
['82.425', '107.703', '47.318']
['80.088', '110.405', '46.265']
['78.710', '110.389', '49.818']
['82.235', '110.471', '51.200']
['82.841', '113.550', '49.133']
['79.233', '114.754', '49.675']
['78.633', '113.745', '53.295']
['77.041', '117.182', '53.503']
['73.963', '116.530', '51.505']
['73.696', '113.058', '52.933']
TypeError: Cannot cast array data from dtype('float64') to dtype('<U32') according to the rule 'safe'
The above code opens the graph interface, but it remains blank. There is also a full screen of red file messages from the interactive shell that I left off to try to save space in this question.
How can I plot the numbers found in c? Thanks
There are a few things to point out:
1) In the following block, c is a list of strings not floats.
with open(file) as fp:
for result in pattern.findall(fp.read()):
output = result[-22:]
coord = " ".join(output.split())
coord = coord.replace(" ",",")
c = coord.split(',')
print(c)
You can change them using:
[float(i) for i in c]
2) When you set X,Y,Z = (c), that c is only the last item in the loop. So you should append each c within the loop to collect all coordinates.
3) You might want to use numpy for array manipulations.
So hopefully the following will work:
import re
import numpy as np
import glob
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
points = []
pattern = re.compile('ATOM\s{5,}\d+\s{2}CA\s{2,}\w{3}\s\w\s{2,}\d+\s{6}\d+\.\d+\s\d+\.\d+\s{2}\d+\.\d+', flags=re.S)
for file in glob.glob('file_rank_1.pdb'):
with open(file) as fp:
for result in pattern.findall(fp.read()):
output = result[-22:]
coord = " ".join(output.split())
coord = coord.replace(" ",",")
c = coord.split(',')
c = [float(i) for i in c] # change them to float
points.append(c)
print(c)
X,Y,Z=np.array(points).T
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_wireframe(X,Y,Z)
ax.set_xlabel('x axis')
ax.set_ylabel('y axis')
ax.set_zlabel('z axis')
plt.show()

python, xlrd: Maniplulate spreadsheet data with xlrd function then graph the manipulated data

I am trying to extract data from an excel spreadsheet, then find a percent change between adjacent rows. The columns that I would like to do this manipulation on is column 1 and 4. I would like to then graph these percent changes in two different bar charts using subplots using column 0 as the x axis.
I am able to do everything except extract the data and formulate a percent change between adjacent rows. The formula for the percent change is Current/previous-1 or (r,0)/(r-1,0)-1. Below is my current script:
import xlrd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as tkr
import matplotlib.dates as mdates
import datetime
from matplotlib import rc
rc('mathtext', default='regular')
file_location = "/Users/adampatel/Desktop/psw01.xls"
workbook = xlrd.open_workbook(file_location, on_demand = False)
worksheet = workbook.sheet_by_name('Data 1')
x = [worksheet.cell_value(i+1699, 0) for i in range(worksheet.nrows-1699)]
y1 = [worksheet.cell_value(i+1699, 1) for i in range(worksheet.nrows-1699)]
y2 = [worksheet.cell_value(i+1699, 4) for i in range(worksheet.nrows-1699)]
fig = plt.figure()
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212, sharex = ax1)
start_date = datetime.date(1899, 12, 30)
dates=[start_date + datetime.timedelta(xval) for xval in x]
ax1.xaxis.set_major_locator(mdates.MonthLocator((), bymonthday=1, interval=2))
ax1.xaxis.set_minor_locator(mdates.MonthLocator((), bymonthday=1, interval=1))
ax1.xaxis.set_major_formatter(mdates.DateFormatter("%b'%y"))
ly1 = ax1.bar(dates, y1, 0.9)
ly2 = ax2.bar(dates, y2, 0.9)
ax1.grid()
ax2.grid()
ax1.set_ylim(-3,3)
ax2.set_ylim(-3,3)
fig.text(0.5, 0.04, 'Inventory Weekly Percent Change', ha='center', va='center', size = '14')
fig.text(0.06, 0.5, 'Weekly Percent Change', ha='center', va='center', size = '14', rotation='vertical')
ax1.set_title('Oil', size = '12')
ax2.set_title('Gasoline', size = '12')
plt.savefig('Gasoline Inventories Weekly Percent Change.png', bbox_inches='tight', dpi=300)
plt.show()
Given list of values:
y1 = [1000,1010,950,1050,1100,1030]
Pure python solution:
Use the zip function to create tuples of the numerator and denominator. Then use list comprehension to get a list of the percent changes.
pct_chg = [1.0*num / den - 1 for num, den in zip(y1[1:], y1)]
Numpy solution:
Convert list to numpy array, then perform computation using array slices.
a1 = np.array(y1)
pct_chg = np.divide(a1[1:],a1[:-1])-1
Pandas package solution:
Convert list to Pandas series and use the built-in percent change function
s1 = pd.Series(y1)
pct_chg = s1.pct_change()
Now, pct_chg is a series too. You can get its values in a numpy array via pct_chg.values. Matplotlib should accept numpy arrays as containers in most cases.

Interpolating 3d data at a single point in space (Python 2.7)

I have a point cloud in 4 dimensions, where each point in the cloud has a location and a value (x,y,z,Value). In addition, I have a 'special' point, S0, within the 3d point cloud; I've used this example to find the closest 10 points in the cloud, relative to S0. Now, I have a numpy array for each of the 10 closest points and their values. How can I interpolate these 10 points, to find the interpolated value at point S0? Example code is shown below:
import numpy as np
import matplotlib.pyplot as plt
numpoints = 20
linexs = 320
lineys = 40
linezs = 60
linexe = 20
lineye = 20
lineze = 0
# Create vectors of points
xpts = np.linspace(linexs, linexe, numpoints)
ypts = np.linspace(lineys, lineye, numpoints)
zpts = np.linspace(linezs, lineze, numpoints)
lin = np.dstack((xpts,ypts,zpts))
# Image line of points
fig = plt.figure()
ax = fig.add_subplot(211, projection='3d')
ax.set_xlim(0,365); ax.set_ylim(-85, 85); ax.set_zlim(0, 100)
ax.plot_wireframe(xpts, ypts, zpts)
ax.view_init(elev=12, azim=78)
def randrange(n, vmin, vmax):
return (vmax - vmin)*np.random.rand(n) + vmin
n = 10
for n in range(21):
xs = randrange(n, 0, 350)
ys = randrange(n, -75, 75)
zs = randrange(n, 0, 100)
ax.scatter(xs, ys, zs)
dat = np.dstack((xs,ys,zs))
ax.set_xlabel('X Label')
ax.set_xlim(0,350)
ax.set_ylabel('Y Label')
ax.set_ylim(-75,75)
ax.set_zlabel('Z Label')
ax.set_zlim(0,100)
ax = fig.add_subplot(212, projection='3d')
ax.set_xlim(0,365); ax.set_ylim(-85, 85); ax.set_zlim(0, 100)
ax.plot_wireframe(xpts,ypts,zpts)
ax.view_init(elev=12, azim=78)
plt.show()
dist = []
# Calculate distance from first point to all other points in cloud
for l in range(len(xpts)):
aaa = lin[0][0]-dat
dist.append(np.sqrt(aaa[0][l][0]**2+aaa[0][l][1]**2+aaa[0][l][2]**2))
full = np.dstack((dat,dist))
aaa = full[0][full[0][:,3].argsort()]
print(aaa[0:10])
A basic example. Note that the meshgrid is not needed for the interpolation, but only to make a fast ufunc to generate an example function A=f(x,y,z), here A=x+y+z.
from scipy.interpolate import interpn
import numpy as np
#make up a regular 3d grid
X=np.linspace(-5,5,11)
Y=np.linspace(-5,5,11)
Z=np.linspace(-5,5,11)
xv,yv,zv = np.meshgrid(X,Y,Z)
# make up a function
# see http://docs.scipy.org/doc/numpy/reference/ufuncs.html
A = np.add(xv,np.add(yv,zv))
#this one is easy enough for us to know what to expect at (.5,.5,.5)
# usage : interpn(points, values, xi, method='linear', bounds_error=True, fill_value=nan)
interpn((X,Y,Z),A,[0.5,0.5,0.5])
Output:
array([ 1.5])
If you pass in an array of points of interest, it will give you multiple answers.