extracting and plotting atomic coordinates from pdb file python - regex

I am trying to extract just the alpha carbon coordinates and plot them in a 3D representation. The top half of the following code works fine, but I can't seem to plot my results.
import re
import glob
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
coord = []
pattern = re.compile('ATOM\s{5,}\d+\s{2}CA\s{2,}\w{3}\s\w\s{2,}\d+\s{6}\d+\.\d+\s\d+\.\d+\s{2}\d+\.\d+', flags=re.S)
for file in glob.glob('file_rank_1.pdb'):
with open(file) as fp:
for result in pattern.findall(fp.read()):
output = result[-22:]
coord = " ".join(output.split())
coord = coord.replace(" ",",")
c = coord.split(',')
print(c)
X,Y,Z = (c)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_wireframe(X,Y,Z)
ax.set_xlabel('x axis')
ax.set_ylabel('y axis')
ax.set_zlabel('z axis')
plt.show()
My results from running the above looks like...
['72.438', '109.283', '43.980']
['75.664', '110.907', '45.079']
['74.354', '111.094', '48.594']
['73.380', '107.449', '48.722']
['76.614', '106.603', '46.958']
['79.740', '105.625', '48.895']
['82.425', '107.703', '47.318']
['80.088', '110.405', '46.265']
['78.710', '110.389', '49.818']
['82.235', '110.471', '51.200']
['82.841', '113.550', '49.133']
['79.233', '114.754', '49.675']
['78.633', '113.745', '53.295']
['77.041', '117.182', '53.503']
['73.963', '116.530', '51.505']
['73.696', '113.058', '52.933']
TypeError: Cannot cast array data from dtype('float64') to dtype('<U32') according to the rule 'safe'
The above code opens the graph interface, but it remains blank. There is also a full screen of red file messages from the interactive shell that I left off to try to save space in this question.
How can I plot the numbers found in c? Thanks

There are a few things to point out:
1) In the following block, c is a list of strings not floats.
with open(file) as fp:
for result in pattern.findall(fp.read()):
output = result[-22:]
coord = " ".join(output.split())
coord = coord.replace(" ",",")
c = coord.split(',')
print(c)
You can change them using:
[float(i) for i in c]
2) When you set X,Y,Z = (c), that c is only the last item in the loop. So you should append each c within the loop to collect all coordinates.
3) You might want to use numpy for array manipulations.
So hopefully the following will work:
import re
import numpy as np
import glob
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
points = []
pattern = re.compile('ATOM\s{5,}\d+\s{2}CA\s{2,}\w{3}\s\w\s{2,}\d+\s{6}\d+\.\d+\s\d+\.\d+\s{2}\d+\.\d+', flags=re.S)
for file in glob.glob('file_rank_1.pdb'):
with open(file) as fp:
for result in pattern.findall(fp.read()):
output = result[-22:]
coord = " ".join(output.split())
coord = coord.replace(" ",",")
c = coord.split(',')
c = [float(i) for i in c] # change them to float
points.append(c)
print(c)
X,Y,Z=np.array(points).T
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_wireframe(X,Y,Z)
ax.set_xlabel('x axis')
ax.set_ylabel('y axis')
ax.set_zlabel('z axis')
plt.show()

Related

Retrun event.xdata from a function

I have made a small gui to select points on a given image, plot the points, interpolate them and save them. I would like to make it so that the interpolated points are given as output of the function gui_pos(image) retrun, but I couldn't find the way to do it. So far I have fixed it saving the interpolated point in a .pckl file but it is not a good solution.
The code is the following:
from PIL import Image
import numpy as np
import pickle
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
def interp_mia(x,y,xx):
f= interp1d(x,y,fill_value="extrapolate")
yy= f(xx)
return yy
def onclick(event):
plt.plot(event.xdata, event.ydata, '.')
fig.canvas.draw()
coordsx.append(event.xdata)
coordsy.append(event.ydata)
if (event.button == 3) :
xx = np.arange(np.min(coordsx),np.max(coordsx))
yy = interp_mia(coordsx, coordsy, xx)
print('you pressed', event.button)
plt.plot(xx,yy,'k-')
fig.canvas.draw()
fig.canvas.mpl_disconnect(cid)
dum = np.array((xx,yy))
f = open('gui_pos.pckl', 'wb')
pickle.dump(dum, f)
f.close()
def gui_pos(image):
global coordsx
global coordsy
global fig
global cid
global xx
global yy
global slit
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(image, origin="lower")
coordsx = []
coordsy = []
cid = fig.canvas.mpl_connect('button_press_event', onclick)
return
Any ideas?
Thank you,
Qarolina

CSV reading in python 2.7

I wrote that code for csv reading, but right now I have this problem:
ValueError: invalid literal for float():
4.000E+00;3.125E-07;-7.854E-13
Here is my code:
import numpy as np
import matplotlib.pyplot as plt
def read_datafile(file_name):
data = np.loadtxt(file_name, delimiter=' ')
return data
for r in range(0,25,1):
data = read_datafile("S:\Dok\Python\Data\Codes\Model2\Mod{}.csv".format(r))
x = data[:,0]
y = data[:,1]
z = data[:,2]
degree = u"\u00b0"
fig = plt.figure(1)
plt.title("Model {}".format(r) + degree)
plt.plot(x, abs(y + 1j * z), color='k')
plt.show()
My files look like this:
You have np.loadtxt(file_name, delimiter=' '), but according to your error:
ValueError: invalid literal for float(): 4.000E+00;3.125E-07;-7.854E-13,
... it is semicolon-delimited.
Because of this, you are getting the whole line in at once, and to the conversion fails. If you change to delimiter=';', it should work.

add multiple colorbars to a subplot of polar contourf [duplicate]

I would like to add a separate colorbar to each subplot in a 2x2 plot.
fig , ( (ax1,ax2) , (ax3,ax4)) = plt.subplots(2, 2,sharex = True,sharey=True)
z1_plot = ax1.scatter(x,y,c = z1,vmin=0.0,vmax=0.4)
plt.colorbar(z1_plot,cax=ax1)
z2_plot = ax2.scatter(x,y,c = z2,vmin=0.0,vmax=40)
plt.colorbar(z1_plot,cax=ax2)
z3_plot = ax3.scatter(x,y,c = z3,vmin=0.0,vmax=894)
plt.colorbar(z1_plot,cax=ax3)
z4_plot = ax4.scatter(x,y,c = z4,vmin=0.0,vmax=234324)
plt.colorbar(z1_plot,cax=ax4)
plt.show()
I thought that this is how you do it, but the resulting plot is really messed up; it just has an all grey background and ignores the set_xlim , set_ylim commands I have (not shown here for simplicity). + it shows no color bars. Is this the right way to do it?
I also tried getting rid of the "cax = ...", but then the colorbar all goes on the bottom right plot and not to each separate plot!
This can be easily solved with the the utility make_axes_locatable. I provide a minimal example that shows how this works and should be readily adaptable:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np
m1 = np.random.rand(3, 3)
m2 = np.arange(0, 3*3, 1).reshape((3, 3))
fig = plt.figure(figsize=(16, 12))
ax1 = fig.add_subplot(121)
im1 = ax1.imshow(m1, interpolation='None')
divider = make_axes_locatable(ax1)
cax = divider.append_axes('right', size='5%', pad=0.05)
fig.colorbar(im1, cax=cax, orientation='vertical')
ax2 = fig.add_subplot(122)
im2 = ax2.imshow(m2, interpolation='None')
divider = make_axes_locatable(ax2)
cax = divider.append_axes('right', size='5%', pad=0.05)
fig.colorbar(im2, cax=cax, orientation='vertical');
In plt.colorbar(z1_plot,cax=ax1), use ax= instead of cax=, i.e. plt.colorbar(z1_plot,ax=ax1)
Specify the ax argument to matplotlib.pyplot.colorbar(), e.g.
import numpy as np
import matplotlib.pyplot as plt
fig, ax = plt.subplots(2, 2)
for i in range(2):
for j in range(2):
data = np.array([[i, j], [i+0.5, j+0.5]])
im = ax[i, j].imshow(data)
plt.colorbar(im, ax=ax[i, j])
plt.show()
Please have a look at this matplotlib example page. There it is shown how to get the following plot with four individual colorbars for each subplot:
I hope this helps.
You can further have a look here, where you can find a lot of what you can do with matplotlib.
Try to use the func below to add colorbar:
def add_colorbar(mappable):
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.pyplot as plt
last_axes = plt.gca()
ax = mappable.axes
fig = ax.figure
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.05)
cbar = fig.colorbar(mappable, cax=cax)
plt.sca(last_axes)
return cbar
Then you codes need to be modified as:
fig , ( (ax1,ax2) , (ax3,ax4)) = plt.subplots(2, 2,sharex = True,sharey=True)
z1_plot = ax1.scatter(x,y,c = z1,vmin=0.0,vmax=0.4)
add_colorbar(z1_plot)

How to reshape a numpy array from (#dim1,#dim2,#channel) to (#channel, #dim1,#dim2)

I have an array with the shape of (#dim1,#dim2,#channel). I want to reshape it to (#channel, #dim1,#dim2).
The plt.reshape(x, (#channel, #dim1,#dim2)) shows me a wrong image.
If you are using the Cifar10 dataset you could use the following code:
import numpy as np
import matplotlib.pyplot as plt
import cPickle
def unpickle(file):
with open(file, 'rb') as fo:
dict = cPickle.load(fo)
return dict
# Read the data
imageDict = unpickle('cifar-10-batches-py/data_batch_2')
imageArray = imageDict['data']
# Now we reshape
imageArray = np.swapaxes(imageArray.reshape(10000,32,32,3,order='F'), 1, 2)
# Get the labels
labels = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']
imageLabels = [labels[i] for i in imageDict['labels']]
# Plot some images
fig, ax = plt.subplots(4,4, figsize=(8,8))
for axIndex in [(i,j) for i in range(4) for j in range(4)]:
index = np.random.randint(0,10000)
ax[axIndex].imshow(imageArray[index], origin='upper')
ax[axIndex].set_title(imageLabels[index])
ax[axIndex].axis('off')
fig.show()
Which gives you:

How to animate and update the title,xlabel,ylabel?

I am new to Matplotlib. Based on my code in following, I wanted to update the data,title,xlabel,ylabel at same time. However, the title and labels did not been updated, but data did.Someone can give me a solution? That will help me a lot.Thank you.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
def updata(frame_number):
current_index = frame_number % 3
a = [[1,2,3],[4,5,6],[7,8,9]]
idata['position'][:,0] = np.asarray(a[current_index])
idata['position'][:,1] = np.asarray(a[current_index])
scat.set_offsets(idata['position'])
ax.set_xlabel('The Intensity of Image1')
ax.set_ylabel('The Intensity of Image2')
ax.set_title("For Dataset %d" % current_index)
fig = plt.figure(figsize=(5,5))
ax = fig.add_axes([0,0,1,1])
idata = np.zeros(3,dtype=[('position',float,2)])
ax.set_title(label='lets begin',fontdict = {'fontsize':12},loc='center')
scat = ax.scatter(idata['position'][:,0],idata['position'][:,1],s=10,alpha=0.3,edgecolors='none')
animation = FuncAnimation(fig,updata,interval=2000)
plt.show()
Running the code, I see an empty window. The reason is that the axes span the complete figure (fig.add_axes([0,0,1,1])). In order to see the title and labels, you would need to make the axes smaller than the figure, e.g. by
ax = fig.add_subplot(111)
Also, the scale of the axes is not defined, so the animation will happen outside the axes limits. You can use ax.set_xlim and ax.set_ylim to prevent that.
Here is a complete running code:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
def updata(frame_number):
current_index = frame_number % 3
a = [[1,2,3],[4,5,6],[7,8,9]]
idata['position'][:,0] = np.asarray(a[current_index])
idata['position'][:,1] = np.asarray(a[current_index])
scat.set_offsets(idata['position'])
ax.set_xlabel('The Intensity of Image1')
ax.set_ylabel('The Intensity of Image2')
ax.set_title("For Dataset %d" % current_index)
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(111)
idata = np.zeros(3,dtype=[('position',float,2)])
ax.set_title(label='lets begin',fontdict = {'fontsize':12},loc='center')
scat = ax.scatter(idata['position'][:,0],idata['position'][:,1],
s=25,alpha=0.9,edgecolors='none')
ax.set_xlim(0,10)
ax.set_ylim(0,10)
animation = FuncAnimation(fig,updata,frames=50,interval=600)
plt.show()