Related
I'm having trouble setting up subplots in matplotlib. Below is a screenshot of the layout I am trying to achieve. I've only gone up through ax4, as I have encountered two issues. Code is also below. (Note that on their own, the functions plot as expected.) I want to continue using "subplot2grid" instead of the other options, and I'm using Python 2.7
Expected behavior:
Plots ax1, ax2, and ax3 should be a World Shade Relief Map, with position as shown in desired layout below.
Plot ax4 should be a scatter plot, with position as shown in desired layout below.
Actual behavior:
Plots ax1, ax2, and ax3 are all blank
Plot ax4 is not a scatter, but it's actually the map; layout is also wrong.
I thought I was missing a "hold on" type feature, but it looks like that's not how matplotlib works.
I also made sure I defined the plot limits in myplotA function.
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
from mpl_toolkits.basemap import Basemap
from random import randint
def myplotA(plotnum, title):
south = 34.0129656032
north = 34.721878622
west = -116.7615176
east = -116.336918412
center = [(east + west) / 2, (north + south) / 2]
m = Basemap(llcrnrlon=west, llcrnrlat=south, urcrnrlon=east, urcrnrlat=north, resolution='c', epsg=4326, lon_0=center[0], lat_0=center[1], suppress_ticks=False)
img = m.arcgisimage(service="World_Shaded_Relief", xpixels=2000)
img.set_alpha(0.5)
plt.xticks(rotation='horizontal')
plotnum.xaxis.set_major_formatter(ScalarFormatter(useOffset=False))
plotnum.axis([west, east, south, north])
for label in (plotnum.get_xticklabels() + plotnum.get_yticklabels()):label.set_fontsize(9)
plt.gca().set_title(title, fontsize=12)
def myplotB(plotnum, title):
x = [randint(0, 10) for i in range(0, 6)]
y = [randint(0, 10) for i in range(0, 6)]
plotnum.scatter(x, y, s=4)
plotnum.xaxis.set_major_formatter(ScalarFormatter(useOffset=False))
plt.xlabel('xlabel', fontsize=8)
plt.ylabel('ylabel', fontsize=8)
plt.gca().set_title(title, fontsize=12)
fig = plt.figure(figsize=(11, 17))
ax1 = plt.subplot2grid((8, 3), (0, 0), rowspan=4, colspan=1)
ax2 = plt.subplot2grid((8, 3), (0, 1), rowspan=4, colspan=1)
ax3 = plt.subplot2grid((8, 3), (0, 2), rowspan=4, colspan=1)
ax4 = plt.subplot2grid((8, 3), (5, 0), rowspan=1, colspan=2)
myplotA(ax1, 'ax1')
myplotA(ax2, 'ax2')
myplotA(ax3, 'ax3')
myplotB(ax4, 'ax4')
plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=.1, hspace=.1)
fig.savefig(outpath + '\\' + 'mytest.pdf')
Desired Layout
Desired Layout Image
Actual Result
Actual Result Image
You pass the axes to the plotting functions, but inside of those you need to actually use those passed axes. Else all the plt commands will apply to the currently active axes, which is the last one you create.
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
from mpl_toolkits.basemap import Basemap
from random import randint
def myplotA(ax, title):
south = 34.0129656032
north = 34.721878622
west = -116.7615176
east = -116.336918412
center = [(east + west) / 2, (north + south) / 2]
m = Basemap(llcrnrlon=west, llcrnrlat=south, urcrnrlon=east, urcrnrlat=north,
resolution='c', epsg=4326, lon_0=center[0], lat_0=center[1],
suppress_ticks=False, ax=ax)
img = m.arcgisimage(service="World_Shaded_Relief", xpixels=2000)
img.set_alpha(0.5)
plt.setp(ax.get_xticklabels(), rotation='horizontal')
plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), fontsize=9)
ax.xaxis.set_major_formatter(ScalarFormatter(useOffset=False))
ax.axis([west, east, south, north])
ax.set_title(title, fontsize=12)
def myplotB(ax, title):
x = [randint(0, 10) for i in range(0, 6)]
y = [randint(0, 10) for i in range(0, 6)]
ax.scatter(x, y, s=4)
ax.xaxis.set_major_formatter(ScalarFormatter(useOffset=False))
ax.set_xlabel('xlabel', fontsize=8)
ax.set_ylabel('ylabel', fontsize=8)
ax.set_title(title, fontsize=12)
fig = plt.figure(figsize=(11, 8))
ax1 = plt.subplot2grid((8, 3), (0, 0), rowspan=4, colspan=1)
ax2 = plt.subplot2grid((8, 3), (0, 1), rowspan=4, colspan=1)
ax3 = plt.subplot2grid((8, 3), (0, 2), rowspan=4, colspan=1)
ax4 = plt.subplot2grid((8, 3), (5, 0), rowspan=1, colspan=2)
myplotA(ax1, 'ax1')
myplotA(ax2, 'ax2')
myplotA(ax3, 'ax3')
myplotB(ax4, 'ax4')
plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=.1, hspace=.1)
fig.savefig('mytest.pdf')
plt.show()
Thanks #ImportanceOfBeingErnest
I tried your code above, but still same plot as shown in the "actual results" part of the OP. However, your words describe the problem: I'm activating the axis, and then activating the next one before plotting. Below does the trick--perhaps this is what you had anyhow and just mistakenly copy/pasted the original code. Thanks for point me in the right direction.
fig = plt.figure(figsize=(11, 17))
ax1 = plt.subplot2grid((8, 3), (0, 0), rowspan=4, colspan=1)
myplotA(ax1, 'ax1')
ax2 = plt.subplot2grid((8, 3), (0, 1), rowspan=4, colspan=1)
myplotA(ax2, 'ax2')
ax3 = plt.subplot2grid((8, 3), (0, 2), rowspan=4, colspan=1)
myplotA(ax3, 'ax3')
ax4 = plt.subplot2grid((8, 3), (4, 0), rowspan=1, colspan=2)
myplotB(ax4, 'ax4')
plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=.1, hspace=.1)
fig.savefig(outpath + '\\' + 'mytest.pdf')
Link to image of output plot; looks as expected
I have two dataframes that I'm trying to make histograms of. I would like to overlay one histogram over the other and show them in the same cell, so I can easily compare the distributions. Can anyone suggest how to do that? I have example code and data below. This will plot the histograms separately one above the other.
Data:
print(df[1:5])
bob
1 1
2 3
3 5
4 1
print(df2[1:5])
bob
1 3
2 3
3 2
4 1
Code:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
df[df[bob]>=1][bob].hist(bins=25, range=[0, 25])
plt.show()
df2[df2[bob]>=1][bob].hist(bins=25, range=[0, 25])
plt.show()
Use ax:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
fig = plt.figure()
ax = fig.add_subplot(111)
df = pd.DataFrame([1, 3, 5, 1], columns=["bob"], index=[1, 2, 3, 4])
df2 = pd.DataFrame([3, 3, 2, 1], columns=["bob"], index=[1, 2, 3, 4])
ax.hist([df, df2], label=("df", "df2"), bins=25, range=[0, 25])
ax.legend()
I am trying to generate a box plot in Python 2.7 for each categorical value in column E from the Pandas dataframe below
A B C D E
0 0.647366 0.317832 0.875353 0.993592 1
1 0.504790 0.041806 0.113889 0.445370 2
2 0.769335 0.120647 0.749565 0.935732 3
3 0.215003 0.497402 0.795033 0.246890 1
4 0.841577 0.211128 0.248779 0.250432 1
5 0.045797 0.710889 0.257784 0.207661 4
6 0.229536 0.094308 0.464018 0.402725 3
7 0.067887 0.591637 0.949509 0.858394 2
8 0.827660 0.348025 0.507488 0.343006 3
9 0.559795 0.820231 0.461300 0.921024 1
I would be willing to do this with Matplotlib or any other plotting library. So far the above code can plot all the categories combined on one plot. Here is the code to generate the above data and produce the plot:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
# Data
df = pd.DataFrame(np.random.rand(10,4),columns=list('ABCD'))
df['E'] = [1,2,3,1,1,4,3,2,3,1]
# Boxplot
bp = ax.boxplot(df.iloc[:,:-1].values, widths=0.2)
plt.show()
In this example, the categories are 1,2,3,4. I would like to plot separate boxplots side-by-side on the same figure, for only categories 1 and 2 and show the category names in the legend.
Is there a way to do this?
Additional Information:
The output should look similar to the 3rd figure from here - replace "Yes","No" by "1","2".
Starting with this:
import numpy
import pandas
from matplotlib import pyplot
import seaborn
seaborn.set(style="ticks")
# Data
df = pandas.DataFrame(numpy.random.rand(10,4), columns=list('ABCD'))
df['E'] = [1, 2, 3, 1, 1, 4, 3, 2, 3, 1]
You've got a couple of options. If separate axes are ok,
fig, axes = pyplot.subplots(ncols=4, figsize=(12, 5), sharey=True)
df.query("E in [1, 2]").boxplot(by='E', return_type='axes', ax=axes)
If you want 1 axes, I think seaborn will be easier. You just need to clean up your data.
ax = (
df.set_index('E', append=True) # set E as part of the index
.stack() # pull A - D into rows
.to_frame() # convert to a dataframe
.reset_index() # make the index into reg. columns
.rename(columns={'level_2': 'quantity', 0: 'value'}) # rename columns
.drop('level_0', axis='columns') # drop junk columns
.pipe((seaborn.boxplot, 'data'), x='E', y='value', hue='quantity', order=[1, 2])
)
seaborn.despine(trim=True)
The cool thing about seaborn is that tweaking the parameters slightly can achieve a lot in terms of the plot's layout. If we switch our hue and x variables, we get:
ax = (
df.set_index('E', append=True) # set E as part of the index
.stack() # pull A - D into rows
.to_frame() # convert to a dataframe
.reset_index() # make the index into reg. columns
.rename(columns={'level_2': 'quantity', 0: 'value'}) # rename columns
.drop('level_0', axis='columns') # drop junk columns
.pipe((seaborn.boxplot, 'data'), x='quantity', y='value', hue='E', hue_order=[1, 2])
)
seaborn.despine(trim=True)
If you're curious, the resulting dataframe looks something like this:
E quantity value
0 1 A 0.935433
1 1 B 0.862290
2 1 C 0.197243
3 1 D 0.977969
4 2 A 0.675037
5 2 B 0.494440
6 2 C 0.492762
7 2 D 0.531296
8 3 A 0.119273
9 3 B 0.303639
10 3 C 0.911700
11 3 D 0.807861
An addition to #Paul_H answer.
Side-by-side boxplots on the single matplotlib.axes.Axes, no seaborn:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.rand(10,4), columns=list('ABCD'))
df['E'] = [1, 2, 1, 1, 1, 2, 1, 2, 2, 1]
mask_e = df['E'] == 1
# prepare data
data_to_plot = [df[mask_e]['A'], df[~mask_e]['A'],
df[mask_e]['B'], df[~mask_e]['B'],
df[mask_e]['C'], df[~mask_e]['C'],
df[mask_e]['D'], df[~mask_e]['D']]
# Positions defaults to range(1, N+1) where N is the number of boxplot to be drawn.
# we will move them a little, to visually group them
plt.figure(figsize=(10, 6))
box = plt.boxplot(data_to_plot,
positions=[1, 1.6, 2.5, 3.1, 4, 4.6, 5.5, 6.1],
labels=['A1','A0','B1','B0','C1','C0','D1','D0'])
I have a numpy array of 3 million points in the form of [pt_id, x, y, z]. The goal is to return all pairs of points that have an Euclidean distance two numbers min_d and max_d.
The Euclidean distance is between x and y and not on the z. However, I'd like to preserve the array with pt_id_from, pt_id_to, distance attributes.
I'm using scipy's dist to calculate the distances:
import scipy.spatial.distance
coords_arr = np.array([['pt1', 2452130.000, 7278106.000, 25.000],
['pt2', 2479539.000, 7287455.000, 4.900],
['pt3', 2479626.000, 7287458.000, 10.000],
['pt4', 2484097.000, 7292784.000, 8.800],
['pt5', 2484106.000, 7293079.000, 7.300],
['pt6', 2484095.000, 7292891.000, 11.100]])
dists = scipy.spatial.distance.pdist(coords_arr[:,1:3], 'euclidean')
np.savetxt('test.out', scipy.spatial.distance.squareform(dists), delimiter=',')
What should I do to return an array of form: [pt_id_from, pt_id_to, distance]?
You simply create a new array from the data by looping through all the possible combinations. The itertools module is excellent for this.
n = coords_arr.shape[0] # number of points
D = scipy.spatial.distance.squareform(dists) # distance matrix
data = []
for i, j in itertools.combinations(range(n), 2):
pt_a = coords_arr[i, 0]
pt_b = coords_arr[j, 0]
d_ab = D[i,j]
data.append([pt_a, pt_b, d_ab])
result_arr = np.array(data)
If memory is a problem, you might want to change the distance lookup from using the huge matrix D to looking up the value directly in dists using the i and j index.
Well, ['pt1', 'pt2', distance_as_number] is not exactly possible. The closest you can get with mixed datatypes is a structured array but then you can't do things like result[:2,0]. You'll have to index field names and array indices separately like: result[['a','b']][0].
Here is my solution:
import numpy as np
import scipy.spatial.distance
coords_arr = np.array([['pt1', 2452130.000, 7278106.000, 25.000],
['pt2', 2479539.000, 7287455.000, 4.900],
['pt3', 2479626.000, 7287458.000, 10.000],
['pt4', 2484097.000, 7292784.000, 8.800],
['pt5', 2484106.000, 7293079.000, 7.300],
['pt6', 2484095.000, 7292891.000, 11.100]])
dists = scipy.spatial.distance.pdist(coords_arr[:,1:3], 'euclidean')
# Create a shortcut for `coords_arr.shape[0]` which is basically
# the total amount of points, hence `n`
n = coords_arr.shape[0]
# `a` and `b` contain the indices of the points which were used to compute the
# distances in dists. In this example:
# a = [0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4]
# b = [1, 2, 3, 4, 5, 2, 3, 4, 5, 3, 4, 5, 4, 5, 5]
a = np.arange(n).repeat(np.arange(n-1, -1, -1))
b = np.hstack([range(x, n) for x in xrange(1, n)])
min_d = 1000
max_d = 10000
# Find out which distances are in range.
in_range = np.less_equal(min_d, dists) & np.less_equal(dists, max_d)
# Define the datatype of the structured array which will be the result.
dtype = [('a', '<f8', (3,)), ('b', '<f8', (3,)), ('dist', '<f8')]
# Create an empty array. We fill it later because it makes the code cleaner.
# Its size is given by the sum over `in_range` which is possible
# since True and False are equivalent to 1 and 0.
result = np.empty(np.sum(in_range), dtype=dtype)
# Fill the resulting array.
result['a'] = coords_arr[a[in_range], 1:4]
result['b'] = coords_arr[b[in_range], 1:4]
result['dist'] = dists[in_range]
print(result)
# In caste you don't want a structured array at all, this is what you can do:
result = np.hstack([coords_arr[a[in_range],1:],
coords_arr[b[in_range],1:],
dists[in_range, None]]).astype('<f8')
print(result)
The structured array:
[([2479539.0, 7287455.0, 4.9], [2484097.0, 7292784.0, 8.8], 7012.389393067102)
([2479539.0, 7287455.0, 4.9], [2484106.0, 7293079.0, 7.3], 7244.7819152821985)
([2479539.0, 7287455.0, 4.9], [2484095.0, 7292891.0, 11.1], 7092.75912462844)
([2479626.0, 7287458.0, 10.0], [2484097.0, 7292784.0, 8.8], 6953.856268287403)
([2479626.0, 7287458.0, 10.0], [2484106.0, 7293079.0, 7.3], 7187.909362255481)
([2479626.0, 7287458.0, 10.0], [2484095.0, 7292891.0, 11.1], 7034.873843929257)]
The ndarray:
[[2479539.0, 7287455.0, 4.9, 2484097.0, 7292784.0, 8.8, 7012.3893],
[2479539.0, 7287455.0, 4.9, 2484106.0, 7293079.0, 7.3, 7244.7819],
[2479539.0, 7287455.0, 4.9, 2484095.0, 7292891.0, 11.1, 7092.7591],
[2479626.0, 7287458.0, 10.0, 2484097.0, 7292784.0, 8.8, 6953.8562],
[2479626.0, 7287458.0, 10.0, 2484106.0, 7293079.0, 7.3, 7187.9093],
[2479626.0, 7287458.0, 10.0, 2484095.0, 7292891.0, 11.1, 7034.8738]]
You can use np.where to get a coords of distances within a range, then generate a new list in your format, filtering same pairs. Like this:
>>> import scipy.spatial.distance
>>> import numpy as np
>>> coords_arr = np.array([['pt1', 2452130.000, 7278106.000, 25.000],
... ['pt2', 2479539.000, 7287455.000, 4.900],
... ['pt3', 2479626.000, 7287458.000, 10.000],
... ['pt4', 2484097.000, 7292784.000, 8.800],
... ['pt5', 2484106.000, 7293079.000, 7.300],
... ['pt6', 2484095.000, 7292891.000, 11.100]])
>>>
>>> dists = scipy.spatial.distance.pdist(coords_arr[:,1:3], 'euclidean')
>>> dists = scipy.spatial.distance.squareform(dists)
>>> x, y = np.where((dists >= 8000) & (dists <= 30000))
>>> [(coords_arr[x[i]][0], coords_arr[y[i]][0], dists[y[i]][x[i]]) for i in xrange(len(x)) if x[i] < y[i]]
[('pt1', 'pt2', 28959.576688895162), ('pt1', 'pt3', 29042.897927032005)]
I'm using subplot2grid to display graphs. However, not all subplots are being displayed. Obviously it has to do with the if statement.
However, in my complete code I need those if statements because depending on some conditions plots have diffent formats. I want all 3 subplots to be displayed (one for each i). However, the first one is missing. How to display it correctly?
Here is the simplified code:
import matplotlib.pyplot as plt
fig=plt.figure()
for i in xrange(0,3):
if i==1:
ax=plt.subplot2grid((3,1),(i,0))
ax.plot([1,2],[1,2])
fig.autofmt_xdate()
else:
ax=plt.subplot2grid((3,1),(i,0), rowspan=2)
ax.plot([1,2],[1,2])
fig.autofmt_xdate()
plt.show()
I would just use the gridspec module from matplotlib. Then you can set the width/height ratios directly.
Then you can do something like this:
import numpy
from matplotlib import gridspec
import matplotlib.pyplot as plt
def do_plot_1(ax):
ax.plot([0.25, 0.5, 0.75], [0.25, 0.5, 0.75], 'k-')
def do_plot_2(ax):
ax.plot([0.25, 0.5, 0.75], [0.25, 0.5, 0.75], 'g--')
fig = plt.figure(figsize=(6, 4))
gs = gridspec.GridSpec(nrows=3, ncols=1, height_ratios=[2, 1, 2])
for n in range(3):
ax = fig.add_subplot(gs[n])
if n == 1:
do_plot_1(ax)
else:
do_plot_2(ax)
fig.tight_layout()
To use plt.subplot2grid, you'd need to effectively do something like this:
fig = plt.figure(figsize=(6, 4))
ax1 = plt.subplot2grid((5,1), (0, 0), rowspan=2)
ax2 = plt.subplot2grid((5,1), (2, 0), rowspan=1)
ax3 = plt.subplot2grid((5,1), (3, 0), rowspan=2)
Since you have two axes with a rowspan=2, your grid needs to be 2+1+2 = 5 blocks tall.