How to visualize binary data in multiple axis in Python? - python-2.7

I have a sample Pandas data frame as follows:
Action Comedy Crime Thriller SciFi
1 0 1 1 0
0 1 0 0 1
0 1 0 1 0
0 0 1 0 1
1 1 0 0 0
I would like to plot the data-set using Python(Preferably by using matplotlib) in such a way that each of the columns will be a separate axis. Hence in this case, there will be 5 axis (Action, Comedy, Crime...) and 5 data points (since it has 5 rows).
Is it possible to plot this kind of multi-axis data using python matplotlib? If its not possible, what would be the best solution to visualize this data?

RadarChart
Having several axes could be accomplished using a RadarChart. You may adapt the Radar Chart example to your needs.
u = u"""Action Comedy Crime Thriller SciFi
1 0 1 1 0
0 1 0 0 1
0 1 0 1 0
0 0 1 0 1
1 1 0 0 0"""
import io
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.path import Path
from matplotlib.spines import Spine
from matplotlib.projections.polar import PolarAxes
from matplotlib.projections import register_projection
def radar_factory(num_vars, frame='circle'):
theta = np.linspace(0, 2*np.pi, num_vars, endpoint=False)
theta += np.pi/2
def draw_poly_patch(self):
verts = unit_poly_verts(theta)
return plt.Polygon(verts, closed=True, edgecolor='k')
def draw_circle_patch(self):
return plt.Circle((0.5, 0.5), 0.5)
patch_dict = {'polygon': draw_poly_patch, 'circle': draw_circle_patch}
def unit_poly_verts(theta):
x0, y0, r = [0.5] * 3
verts = [(r*np.cos(t) + x0, r*np.sin(t) + y0) for t in theta]
return verts
class RadarAxes(PolarAxes):
name = 'radar'
RESOLUTION = 1
draw_patch = patch_dict[frame]
def fill(self, *args, **kwargs):
"""Override fill so that line is closed by default"""
closed = kwargs.pop('closed', True)
return super(RadarAxes, self).fill(closed=closed, *args, **kwargs)
def plot(self, *args, **kwargs):
"""Override plot so that line is closed by default"""
lines = super(RadarAxes, self).plot(*args, **kwargs)
for line in lines:
self._close_line(line)
def _close_line(self, line):
x, y = line.get_data()
if x[0] != x[-1]:
x = np.concatenate((x, [x[0]]))
y = np.concatenate((y, [y[0]]))
line.set_data(x, y)
def set_varlabels(self, labels):
self.set_thetagrids(np.degrees(theta), labels)
def _gen_axes_patch(self):
return self.draw_patch()
def _gen_axes_spines(self):
if frame == 'circle':
return PolarAxes._gen_axes_spines(self)
spine_type = 'circle'
verts = unit_poly_verts(theta)
# close off polygon by repeating first vertex
verts.append(verts[0])
path = Path(verts)
spine = Spine(self, spine_type, path)
spine.set_transform(self.transAxes)
return {'polar': spine}
register_projection(RadarAxes)
return theta
df = pd.read_csv(io.StringIO(u), delim_whitespace=True)
N = 5
theta = radar_factory(N, frame='polygon')
fig, ax = plt.subplots(subplot_kw=dict(projection='radar'))
colors = ['b', 'r', 'g', 'm', 'y']
markers = ["s", "o","P", "*", "^"]
ax.set_rgrids([1])
for i,(col, row) in enumerate(df.iterrows()):
ax.scatter(theta, row, c=colors[i], marker=markers[i], label=col)
ax.fill(theta, row, facecolor=colors[i], alpha=0.25)
ax.set_varlabels(df.columns)
labels = ["Book {}".format(i+1) for i in range(len(df))]
ax.legend(labels*2, loc=(0.97, .1), labelspacing=0.1, fontsize='small')
plt.show()
heatmap
An easy and probably more readable way to visualize the data would be a heatmap.
u = u"""Action Comedy Crime Thriller SciFi
1 0 1 1 0
0 1 0 0 1
0 1 0 1 0
0 0 1 0 1
1 1 0 0 0"""
import io
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv(io.StringIO(u), delim_whitespace=True)
print df
plt.matshow(df, cmap="gray")
plt.xticks(range(len(df.columns)), df.columns)
plt.yticks(range(len(df)), range(1,len(df)+1))
plt.ylabel("Book number")
plt.show()

Here is a nice simple visualization that you can get with a bit of data manipulation and Seaborn.
import seaborn as sns
# df is a Pandas DataFrame with the following content:
# Action Comedy Crime Thriller SciFi
# 1 0 1 1 0
# 0 1 0 0 1
# 0 1 0 1 0
# 0 0 1 0 1
# 1 1 0 0 0
df = ...
# Give name to the indices for convenience
df.index.name = "Index"
df.columns.name = "Genre"
# Get a data frame containing the relevant genres and indices
df2 = df.unstack()
df2 = df2[df2 > 0].reset_index()
# Plot it
ax = sns.stripplot(x="Genre", y="Index", data=df2)
ax.set_yticks(df.index)
And you get:
For fine tuning you can check the documentation of sns.stripplot.

Related

How to plot parallel coordinae plot ftrom Hyperparameter Tuning with the HParams Dashboard?

I am trying to replicate the parallel coordinate plot form Hyperparameter Tuning tutorial in this Tensorflow tutorial and I have writen my own csv file where I store my results.
My output reading the csv file is like this:
conv_layers filters dropout accuracy
0 4 16 0.5 0.447917
1 4 16 0.6 0.458333
2 4 32 0.5 0.635417
3 4 32 0.6 0.447917
4 4 64 0.5 0.604167
5 4 64 0.6 0.645833
6 8 16 0.5 0.437500
7 8 16 0.6 0.437500
8 8 32 0.5 0.437500
9 8 32 0.6 0.562500
10 8 64 0.5 0.562500
11 8 64 0.6 0.437500
How can I create the same plot like in the tutorial in python?
so I found the answer using plotly
import os
import sys
import pandas as pd
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objects as go
init_notebook_mode(connected=True)
df = pd.read_csv('path/to/csv')
fig = go.Figure(data=
go.Parcoords(
line = dict(color = df['accuracy'],
colorbar = [],
colorscale = [[0, '#6C9E12'], ##
[0.25,'#0D5F67'], ##
[0.5,'#AA1B13'], ##
[0.75, '#69178C'], ##
[1, '#DE9733']]),
dimensions = list([
dict(range = [0,12],
label = 'Conv_layers', values = df['conv_layers']),
dict(range = [8,64],
label = 'filter_number', values = df['filters']),
dict(range = [0.2,0.8],
label = 'dropout_rate', values = df['dropout']),
dict(range = [0.2,0.8],
label = 'dense_num', values = df['dense']),
dict(range = [0.1,1.0],
label = 'accuracy', values = df['accuracy'])
])
)
)
fig.update_layout(
plot_bgcolor = '#E5E5E5',
paper_bgcolor = '#E5E5E5',
title="Parallel Coordinates Plot"
)
# print the plot
fig.show()

Null independent column wise mean calculation in Python

I am trying to calculate the mean of 3 three columns in Python. Here is the catch-
If all 3 row values of my 3 columns are not null then my mean will be (x+y+z)/3.
If one of my row value is null (suppose z), then my mean should be (x+y)/2.
I'm storing there mean values in a seperate column which is part of the pandas dataframe.
I'm looking for the best approach as my dataset has over 2 million rows.
My data is below.
Thanks in advance.
A B C
0 1 2 3 # = (1+2+3)/3 = 2
1 4 NaN 6 # = (4+6)/2 = 5
2 NaN 8 9 # = (8+9)/2 = 8.5
Just apply the numpy.nanmean function along axis 0 (columns). This is the default axis so you will get the same result with omitting axis = 0. If you want the means row-wise use axis = 1:
import pandas as pd
import numpy as np
df = pd.DataFrame({
'a': [2.3, 4.5, 2.1, np.nan, 6.7],
'b': [2.4, 5.6, np.nan, np.nan, 7.1],
'c': [np.nan, np.nan, np.nan, np.nan, 0.9]
})
colmeans = df.apply(np.nanmean, axis = 0)
# colmeans
# a 3.900000
# b 5.033333
# c 0.900000
# dtype: float64
rowmeans = df.apply(np.nanmean, axis = 1)
# 0 2.35
# 1 5.05
# 2 2.10
# 3 NaN
# 4 4.90
# dtype: float64

Pyomo - NameError: name 'm' is not defined

I will be grateful for help with the above-stated error message from running my pyomo script file - "pyomo solve Katrina_Model5.py Katrina_paper.dat --solver=gurobi --summary --stream-solver --report-timing" at the command prompt. I am still new to the software. I have included full code and data file to aid with your help.
Below is the entire syntax of my problem below:
from pyomo.environ import *
#--define the mode:
model = AbstractModel()
#--declaring parameters:
model.n = Param(within=PositiveIntegers, doc='total no. of depots & afected areas')
model.L = Param(within=PositiveIntegers, doc='Max. no. of nodes a salesman may visit')
model.K = Param(initialize=2, within=PositiveIntegers, doc='Min. no. of nodes a salesman may visit')
#model.m = Param(within=PositiveIntegers, doc='no. of initial salesmen positioned at depot i &j')
#--declare model sets names:
model.I = RangeSet(model.n, name='Set of Origin/intermediary nodes')
model.J = RangeSet(model.n, name='Set of affected areas/destination nodes')
model.A = model.I*model.J
model.D = RangeSet(2, name='Set of depots comprises first d nodes of Set V')
model.U = RangeSet(3,5, name='Set of impacted areas/or customers')
model.V = model.D | model.U
#-- define additional parameters with indexed sets:
model.d = Param(model.I, model.J, doc='Represents cost/travel time matrix.')
#--define model Variables:
model.x = Var(model.I, model.J, within=Binary, name="Var of a salesman traveling.")
model.u = Var(within=RangeSet(2,5), name="no. of nodes visited on traveler's path from origin up to node i")
model.m =Var(model.D, name='no. of initial salesmen positioned at depot i &j')
"""#model's objective function defined.#"""
def objective_rule(model):
return sum(model.d[i,j]*model.x[i,j] for (i,j) in model.A)
model.objective = Objective(rule=objective_rule, sense=minimize, name="Total distance traveled")
"""--Below we define and declare the constraints of the model --"""
#.....constraint # 2
def constrTWO_rule(model, i):
return sum(model.x[i,j] for j in model.U) == m[i]
model.ConsOutTrvler = Constraint(model.D, rule=constrTWO_rule)
#.....constraint # 3
def constrTHREE_rule(model, j):
return sum(model.x[i,j] for i in model.U) == m[j]
model.ConsInTrvler = Constraint(model.D, rule=constrTHREE_rule)
#.....constraint # 4
def constrFOUR_rule(model, j):
return sum(model.x[i,j] for i in model.V) == 1
model.ConsTrvlerInn = Constraint(model.U, rule=constrFOUR_rule)
#.....constraint # 5
def constrFIVE_rule(model, i):
return sum(model.x[i,j] for j in model.V) == 1
model.ConsTrvlerOut = Constraint(model.U, rule=constrFIVE_rule)
#......constraint # 6
def constrSIX_rule(model, i):
return u[i] + (L-2)*sum(model.x[k,i]-model.x[i,k] for k in model.D)-L + 1 <= 0
model.consLowBounds = Constraint(model.U, rule=constrSIX_rule)
#.....constraint # 7
def constrSEVEN_rule(model, i):
return u[i] + sum(model.x[k,i] + (2-K)*model.x[i,k] for k in model.D) >= 2
model.consUpBounds = Constraint(model.U, rule=constrSEVEN_rule)
#.....constraint # 8 ---DOUBLE-CHECK FORMULATION
def constrEIGHT_rule(model, k, i):
return model.x[k,i] + model.x[i,k] <= 1
model.consNotOneAffArea = Constraint(model.D, model.U, rule=constrEIGHT_rule)
#.....constraint # 9 ---DOUBLE-CHECK FORMULATION
def constrNINE_rule(model, i, j):
return ( u[i] - u[j] + L*x[i,j] + (L-2)*x[i,j] ) <= L-1
model.consSubTourElim = Constraint(model.U, rule=constrNINE_rule)
And, here are the '.dat' data file used:
param n := 5 ;
param L := 5 ;
param K := 2 ;
param d: 1 2 3 4 5 :=
1 0 8 4 9 9
2 8 0 6 7 10
3 4 6 0 5 6
4 9 7 5 0 4
5 9 10 6 4 0 ;

Need help understanding line # 9 of is_prime function; is there a simpler way for the function?

How is line # 9(if n + 1 == x) relevant to checking if the number is prime?
Is there a simpler way to build this function?
def is_prime(x):
if x == 2:
return True
elif x > 2:
for n in range(2, x):
if x % n == 0:
return False
else:
if n + 1 == x:
return True
else:
return False
A prime number is an integer having only 1 and 'self' as divisors. Here is a similar solution that may be easier to follow. We use a pandas DataFrame and it's associated 'apply' function. Suppress the 'print df' line and modify the output as desired. Have fun
"""
Created on Fri Nov 18 13:32:08 2016
#author: Soya
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
def isprime(x):
vals = range(2,x/2)
df = DataFrame([vals]).T
df['1'] = df.apply(lambda y: x%y)
print df
print ''
if df['1'].prod() != 0:
print 'PRIME'
isprime(17)
0 1
0 2 1
1 3 2
2 4 1
3 5 2
4 6 5
5 7 3
PRIME

how to make a counter in python

I am trying to make a counter so if I type 'bye' it starts counting how long it has been since I said bye but the problem is that I can't type anything to stop the counter and I don't know how to have it tell you something when you type something to stop it. Here is my code for a counter but I tried to type something and it does not stop:
import time
s = 0
m = 0
h = 0
while s<=60:
print h, 'Hours', m, 'Minutes', s, 'Seconds'
time.sleep(1)
s+=1
if s == 60:
m+=1
s = 0
elif m ==60:
h+=1
m = 0
s = 0
Consider using threading.Thread:
import time
import threading
class MyTimer(threading.Thread):
def __init__(self):
self.h = 0
self.m = 0
self.s = 0
def count(self, t, stop_event):
while self.s <= 60:
print self.h, 'Hours', self.m, 'Minutes', self.s, 'Seconds'
time.sleep(1)
self.s += 1
if self.s == 60:
self.m += 1
self.s = 0
elif self.m == 60:
self.h += 1
self.m = 0
self.s = 0
elif stop_event.is_set():
print self.h, 'Hours', self.m, 'Minutes', self.s, 'Seconds'
break
class Asking(threading.Thread):
def asking(self, t, stop_event):
while not stop_event.is_set():
word = raw_input('enter a word:\n')
if word == 'bye':
timer_stop.set()
question_stop.set()
timer = MyTimer()
question = Asking()
question_stop = threading.Event()
timer_stop = threading.Event()
threading.Thread(target=question.asking, args=(1, question_stop)).start()
threading.Thread(target=timer.count, args=(2, timer_stop)).start()
Running it as an example:
$ python stackoverflow.py
enter a word:
0 Hours 0 Minutes 0 Seconds
0 Hours 0 Minutes 1 Seconds
0 Hours 0 Minutes 2 Seconds
0 Hours 0 Minutes 3 Seconds
hi
enter a word:
0 Hours 0 Minutes 4 Seconds
0 Hours 0 Minutes 5 Seconds
0 Hours 0 Minutes 6 Seconds
bye
0 Hours 0 Minutes 7 Seconds
The code could probably be a bit more neater :p. I shocked myself that I was able to produce this :D.
The only way I know is with pygame. It would set up a normal pygame loop, except having it only being 1 by 1, so you can see the background window, and when you enter in a letter it would exit the pygame loop.
maybe better...
.....
.....
while self.s <= 60:
print self.h, 'Hours', self.m, 'Minutes', self.s, 'Seconds'
time.sleep(1)
self.s += 1
if self.s == 60:
self.m += 1
self.s = 0
if self.m == 60:
self.h += 1
self.m = 0
elif stop_event.is_set():
print self.h, 'Hours', self.m, 'Minutes', self.s, 'Seconds'
break
......
......