Python 2.7 Interactive Visualisation - python-2.7

I'm a new programmer who has for a few days trying to create a dropdown list whose input then creates a graph.
For my graph, I'm using Bokeh to create a html file graph, plotting per-capita income of a few places as well as it's percentage of Diabetes. However I have been trying to get it to work for 2 weeks now with a dropdown list and I simply cannot make it work.
I can create the file, but only when the user enters the input by typing. How Can I make this work with a person selecting a place from a dropdown list and the file showing that places graph as output. Here's my code.
Edit:
I want the selected value from the dropdown list to be sent as the value aaa to the program. I know I should turn my graph creating part of the program into a function. But how do I get the value of a dropdown list as the variable aaa?
import csv
from bokeh.plotting import figure, curdoc
from bokeh.io import output_file, show
from bokeh.layouts import widgetbox
from bokeh.models.widgets import Dropdown
aaa = raw_input("Write State, not Puerto Rico, Hawaii, or DC: ")
from collections import defaultdict
columns = defaultdict(list) # each value in each column is appended to a list
columns1 = defaultdict(list)
with open('my_data.csv') as f:
for row in f:
row = row.strip()# read a row as {column1: value1, column2: value2,...}
row = row.split(',')
columns[row[0]].append(row[1])
columns[row[0]].append(row[2])
columns[row[0]].append(row[3])
columns[row[0]].append(row[4])
columns[row[0]].append(row[5])
xy = (columns[aaa])
xy = [float(i) for i in xy]
myInt = 10000
xy = [x / myInt for x in xy]
print xy
with open('my_data1.csv') as f:
for row in f:
row = row.strip()# read a row as {column1: value1, column2: value2,...}
row = row.split(',')
columns1[row[0]].append(row[1])
columns1[row[0]].append(row[2])
columns1[row[0]].append(row[3])
columns1[row[0]].append(row[4])
columns1[row[0]].append(row[5])
omega = (columns1[aaa])
omega = [float(i) for i in omega]
print omega
import numpy
corr123 = numpy.corrcoef(omega,xy)
print corr123
a = [2004, 2005, 2006, 2007, 2008]
output_file("lines.html")
p = figure(tools="pan,box_zoom,reset,save", title="Diabetes and Stats",
x_axis_label='Years', y_axis_label='percents')
# add some renderers
per = "Diabetes% " + aaa
p.line(a, omega, legend=per)
p.circle(a, omega, legend=per, fill_color="white",line_color="green", size=8)
p.line(a, xy, legend="Per Capita Income/10000")
p.circle(a, xy, legend="Per Capita Income/10000", fill_color="red", line_color="red", size=8)
p.legend.location="top_left"
show(p)

Related

How to represent the data in x and y axis using matplotlib

Here in my program i want to create the month wise dates on x axis label and and another rs data i want to represent on the y axis.can you please help me how to mention my data in matplotlib.
Given below is my sample program:
import matplotlib.pyplot as plt
from matplotlib import style
# line 1 points
x1 = [1,2,3]
y1 = [2,4,1]
# plotting the line 1 points
plt.plot(x1, y1, 'g', label = "line 1",linewidth=10)
plt.title('Two lines on same graph!')
plt.xlabel('x - axis')
plt.ylabel('y - axis')
plt.legend()
plt.grid(True,color="k")
plt.show()
# xticks(np.arange(12), calendar.month_name[1:13], rotation=20)
i don't want to mention in between the values it is tacking the x and y values i want to mention like in given diagram.
After few edits and your comments. Is this more closer what you are looking for?
import matplotlib.pyplot as plt
import datetime
# line 1 points
val = [1,2,3,2,6]
cust = [2,4,1,6,2]
orders = [3,5,2,7,3]
col = [1,3,4,2,6]
# plotting the line 1 points
fig, ax = plt.subplots()
start_date = datetime.datetime(2019, 07, 01)
dates = []
# Dates based on the measurement count
# See: https://stackoverflow.com/questions/1060279/iterating-through-a-range-of-dates-in-python
for single_date in (start_date + datetime.timedelta(n) for n in range(len(val))):
dates.append(single_date.strftime('%Y-%m-%d'))
# Values
plt.plot(dates, val, '.',color='g', markersize=12)
plt.plot(dates, val, label='Values', color='g')
# Customers
plt.plot(dates, cust, '.',color='b', markersize=12)
plt.plot(dates, cust, label='Customers',color='b')
# Orders
plt.plot(dates, orders, '.',color='r', markersize=12)
plt.plot(dates, orders, label='Orders',color='r')
# Collection
plt.plot(dates, col, '.',color='black', markersize=12)
plt.plot(dates, col, label='Collection',color='black')
plt.title('Four lines on same graph!')
plt.tick_params(axis='x', rotation=20)
plt.xlabel('x - axis')
plt.ylabel('y - axis')
plt.grid(True,color="k")
plt.legend()
plt.show()

Assigning more than one word to a node in a graph using networkx and matplotlib

I want to create a graph for a sentence by assigning words to the nodes based on the tag assigned to each word. If the word is a proper noun, it will be assigned to the subject list, if the word is a noun it will be assigned to the object list and if the word is a verb it will be assigned to the verb list.
I'm using Python 2.7 in Jupyter Notebook.
sentence_list=['Arun Mehta drinks milk']
tag_list={'Arun':'NP','Mehta':'NP','drinks':'VF','milk':'NN'}
tag_list_keys = tag_list.keys()
subject_list=[]
object_list=[]
verb_list=[]
def classify(item):
if item in tag_list_keys:
if tag_list[item] == 'NP': subject_list.append(item)
if tag_list[item] == 'NN': object_list.append(item)
if tag_list[item] == 'VF': verb_list.append(item)
def extract(item):
item_split = item.split(' ')
map(classify, item_split)
map(extract, sentence_list)
print('SUBJECT:',subject_list)
print('OBJECT',object_list)
print('VERB',verb_list)
%matplotlib notebook
import networkx as nx
import matplotlib.pyplot as plt
G = nx.Graph()
for i in range(3):
G.add_node(object_list[i])
G.add_node(verb_list[i])
G.add_node(subject_list[i])
G.add_edge(verb_list[i],object_list[i])
G.add_edge(subject_list[i],verb_list[i])
nx.draw(G, with_labels= True)
plt.show()
Expected output should have three nodes consisting of 'Arun Mehta' as one node, 'drinks' in the second node and 'milk' in the third node. Can somebody please suggest what needs to be done in order to get two or more words in one node?
In your extractmethod you are splitting at every space. This is the reason why you only have single words inside your graph. You may want to check if two adjacent words are subjects and join them again.
To answer your basic question, networkx supports
import networkx as nx
G = nx.Graph()
G.add_node('Arun Mehta')
print(G.nodes)
Output: ['Arun Mehta']
I have modified your code to join two adjacent subjects and modified it a little bit to work with python 3
sentence_list=['Arun Mehta drinks milk']
tag_list={'Arun':'NP','Mehta':'NP','drinks':'VF','milk':'NN'}
tag_list_keys = tag_list.keys()
subject_list=[]
object_list=[]
verb_list=[]
list_by_tag = {'NP':subject_list,'NN':object_list, 'VF':verb_list}
def classify(items):
last_tag = tag_list[items[0]]
complete_item = items[0]
for item in items[1:]:
current_tag = tag_list[item]
if current_tag == last_tag:
complete_item = item + " " + complete_item
else:
# append last item
list_by_tag[last_tag].append(complete_item)
# save current item and tag
complete_item = item
last_tag = current_tag
# care about last element of the list
list_by_tag[last_tag].append(complete_item)
def extract(item):
item_split = item.split(' ')
classify(item_split)
list(map(extract, sentence_list))
print('SUBJECT:',subject_list)
print('OBJECT',object_list)
print('VERB',verb_list)
%matplotlib notebook
import networkx as nx
import matplotlib.pyplot as plt
G = nx.Graph()
for i in range(1):
G.add_node(object_list[i])
G.add_node(verb_list[i])
G.add_node(subject_list[i])
G.add_edge(verb_list[i],object_list[i])
G.add_edge(subject_list[i],verb_list[i])
nx.draw(G, with_labels= True)
plt.show()

Colour table cells - bokeh

I want to add colour to a row only if a row's cell's value is present in another datasource.
I mean, I have the list A, and the table B, so I want to colour the row X in B if a cell of the row contains a value from A...
I don't even know from where to start..
Pretty much you need to do what ChesuCR mentioned in their comment. To take it one step further, see below a small bokeh application.
If you edit values in the first table, a callback will run and check each 'y' value. An additional column is needed to keep track whether the 'y' values are contained in the seperate list/data source. The value of the additional column is then used to color the cell.
from bokeh.layouts import row
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import DataTable, TableColumn, HTMLTemplateFormatter
from bokeh.io import curdoc
def update_included(attr, old, new):
list_a = [float(a) for a in source_1.data['a']]
ys = source_2.data['y']
y_in = []
for i, y in enumerate(ys):
if y in list_a:
y_in.append(1)
else:
y_in.append(0)
print(ys, y_in, list_a)
source_2.data['y_in'] = y_in
source_1 = ColumnDataSource(data={'a':[1001,1100]})
columns = [
TableColumn(field="a", title="Criteria list")
]
data_table1 = DataTable(source=source_1, columns=columns, width=400, editable=True)
dict1 = {'x':[0]*6,
'y':[0,10,12,13,200,2001],
'y_in':[0]*6}
source_2 = ColumnDataSource(data=dict1)
template="""
<div style="background:<%=
(function colorfromint(){
if(y_in == 1){
return("blue")}
else{return("red")}
}()) %>;
color: white">
<%= value %></div>
"""
formater = HTMLTemplateFormatter(template=template)
columns = [
TableColumn(field="x", title="x"),
TableColumn(field="y", title="y",formatter=formater)
]
data_table2 = DataTable(source=source_2, columns=columns, width=400)
source_1.on_change('data', update_included)
curdoc().add_root(row(data_table1, data_table2))

Curdoc() keeps adding plots, want to replace

I have written a program that creates a graph based on input from a dropdown list. I am using curdoc().add_root() from bokeh to show my graphs on a server as show() does not work. However, whenever I choose a new option, instead of replacing the current graph, it creates one below it. I have tried curdoc().clear() its not working. How do I make this work where it replaces the graph but doesnt delete the dropdown list, because that is what curdoc().clear() is doing? Here's my code:
import csv
import bokeh.plotting
from bokeh.plotting import figure, curdoc
from bokeh.io import output_file, show
from bokeh.layouts import widgetbox
from bokeh.models.widgets import MultiSelect
from bokeh.io import output_file, show, vform
from bokeh.layouts import row
from collections import defaultdict
columns = defaultdict(list) # each value in each column is appended to a list
columns1 = defaultdict(list)
with open('my_data.csv') as f:
for row in f:
row = row.strip()# read a row as {column1: value1, column2: value2,...}
row = row.split(',')
columns[row[0]].append(row[1])
columns[row[0]].append(row[2])
columns[row[0]].append(row[3])
columns[row[0]].append(row[4])
columns[row[0]].append(row[5])
with open('my_data1.csv') as f:
for row in f:
row = row.strip()# read a row as {column1: value1, column2: value2,...}
row = row.split(',')
columns1[row[0]].append(row[1])
columns1[row[0]].append(row[2])
columns1[row[0]].append(row[3])
columns1[row[0]].append(row[4])
columns1[row[0]].append(row[5])
from bokeh.layouts import widgetbox
from bokeh.models.widgets import Dropdown
from bokeh.plotting import curdoc
menu = [("NY", "New York"), ("California", "California"), ("Ohio", "Ohio")]
dropdown = Dropdown(label="Dropdown button", button_type="warning", menu=menu)
count = 0
#def function_to_call(attr, old, new):
#print dropdown.value
def myfunc(attr, old, new):
aaa = dropdown.value
xy = (columns[aaa])
xy = [float(i) for i in xy]
myInt = 10000
xy = [x / myInt for x in xy]
print xy
omega = (columns1[aaa])
omega = [float(i) for i in omega]
print omega
import numpy
corr123 = numpy.corrcoef(omega,xy)
print corr123
a = [2004, 2005, 2006, 2007, 2008]
p = figure(tools="pan,box_zoom,reset,save", title="Diabetes and Stats",
x_axis_label='Years', y_axis_label='percents')
# add some renderers
per = "Diabetes% " + aaa
p.line(a, omega, legend=per)
p.circle(a, omega, legend=per, fill_color="white",line_color="green", size=8)
p.line(a, xy, legend="Per Capita Income/10000")
p.circle(a, xy, legend="Per Capita Income/10000", fill_color="red", line_color="red", size=8)
p.legend.location="top_left"
#bokeh.plotting.reset_output
#curdoc().clear()
curdoc().add_root(p)
curdoc().add_root(dropdown)
#bokeh.plotting.reset_output
dropdown.on_change('value', myfunc)
curdoc().add_root(dropdown)

How to convert recurrent vertical column into rows than stack them together in Python/Pandas?

I am generating some data vertically at first, but would like to transpose them into row data, then stack them into an array like a Pandas data frame. How do I get a final product of a pandas data frame with 4 columns ('fr', 'en', 'ir', 'ab') and three rows?
# coding=utf-8
import pandas as pd
from pandas import DataFrame, Series
import numpy as np
import nltk
import re
import random
from random import randint
import csv
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
# Get csv file into data frame
data = pd.read_csv("FamilySearchData_All_OCT2015_newEthnicity_filledEthnicity_processedName_trimmedCol.csv", header=0, encoding="utf-8")
df = DataFrame(data)
columns = ['fr', 'en', 'ir', 'ab']
classes = ['ethnicity2', 'Ab_group', 'Ab_tribe']
df_count = DataFrame(columns=columns)
for j in classes:
for i in columns:
ethnicity_tar = str(i)
count = 0
try:
count = df[str(j)].value_counts()[ethnicity_tar]
except Exception as e:
count = ''
print ethnicity_tar, count
Output:
fr 1554455
en 1196932
ir 941852
ab 95131
fr 1554444
en 16000
ir 940850
ab 9371
fr 1554600
en 2196931
ir 940957
ab 9399
What I would like at the end:
fr en ir ab
1554455 1196932 941852 95131
1554444 16000 940850 9371
1554600 2196931 940957 9399
To implement this I would create a dictionary (hash) of the column names each containing an array. Then as I loop through the rows in your file, I'd use the first value to index into the dictionary to get the array and then append the numerical value to that array.
Once this interim data structure is built, you could loop through the arrays pulling the same index value for each row and printing them:
for i in range(0, n):
print str(hash['fr'][i]) + " " +
str(hash['en'][i]) + " " +
str(hash['ir'][i]) + " "
str(hash['ab'][i])