Colour table cells - bokeh - python-2.7

I want to add colour to a row only if a row's cell's value is present in another datasource.
I mean, I have the list A, and the table B, so I want to colour the row X in B if a cell of the row contains a value from A...
I don't even know from where to start..

Pretty much you need to do what ChesuCR mentioned in their comment. To take it one step further, see below a small bokeh application.
If you edit values in the first table, a callback will run and check each 'y' value. An additional column is needed to keep track whether the 'y' values are contained in the seperate list/data source. The value of the additional column is then used to color the cell.
from bokeh.layouts import row
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import DataTable, TableColumn, HTMLTemplateFormatter
from bokeh.io import curdoc
def update_included(attr, old, new):
list_a = [float(a) for a in source_1.data['a']]
ys = source_2.data['y']
y_in = []
for i, y in enumerate(ys):
if y in list_a:
y_in.append(1)
else:
y_in.append(0)
print(ys, y_in, list_a)
source_2.data['y_in'] = y_in
source_1 = ColumnDataSource(data={'a':[1001,1100]})
columns = [
TableColumn(field="a", title="Criteria list")
]
data_table1 = DataTable(source=source_1, columns=columns, width=400, editable=True)
dict1 = {'x':[0]*6,
'y':[0,10,12,13,200,2001],
'y_in':[0]*6}
source_2 = ColumnDataSource(data=dict1)
template="""
<div style="background:<%=
(function colorfromint(){
if(y_in == 1){
return("blue")}
else{return("red")}
}()) %>;
color: white">
<%= value %></div>
"""
formater = HTMLTemplateFormatter(template=template)
columns = [
TableColumn(field="x", title="x"),
TableColumn(field="y", title="y",formatter=formater)
]
data_table2 = DataTable(source=source_2, columns=columns, width=400)
source_1.on_change('data', update_included)
curdoc().add_root(row(data_table1, data_table2))

Related

Python-docx: cell edges disappear when merging outer cells

I am using python-docx to create a table with borders on all cells. When I merge cells involving outer cells some outer borders disappear. I use a function from other stackoverflow question -link shown as comment in code below- to set cell borders. How to fix that so outer borders are shown in merged cells?
Wrong borders:
Good borders:
Working example:
from docx import Document
from docx.oxml.shared import OxmlElement, qn
# from https://stackoverflow.com/questions/33069697/how-to-setup-cell-borders-with-python-docx
def set_cell_edges(cell, edges, color, style, width):
"""
Parameter Type Definition
=========== ==================== ==========================================================================================
cell Cell Cell to apply edges
edges str, list, None Cell edges, options are 'top', 'bottom', 'start' and 'end'
color str Edge color
style str Edge style, options are 'single', 'dotted', 'dashed', 'dashdotted' and 'double',
width int, float Edge width in points
"""
kwargs = dict()
for edge in edges:
kwargs[edge] = {'sz': width, 'val': style, 'color': color}
tc = cell._tc
tcPr = tc.get_or_add_tcPr()
# check for tag existance, if none found then create one
tcBorders = tcPr.first_child_found_in("w:tcBorders")
if tcBorders is None:
tcBorders = OxmlElement('w:tcBorders')
tcPr.append(tcBorders)
# list over all available tags
for edge in ('start', 'top', 'end', 'bottom', 'insideH', 'insideV'):
edge_data = kwargs.get(edge)
if edge_data:
tag = 'w:{}'.format(edge)
# check for tag existance, if none found, then create one
element = tcBorders.find(qn(tag))
if element is None:
element = OxmlElement(tag)
tcBorders.append(element)
# looks like order of attributes is important
for key in ["sz", "val", "color", "space", "shadow"]:
if key in edge_data:
element.set(qn('w:{}'.format(key)), str(edge_data[key]))
if __name__ == '__main__':
rows = 3
columns = 3
document = Document()
# create table
table = document.add_table(rows=rows, cols=columns)
# merge cells
scell = table.rows[1].cells[1]
ecell = table.rows[2].cells[2]
scell.merge(ecell)
# set 4 borders in all cells
for row in table.rows:
for cell in row.cells:
set_cell_edges(cell, ['top', 'bottom', 'start', 'end'], '#ff0000', 'single', 1)
document.save('test.docx')
Of course, I can set an extra column and row to set the specific borders. But it would be nice to fix it without that trick. Example with the trick.
Good borders with trick:
if __name__ == '__main__':
rows = 3
columns = 3
document = Document()
# create table
table = document.add_table(rows=rows+1, cols=columns+1)
# merge cells
scell = table.rows[1].cells[1]
ecell = table.rows[2].cells[2]
scell.merge(ecell)
# set 4 borders in all cells
for row in table.rows[:-1]:
for cell in row.cells[:-1]:
set_cell_edges(cell, ['top', 'bottom', 'start', 'end'], '#ff0000', 'single', 1)
# set top border in last row
for cell in table.rows[-1].cells[:-1]:
set_cell_edges(cell, ['top'], '#ff0000', 'single', 1)
# set left border in last column
for cell in table.columns[-1].cells[:-1]:
set_cell_edges(cell, ['start'], '#ff0000', 'single', 1)
document.save('test.docx')

Web Scraping - Get the 'blahblah' out from <td foo=blahblah>TEXT</td> using bs4?

I'm trying to scrape a few schedule tables from ESPN: http://www.espn.com/nba/schedule/_/date/20171001
import requests
import bs4
response = requests.get('http://www.espn.com/nba/schedule/_/date/20171001')
soup = bs4.BeautifulSoup(response.text, 'lxml')
print soup.prettify()
table = soup.find_all('table')
data = []
for i in table:
rows = i.find_all('tr')
for row in rows:
cols = row.find_all('td')
cols = [col.text.strip() for col in cols]
data.append([col for col in cols if col])
My code works fine except the output is missing the date info:
[
"Phoenix PHX",
"Utah UTAH",
"394 tickets available from $6"
],
[],
[
"Miami MIA",
"Orlando ORL",
"1,582 tickets available from $12"
]
After some investigation, I realized that the date and time information is wrapped within the tags like so:
<td data-behavior="date_time" data-date="2017-10-07T23:00Z"><a data-dateformat="time1" href="/nba/game?gameId=400978807" name="&lpos=nba:schedule:time"></a></td>
I see this on other websites from time to time as well but never really understood why they do it this way. How can I extract text inside an open tag to get the "2017-10-07T23:00Z" in my output?
attrs property contains a dictionary of attributes which you can utilize to fetch values,I have added a length check as some empty rows are present.
Can you try modifying the for loop as below:
for i in table:
rows = i.find_all('tr')
for row in rows:
cols = row.find_all('td')
date_data = None
if len(cols) > 2:
date_data = cols[2].attrs['data-date']
cols = [col.text.strip() for col in cols]
dat = [col for col in cols if col]
if date_data:
dat.append(date_data)
data.append(dat)
PS: the above snippet can be optimized :-)
Some td tags in that table contain attributes. You can access a td's attributes by calling attrs() which returns a dict:
>>> td = soup.select('tr')[1].select('td')[2]
>>> td
<td data-behavior="date_time" data-date="2017-10-01T22:00Z"><a data-dateformat="time1" href="/nba/game?gameId=400978817" name="&lpos=nba:schedule:time"></a></td>
>>> td.attrs
{'data-date': '2017-10-01T22:00Z', 'data-behavior': 'date_time'}
>>> td.attrs['data-date']
'2017-10-01T22:00Z'
To that end, you can create a function that returns the date if that attribute is present or just return the text for a td:
import requests
import bs4
def date_or_text(td):
if 'data-date' in td.attrs:
return td.attrs['data-date']
return td.text
def extract_game_information(tr):
tds_with_blanks = (date_or_text(td) for td in tr.select('td'))
return [data for data in tds_with_blanks if data]
response = requests.get('http://www.espn.com/nba/schedule/_/date/20171001')
soup = bs4.BeautifulSoup(response.text, 'lxml')
data = [extract_game_information(tr) for tr in soup.select('tr')]

pandas shape issues when applying function returning multiple new columns

I need to return multiple calculated columns for each row of a pandas dataframe.
This error: ValueError: Shape of passed values is (4, 2), indices imply (4, 3) is raised when the apply function is executed in the following code snippet:
import pandas as pd
my_df = pd.DataFrame({
'datetime_stuff': ['2012-01-20', '2012-02-16', '2012-06-19', '2012-12-15'],
'url': ['http://www.something', 'http://www.somethingelse', 'http://www.foo', 'http://www.bar' ],
'categories': [['foo', 'bar'], ['x', 'y', 'z'], ['xxx'], ['a123', 'a456']],
})
my_df['datetime_stuff'] = pd.to_datetime(my_df['datetime_stuff'])
my_df.sort_values(['datetime_stuff'], inplace=True)
print(my_df.head())
def calculate_stuff(row):
if row['url'].startswith('http'):
categories = row['categories'] if type(row['categories']) == list else []
calculated_column_x = row['url'] + '_other_stuff_'
else:
calculated_column_x = None
another_column = 'deduction_from_fields'
return calculated_column_x, another_column
print(my_df.shape)
my_df['calculated_column_x'], my_df['another_column'] = zip(*my_df.apply(calculate_stuff, axis=1))
Each row of the dataframe I am working on is more complicated than the example above, and the function calculate_stuff I am applying is using many different columns for each row, then returning multiple new columns.
However, the previous example still raises this ValueError related to the shape of the dataframe that I am not able to understand how to fix.
How to create multiple new columns (for each row) that can be calculated starting from the existing columns?
When you return a list or tuple from a function that is being applied, pandas attempts to shoehorn it back into the dataframe you ran apply over. Instead, return a series.
Reconfigured Code
my_df = pd.DataFrame({
'datetime_stuff': ['2012-01-20', '2012-02-16', '2012-06-19', '2012-12-15'],
'url': ['http://www.something', 'http://www.somethingelse', 'http://www.foo', 'http://www.bar' ],
'categories': [['foo', 'bar'], ['x', 'y', 'z'], ['xxx'], ['a123', 'a456']],
})
my_df['datetime_stuff'] = pd.to_datetime(my_df['datetime_stuff'])
my_df.sort_values(['datetime_stuff'], inplace=True)
def calculate_stuff(row):
if row['url'].startswith('http'):
categories = row['categories'] if type(row['categories']) == list else []
calculated_column_x = row['url'] + '_other_stuff_'
else:
calculated_column_x = None
another_column = 'deduction_from_fields'
# I changed this VVVV
return pd.Series((calculated_column_x, another_column), ['calculated_column_x', 'another_column'])
my_df.join(my_df.apply(calculate_stuff, axis=1))
categories datetime_stuff url calculated_column_x another_column
0 [foo, bar] 2012-01-20 http://www.something http://www.something_other_stuff_ deduction_from_fields
1 [x, y, z] 2012-02-16 http://www.somethingelse http://www.somethingelse_other_stuff_ deduction_from_fields
2 [xxx] 2012-06-19 http://www.foo http://www.foo_other_stuff_ deduction_from_fields
3 [a123, a456] 2012-12-15 http://www.bar http://www.bar_other_stuff_ deduction_from_fields

Curdoc() keeps adding plots, want to replace

I have written a program that creates a graph based on input from a dropdown list. I am using curdoc().add_root() from bokeh to show my graphs on a server as show() does not work. However, whenever I choose a new option, instead of replacing the current graph, it creates one below it. I have tried curdoc().clear() its not working. How do I make this work where it replaces the graph but doesnt delete the dropdown list, because that is what curdoc().clear() is doing? Here's my code:
import csv
import bokeh.plotting
from bokeh.plotting import figure, curdoc
from bokeh.io import output_file, show
from bokeh.layouts import widgetbox
from bokeh.models.widgets import MultiSelect
from bokeh.io import output_file, show, vform
from bokeh.layouts import row
from collections import defaultdict
columns = defaultdict(list) # each value in each column is appended to a list
columns1 = defaultdict(list)
with open('my_data.csv') as f:
for row in f:
row = row.strip()# read a row as {column1: value1, column2: value2,...}
row = row.split(',')
columns[row[0]].append(row[1])
columns[row[0]].append(row[2])
columns[row[0]].append(row[3])
columns[row[0]].append(row[4])
columns[row[0]].append(row[5])
with open('my_data1.csv') as f:
for row in f:
row = row.strip()# read a row as {column1: value1, column2: value2,...}
row = row.split(',')
columns1[row[0]].append(row[1])
columns1[row[0]].append(row[2])
columns1[row[0]].append(row[3])
columns1[row[0]].append(row[4])
columns1[row[0]].append(row[5])
from bokeh.layouts import widgetbox
from bokeh.models.widgets import Dropdown
from bokeh.plotting import curdoc
menu = [("NY", "New York"), ("California", "California"), ("Ohio", "Ohio")]
dropdown = Dropdown(label="Dropdown button", button_type="warning", menu=menu)
count = 0
#def function_to_call(attr, old, new):
#print dropdown.value
def myfunc(attr, old, new):
aaa = dropdown.value
xy = (columns[aaa])
xy = [float(i) for i in xy]
myInt = 10000
xy = [x / myInt for x in xy]
print xy
omega = (columns1[aaa])
omega = [float(i) for i in omega]
print omega
import numpy
corr123 = numpy.corrcoef(omega,xy)
print corr123
a = [2004, 2005, 2006, 2007, 2008]
p = figure(tools="pan,box_zoom,reset,save", title="Diabetes and Stats",
x_axis_label='Years', y_axis_label='percents')
# add some renderers
per = "Diabetes% " + aaa
p.line(a, omega, legend=per)
p.circle(a, omega, legend=per, fill_color="white",line_color="green", size=8)
p.line(a, xy, legend="Per Capita Income/10000")
p.circle(a, xy, legend="Per Capita Income/10000", fill_color="red", line_color="red", size=8)
p.legend.location="top_left"
#bokeh.plotting.reset_output
#curdoc().clear()
curdoc().add_root(p)
curdoc().add_root(dropdown)
#bokeh.plotting.reset_output
dropdown.on_change('value', myfunc)
curdoc().add_root(dropdown)

Python 2.7 Interactive Visualisation

I'm a new programmer who has for a few days trying to create a dropdown list whose input then creates a graph.
For my graph, I'm using Bokeh to create a html file graph, plotting per-capita income of a few places as well as it's percentage of Diabetes. However I have been trying to get it to work for 2 weeks now with a dropdown list and I simply cannot make it work.
I can create the file, but only when the user enters the input by typing. How Can I make this work with a person selecting a place from a dropdown list and the file showing that places graph as output. Here's my code.
Edit:
I want the selected value from the dropdown list to be sent as the value aaa to the program. I know I should turn my graph creating part of the program into a function. But how do I get the value of a dropdown list as the variable aaa?
import csv
from bokeh.plotting import figure, curdoc
from bokeh.io import output_file, show
from bokeh.layouts import widgetbox
from bokeh.models.widgets import Dropdown
aaa = raw_input("Write State, not Puerto Rico, Hawaii, or DC: ")
from collections import defaultdict
columns = defaultdict(list) # each value in each column is appended to a list
columns1 = defaultdict(list)
with open('my_data.csv') as f:
for row in f:
row = row.strip()# read a row as {column1: value1, column2: value2,...}
row = row.split(',')
columns[row[0]].append(row[1])
columns[row[0]].append(row[2])
columns[row[0]].append(row[3])
columns[row[0]].append(row[4])
columns[row[0]].append(row[5])
xy = (columns[aaa])
xy = [float(i) for i in xy]
myInt = 10000
xy = [x / myInt for x in xy]
print xy
with open('my_data1.csv') as f:
for row in f:
row = row.strip()# read a row as {column1: value1, column2: value2,...}
row = row.split(',')
columns1[row[0]].append(row[1])
columns1[row[0]].append(row[2])
columns1[row[0]].append(row[3])
columns1[row[0]].append(row[4])
columns1[row[0]].append(row[5])
omega = (columns1[aaa])
omega = [float(i) for i in omega]
print omega
import numpy
corr123 = numpy.corrcoef(omega,xy)
print corr123
a = [2004, 2005, 2006, 2007, 2008]
output_file("lines.html")
p = figure(tools="pan,box_zoom,reset,save", title="Diabetes and Stats",
x_axis_label='Years', y_axis_label='percents')
# add some renderers
per = "Diabetes% " + aaa
p.line(a, omega, legend=per)
p.circle(a, omega, legend=per, fill_color="white",line_color="green", size=8)
p.line(a, xy, legend="Per Capita Income/10000")
p.circle(a, xy, legend="Per Capita Income/10000", fill_color="red", line_color="red", size=8)
p.legend.location="top_left"
show(p)