How to write value inside existing Excel sheet? - python-2.7

Here is the piece I am trying to write the value inside existing excel sheet in particular cell but value is not printing inside that sheet,how to write that value,here I used xlutils.copy
from datetime import datetime, timedelta, date
from xlrd import open_workbook
from xlwt import Workbook
from xlutils.copy import copy
import xlrd
import datetime
book = open_workbook('Data.xlsx')
sheet = book.sheet_by_index(0)
# read header values into the list
keys = [sheet.cell(0, col_index).value for col_index in xrange(sheet.ncols)]
dict_list = []
#read the excel sheet data into list
for row_index in xrange(1, sheet.nrows):
d = {keys[col_index]: sheet.cell(row_index, col_index).value
for col_index in xrange(sheet.ncols)}
dict_list.append(d)
TotalEffort = 0
#convert the integer date to YMD format
for count in range(len(dict_list)):
year, month, day, hour, minute, second = xlrd.xldate_as_tuple(dict_list[count]["Date"],book.datemode)
#print week number
if datetime.date.today().isocalendar()[1] == date(year, month, day).isocalendar()[1]:
TotalEffort = TotalEffort+dict_list[count]["Effort"]
weeknum = str(datetime.date.today().isocalendar()[1])
Total = str(TotalEffort)
print " Effort for week"+weeknum+" is: "+Total+"hours"
rb = open_workbook('output.xlsx')
ws = rb.sheet_by_index(0)
for rowidx in range(ws.nrows):# number of rows in sheets
row = ws.row(rowidx)# count row from 0 and get it frm sheet
for colidx, cell in enumerate(row):#read all rows in sheets
if cell.value == "search word":
print 'row ' ,rowidx
print 'column' ,colidx
cur_row = rowidx+2
cur_col = colidx+36
wb = copy(rb)
#pic first sheet
shw = wb.get_sheet(0)
value = str(Total)
#writing to shw
shw.write(cur_row,cur_col,'value')

Related

Python script | long running | Need suggestions to optimize

I have written this script to generate a dataset which would contain 15 minute time intervals based on the inputs provided for operational hours for all days of a week for 365 days.
example: Let us say Store 1 opens at 9 AM and closes at 9 PM on all days. That is 12 hours everyday. 12*4 = 48(15 minute periods a day). 48 * 365 = 17520 (15 minute periods for a year).
The sample dataset only contains 5 sites but there are about 9000 sites that this script needs to generate data for.
The script obviously runs for a handful of sites(100) and couple of days(2) but needs to run for sites(9000) and 365 days.
Looking for suggestions to make this run faster. This will be running on a local machine.
input data: https://drive.google.com/open?id=1uLYRUsJ2vM-TIGPvt5RhHDhTq3vr4V2y
output data: https://drive.google.com/open?id=13MZCQXfVDLBLFbbmmVagIJtm6LFDOk_T
Please let me know if I can help with anything more to get this answered.
def datetime_range(start, end, delta):
current = start
while current < end:
yield current
current += delta
import pandas as pd
import numpy as np
import cProfile
from datetime import timedelta, date, datetime
#inputs
empty_data = pd.DataFrame(columns=['store','timestamp'])
start_dt = date(2019, 1, 1)
days = 365
data = "input data | attached to the post"
for i in range(days):
for j in range(len(data.store)):
curr_date = start_dt + timedelta(days=i)
curr_date_year = curr_date.year
curr_date_month = curr_date.month
curr_date_day = curr_date.day
weekno = curr_date.weekday()
if weekno<5:
dts = [dt.strftime('%Y-%m-%d %H:%M') for dt in
datetime_range(datetime(curr_date_year,curr_date_month,curr_date_day,data['m_f_open_hrs'].iloc[j],data['m_f_open_min'].iloc[j]), datetime(curr_date_year,curr_date_month,curr_date_day, data['m_f_close_hrs'].iloc[j],data['m_f_close_min'].iloc[j]),
timedelta(minutes=15))]
vert = pd.DataFrame(dts,columns = ['timestamp'])
vert['store']= data['store'].iloc[j]
empty_data = pd.concat([vert, empty_data])
elif weekno==5:
dts = [dt.strftime('%Y-%m-%d %H:%M') for dt in
datetime_range(datetime(curr_date_year,curr_date_month,curr_date_day,data['sat_open_hrs'].iloc[j],data['sat_open_min'].iloc[j]), datetime(curr_date_year,curr_date_month,curr_date_day, data['sat_close_hrs'].iloc[j],data['sat_close_min'].iloc[j]),
timedelta(minutes=15))]
vert = pd.DataFrame(dts,columns = ['timestamp'])
vert['store']= data['store'].iloc[j]
empty_data = pd.concat([vert, empty_data])
else:
dts = [dt.strftime('%Y-%m-%d %H:%M') for dt in
datetime_range(datetime(curr_date_year,curr_date_month,curr_date_day,data['sun_open_hrs'].iloc[j],data['sun_open_min'].iloc[j]), datetime(curr_date_year,curr_date_month,curr_date_day, data['sun_close_hrs'].iloc[j],data['sun_close_min'].iloc[j]),
timedelta(minutes=15))]
vert = pd.DataFrame(dts,columns = ['timestamp'])
vert['store']= data['store'].iloc[j]
empty_data = pd.concat([vert, empty_data])
final_data = empty_data
I think the most time consuming tasks in your script are the datetime calculations.
You should try to make all of those calculations using UNIX Time. It basically represents time as an integer that counts seconds... so you could take two UNIX dates and see the difference just by doing simple subtraction.
In my opinion you should perform all the operations like that... and when the process has finished you can make all the datetime conversions to a more readable date format.
Other thing that you should change in your script is all the code repetition that is almost identical. It won't improve the performance, but it improves readability, debugging and your skills as a programmer. As a simple example I have refactored some of the code (you probably can do better than what I did, but this is just an example).
def datetime_range(start, end, delta):
current = start
while current < end:
yield current
current += delta
from datetime import timedelta, date, datetime
import numpy as np
import cProfile
import pandas as pd
# inputs
empty_data = pd.DataFrame(columns=['store', 'timestamp'])
start_dt = date(2019, 1, 1)
days = 365
data = "input data | attached to the post"
for i in range(days):
for j in range(len(data.store)):
curr_date = start_dt + timedelta(days=i)
curr_date_year = curr_date.year
curr_date_month = curr_date.month
curr_date_day = curr_date.day
weekno = curr_date.weekday()
week_range = 'sun'
if weekno < 5:
week_range = 'm_f'
elif weekno == 5:
week_range = 'sat'
first_time = datetime(curr_date_year,curr_date_month,curr_date_day,data[week_range + '_open_hrs'].iloc[j],data[week_range + '_open_min'].iloc[j])
second_time = datetime(curr_date_year,curr_date_month,curr_date_day, data[week_range + '_close_hrs'].iloc[j],data[week_range + '_close_min'].iloc[j])
dts = [ dt.strftime('%Y-%m-%d %H:%M') for dt in datetime_range(first_time, second_time, timedelta(minutes=15)) ]
vert = pd.DataFrame(dts, columns = ['timestamp'])
vert['store']= data['store'].iloc[j]
empty_data = pd.concat([vert, empty_data])
final_data = empty_data
Good luck!

Searching a particular cell value in excel using row and column names with python

Below is my code. I'm trying to capture rowid and columnid of a particular 'cell' element and that cell value in an excel sheet using row name and column name, but I'm unable to capture it.
from xlrd import open_workbook
book = open_workbook("D:\A2.xlsx")
for sheet in book.sheets():
for rowidx in range(sheet.nrows):
row = sheet.row(rowidx)
for colidx,cell in enumerate(row):
if cell.value == "PES":#row value
print "Found Row Element"
for column in range(sheet.ncols):
col = sheet.col(column)
for rowid,cell1 in enumerate(col):
if cell1.value == "# Responses PCSAT YTD":#column value
print rowidx,colidx
print "Column Element Found"
print sheet.name
print (sheet.cell1(rowidx,colidx).value)#must be cell value
print "***************"
Any help?
Is this what you wanted?
from xlrd import open_workbook
book = open_workbook("book.xlsx")
for sheet in book.sheets():
for rowidx in range(sheet.nrows):
row = sheet.row(rowidx)
for colidx,cell in enumerate(row):
if cell.value == "PES":#row value
print ("Found Row Element")
print(rowidx, colidx)

Web Scraping - Get the 'blahblah' out from <td foo=blahblah>TEXT</td> using bs4?

I'm trying to scrape a few schedule tables from ESPN: http://www.espn.com/nba/schedule/_/date/20171001
import requests
import bs4
response = requests.get('http://www.espn.com/nba/schedule/_/date/20171001')
soup = bs4.BeautifulSoup(response.text, 'lxml')
print soup.prettify()
table = soup.find_all('table')
data = []
for i in table:
rows = i.find_all('tr')
for row in rows:
cols = row.find_all('td')
cols = [col.text.strip() for col in cols]
data.append([col for col in cols if col])
My code works fine except the output is missing the date info:
[
"Phoenix PHX",
"Utah UTAH",
"394 tickets available from $6"
],
[],
[
"Miami MIA",
"Orlando ORL",
"1,582 tickets available from $12"
]
After some investigation, I realized that the date and time information is wrapped within the tags like so:
<td data-behavior="date_time" data-date="2017-10-07T23:00Z"><a data-dateformat="time1" href="/nba/game?gameId=400978807" name="&lpos=nba:schedule:time"></a></td>
I see this on other websites from time to time as well but never really understood why they do it this way. How can I extract text inside an open tag to get the "2017-10-07T23:00Z" in my output?
attrs property contains a dictionary of attributes which you can utilize to fetch values,I have added a length check as some empty rows are present.
Can you try modifying the for loop as below:
for i in table:
rows = i.find_all('tr')
for row in rows:
cols = row.find_all('td')
date_data = None
if len(cols) > 2:
date_data = cols[2].attrs['data-date']
cols = [col.text.strip() for col in cols]
dat = [col for col in cols if col]
if date_data:
dat.append(date_data)
data.append(dat)
PS: the above snippet can be optimized :-)
Some td tags in that table contain attributes. You can access a td's attributes by calling attrs() which returns a dict:
>>> td = soup.select('tr')[1].select('td')[2]
>>> td
<td data-behavior="date_time" data-date="2017-10-01T22:00Z"><a data-dateformat="time1" href="/nba/game?gameId=400978817" name="&lpos=nba:schedule:time"></a></td>
>>> td.attrs
{'data-date': '2017-10-01T22:00Z', 'data-behavior': 'date_time'}
>>> td.attrs['data-date']
'2017-10-01T22:00Z'
To that end, you can create a function that returns the date if that attribute is present or just return the text for a td:
import requests
import bs4
def date_or_text(td):
if 'data-date' in td.attrs:
return td.attrs['data-date']
return td.text
def extract_game_information(tr):
tds_with_blanks = (date_or_text(td) for td in tr.select('td'))
return [data for data in tds_with_blanks if data]
response = requests.get('http://www.espn.com/nba/schedule/_/date/20171001')
soup = bs4.BeautifulSoup(response.text, 'lxml')
data = [extract_game_information(tr) for tr in soup.select('tr')]

Python 2.7 Interactive Visualisation

I'm a new programmer who has for a few days trying to create a dropdown list whose input then creates a graph.
For my graph, I'm using Bokeh to create a html file graph, plotting per-capita income of a few places as well as it's percentage of Diabetes. However I have been trying to get it to work for 2 weeks now with a dropdown list and I simply cannot make it work.
I can create the file, but only when the user enters the input by typing. How Can I make this work with a person selecting a place from a dropdown list and the file showing that places graph as output. Here's my code.
Edit:
I want the selected value from the dropdown list to be sent as the value aaa to the program. I know I should turn my graph creating part of the program into a function. But how do I get the value of a dropdown list as the variable aaa?
import csv
from bokeh.plotting import figure, curdoc
from bokeh.io import output_file, show
from bokeh.layouts import widgetbox
from bokeh.models.widgets import Dropdown
aaa = raw_input("Write State, not Puerto Rico, Hawaii, or DC: ")
from collections import defaultdict
columns = defaultdict(list) # each value in each column is appended to a list
columns1 = defaultdict(list)
with open('my_data.csv') as f:
for row in f:
row = row.strip()# read a row as {column1: value1, column2: value2,...}
row = row.split(',')
columns[row[0]].append(row[1])
columns[row[0]].append(row[2])
columns[row[0]].append(row[3])
columns[row[0]].append(row[4])
columns[row[0]].append(row[5])
xy = (columns[aaa])
xy = [float(i) for i in xy]
myInt = 10000
xy = [x / myInt for x in xy]
print xy
with open('my_data1.csv') as f:
for row in f:
row = row.strip()# read a row as {column1: value1, column2: value2,...}
row = row.split(',')
columns1[row[0]].append(row[1])
columns1[row[0]].append(row[2])
columns1[row[0]].append(row[3])
columns1[row[0]].append(row[4])
columns1[row[0]].append(row[5])
omega = (columns1[aaa])
omega = [float(i) for i in omega]
print omega
import numpy
corr123 = numpy.corrcoef(omega,xy)
print corr123
a = [2004, 2005, 2006, 2007, 2008]
output_file("lines.html")
p = figure(tools="pan,box_zoom,reset,save", title="Diabetes and Stats",
x_axis_label='Years', y_axis_label='percents')
# add some renderers
per = "Diabetes% " + aaa
p.line(a, omega, legend=per)
p.circle(a, omega, legend=per, fill_color="white",line_color="green", size=8)
p.line(a, xy, legend="Per Capita Income/10000")
p.circle(a, xy, legend="Per Capita Income/10000", fill_color="red", line_color="red", size=8)
p.legend.location="top_left"
show(p)

searching excel using xlrd

I am trying to teach myself how to use xlrd for a (conceptually) simple task:
I want to take a string through raw_input from the user and search an excel sheet for the string.
when found I want the program to print the cell row only
here is my non-working code to start with:
import xlrd
from xlrd import open_workbook
book = open_workbook('simple.xls')
sheet = book.sheet_by_index(0)
city = raw_input("> ")
for rowi in range(sheet.nrows):
row = sheet.row(rowi)
for coli, cell in enumerate(row):
if cell.value == city:
loc = cell.row
??????????????
cell = sheet.cell(loc, 9)
print "The Ordinance Frequency is %r" % cell.value
Try cycling through the columns in the same way that you cycle through rows
for r in range(sheet.nrows):
for c in range(sheet.ncols):
cell = sheet.cell(r, c)
if cell.value == city:
loc = r //index of interesting row