only apply date format to date columns with xlwt - python-2.7

I have results from a database search where some columns have dates, but not others. I need help to write
data = [['556644', 'Mr', 'So', 'And', 'So', Decimal('0.0000'), datetime.datetime(2012, 2, 25, 0, 0), '', False, datetime.datetime(2013, 6, 30, 0, 0)],...]
into an Excel spreadsheet so that
is only applied to the datetime columns. I'm quite new to Python and I've been banging my head on this for quite a few days now. Thanks!

After simplifying your self-answer:
date_xf = easyxf(num_format_str='DD/MM/YYYY') # sets date format in Excel
data = [list(n) for n in cursor.fetchall()]
for row_index, row_contents in enumerate(data):
for column_index, cell_value in enumerate(row_contents):
if isinstance(cell_value,
sheet1.write(row_index+1, column_index, cell_value, date_xf)
sheet1.write(row_index+1, column_index, cell_value)

This is being imported from the SQL server with pyodbc, so that's from where cursor.fetchall() is coming:
data = [list(n) for n in cursor.fetchall()]
for row_index, row_contents in enumerate(data):
for column_index, cell_value in enumerate(row_contents):
if isinstance(data[row_index][column_index],
xf = easyxf(num_format_str='DD/MM/YYYY') # sets date format in Excel
if xf:
sheet1.write(row_index+1, column_index, cell_value, xf)
sheet1.write(row_index+1, column_index, cell_value)
Done! :)

Want to write a date to a cell in Excel:
Add an extra attribute to the write call - the style for the cell:
import datetime
import xlwt
workbook = xlwt.Workbook()
worksheet = workbook.add_sheet('test')
date_style = xlwt.easyxf(num_format_str='YYYY/MM/DD')
worksheet.write(0, 0, 'Today')
worksheet.write(1, 0,, date_style)'C:\\data.xls')


Replace Null values with median in pyspark

How can I replace null values with median in the columns Age and Height below data set df.
df = spark.createDataFrame([(1, 'John', 1.79, 28,'M', 'Doctor'),
(2, 'Steve', 1.78, 45,'M', None),
(3, 'Emma', 1.75, None, None, None),
(4, 'Ashley',1.6, 33,'F', 'Analyst'),
(5, 'Olivia', 1.8, 54,'F', 'Teacher'),
(6, 'Hannah', 1.82, None, 'F', None),
(7, 'William',None, 42,'M', 'Engineer'),
, ['Id', 'Name', 'Height', 'Age', 'Gender', 'Profession'])
In the post Replace missing values with mean - Spark Dataframe I used the function given
from import Imputer
imputer = Imputer(
outputCols=["{}_imputed".format(c) for c in df.columns])
It throws me an error.
IllegalArgumentException: 'requirement failed: Column Id must be of type equal to one of the following types: [DoubleType, FloatType] but was actually of type LongType.'
So please help.
Thank you
It's likely an initial casting error (I had some strings I needed to be floats). To convert all cols to floats do:
from pyspark.sql.functions import col
df =*(col(c).cast("float").alias(c) for c in df.columns))
Then you should be fine to impute. Note: I set my strategy to median rather than mean.
from import Imputer
imputer = Imputer(
outputCols=["{}_imputed".format(c) for c in df.columns]
# Add imputation cols to df
df =
I'd be interested in a more elegant solution but I separately imputed the categoricals from the numerics. To impute the categoricals I got the most common value and filled the blanks with it using the when and otherwise functions:
import pyspark.sql.functions as F
for col_name in ['Name', 'Gender', 'Profession']:
common = df.dropna().groupBy(col_name).agg(F.count("*")).orderBy('count(1)', ascending=False).first()[col_name]
df = df.withColumn(col_name, F.when(F.isnull(col_name), common).otherwise(df[col_name]))
To impute the numerics before running the imputer lines I simply casting the Age and Id columns as doubles circumvents the issue for the numeric fields and restrict the imputer to the numerical columns.
from import Imputer
df = df.withColumn("Age", df['Age'].cast('double')).withColumn('Id', df['Id'].cast('double'))
imputer = Imputer(
inputCols=['Id', 'Height', 'Age'],
outputCols=['Id', 'Height', 'Age'])

ValueError: invalid literal for float(): when adding annotation in pandas

I get this error when I try to add an annotation to my plot - ValueError: invalid literal for float(): 10_May.
my dataframe:
my code (I use to_datetime and strftime before ploting as I needed to sort dates which were stored as strings):
# dealing with dates as strings
grouped.index = pd.to_datetime(grouped.index, format='%d_%b')
grouped = grouped.sort_index()
grouped.index = grouped.index.strftime('%d_%b')
(grouped.index[9], grouped['L'][9]),
xytext=(15, 15),
textcoords='offset points',
grouped.index[9] returns u'10_May' while grouped['L'][9] returns 10.0.
I know that pandas expect index to be float but I thought I can access it by df.index[]. Will appreciate your suggestions.
For me works first plot and then get index position by Index.get_loc:
ax = df.plot()
(df.index.get_loc(df.index[9]), df['L'][9]),
xytext=(15, 15),
textcoords='offset points',
df = pd.DataFrame({'L':[3,5,0,1]}, index=['4_May','3_May','1_May', '2_May'])
#print (df)
df.index = pd.to_datetime(df.index, format='%d_%b')
df = df.sort_index()
df.index = df.index.strftime('%d_%b')
(df.index.get_loc(df.index[2]), df['L'][2]),
xytext=(15, 15),
textcoords='offset points',
More general solution with get_loc + idxmax + max:
ax = df.plot()
(df.index.get_loc(df['L'].idxmax()), df['L'].max()),
xytext=(15, 15),
textcoords='offset points',

Put information from a dabtabse file into lists

import sqlite3
db = sqlite3.connect('newdb.db')
team_list = ['Munster', 'Leinster', 'Ulster', 'Glasgow']
cursor = db.cursor()
for i in range(len(team_list)):
team_names = team_list[i].upper()
searchStr = '%' + team_names + '%'
cursor.execute('select * from tickets where Name LIKE ?', (searchStr,))
teams_points = cursor.fetchall()
print teams_points
This is my python code used to display all data in the table 'tickets' in newdb.db. I have a list with the team names and i want to be able to search these team names in the database and calculate information on tickets sold.
picture of database
[(u'MUNSTER', 5, u'First Round'), (u'MUNSTER', 5, u'First Round'),
(u'MUNSTER', 8, u'Second Round'), (u'MUNSTER', 10, u'Both Rounds')]
[(u'LEINSTER', 2, u'Second Round'), (u'LEINSTER', 16, u'First Round'),
(u'LEINSTER', 5, u'Both Rounds'), (u'LEINSTER', 6, u'Both Rounds'),
(u'LEINSTER', 3, u'First Round')]
[(u'ULSTER', 10, u'Second Round')]
[(u'GLASGOW', 4, u'First Round')]
Above is my output when I run the script, i want to be able put each team into a list as
team_list=['team_name', 'total first round tickets', 'second round tickets']
munster_list = ['MUNSTER', '20', '18']
leinster_list = ['LEINSTER','30','13']
ulster_list = ['ULSTER','0','10']
glasgow_list = ['GLASGOW','4','0']
so then to print the list I can just use print munster_list
Use GROUP BY to get one output row from the rows in each group. Use CASE expressions to sum up only certain values:
sum(CASE WHEN Type IN ('First Round', 'Both Rounds')
THEN Amount
END) AS "first round tickets",
sum(CASE WHEN Type IN ('Second Round', 'Both Rounds')
THEN Amount
END) AS "second round tickets"
FROM tickets

How to write value inside existing Excel sheet?

Here is the piece I am trying to write the value inside existing excel sheet in particular cell but value is not printing inside that sheet,how to write that value,here I used xlutils.copy
from datetime import datetime, timedelta, date
from xlrd import open_workbook
from xlwt import Workbook
from xlutils.copy import copy
import xlrd
import datetime
book = open_workbook('Data.xlsx')
sheet = book.sheet_by_index(0)
# read header values into the list
keys = [sheet.cell(0, col_index).value for col_index in xrange(sheet.ncols)]
dict_list = []
#read the excel sheet data into list
for row_index in xrange(1, sheet.nrows):
d = {keys[col_index]: sheet.cell(row_index, col_index).value
for col_index in xrange(sheet.ncols)}
TotalEffort = 0
#convert the integer date to YMD format
for count in range(len(dict_list)):
year, month, day, hour, minute, second = xlrd.xldate_as_tuple(dict_list[count]["Date"],book.datemode)
#print week number
if[1] == date(year, month, day).isocalendar()[1]:
TotalEffort = TotalEffort+dict_list[count]["Effort"]
weeknum = str([1])
Total = str(TotalEffort)
print " Effort for week"+weeknum+" is: "+Total+"hours"
rb = open_workbook('output.xlsx')
ws = rb.sheet_by_index(0)
for rowidx in range(ws.nrows):# number of rows in sheets
row = ws.row(rowidx)# count row from 0 and get it frm sheet
for colidx, cell in enumerate(row):#read all rows in sheets
if cell.value == "search word":
print 'row ' ,rowidx
print 'column' ,colidx
cur_row = rowidx+2
cur_col = colidx+36
wb = copy(rb)
#pic first sheet
shw = wb.get_sheet(0)
value = str(Total)
#writing to shw

django compare date with date

I trying to refuse importing lines with date lesser than already imported.
timelimit = Operation.objects.filter(account = 3).aggregate(Max('date'))
for row in csv.reader(reencode(f), delimiter=';', quotechar='"')
if row != []:
if row[0]>timelimit:[0]
row looks like:
Of course comparison row[0]>timelimit is wrong - but what is correct?
#this will convert your string("2012-01-12") to a datetime object
from datetime import datetime
>>> x = datetime.strptime(row[0], "%Y-%m-%d")
>>> x
>>> datetime.datetime(2012, 1, 12, 0, 0)
And then you can convert timelimit in a datetime object too like so:
timelimit = datetime(2011, 10, 10)
and then comparing these two is trivial:
x > timelimit