date format cannot convert in data format - python-2.7

I am trying to create an application where it pulls the information from website using python and I am using datetime formats.
def constructYFURL(ticker,start_date,end_date,freq):
start_date = datetime.strptime(start_date,"%Y-%m-%d").date()
end_date = datetime.strptime(end_date,"%Y-%m-%d").date()
s=ticker.replace("^","%5E")
if start_date.month-1<10:
a="0"+str(start_date.month-1)
else:
a=str(start_date.month-1)
# Also the month always has 2 digits
b=str(start_date.day)
c=str(start_date.year)
if end_date.month - 1 < 10:
d = "0" + str(end_date.month - 1)
else:
d = str(end_date.month - 1)
e=str(end_date.day)
f=str(end_date.year)
`enter code here`g=freq
yfURL = "http://real-chart.finance.yahoo.com/table.csv? s="+s+"&a="+a+"&b="+b+"&c="+c+"&d="+d+"&e="+e+"&f="+f+"&g="+g+"&ignore=.csv"
return yfURL
from download import constructYFURL
ticker = "AAPL"
start_date = "2016-01-01"
end_date = " 2017-03-06"
freq = "d"
yfURL = constructYFURL(ticker,start_date,end_date,freq)
print yfURL
I am getting a following error saying:
ValueError: time data ' 2017-03-06' does not match format '%Y-%m-%d'

You have an extra leading space in:
end_date = " 2017-03-06"
remove it and the error you're getting goes away and the program runs
Better formatted code:
def constructYFURL(ticker,start_date,end_date,freq):
start_date = datetime.strptime(start_date,"%Y-%m-%d").date()
end_date = datetime.strptime(end_date,"%Y-%m-%d").date()
s=ticker.replace("^","%5E")
if start_date.month-1<10:
a="0"+str(start_date.month-1)
else:
a=str(start_date.month-1)
# Also the month always has 2 digits
b=str(start_date.day)
c=str(start_date.year)
if end_date.month - 1 < 10:
d = "0" + str(end_date.month - 1)
else:
d = str(end_date.month - 1)
e=str(end_date.day)
f=str(end_date.year)
g=freq
yfURL = "http://real-chart.finance.yahoo.com/table.csv? s="+s+"&a="+a+"&b="+b+"&c="+c+"&d="+d+"&e="+e+"&f="+f+"&g="+g+"&ignore=.csv"
return yfURL
ticker = "AAPL"
start_date = "2016-01-01"
end_date = "2017-03-06"
freq = "d"
yfURL = constructYFURL(ticker,start_date,end_date,freq)
print yfURL

Related

boost.python : pandas dataframe to c++

I want to use boost.python to use multi-index columns dataframe in c++.
※multi-index columns dataframe is like
I changed the type of multi-index columns dataframe into csv.
My csv file looks like this on spreadsheet
The reason why I want to use this data is for backtest. This is my backtest code in python that I want to translate to c++.
import pandas as pd
import numpy as np
from utils import load_data, load_list_csv, to_int
class No_Strategy():
def __init__(self, codes, unit, cash, position):
self.codes = codes
self.unit = unit
self.cash = cash
self.buy_signal = [0]*len(codes)
self.sell_signal = [0]*len(codes)
self.valid = 0
self.position = position
self.pass_st = 0 # 전략에 들어가지도 못한 경우
def set_data(self, prev_fs_row, fs_row, indi_row):
self.prev_fs = prev_fs_row
self.fs = fs_row # multi dimensional df
self.indi = indi_row
def _strat(self, prev_fs, curr_fs, curr_indi):
curr_rev = prev_rev = curr_ni = prev_ni = ni_growth = curr_asset = noncurr_asset = curr_asset_rat = 0
try:
prev_rev = int(prev_fs['매출액'].replace(",",""))
curr_rev = int(curr_fs['매출액'].replace(",",""))
except:
self.pass_st += 1
return 0, 0
rev_growth=(curr_rev-prev_rev)/prev_rev
try:
prev_ni = int(prev_fs['당기순이익'].replace(",",""))
curr_ni = int(curr_fs['당기순이익'].replace(",",""))
except:
self.pass_st += 1
return 0, 0
ni_growth=(curr_ni-prev_ni)/prev_ni
try:
curr_asset = int(curr_fs['유동자산'].replace(",",""))
noncurr_asset = int(curr_fs['비유동자산'].replace(",",""))
except:
self.pass_st += 1
return 0, 0
curr_asset_rat = curr_asset / noncurr_asset
#### this is the buy strategy! You can change the below ####
if (curr_indi.golden_cross) or (curr_indi.rsi_k < 0.65) :
return 1, 0
#### ************************************************** ####
#### this is the sell strategy! You can change the below ####
if (curr_indi.dead_cross):
return 0, 1
#### ************************************************** ####
return 0, 0
def run(self):
for i, code in enumerate(self.codes):
self.valid = 0
prev_fs = self.prev_fs[code]
curr_fs = self.fs[code]
curr_indi = self.indi[code]
prev_fs_cell = None
curr_fs_cell = None
try:
prev_fs_cell = prev_fs.iloc[0].replace(",","")
try:
curr_fs_cell = curr_fs.iloc[0].replace(",","")
except:
self.pass_st += 1
pass
except:
self.pass_st += 1
pass
if (curr_fs_cell != None) & (prev_fs_cell != None):
self.valid = 1
buy, sell = self._strat(prev_fs, curr_fs, curr_indi)
if self.valid == 0:
self.pass_st += 1
continue
else: # buy or sell signal get
price = curr_indi['close']
if buy:
if self.cash >= self.unit * price:
self.buy_signal[i] = self.unit
self.position[i] += self.unit
self.cash -= price * self.unit
elif sell:
if self.position[i] > 0 :
sell_num = self.position[i] - int(self.position[i]/2)
self.sell_signal[i] = sell_num
self.position[i] = int(self.position[i]/2) # 1-> 1 sell, 4 -> 2 sell ....
self.cash += price * sell_num
##title
class Broker():
def __init__(self, codes):
self.cash = 200000000 #2억
self.cash_df = None #pd.DataFrame(columns=['cash'])
self.position = [0]*len(codes)
self.position_df = None #pd.DataFrame(columns=codes) # for accumulated profit calculation
self.buy_signal = None #pd.DataFrame(columns=codes) # codes = KOSPI_stock_names
self.sell_signal = None #pd.DataFrame(columns=codes)
self.codes = codes # 012934, 3281, ...
self.unit = 1 # 주식 매매 단위
self.pass_st = 0
def set_strat(self, strategy):
self.strategy = strategy # class
def set_time(self, time_index): # time_index type: pd.Index / time range for indi df
self.buy_signal = pd.DataFrame(columns = self.codes, index = time_index) #set_index(time_index)
self.sell_signal = pd.DataFrame(columns = self.codes, index = time_index) #.set_index(time_index)
self.position_df = pd.DataFrame(columns = self.codes, index = time_index)
self.cash_df = pd.DataFrame(columns = ['cash'], index = time_index)#.set_index(time_index)
self.time_index = time_index
def set_data(self, fs, indi, price):
self.fs = fs # multi dimensional df / start: 0th - nth
self.indi = indi # multi dimensional df / start : 1th - nth
self.price = price # 2 dimensional (date X codes : close price)
def update_data(self, strategy, date):
self.cash = strategy.cash
self.cash_df.loc[date] = strategy.cash
self.position = strategy.position
self.position_df.loc[date] = strategy.position #list
self.buy_signal.loc[date] = strategy.buy_signal #list
self.sell_signal.loc[date] = strategy.sell_signal #list
self.pass_st += strategy.pass_st
def run(self):
for date in self.time_index: #아마 수정해야 할 확률 높음
if date.year == 2021:
break
else:
prev_fs_row = self.fs.loc[date.year-1] # ex: 2014
fs_row = self.fs.loc[date.year] # 2015
indi_row = self.indi.loc[date] # 2015
strategy = self.strategy(self.codes, self.unit, self.cash, self.position)
strategy.set_data(prev_fs_row, fs_row, indi_row)
strategy.run()
self.update_data(strategy, date)
def performance(self):
# !!!! 2020년까지의 결과만 성능 평가 ####
cash_df = self.cash_df[self.cash_df.index < '2021']
position_df = self.position_df[self.position_df.index < '2021']
price = self.price[self.price.index < '2021']
buy_signal = self.buy_signal[self.buy_signal.index < '2021']
sell_signal = self.sell_signal[self.sell_signal.index < '2021']
last_price = price.iloc[-1]
total_remain_num = self.position # last(2020) position data
total_buy = (price * buy_signal).sum(axis=1).sum()
total_sell = (price * sell_signal).sum(axis=1).sum()
total_remain = (last_price * total_remain_num).sum()
print(f'remain 개수: {total_remain_num}, total_remain: {total_remain} total_buy: {total_buy}, total_sell={total_sell}')
profit = total_sell + total_remain - total_buy
try:
return_mean = profit / total_buy
except:
print("no buy")
return
accum_df = (cash_df['cash'] + ((price.fillna(0) * position_df).sum(axis=1))).to_frame() # row sum
daily_return_df = (accum_df - accum_df.shift(1))/accum_df.shift(1)-1
SSE = ((daily_return_df - return_mean)**2).sum().item()
std = np.sqrt(SSE/(accum_df.shape[0]-1)) # route(sigma(x-x_bar)^2 / (n-1))
sharp = return_mean / std
self.return_mean = return_mean
self.sharp = sharp
print(f'return_mean: {return_mean}, sharp: {sharp}')
code_path = GDRIVE_DATA_PATH + 'codes.csv'
fs_path = GDRIVE_DATA_PATH + 'fs_total.csv'
indi_path = GDRIVE_DATA_PATH + 'indi_total.csv'
price_path = GDRIVE_DATA_PATH + 'prices.csv'
fs_total = load_data("fs_total.csv")
indi_total = load_data("indi_total.csv") # stock price and indicator(Golden cross, RSI, etc.)
prices = load_data("prices.csv") # stock close price data rows:date, cols: stock code.
time_index = indi_total.index # time index of indi_total multi-index columns
broker = Broker(codes)
broker.set_strat(No_Strategy)
broker.set_time(time_index)
broker.set_data(fs_total, indi_total, prices)
broker.run()
broker.performance()
I want to translate it not changing much in flow of the code.
But I cannot find how to get multi-index columns dataframe in c++, and transfer its row data to No_Strategy to decide whether invest into the stock.
※ I uploaded similar question before and get thankful answer, but it is too complicated for me so I question one more time with detail information.
look at https://github.com/hosseinmoein/DataFrame. It has about 95% of Pandas functionality in a much faster framework

Cannot use SORTED for a list

The error occurs in the last step of this code when I try to sort a list using SORTED. I get the error message that a "list object is not callable". I want to use the SORTED and not the SORT function for this.
from csv import reader
import datetime as dt
open_file = open("hacker_news.csv")
read_file = reader(open_file)
hn = list(read_file)
headers = hn[0]
hn = hn[1:]
def explore_data(dataset,start,finish,col_rows=True):
dataset_slice = dataset[start:finish]
for row in dataset_slice:
print(row)
print('\n')
if col_rows:
print('rows:' , len(dataset))
print('columns:' , len(dataset[0]))
ask_posts = []
show_posts = []
other_posts = []
for row in hn:
title = row[1]
if title.lower().startswith("ask hn"):
ask_posts.append(row)
elif title.lower().startswith("show hn"):
show_posts.append(row)
else:
other_posts.append(row)
total_ask_comments = 0
total_show_comments = 0
total = 0
for row in ask_posts:
total += 1
num_comments = int(row[4])
total_ask_comments += num_comments
avg_ask_comments = total_ask_comments/total
print(avg_ask_comments)
for row in show_posts:
total += 1
num_comments = int(row[4])
total_show_comments += num_comments
avg_show_comments = total_show_comments/total
print(avg_show_comments)
result_list = []
for row in ask_posts:
created_at = row[6]
num_comments = int(row[4])
result_list.append([created_at,num_comments])
counts_by_hour = {}
comments_by_hour = {}
for row in result_list:
comment = row[1]
date_time = row[0]
date_time = dt.datetime.strptime(date_time,'%m/%d/%Y %H:%M')
hour = date_time.strftime('%H')
if hour not in counts_by_hour:
counts_by_hour[hour] = 1
comments_by_hour[hour] = comment
else:
counts_by_hour[hour] += 1
comments_by_hour[hour] += comment
for hour in counts_by_hour:
if hour in comments_by_hour:
avg = round(comments_by_hour[hour]/counts_by_hour[hour],2)
avg_by_hour.append([hour,avg])
for row in avg_by_hour:
swap_avg_by_hour.append([row[1], row[0]])
print(swap_avg_by_hour)
sorted_swap = sorted(swap_avg_by_hour, reverse=True)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-49-307863a4b1cd> in <module>
6 print(swap_avg_by_hour)
7
----> 8 sorted_swap = sorted(swap_avg_by_hour, reverse=True)
9
10 print(sorted_swap)
TypeError: 'list' object is not callable
Not sure how to upload the csv as don't see an upload option. Is there an obvious error in the code? or can someone help with instructions to upload the csv file?
You need to declare these 2 lists before appnding avg_by_hour = [] swap_avg_by_hour= []

For loop in Django

I have two question. First question: Python script that shows me the precipitation for a certain period.For example, I'm getting an initial year-month and a final year-month.
Initial:
year:2000
month:3
Final
year1:2005
month:4
Now, instead of seeing:
2000/3,2000/4,2000/5,2000/6..........2005/1,2005/2,2005/3,2005/4
she works like this(look in the hooked picture):
2000/3, 2000/4, 2001/3, 2001/4........2005/3,2005/4.
I want to work for me like the first case.
def period_month_prec(year,month,year1,month1):
for i in range (year,year1+1,1):
for j in range(month,month1+1,1):
......................
Second question: How to write the output(picture) from the script in csv.fileenter image description here
This is what my views.py script looks like , which saves me only the first result:
def monthly_period(request):
if request.method == "POST" :
form = PeriodMonthlyForm(request.POST)
if form.is_valid():
data = form.cleaned_data
year = data.get('year')
month = data.get('month')
year1 = data.get('year1')
month1 = data.get('month1')
lon = data.get('lon')
lat = data.get ('lat')
inter = data.get('inter')
point = period_month_prec(year,month,year1,month1,lon,lat)
args = {'point':point}
response = HttpResponse(content_type='text/txt')
response['Content-Disposition'] = 'attachment; filename="precipitation.txt"'
writer = csv.writer(response)
writer.writerow([point])
return response
else:
form = PeriodMonthlyForm()
active_period_monthly = True
return render (request, 'carpatclimapp/home.html',{'form':form, 'active_period_monthly': active_period_monthly})
Ok, i have forms like this:
Forms
You set initial values(red color) and end interval(blue color). For this given interval, the lon and lat are defined for the point in which we want to perform interpolation. When you press the submit button, it starts with interpolation for a defined period. The loop problem is because it works only for the defined months (we see from the 2nd picture that it only works in the interval 1-6) but not for 7,8,9,10,11,12 months between these years.
Initial: year:2000, month:3
Final: year1:2001, month:4
for this she's doing it like this: 2000/3,2000/4,2001/3,2001/4
I do not want that, I want this: 2000/3,2000/4,2000/5,2000/6,2000/7.....2000/12,2001/1,2001/2,2001/3,2001/4.
this is me code :
def period_month_prec(year,month,year1,month1,lon,lat):
cnx = sqlite3.connect(DB1)
cursor = cnx.cursor()
table = 'monthly'
year = int(year)
year1 = int(year1)
month = int(month)
month1 = int(month1)
for i in range (year,year1+1,1):
for j in range(month,month1+1,1):
query = '''
SELECT dates, cell, prec FROM %s WHERE dates = "%s-%s" ;
''' % (table,i,j)
df = pd.read_sql_query(query, cnx)
tacka = '''SELECT id, lon, lat,country,altitude FROM %s;''' % 'grid1'
grid1 = pd.read_sql_query(tacka, cnx)
podaci = pd.merge(df,grid1,left_on='cell',right_on='id')
podaci_a = podaci.drop(['cell','id','country','altitude'],axis=1)
lon_n = podaci_a['lon'].values
lat_n = podaci_a['lat'].values
prec =podaci_a['prec'].values
x_masked, y_masked, prec_p = remove_nan_observations(lon_n, lat_n, prec)
xy = np.vstack([x_masked,y_masked]).T
xi = ([lon,lat])
inter_point = interpolate_to_points(xy,prec_p,xi, interp_type='linear'
return (i,j,lon,lat,inter_point)
The results that come out look like this:
loop with calculations
The second question was how to save these results(2nd picture) in the csv file, how to write correctly views.py. Currently she looks like this :
def monthly_period(request):
if request.method == "POST" :
form = PeriodMonthlyForm(request.POST)
if form.is_valid():
data = form.cleaned_data
year = data.get('year')
month = data.get('month')
year1 = data.get('year1')
month1 = data.get('month1')
lon = data.get('lon')
lat = data.get ('lat')
inter = data.get('inter')
point = period_month_prec(year,month,year1,month1,lon,lat)
args = {'point':point}
response = HttpResponse(content_type='text/txt')
response['Content-Disposition'] = 'attachment; filename="precipitation.txt"'
writer = csv.writer(response)
writer.writerow([point])
return response
else:
form = PeriodMonthlyForm()
active_period_monthly = True
return render (request, 'carpatclimapp/home.html',{'form':form, 'active_period_monthly': active_period_monthly})
I hope I'm a little clearer now

unsupported operand types in django

first date
d = datetime.datetime.utcnow().replace(tzinfo=utc)
second date
checkin = models.DateTimeField(default = timezone.now)
e = Checkin.objects.all().values()
t = last value in 'e'
co = d.time()
ci = t.time()
I want difference between 'co' and 'ci'
It looks like you probably need to make both of your datetime objects time zone aware.
I have a local Page model that has a DateTimeField called first_published_at. Here's how I handle:
target_tz = datetime.tzinfo('utc')
now_dt = datetime.datetime.utcnow().replace(tzinfo=target_tz)
inst = m.Page.objects.get(pk=18)
model_dt = inst.first_published_at.replace(tzinfo=target_tz)
print(now_dt - model_dt) # 734 days, 12:46:53.059321
Or you could make both of them timezone naive:
target_tz = None
now_dt = datetime.datetime.utcnow().replace(tzinfo=target_tz)
inst = m.Page.objects.get(pk=18)
model_dt = inst.first_published_at.replace(tzinfo=target_tz)
print(now_dt - model_dt) # Same as above

Django: Total birthdays each day for the next 30 days

I've got a model similar to this:
class Person(models.Model):
name = models.CharField(max_length=40)
birthday = DateTimeField() # their next birthday
I would like to get a list of the total birthdays for each day for the next 30 days. So for example, the list would look like this:
[[9, 0], [10, 3], [11, 1], [12, 1], [13, 5], ... #30 entries in list
Each list entry in the list is a date number followed by the number of birthdays on that day. So for example on the 9th of May there are 0 birthdays.
UPDATES
My db is sqlite3 - will be moving to postgres in the future.
from django.db.models import Count
import datetime
today = datetime.date.today()
thirty_days = today + datetime.timedelta(days=30)
birthdays = dict(Person.objects.filter(
birthday__range=[today, thirty_days]
).values_list('birthday').annotate(Count('birthday')))
for day in range(30):
date = today + datetime.timedelta(day)
print "[%s, %s]" % (date, birthdays.get(date, 0))
I would get the list of days and birthday count this way:
from datetime import date, timedelta
today = date.today()
thirty_days = today + timedelta(days=30)
# get everyone with a birthday
people = Person.objects.filter(birthday__range=[today, thirty_days])
birthday_counts = []
for date in [today + timedelta(x) for x in range(30)]:
# use filter to get only birthdays on given date's day, use len to get total
birthdays = [date.day, len(filter(lambda x: x.birthday.day == date.day, people))]
birthday_counts.append(birthdays)
Something like this --
from datetime import date, timedelta
class Person(models.Model):
name = models.CharField(max_length=40)
birthday = models.DateField()
#staticmethod
def upcoming_birthdays(days=30):
today = date.today()
where = 'DATE_ADD(birthday, INTERVAL (YEAR(NOW()) - YEAR(birthday)) YEAR) BETWEEN DATE(NOW()) AND DATE_ADD(NOW(), INTERVAL %S DAY)'
birthdays = Person.objects.extra(where=where, params=[days]).values_list('birthday', flat=True)
data = []
for offset in range(0, days):
i = 0
d = today + timedelta(days=offset)
for b in birthdays:
if b.day == d.day and b.month == d.month:
i += 1
data.append((d.day, i))
return data
print Person.upcoming_birthdays()
(Queryset of people with a birthday in the next X days)
Found cool solution for this!
For me it works!
from datetime import datetime, timedelta
import operator
from django.db.models import Q
def birthdays_within(days):
now = datetime.now()
then = now + timedelta(days)
# Build the list of month/day tuples.
monthdays = [(now.month, now.day)]
while now <= then:
monthdays.append((now.month, now.day))
now += timedelta(days=1)
# Tranform each into queryset keyword args.
monthdays = (dict(zip(("birthday__month", "birthday__day"), t))
for t in monthdays)
# Compose the djano.db.models.Q objects together for a single query.
query = reduce(operator.or_, (Q(**d) for d in monthdays))
# Run the query.
return Person.objects.filter(query)
But it get a list of persons that have a birthday in date range. You should change a bit.