Cannot use SORTED for a list - list

The error occurs in the last step of this code when I try to sort a list using SORTED. I get the error message that a "list object is not callable". I want to use the SORTED and not the SORT function for this.
from csv import reader
import datetime as dt
open_file = open("hacker_news.csv")
read_file = reader(open_file)
hn = list(read_file)
headers = hn[0]
hn = hn[1:]
def explore_data(dataset,start,finish,col_rows=True):
dataset_slice = dataset[start:finish]
for row in dataset_slice:
print(row)
print('\n')
if col_rows:
print('rows:' , len(dataset))
print('columns:' , len(dataset[0]))
ask_posts = []
show_posts = []
other_posts = []
for row in hn:
title = row[1]
if title.lower().startswith("ask hn"):
ask_posts.append(row)
elif title.lower().startswith("show hn"):
show_posts.append(row)
else:
other_posts.append(row)
total_ask_comments = 0
total_show_comments = 0
total = 0
for row in ask_posts:
total += 1
num_comments = int(row[4])
total_ask_comments += num_comments
avg_ask_comments = total_ask_comments/total
print(avg_ask_comments)
for row in show_posts:
total += 1
num_comments = int(row[4])
total_show_comments += num_comments
avg_show_comments = total_show_comments/total
print(avg_show_comments)
result_list = []
for row in ask_posts:
created_at = row[6]
num_comments = int(row[4])
result_list.append([created_at,num_comments])
counts_by_hour = {}
comments_by_hour = {}
for row in result_list:
comment = row[1]
date_time = row[0]
date_time = dt.datetime.strptime(date_time,'%m/%d/%Y %H:%M')
hour = date_time.strftime('%H')
if hour not in counts_by_hour:
counts_by_hour[hour] = 1
comments_by_hour[hour] = comment
else:
counts_by_hour[hour] += 1
comments_by_hour[hour] += comment
for hour in counts_by_hour:
if hour in comments_by_hour:
avg = round(comments_by_hour[hour]/counts_by_hour[hour],2)
avg_by_hour.append([hour,avg])
for row in avg_by_hour:
swap_avg_by_hour.append([row[1], row[0]])
print(swap_avg_by_hour)
sorted_swap = sorted(swap_avg_by_hour, reverse=True)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-49-307863a4b1cd> in <module>
6 print(swap_avg_by_hour)
7
----> 8 sorted_swap = sorted(swap_avg_by_hour, reverse=True)
9
10 print(sorted_swap)
TypeError: 'list' object is not callable
Not sure how to upload the csv as don't see an upload option. Is there an obvious error in the code? or can someone help with instructions to upload the csv file?

You need to declare these 2 lists before appnding avg_by_hour = [] swap_avg_by_hour= []

Related

boost.python : pandas dataframe to c++

I want to use boost.python to use multi-index columns dataframe in c++.
※multi-index columns dataframe is like
I changed the type of multi-index columns dataframe into csv.
My csv file looks like this on spreadsheet
The reason why I want to use this data is for backtest. This is my backtest code in python that I want to translate to c++.
import pandas as pd
import numpy as np
from utils import load_data, load_list_csv, to_int
class No_Strategy():
def __init__(self, codes, unit, cash, position):
self.codes = codes
self.unit = unit
self.cash = cash
self.buy_signal = [0]*len(codes)
self.sell_signal = [0]*len(codes)
self.valid = 0
self.position = position
self.pass_st = 0 # 전략에 들어가지도 못한 경우
def set_data(self, prev_fs_row, fs_row, indi_row):
self.prev_fs = prev_fs_row
self.fs = fs_row # multi dimensional df
self.indi = indi_row
def _strat(self, prev_fs, curr_fs, curr_indi):
curr_rev = prev_rev = curr_ni = prev_ni = ni_growth = curr_asset = noncurr_asset = curr_asset_rat = 0
try:
prev_rev = int(prev_fs['매출액'].replace(",",""))
curr_rev = int(curr_fs['매출액'].replace(",",""))
except:
self.pass_st += 1
return 0, 0
rev_growth=(curr_rev-prev_rev)/prev_rev
try:
prev_ni = int(prev_fs['당기순이익'].replace(",",""))
curr_ni = int(curr_fs['당기순이익'].replace(",",""))
except:
self.pass_st += 1
return 0, 0
ni_growth=(curr_ni-prev_ni)/prev_ni
try:
curr_asset = int(curr_fs['유동자산'].replace(",",""))
noncurr_asset = int(curr_fs['비유동자산'].replace(",",""))
except:
self.pass_st += 1
return 0, 0
curr_asset_rat = curr_asset / noncurr_asset
#### this is the buy strategy! You can change the below ####
if (curr_indi.golden_cross) or (curr_indi.rsi_k < 0.65) :
return 1, 0
#### ************************************************** ####
#### this is the sell strategy! You can change the below ####
if (curr_indi.dead_cross):
return 0, 1
#### ************************************************** ####
return 0, 0
def run(self):
for i, code in enumerate(self.codes):
self.valid = 0
prev_fs = self.prev_fs[code]
curr_fs = self.fs[code]
curr_indi = self.indi[code]
prev_fs_cell = None
curr_fs_cell = None
try:
prev_fs_cell = prev_fs.iloc[0].replace(",","")
try:
curr_fs_cell = curr_fs.iloc[0].replace(",","")
except:
self.pass_st += 1
pass
except:
self.pass_st += 1
pass
if (curr_fs_cell != None) & (prev_fs_cell != None):
self.valid = 1
buy, sell = self._strat(prev_fs, curr_fs, curr_indi)
if self.valid == 0:
self.pass_st += 1
continue
else: # buy or sell signal get
price = curr_indi['close']
if buy:
if self.cash >= self.unit * price:
self.buy_signal[i] = self.unit
self.position[i] += self.unit
self.cash -= price * self.unit
elif sell:
if self.position[i] > 0 :
sell_num = self.position[i] - int(self.position[i]/2)
self.sell_signal[i] = sell_num
self.position[i] = int(self.position[i]/2) # 1-> 1 sell, 4 -> 2 sell ....
self.cash += price * sell_num
##title
class Broker():
def __init__(self, codes):
self.cash = 200000000 #2억
self.cash_df = None #pd.DataFrame(columns=['cash'])
self.position = [0]*len(codes)
self.position_df = None #pd.DataFrame(columns=codes) # for accumulated profit calculation
self.buy_signal = None #pd.DataFrame(columns=codes) # codes = KOSPI_stock_names
self.sell_signal = None #pd.DataFrame(columns=codes)
self.codes = codes # 012934, 3281, ...
self.unit = 1 # 주식 매매 단위
self.pass_st = 0
def set_strat(self, strategy):
self.strategy = strategy # class
def set_time(self, time_index): # time_index type: pd.Index / time range for indi df
self.buy_signal = pd.DataFrame(columns = self.codes, index = time_index) #set_index(time_index)
self.sell_signal = pd.DataFrame(columns = self.codes, index = time_index) #.set_index(time_index)
self.position_df = pd.DataFrame(columns = self.codes, index = time_index)
self.cash_df = pd.DataFrame(columns = ['cash'], index = time_index)#.set_index(time_index)
self.time_index = time_index
def set_data(self, fs, indi, price):
self.fs = fs # multi dimensional df / start: 0th - nth
self.indi = indi # multi dimensional df / start : 1th - nth
self.price = price # 2 dimensional (date X codes : close price)
def update_data(self, strategy, date):
self.cash = strategy.cash
self.cash_df.loc[date] = strategy.cash
self.position = strategy.position
self.position_df.loc[date] = strategy.position #list
self.buy_signal.loc[date] = strategy.buy_signal #list
self.sell_signal.loc[date] = strategy.sell_signal #list
self.pass_st += strategy.pass_st
def run(self):
for date in self.time_index: #아마 수정해야 할 확률 높음
if date.year == 2021:
break
else:
prev_fs_row = self.fs.loc[date.year-1] # ex: 2014
fs_row = self.fs.loc[date.year] # 2015
indi_row = self.indi.loc[date] # 2015
strategy = self.strategy(self.codes, self.unit, self.cash, self.position)
strategy.set_data(prev_fs_row, fs_row, indi_row)
strategy.run()
self.update_data(strategy, date)
def performance(self):
# !!!! 2020년까지의 결과만 성능 평가 ####
cash_df = self.cash_df[self.cash_df.index < '2021']
position_df = self.position_df[self.position_df.index < '2021']
price = self.price[self.price.index < '2021']
buy_signal = self.buy_signal[self.buy_signal.index < '2021']
sell_signal = self.sell_signal[self.sell_signal.index < '2021']
last_price = price.iloc[-1]
total_remain_num = self.position # last(2020) position data
total_buy = (price * buy_signal).sum(axis=1).sum()
total_sell = (price * sell_signal).sum(axis=1).sum()
total_remain = (last_price * total_remain_num).sum()
print(f'remain 개수: {total_remain_num}, total_remain: {total_remain} total_buy: {total_buy}, total_sell={total_sell}')
profit = total_sell + total_remain - total_buy
try:
return_mean = profit / total_buy
except:
print("no buy")
return
accum_df = (cash_df['cash'] + ((price.fillna(0) * position_df).sum(axis=1))).to_frame() # row sum
daily_return_df = (accum_df - accum_df.shift(1))/accum_df.shift(1)-1
SSE = ((daily_return_df - return_mean)**2).sum().item()
std = np.sqrt(SSE/(accum_df.shape[0]-1)) # route(sigma(x-x_bar)^2 / (n-1))
sharp = return_mean / std
self.return_mean = return_mean
self.sharp = sharp
print(f'return_mean: {return_mean}, sharp: {sharp}')
code_path = GDRIVE_DATA_PATH + 'codes.csv'
fs_path = GDRIVE_DATA_PATH + 'fs_total.csv'
indi_path = GDRIVE_DATA_PATH + 'indi_total.csv'
price_path = GDRIVE_DATA_PATH + 'prices.csv'
fs_total = load_data("fs_total.csv")
indi_total = load_data("indi_total.csv") # stock price and indicator(Golden cross, RSI, etc.)
prices = load_data("prices.csv") # stock close price data rows:date, cols: stock code.
time_index = indi_total.index # time index of indi_total multi-index columns
broker = Broker(codes)
broker.set_strat(No_Strategy)
broker.set_time(time_index)
broker.set_data(fs_total, indi_total, prices)
broker.run()
broker.performance()
I want to translate it not changing much in flow of the code.
But I cannot find how to get multi-index columns dataframe in c++, and transfer its row data to No_Strategy to decide whether invest into the stock.
※ I uploaded similar question before and get thankful answer, but it is too complicated for me so I question one more time with detail information.
look at https://github.com/hosseinmoein/DataFrame. It has about 95% of Pandas functionality in a much faster framework

Get different result when use i iteration and np.sum function

I have a pandas Dataframe, I want to get the sum of the 'daily return' column of the data frame for every 60 days. The following is my code:
day = days() #days is a function to count business days.
for day>60:
for i in range(day-60,day):
current_x = sh600004['daily return'][i]
x_list.append(current_x)
x_sum = sum(x_list)
print x_sum
Here's what I got
To test the result, I used the following code:
y = sh600004
y.apply(lambda x: x.sum())
and I got different result.
The sum of 'daily return' column is not the same. When I print out my dataframe sh600004, I realize the data in x_sum is same as sh600004['daily return'], not the sum of it.
What do I need to do to get the sum of every 60 days of the daily return ? Can anyone help, please?
I don't know is that gonna help or not, but here's my the code I wrote so far:
#calculate daily return
daily_close = sh600004['close']
daily_pct_c = daily_close.pct_change().fillna(0)
sh600004['daily return'] = daily_pct_c
def days():
day = np.busday_count((datetime.datetime.strptime(sh600004['date'][0], '%Y/%m/%d')),pd.to_datetime(date), weekmask='1111100', holidays=holiday_list)
return day
def xn_deviation():
x_list = []
deviation_list = []
z_list = []
diff_list = []
result_list = []
day = days()
for i in range(0, 60):
current_x = sh600004['daily return'][i]
x_list.append(current_x)
x_sum = sum(x_list)
x_average = x_sum/len(x_list) #xn average
x_deviation = current_x - x_average #xn deviation
deviation_list.append(x_deviation)
dev_sum = sum(deviation_list) #calculate Z
z_list.append(dev_sum) #deviation sum list
r = max(z_list)-min(z_list) #calculate widest deviation
diff = np.square(current_x - x_average)
diff_list.append(diff)
sum_diff = sum(diff_list)
s = np.sqrt(sum_diff/len(x_list))
result_list = [r,s]
return result_list
else:
for i in range(day-60,day):
#same code as before
#loop
for date in sh600004.index:
days()
xn_deviation()

Memory Error when exporting data to csv file

Hello I was hoping someone could help me with my college coursework, I have an issue with my code. I keep running into a memory error with my data export.
Is there any way I can reduce the memory that is being used or is there a different approach I can take?
For the course work I am given a file of 300 records about customer orders from a CSV file and then I have to export the Friday records to a new CSV file. Also I am required to print the most popular method for customer's orders and the total money raised from the orders but I have an easy plan for that.
This is my first time working with CSV so I'm not sure how to do it. When I run the program it tends to crash instantly or stop responding. Once it appeared with 'MEMORY ERROR' however that is all it appeared with. I'm using a college provided computer so I am not sure on the exact specs but I know it runs 4GB of memory.
defining count occurences predefined function
def countOccurences(target,array):
counter = 0
for element in array:
if element == target:
counter= counter + 1
print counter
return counter
creating user defined functions for the program
dataInput function used for collecting data from provided file
def dataInput():
import csv
recordArray = []
customerArray = []
f = open('E:\Portable Python 2.7.6.1\Choral Shield Data File(CSV).csv')
csv_f = csv.reader(f)
for row in csv_f:
customerArray.append(row[0])
ticketID = row[1]
day, area = datasplit(ticketID)
customerArray.append(day)
customerArray.append(area)
customerArray.append(row[2])
customerArray.append(row[3])
recordArray.append(customerArray)
f.close
return recordArray
def datasplit(variable):
day = variable[0]
area = variable[1]
return day,area
def dataProcessing(recordArray):
methodArray = []
wed_thursCost = 5
friCost = 10
record = 0
while record < 300:
method = recordArray[record][4]
methodArray.append(method)
record = record+1
school = countOccurences('S',methodArray)
website = countOccurences('W',methodArray)
if school > website:
school = True
elif school < website:
website = True
dayArray = []
record = 0
while record < 300:
day = recordArray[record][1]
dayArray.append(day)
record = record + 1
fridays = countOccurences('F',dayArray)
wednesdays = countOccurences('W',dayArray)
thursdays = countOccurences('T', dayArray)
totalFriCost = fridays * friCost
totalWedCost = wednesdays * wed_thursCost
totalThurCost = thursdays * wed_thursCost
totalCost = totalFriCost + totalWedCost + totalThurCost
return totalCost,school,website
My first attempt to writing to a csv file
def dataExport(recordArray):
import csv
fridayRecords = []
record = 0
customerIDArray = []
ticketIDArray = []
numberArray = []
methodArray = []
record = 0
while record < 300:
if recordArray[record][1] == 'F':
fridayRecords.append(recordArray[record])
record = record + 1
with open('\Courswork output.csv',"wb") as f:
writer = csv.writer(f)
for record in fridayRecords:
writer.writerows(fridayRecords)
f.close
My second attempt at writing to the CSV file
def write_file(recordArray): # write selected records to a new csv file
CustomerID = []
TicketID = []
Number = []
Method = []
counter = 0
while counter < 300:
if recordArray[counter][2] == 'F':
CustomerID.append(recordArray[counter][0])
TicketID.append(recordArray[counter][1]+recordArray[counter[2]])
Number.append(recordArray[counter][3])
Method.append(recordArray[counter][4])
fridayRecords = [] # a list to contain the lists before writing to file
for x in range(len(CustomerID)):
one_record = CustomerID[x],TicketID[x],Number[x],Method[x]
fridayRecords.append(one_record)
#open file for writing
with open("sample_output.csv", "wb") as f:
#create the csv writer object
writer = csv.writer(f)
#write one row (item) of data at a time
writer.writerows(recordArray)
f.close
counter = counter + 1
#Main Program
recordArray = dataInput()
totalCost,school,website = dataProcessing(recordArray)
write_file(recordArray)
In the function write_file(recordArray) in your second attempt the counter variable counter in the first while loop is never updated so the loop continues for ever until you run out of memory.

date format cannot convert in data format

I am trying to create an application where it pulls the information from website using python and I am using datetime formats.
def constructYFURL(ticker,start_date,end_date,freq):
start_date = datetime.strptime(start_date,"%Y-%m-%d").date()
end_date = datetime.strptime(end_date,"%Y-%m-%d").date()
s=ticker.replace("^","%5E")
if start_date.month-1<10:
a="0"+str(start_date.month-1)
else:
a=str(start_date.month-1)
# Also the month always has 2 digits
b=str(start_date.day)
c=str(start_date.year)
if end_date.month - 1 < 10:
d = "0" + str(end_date.month - 1)
else:
d = str(end_date.month - 1)
e=str(end_date.day)
f=str(end_date.year)
`enter code here`g=freq
yfURL = "http://real-chart.finance.yahoo.com/table.csv? s="+s+"&a="+a+"&b="+b+"&c="+c+"&d="+d+"&e="+e+"&f="+f+"&g="+g+"&ignore=.csv"
return yfURL
from download import constructYFURL
ticker = "AAPL"
start_date = "2016-01-01"
end_date = " 2017-03-06"
freq = "d"
yfURL = constructYFURL(ticker,start_date,end_date,freq)
print yfURL
I am getting a following error saying:
ValueError: time data ' 2017-03-06' does not match format '%Y-%m-%d'
You have an extra leading space in:
end_date = " 2017-03-06"
remove it and the error you're getting goes away and the program runs
Better formatted code:
def constructYFURL(ticker,start_date,end_date,freq):
start_date = datetime.strptime(start_date,"%Y-%m-%d").date()
end_date = datetime.strptime(end_date,"%Y-%m-%d").date()
s=ticker.replace("^","%5E")
if start_date.month-1<10:
a="0"+str(start_date.month-1)
else:
a=str(start_date.month-1)
# Also the month always has 2 digits
b=str(start_date.day)
c=str(start_date.year)
if end_date.month - 1 < 10:
d = "0" + str(end_date.month - 1)
else:
d = str(end_date.month - 1)
e=str(end_date.day)
f=str(end_date.year)
g=freq
yfURL = "http://real-chart.finance.yahoo.com/table.csv? s="+s+"&a="+a+"&b="+b+"&c="+c+"&d="+d+"&e="+e+"&f="+f+"&g="+g+"&ignore=.csv"
return yfURL
ticker = "AAPL"
start_date = "2016-01-01"
end_date = "2017-03-06"
freq = "d"
yfURL = constructYFURL(ticker,start_date,end_date,freq)
print yfURL

loop doesn't iterate over all the csv file read, python2, pycharm

here is my code:
import csv
inp1 = raw_input('Enter your Hijjri year:')
intinp1 = int(inp1)
majmouaopen = open('Majmoua.csv')
majmouaread = csv.reader(majmouaopen)
majmouaread.next()
mabsoutaopen = open('Mabsouta.csv')
mabsoutaread = csv.reader(mabsoutaopen)
mabsoutaread.next()
hijrimiladimonthsopened = open('MiladiHijrimonths.csv')
hijrimiladimonthsread = csv.reader(hijrimiladimonthsopened)
yearslist = []
years = []
yearssection = []
monthssection = []
minutessection = []
def miladifromhijri(intinp1):#, inp2, intinp3):
fulyear = intinp1 - 1
n = 0
for row in majmouaread:
print row
introw = int(row[0])
if introw <= fulyear:
n += 1
years.append(introw)
continue
if n == len(years):
near = years[::-1][0]
nearlessyear = near
break
for row in majmouaread:
print row
my problem is with the last loop, it doesn't print all of the majmouaread files. for the first loop, which is the same, it does print all of the csv file rows.
What is causing the probblem, is it something in the code? or something happened to the csv file read? It looks fine with first loop?