I have a pandas Dataframe, I want to get the sum of the 'daily return' column of the data frame for every 60 days. The following is my code:
day = days() #days is a function to count business days.
for day>60:
for i in range(day-60,day):
current_x = sh600004['daily return'][i]
x_list.append(current_x)
x_sum = sum(x_list)
print x_sum
Here's what I got
To test the result, I used the following code:
y = sh600004
y.apply(lambda x: x.sum())
and I got different result.
The sum of 'daily return' column is not the same. When I print out my dataframe sh600004, I realize the data in x_sum is same as sh600004['daily return'], not the sum of it.
What do I need to do to get the sum of every 60 days of the daily return ? Can anyone help, please?
I don't know is that gonna help or not, but here's my the code I wrote so far:
#calculate daily return
daily_close = sh600004['close']
daily_pct_c = daily_close.pct_change().fillna(0)
sh600004['daily return'] = daily_pct_c
def days():
day = np.busday_count((datetime.datetime.strptime(sh600004['date'][0], '%Y/%m/%d')),pd.to_datetime(date), weekmask='1111100', holidays=holiday_list)
return day
def xn_deviation():
x_list = []
deviation_list = []
z_list = []
diff_list = []
result_list = []
day = days()
for i in range(0, 60):
current_x = sh600004['daily return'][i]
x_list.append(current_x)
x_sum = sum(x_list)
x_average = x_sum/len(x_list) #xn average
x_deviation = current_x - x_average #xn deviation
deviation_list.append(x_deviation)
dev_sum = sum(deviation_list) #calculate Z
z_list.append(dev_sum) #deviation sum list
r = max(z_list)-min(z_list) #calculate widest deviation
diff = np.square(current_x - x_average)
diff_list.append(diff)
sum_diff = sum(diff_list)
s = np.sqrt(sum_diff/len(x_list))
result_list = [r,s]
return result_list
else:
for i in range(day-60,day):
#same code as before
#loop
for date in sh600004.index:
days()
xn_deviation()
Related
I need a hand, I don't understand how to fix it.
Look at the photo to better understand my speech, where the new field is populated the value of new is subtracted from the total and I don't want it to be subtracted immediately but the next round.
How can I do?
def UserAndamentoListView(request):
giorni = []
mese = []
new_user = []
tot_user = []
tot = User.objects.all().count()
for day in range(5):
giorni.append(datetime.datetime.now() - datetime.timedelta(days=day))
new = User.objects.filter(date_joined__contains = giorni[day].date())
new_user.append(new)
tot -= new.count()
tot_user.append(tot)
context = {'giorni': giorni, 'new_user':new_user, 'tot_user': tot_user}
return render(request, 'andamento.html', context)
Just subtract after appending the total
for day in range(5):
giorni.append(datetime.datetime.now() - datetime.timedelta(days=day))
new = User.objects.filter(date_joined__contains = giorni[day].date())
new_user.append(new)
tot_user.append(tot)
tot -= new.count()
The value of tot is changed at the end of the current loop iteration but is not used until the next trip around the loop.
So I have an array with some daily price variations (VarP), and a initial price value (p_0 = 500), and I want to get a new array with prices according to these variations.
For example:
p_0 = 500
p_1 = p_0*VarP[0]
p_2 = p_1*VarP[1]
.
.
.
p_n = p_(n-1)*VarP[n-1]
How can I do this with a function in Python?
Please try this code
newArr = [None] * len(VarP);
newArr[0] = 500;
for i in range(1, len(VarP)):
newArr[i] = VarP[i-1] * newArr[i-1]
The error occurs in the last step of this code when I try to sort a list using SORTED. I get the error message that a "list object is not callable". I want to use the SORTED and not the SORT function for this.
from csv import reader
import datetime as dt
open_file = open("hacker_news.csv")
read_file = reader(open_file)
hn = list(read_file)
headers = hn[0]
hn = hn[1:]
def explore_data(dataset,start,finish,col_rows=True):
dataset_slice = dataset[start:finish]
for row in dataset_slice:
print(row)
print('\n')
if col_rows:
print('rows:' , len(dataset))
print('columns:' , len(dataset[0]))
ask_posts = []
show_posts = []
other_posts = []
for row in hn:
title = row[1]
if title.lower().startswith("ask hn"):
ask_posts.append(row)
elif title.lower().startswith("show hn"):
show_posts.append(row)
else:
other_posts.append(row)
total_ask_comments = 0
total_show_comments = 0
total = 0
for row in ask_posts:
total += 1
num_comments = int(row[4])
total_ask_comments += num_comments
avg_ask_comments = total_ask_comments/total
print(avg_ask_comments)
for row in show_posts:
total += 1
num_comments = int(row[4])
total_show_comments += num_comments
avg_show_comments = total_show_comments/total
print(avg_show_comments)
result_list = []
for row in ask_posts:
created_at = row[6]
num_comments = int(row[4])
result_list.append([created_at,num_comments])
counts_by_hour = {}
comments_by_hour = {}
for row in result_list:
comment = row[1]
date_time = row[0]
date_time = dt.datetime.strptime(date_time,'%m/%d/%Y %H:%M')
hour = date_time.strftime('%H')
if hour not in counts_by_hour:
counts_by_hour[hour] = 1
comments_by_hour[hour] = comment
else:
counts_by_hour[hour] += 1
comments_by_hour[hour] += comment
for hour in counts_by_hour:
if hour in comments_by_hour:
avg = round(comments_by_hour[hour]/counts_by_hour[hour],2)
avg_by_hour.append([hour,avg])
for row in avg_by_hour:
swap_avg_by_hour.append([row[1], row[0]])
print(swap_avg_by_hour)
sorted_swap = sorted(swap_avg_by_hour, reverse=True)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-49-307863a4b1cd> in <module>
6 print(swap_avg_by_hour)
7
----> 8 sorted_swap = sorted(swap_avg_by_hour, reverse=True)
9
10 print(sorted_swap)
TypeError: 'list' object is not callable
Not sure how to upload the csv as don't see an upload option. Is there an obvious error in the code? or can someone help with instructions to upload the csv file?
You need to declare these 2 lists before appnding avg_by_hour = [] swap_avg_by_hour= []
I have two question. First question: Python script that shows me the precipitation for a certain period.For example, I'm getting an initial year-month and a final year-month.
Initial:
year:2000
month:3
Final
year1:2005
month:4
Now, instead of seeing:
2000/3,2000/4,2000/5,2000/6..........2005/1,2005/2,2005/3,2005/4
she works like this(look in the hooked picture):
2000/3, 2000/4, 2001/3, 2001/4........2005/3,2005/4.
I want to work for me like the first case.
def period_month_prec(year,month,year1,month1):
for i in range (year,year1+1,1):
for j in range(month,month1+1,1):
......................
Second question: How to write the output(picture) from the script in csv.fileenter image description here
This is what my views.py script looks like , which saves me only the first result:
def monthly_period(request):
if request.method == "POST" :
form = PeriodMonthlyForm(request.POST)
if form.is_valid():
data = form.cleaned_data
year = data.get('year')
month = data.get('month')
year1 = data.get('year1')
month1 = data.get('month1')
lon = data.get('lon')
lat = data.get ('lat')
inter = data.get('inter')
point = period_month_prec(year,month,year1,month1,lon,lat)
args = {'point':point}
response = HttpResponse(content_type='text/txt')
response['Content-Disposition'] = 'attachment; filename="precipitation.txt"'
writer = csv.writer(response)
writer.writerow([point])
return response
else:
form = PeriodMonthlyForm()
active_period_monthly = True
return render (request, 'carpatclimapp/home.html',{'form':form, 'active_period_monthly': active_period_monthly})
Ok, i have forms like this:
Forms
You set initial values(red color) and end interval(blue color). For this given interval, the lon and lat are defined for the point in which we want to perform interpolation. When you press the submit button, it starts with interpolation for a defined period. The loop problem is because it works only for the defined months (we see from the 2nd picture that it only works in the interval 1-6) but not for 7,8,9,10,11,12 months between these years.
Initial: year:2000, month:3
Final: year1:2001, month:4
for this she's doing it like this: 2000/3,2000/4,2001/3,2001/4
I do not want that, I want this: 2000/3,2000/4,2000/5,2000/6,2000/7.....2000/12,2001/1,2001/2,2001/3,2001/4.
this is me code :
def period_month_prec(year,month,year1,month1,lon,lat):
cnx = sqlite3.connect(DB1)
cursor = cnx.cursor()
table = 'monthly'
year = int(year)
year1 = int(year1)
month = int(month)
month1 = int(month1)
for i in range (year,year1+1,1):
for j in range(month,month1+1,1):
query = '''
SELECT dates, cell, prec FROM %s WHERE dates = "%s-%s" ;
''' % (table,i,j)
df = pd.read_sql_query(query, cnx)
tacka = '''SELECT id, lon, lat,country,altitude FROM %s;''' % 'grid1'
grid1 = pd.read_sql_query(tacka, cnx)
podaci = pd.merge(df,grid1,left_on='cell',right_on='id')
podaci_a = podaci.drop(['cell','id','country','altitude'],axis=1)
lon_n = podaci_a['lon'].values
lat_n = podaci_a['lat'].values
prec =podaci_a['prec'].values
x_masked, y_masked, prec_p = remove_nan_observations(lon_n, lat_n, prec)
xy = np.vstack([x_masked,y_masked]).T
xi = ([lon,lat])
inter_point = interpolate_to_points(xy,prec_p,xi, interp_type='linear'
return (i,j,lon,lat,inter_point)
The results that come out look like this:
loop with calculations
The second question was how to save these results(2nd picture) in the csv file, how to write correctly views.py. Currently she looks like this :
def monthly_period(request):
if request.method == "POST" :
form = PeriodMonthlyForm(request.POST)
if form.is_valid():
data = form.cleaned_data
year = data.get('year')
month = data.get('month')
year1 = data.get('year1')
month1 = data.get('month1')
lon = data.get('lon')
lat = data.get ('lat')
inter = data.get('inter')
point = period_month_prec(year,month,year1,month1,lon,lat)
args = {'point':point}
response = HttpResponse(content_type='text/txt')
response['Content-Disposition'] = 'attachment; filename="precipitation.txt"'
writer = csv.writer(response)
writer.writerow([point])
return response
else:
form = PeriodMonthlyForm()
active_period_monthly = True
return render (request, 'carpatclimapp/home.html',{'form':form, 'active_period_monthly': active_period_monthly})
I hope I'm a little clearer now
I have a list of data frames:
data_frames = [sort,sort1,sort2]
I'd like to iterate over them and store some stats in a new df. I feel like this is something trivial but the function below returns an empty data frame df_concat = df_stats(data_frames). What am I missing? Will appreciate your help.
Create an example data set:
import pandas as pd
data = {'number': [23,56,89], 'PVs': [23456, 34456, 6789]}
sort = pd.DataFrame.from_dict(data)
data1 = {'number': [28,52,12], 'PVs': [3423456, 2334456, 36789]}
sort1 = pd.DataFrame.from_dict(data1)
data2 = {'number': [123,5,86], 'PVs': [2345655, 934456, 16789]}
sort2 = pd.DataFrame.from_dict(data2)
The function to iterate over data frames:
def df_stats(data_frames):
df = pd.DataFrame()
for data in data_frames:
df['Number'] = data.number.count()
df["Total PVs"] = '{0:,.0f}'.format(data.PVs.sum())
df["Average"] = '{0:,.0f}'.format(data.PVs.mean())
df["Median"] = '{0:,.0f}'.format(data.PVs.median())
return df
We can using pd.concat+groupby rather than for loop
pd.concat(data_frames,keys=[1,2,3]).groupby(level=0).agg({'number':'count','PVs':['sum','mean','median']})
Out[1117]:
number PVs
count sum mean median
1 3 64701 2.156700e+04 23456
2 3 5794701 1.931567e+06 2334456
3 3 3296900 1.098967e+06 934456
Also if you want to using your function you can fix it to
df = pd.DataFrame()
for i,data in enumerate(data_frames):
df.at[i,'Number'] = data.number.count()
df.at[i,"Total PVs"] = '{0:,.0f}'.format(data.PVs.sum())
df.at[i,"Average"] = '{0:,.0f}'.format(data.PVs.mean())
df.at[i,"Median"] = '{0:,.0f}'.format(data.PVs.median())
df
Out[1121]:
Number Total PVs Average Median
0 3.0 64,701 21,567 23,456
1 3.0 5,794,701 1,931,567 2,334,456
2 3.0 3,296,900 1,098,967 934,456
Try this:
''' Example DataFrames '''
data1 = pd.DataFrame({'number': [23,56,89], 'PVs': [23456, 34456, 6789]},
columns=['number', 'PVs'])
data2 = pd.DataFrame({'number': [28,52,12], 'PVs': [3423456, 2334456, 36789]}, columns=['number', 'PVs'])
data3 = pd.DataFrame({'number': [123,5,86], 'PVs': [2345655, 934456, 16789]},
columns=['number', 'PVs'])
''' The function returning the stats '''
def df_stats(dataFrame):
df = pd.DataFrame({}, columns=['Number', 'Total PVs', 'Average', 'Median'])
df.loc['Number'] = dataFrame['number'].count()
df["Total PVs"] = '{0:,.0f}'.format(dataFrame['PVs'].sum())
df["Average"] = '{0:,.0f}'.format(dataFrame['PVs'].mean())
df["Median"] = '{0:,.0f}'.format(dataFrame['PVs'].median())
return df
''' Create a list of DataFrames to iterate over '''
data_frames = [data1, data2, data3]
''' Create an emmpty DataFrame so you can include it in pd.concat() '''
result = pd.DataFrame()
''' Iterate over DataFrame list and concatenate'''
for dataFrame in data_frames:
tempDF = df_stats(dataFrame)
result = pd.concat([result,tempDF], ignore_index=True)
result.head(3)
The output is:
Number Total PVs Average Median
0 3 64,701 21,567 23,456
1 3 5,794,701 1,931,567 2,334,456
2 3 3,296,900 1,098,967 934,456
The below functions works
dict_df ={'df1':sort1,'df':sort,'df2':sort2}
def df_stats(dict_df):
df = pd.DataFrame(columns=['Number','Total PVs','Average','Median'],index=dict_df.keys())
for name,data in dict_df.items():
df.loc[name,"Number"] = data.number.count()
df.loc[name,"Total PVs"] = '{0:,.0f}'.format(data.PVs.sum())
df.loc[name,"Average"] = '{0:,.0f}'.format(data.PVs.mean())
df.loc[name,"Median"] = '{0:,.0f}'.format(data.PVs.median())
return df
Output:
Number Total PVs Average Median
df2 3 3,296,900 1,098,967 934,456
df1 3 5,794,701 1,931,567 2,334,456
df 3 64,701 21,567 23,456