average spending per day - django model - django

I have a model that looks something like that:
class Payment(TimeStampModel):
timestamp = models.DateTimeField(auto_now_add=True)
amount = models.FloatField()
creator = models.ForeignKey(to='Payer')
What is the correct way to calculate average spending per day?
I can aggregate by day, but then the days when a payer does not spend anything won't count, which is not correct
UPDATE:
So, let's say I have only two records in my db, one from March 1, and one from January 1. The average spending per day should be something
(Sum of all spendings) / (March 1 - January 1)
that is divided by 60
however this of course give me just an average spending per item, and number of days will give me 2:
for p in Payment.objects.all():
print(p.timestamp, p.amount)
p = Payment.objects.all().dates('timestamp','day').aggregate(Sum('amount'), Avg('amount'))
print(p
Output:
2019-03-05 17:33:06.490560+00:00 456.0
2019-01-05 17:33:06.476395+00:00 123.0
{'amount__sum': 579.0, 'amount__avg': 289.5}

You can aggregate min and max timestamp and the sum of amount:
from django.db.models import Min, Max, Sum
def average_spending_per_day():
aggregate = Payment.objects.aggregate(Min('timestamp'), Max('timestamp'), Sum('amount'))
min_datetime = aggregate.get('timestamp__min')
if min_datetime is not None:
min_date = min_datetime.date()
max_date = aggregate.get('timestamp__max').date()
total_amount = aggregate.get('amount__sum')
days = (max_date - min_date).days + 1
return total_amount / days
return 0
If there is a min_datetime then there is some data in the db table, and there is also max date and total amount, otherwise we return 0 or whatever you want.

It depends on your backend, but you want to divide the sum of amount by the difference in days between your max and min timestamp. In Postgres, you can simply subtract two dates to get the number of days between them. With MySQL there is a function called DateDiff that takes two dates and returns the number of days between them.
class Date(Func):
function = 'DATE'
class MySQLDateDiff(Func):
function = 'DATEDIFF'
def __init__(self, *expressions, **extra):
expressions = [Date(exp) for exp in expressions]
extra['output_field'] = extra.get('output_field', IntegerField())
super().__init__(*expressions, **extra)
class PgDateDiff(Func):
template = "%(expressions)s"
arg_joiner = ' - '
def __init__(self, *expressions, **extra):
expressions = [Date(exp) for exp in expressions]
extra['output_field'] = extra.get('output_field', IntegerField())
super().__init__(*expressions, **extra)
agg = {
avg_spend: ExpressionWrapper(
Sum('amount') / (PgDateDiff(Max('timestamp'), Min('timestamp')) + Value(1)),
output_field=DecimalField())
}
avg_spend = Payment.objects.aggregate(**agg)
That looks roughly right to me, of course, I haven't tested it. Of course, use MySQLDateDiff if that's your backend.

Related

How to get only date after subtraction of two date field

I'm trying to have subtraction of two data fields and the result in days. But I'm having time also at the output. How do I get only days not the time.
Here is my code:
class ItemTable(models.Model):
expdate = models.DateField("EXP Date", null=True, blank=True)
def days_to_exp(self):
if self.expdate:
now = datetime.date.today()
exp = str(self.expdate - now)
if exp > "1":
return exp
elif exp < "1" and exp > "0":
return "Today"
else:
return "Expired"
output:
12 days, 0:00:00,
4 days, 0:00:00... etc
I just want the result as:
12 days,
4 days..... etc
The result of subtracting one datetime.date from another is a timedelta object. You can access the .days attribute of that timedelta object to get what you're after.
> today = datetime.now().date()
> tomorrow = today + timedelta(days=1)
> (tomorrow - today).days
1
> (today - tomorrow).days
-1
Result of subtraction between datetime.date instances is an object with type of datetime.timedelta that represents a duration not datetime.date nor datetime.datatime. you can get how long a timedelta is by accessing it's .days property.
for example:
result = now().today() - (now()+timedelta(days=10))
assert(result.days==10)

django query aggregate function is slow?

I am working with Django to see how to handle large databases. I use a database with fields name, age, date of birth(dob) and height. The database has about 500000 entries. I have to find the average height of persons of (1) same age and (2) born in same year. The aggregate function in querying table takes about 10s. Is it usual or am I missing something?
For age:
age = [i[0] for i in Data.objects.values_list('age').distinct()]
ht = []
for each in age:
aggr = Data.objects.filter(age=each).aggregate(ag_ht=Avg('height')
ht.append(aggr)
From dob,
age = [i[0].year for i in Data.objects.values_list('dob').distinct()]
for each in age:
aggr = Data.objects.filter(dob__contains=each).aggregate(ag_ht=Avg(‌​'height')
ht.append(aggr)
The year has to be extracted from dob. It is SQLite and I cannot use __year (join).
For these queries to be efficient, you have to create indexes on the age and dob columns.
You will get a small additional speedup by using covering indexes, i.e., using two-column indexes that also include the height column.
full version with time compare loop and query set version
import time
from dd.models import Data
from django.db.models import Avg
from django.db.models.functions import ExtractYear
for age
start = time.time()
age = [i[0] for i in Data.objects.values_list('age').distinct()]
ht = []
for each in age:
aggr = Data.objects.filter(age=each).aggregate(ag_ht=Avg('height'))
ht.append(aggr)
end = time.time()
loop_time = end - start
start = time.time()
qs = Data.objects.values('age').annotate(ag_ht=Avg('height')).order_by('age')
ht_qs = qs.values_list('age', 'ag_ht')
end = time.time()
qs_time = end - start
print loop_time / qs_time
for dob year, with easy refactoring your version(add set in the years)
start = time.time()
years = set([i[0].year for i in Data.objects.values_list('dob').distinct()])
ht_year_loop = []
for each in years:
aggr = Data.objects.filter(dob__contains=each).aggregate(ag_ht=Avg('height'))
ht_year_loop.append((each, aggr.get('ag_ht')))
end = time.time()
loop_time = end - start
start = time.time()
qs = Data.objects.annotate(dob_year=ExtractYear('dob')).values('dob_year').annotate(ag_ht=Avg('height'))
ht_qs = qs.values_list('dob_year', 'ag_ht')
end = time.time()
qs_time = end - start
print loop_time / qs_time

How to get the number of Days in a Specific Month between Two Dates in Python

I have two date fields campaign_start_date and campaign_end_date. I want to count the number of days in each month that comes in-between the campaign_start_date and campaign_end_date.
eg:
campaign_start_date = September 7 2017
campaign_end_date = November 6 2017
The solution should be :
Total No:of days = 61 days
No: of months = 3 months
Month 1 = 9/7/2017 to 9/30/2017
Month 2 = 10/1/2017 to 10/31/2017
Month 3 = 11/1/2017 to 11/6/2017
No:of days in Month 1 = 24 days
No:of days in Month 2 = 31 days
No:of days in Month 3 = 6 days
How can I achieve this using Python?
So far I have achieved:
#api.multi
def print_date(self):
start_date = datetime.strptime(self.start_date, "%Y-%m-%d %H:%M:%S")
end_date = datetime.strptime(self.end_date, "%Y-%m-%d %H:%M:%S")
campaign_start_date = date(start_date.year,start_date.month,start_date.day)
campaign_end_date = date(end_date.year,end_date.month,end_date.day)
duration = (campaign_end_date-campaign_start_date).days
return True
Calculate the duration in days:
from datetime import date
campaign_start_date = date(2017, 9, 7)
campaign_end_date = date(2017, 10, 6)
duration = (campaign_end_date-campaign_start_date).days
print campaign_start_date, campaign_end_date, duration
Some hints for further calculations:
import calendar
campaign_end_month_start = campaign_end_date.replace(day=1)
days_in_month_campaign_end = (campaign_end_date - campaign_end_month_start).days + 1
range_startmonth = calendar.monthrange(campaign_start_date.year, campaign_start_date.month)
campaign_start_month_ends = campaign_start_date.replace(day=range_startmonth[1])
days_in_month_campaign_begins = (campaign_start_month_ends - campaign_start_date).days
This way you can calculate the number of days in each month of the campaign (keep in mind to check if campaign_end_date is in another month than campaign_start_date
For calculations you can also access the fields of a date, e.g.
campaign_start_date.day
campaign_start_date.month
campaign_start_date.year
To calculate the number of involved month in your campaign and to get a list of the month to calculate the duration per month you can use this (based on the answer of m.antkowicz in Python: get all months in range?). It's important to set the day to 1 (current = current.replace(day=1)) before and inside the loop, otherwise you skip a month when your startdate is 31st of a month and the next month is shorter than 31 days or if you have a longer period:
from datetime import date, datetime, timedelta
current = campaign_start_date
result = [current]
current = current.replace(day=1)
while current <= campaign_end_date:
current += timedelta(days=32)
current = current.replace(day=1)
result.append(datetime(current.year, current.month, 1))
print result, len(result)
which prints (when you use current.strftime('%Y-%m-%d'):
['2017-09-07', '2017-10-01', '2017-11-01'] 3
now you can loop over the result list and calculate the number of days per months:
durations= []
for curr in result:
curr_range = calendar.monthrange(curr.year, curr.month)
curr_duration = (curr_range[1] - curr.day)+1
if (curr.month < campaign_end_date.month):
durations.append(curr_duration)
else:
durations.append(campaign_end_date.day)
print durations
which gives you the desired "No:of days in Month x" as a list:
[24, 31, 6]
This is the robust solution which takes care of dates from different years.
def get_months_and_durations(start_date,end_date):
current = start_date
result = [current]
current = current.replace(day=1)
while current <= end_date:
current += timedelta(days=32)
current = current.replace(day=1)
result.append(datetime(current.year, current.month, 1).date())
durations= []
for curr in result[:-1]:
curr_range = calendar.monthrange(curr.year, curr.month)
curr_duration = (curr_range[1] - curr.day)+1
if ((curr.month == end_date.month) & (curr.year == end_date.year)):
durations.append(end_date.day)
else:
durations.append(curr_duration)
return result[:-1],durations

Number of events occurred on each hour in particular day

I have model containing "caller_name" and "call_datetime" field.
I was able to get number of calls occurred on each day in particular month:
while start_date <= end_date:
calls = CDR.objects.filter(start_time__year=str(start_date.year), start_time__month=str(start_date.month),
start_time__day=str(start_date.day))
print "Number of calls:", len(calls)
start_date = start_date + datetime.timedelta(days=1)
Similarlly, I tried to get number of calls on each hour in particular date.
for i in range(24):
calls = CDR.objects.filter(start_time__year=str(start_date.year), start_time__month=str(start_date.month),start_time__day=str(start_date.day), start_time__hour=str(i))
Found out that "start_time__hour" is not implemented, but in their any way to achieve this?
Try this workaround:
day_calls = CDR.objects.filter(start_time__year=str(start_date.year), start_time__month=str(start_date.month),start_time__day=str(start_date.day))
hour_calls = day_calls.extra(select={'hours': 'DATE_FORMAT(start_date, "%%H")'})\
.values_list('hours', flat=True)\
.distinct()\
.order_by('hours')
You could either use raw SQL with the .extra() method or something like this:
for i in range(24):
dt1 = start_time.replace(hour=i)
dt2 = dt1 + datetime.timedelta(hours=1)
calls = CDR.objects.filter(start_time__gte=dt1, start_time__lt=dt2)

Django: Total birthdays each day for the next 30 days

I've got a model similar to this:
class Person(models.Model):
name = models.CharField(max_length=40)
birthday = DateTimeField() # their next birthday
I would like to get a list of the total birthdays for each day for the next 30 days. So for example, the list would look like this:
[[9, 0], [10, 3], [11, 1], [12, 1], [13, 5], ... #30 entries in list
Each list entry in the list is a date number followed by the number of birthdays on that day. So for example on the 9th of May there are 0 birthdays.
UPDATES
My db is sqlite3 - will be moving to postgres in the future.
from django.db.models import Count
import datetime
today = datetime.date.today()
thirty_days = today + datetime.timedelta(days=30)
birthdays = dict(Person.objects.filter(
birthday__range=[today, thirty_days]
).values_list('birthday').annotate(Count('birthday')))
for day in range(30):
date = today + datetime.timedelta(day)
print "[%s, %s]" % (date, birthdays.get(date, 0))
I would get the list of days and birthday count this way:
from datetime import date, timedelta
today = date.today()
thirty_days = today + timedelta(days=30)
# get everyone with a birthday
people = Person.objects.filter(birthday__range=[today, thirty_days])
birthday_counts = []
for date in [today + timedelta(x) for x in range(30)]:
# use filter to get only birthdays on given date's day, use len to get total
birthdays = [date.day, len(filter(lambda x: x.birthday.day == date.day, people))]
birthday_counts.append(birthdays)
Something like this --
from datetime import date, timedelta
class Person(models.Model):
name = models.CharField(max_length=40)
birthday = models.DateField()
#staticmethod
def upcoming_birthdays(days=30):
today = date.today()
where = 'DATE_ADD(birthday, INTERVAL (YEAR(NOW()) - YEAR(birthday)) YEAR) BETWEEN DATE(NOW()) AND DATE_ADD(NOW(), INTERVAL %S DAY)'
birthdays = Person.objects.extra(where=where, params=[days]).values_list('birthday', flat=True)
data = []
for offset in range(0, days):
i = 0
d = today + timedelta(days=offset)
for b in birthdays:
if b.day == d.day and b.month == d.month:
i += 1
data.append((d.day, i))
return data
print Person.upcoming_birthdays()
(Queryset of people with a birthday in the next X days)
Found cool solution for this!
For me it works!
from datetime import datetime, timedelta
import operator
from django.db.models import Q
def birthdays_within(days):
now = datetime.now()
then = now + timedelta(days)
# Build the list of month/day tuples.
monthdays = [(now.month, now.day)]
while now <= then:
monthdays.append((now.month, now.day))
now += timedelta(days=1)
# Tranform each into queryset keyword args.
monthdays = (dict(zip(("birthday__month", "birthday__day"), t))
for t in monthdays)
# Compose the djano.db.models.Q objects together for a single query.
query = reduce(operator.or_, (Q(**d) for d in monthdays))
# Run the query.
return Person.objects.filter(query)
But it get a list of persons that have a birthday in date range. You should change a bit.