complex sums divided by month

complex sums divided by month - django

models:
class Category(models.Model):
name = models.CharField(max_length=100)
class Operation(models.Model):
date = models.DateField()
value = models.DecimalField(max_digits = 9, decimal_places = 2)
category = models.ForeignKey(Category, null = True)
comments = models.TextField(null = True)
Now I want to create a view, with 13 columns:
name of category | -11 | -10 | -9 | ... | -1 | 0
eg.
...food.. | $123.00 | $100.14 | ... | $120.13| $54.12
.clothes.| $555.23 | $232.23 | ... | $200.12| $84.44
where $123.00 for example is a sum of values of operations with category food, made 11 months ago, $100.14 - 10 months ago and so on - $54.12 is sum of current month, 555.23 => the same but category clothes...
I googled a lot, but most of examples are simple - without related class (category)
The correct answer after suggestion of Answer 1:
def get_month_sum_series(self):
import qsstats, datetime
from django.db.models import Sum
qss = qsstats.QuerySetStats(self.operation_set.all(), date_field='date', aggregate_field='value',aggregate_class=Sum)
today = datetime.date.today()
year_ago = today - datetime.timedelta(days=365)
return qss.time_series( start_date=year_ago, end_date=today, interval='months')

Take a look at django-qsstats. It has a time_series feature which will alow you to get whole series of data for all time in one request. In your case I'd create a method in Category, something like:
def price_series(self):
return qsstats.time_series(queryset=self.operation_set.all(), start_date=year_ago, end_date=now, interval='months')
Of course, you'll need to set up year_ago and now variables (for example, using datetime module functions).

Related

How to group by two columns on queryset in Django2?

I am getting little confused about how to use .annotate on quesryset.
To be quick: I have a model:
class Row(models.Model):
order = models.ForeignKey('order.Header', blank=True, null=True)
qty = models.IntegerField(blank=True, null=True, default=0)
name = models.CharField(default='', blank=True, null=True)
total = models.DecimalField(max_digits=10, decimal_places=2,default=0, blank=True, null=True)
profit = models.DecimalField(max_digits=10,decimal_places=2,default=0, blank=True, null=True)
profit_percent = models.DecimalField(max_digits=6,decimal_places=2,default=0, blank=True, null=True)
month_sold = models.IntegerField(default=0)
month_painted = models.IntegerField(default=0)
area_painted_1 = models.DecimalField(max_digits=5,decimal_places=2,default=0, blank=True, null=True)
area_painted_2 = models.DecimalField(max_digits=5,decimal_places=2,default=0, blank=True, null=True)
What I need to do is to create a kind of a summary, that will tell me month by month, a sum of Total, Profit Avg of profit, and also a sum of the painted area.
Something like that:
+-------+-------+--------+----------+--------+--------+
| month | Total | Profit | Profit % | area_1 | area_2 |
+-------+-------+--------+----------+--------+--------+
| 0 | 23000 | 3000 | 13% | 55 | 12 |
| Jan | 10000 | 1000 | 10% | 43 | 44 |
| April | 20000 | 1000 | 5% | 99 | 134 |
+-------+-------+--------+----------+--------+--------+
I tried to achieve that with .annotate:
result = Row.objects.values('month_sold') \
.annotate(total=Sum('total')+1) \
.annotate(profit=Sum('profit'))
.annotate(profit_percent=Round(F('profit')/F('total')*100, 2))
.annotate(area_2=Sum('area_painted_2'))
.annotate(area_1=Sum('area_painted_1'))
.values('month_sold', 'total', 'profit', 'profit_percent',
'area_1', 'area_2')
.order_by('moth_sold')
But obviously, it groups by month_sold. So total, profit values are good, but I don't know how to get area_1 and _2 by month_painted.
Any indications or ideas how can I solve it?

I'm not sure I've got you right. In your table "Something like that", do you want month to refer to different fields in your model (either month_sold or month_painted) depending on what aggregate you're looking at? So for Total and Profit, it's month_sold, and for area_1 and area_2 it's month_painted?
If that's the case, you're not going to achieve it with one single query. In raw SQL, you could join the table with itself on month_sold = month_painted; in Djano's ORM, I believe you'd need subqueries for each aggregate that is not grouped on the month type of the main query. For instance:
sq1 = (
Row.objects
.filter(month_painted=OuterRef('month_sold'))
.values('month_painted')
.annotate(area_1=Sum('area_painted_1'))
.values('area_1')
)
sq2 = (
Row.objects
.filter(month_painted=OuterRef('month_sold'))
.values('month_painted')
.annotate(area_2=Sum('area_painted_2'))
.values('area_2')
)
result = (
Row.objects
.values('month_sold')
.annotate(total=Sum('total')+1)
.annotate(profit=Sum('profit'))
.annotate(profit_percent=Round(F('profit')/F('total')*100, 2))
.annotate(area_1=Subquery(sq1, output_field=models.IntegerField()))
.annotate(area_2=Subquery(sq2, output_field=models.IntegerField()))
.values('month_sold', 'total', 'profit', 'profit_percent',
'area_1', 'area_2')
.order_by('month_sold')
)
Which month fields (month_sold or month_painted) the main query and the subqueries are base based on depends on which month type you want to be the outer part of the outer join, ie. which month type you want to include even if there are no corresponding values for the other month type. To include both (= FULL OUTER JOIN) using the ORM, you'd first have to get a list of all months (whether painted or sold), and then pull in the other columns as individual subqueries.

Two forms in one model. Combining values for table databases

I have simple model in django that looks like :
class TimeTable(models.Model):
title = models.CharField(max_length=100)
start_time= models.CharField(choices=MY_CHOICES, max_length=10)
end_time = models.CharField(choices=MY_CHOICES, max_length=10)
day1 = models.BooleanField(default=False)
day2 = models.BooleanField(default=False)
day3 = models.BooleanField(default=False)
day4 = models.BooleanField(default=False)
day5 = models.BooleanField(default=False)
day6 = models.BooleanField(default=False)
day7 = models.BooleanField(default=False)
for this model I have 2 forms :
class TimeTableForm(ModelForm):
class Meta:
model = TimeTable
fields = ['title ', 'start_time', 'end_time']
class WeekDayForm(ModelForm):
class Meta:
model = TimeTable
fields = ['day1', 'day2', 'day3', 'day4', 'day5', 'day6', 'day7']
Now In views.py I need to save this values into database
def schedule(request):
if request.method == 'POST':
form = TimeTableForm(request.POST)
day_week = WeekDayForm(request.POST)
if all([form.is_valid(), day_week.is_valid()]):
form.save()
day_week.save()
I'm new in Django so I was thinking that this values will be combined into one and I will get proper data but for every one submit I get two separated objects in database for example
title | 8:00 | 10:00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| | | 0 | 1 | 1 | 1 | 1 | 0 | 0 |
but I should get
title | 8:00 | 10:00 | 0 | 1 | 1 | 1 | 1 | 0 | 0 |
As you see this two forms values are separated in database (probably for person who have more much common with django it is obvious) there is an option to combine this two into one ?

OR definition of filters when using relations in django filter

I have three models with a simple relation as below:
models.py
class Person(models.Model):
first_name = models.CharField(max_length=20)
last_name = models.CharField(max_length=20)
class PersonSession(models.Model):
start_time = models.DateTimeField(auto_now_add=True)
end_time = models.DateTimeField(null=True,
blank=True)
person = models.ForeignKey(Person, related_name='sessions')
class Billing(models.Model):
DEBT = 'DE'
BALANCED = 'BA'
CREDIT = 'CR'
session = models.OneToOneField(PersonSession,
blank=False,
null=False,
related_name='billing')
STATUS = ((BALANCED, 'Balanced'),
(DEBT, 'Debt'),
(CREDIT, 'Credit'))
status = models.CharField(max_length=2,
choices=STATUS,
blank=False,
default=BALANCED
)
views.py
class PersonFilter(django_filters.FilterSet):
start_time = django_filters.DateFromToRangeFilter(name='sessions__start_time',
distinct=True)
billing_status = django_filters.ChoiceFilter(name='sessions__billing__status',
choices=Billing.STATUS,
distinct=True)
class Meta:
model = Person
fields = ('first_name', 'last_name')
class PersonList(generics.ListCreateAPIView):
queryset = Person.objects.all()
serializer_class = PersonSerializer
filter_backends = (django_filters.rest_framework.DjangoFilterBackend)
filter_class = PersonFilter
I want to get billings from person endpoint which have DE status in billing and are between a period of time:
api/persons?start_time_0=2018-03-20&start_time_1=2018-03-23&billing_status=DE
But the result is not what I were looking for, this returns all persons has a session in that period and has a billing with the DE status, whether that billing is on the period or not.
In other words, it seems use or operation between two filter fields, I think this post is related to this issue but currently I could not find a way to get the result I want. I am using djang 1.10.3.
Edit
I try to write an example to show what I need and what I get from django filter. If I get persons using below query in the example, I got just two person:
select *
from
test_filter_person join test_filter_personsession on test_filter_person.id=test_filter_personsession.person_id join test_filter_billing on test_filter_personsession.id=test_filter_billing.session_id
where
start_time > '2000-02-01' and start_time < '2000-03-01' and status='DE';
Which gets me just person 1 and 2. But if I get somethings expected similar from url I would get all of persons, the similar url (at least one which I expected to be the same) is as below:
http://address/persons?start_time_0=2000-02-01&start_time_1=2000-03-01&billing_status=DE
Edit2
This is the data that my queries in the example are upon and using them you can see what must returns in queries that I mentioned above:
id | first_name | last_name | id | start_time | end_time | person_id | id | status | session_id
----+------------+-----------+----+---------------------------+---------------------------+-----------+----+--------+------------
0 | person | 0 | 0 | 2000-01-01 16:32:00+03:30 | 2000-01-01 17:32:00+03:30 | 0 | 0 | DE | 0
0 | person | 0 | 1 | 2000-02-01 16:32:00+03:30 | 2000-02-01 17:32:00+03:30 | 0 | 1 | BA | 1
0 | person | 0 | 2 | 2000-03-01 16:32:00+03:30 | 2000-03-01 17:32:00+03:30 | 0 | 2 | DE | 2
1 | person | 1 | 3 | 2000-01-01 16:32:00+03:30 | 2000-01-01 17:32:00+03:30 | 1 | 3 | BA | 3
1 | person | 1 | 4 | 2000-02-01 16:32:00+03:30 | 2000-02-01 17:32:00+03:30 | 1 | 4 | DE | 4
1 | person | 1 | 5 | 2000-03-01 16:32:00+03:30 | 2000-03-01 17:32:00+03:30 | 1 | 5 | DE | 5
2 | person | 2 | 6 | 2000-01-01 16:32:00+03:30 | 2000-01-01 17:32:00+03:30 | 2 | 6 | DE | 6
2 | person | 2 | 7 | 2000-02-01 16:32:00+03:30 | 2000-02-01 17:32:00+03:30 | 2 | 7 | DE | 7
2 | person | 2 | 8 | 2000-03-01 16:32:00+03:30 | 2000-03-01 17:32:00+03:30 | 2 | 8 | BA | 8
Edit3
I try using prefetch_related to join tables and get results as I expected because I thought that extra join causes this problem but this did not work and I still get the same result and this had not any effects.
Edit4
This issue has the same problem.

I don't have a solution yet; but I thought a concise summary of the problem will set more and better minds than mine at work!
From what I understand; your core issue is a result of two pre-conditions:
The fact that you have two discrete filters defined on a related model; resulting in filter spanning-multi-valued-relationships
The way FilterSet implements filtering
Let us look at these in more detail:
filter spanning-multi-valued-relationships
This is a great resource to understand issue pre-condition #1 better:
https://docs.djangoproject.com/en/2.0/topics/db/queries/#spanning-multi-valued-relationships
Essentially, the start_time filter adds a .filter(sessions__start_time=value) to your Queryset, and the billing_status filter adds a .filter(sessions_billing_status=value) to the filter. This results in the "spanning-multi-valued-relationships" issue described above, meaning it will do an OR between these filters instead of an AND as you require it to.
This got me thinking, why don't we see the same issue in the start_time filter; but the trick here is that it is defined as a DateFromToRangeFilter; it internally uses a single filter query with the __range= construct. If instead it did sessions__start_time__gt= and sessions__start_time__lt=, we would have the same issue here.
The way FilterSet implements filtering
Talk is cheap; show me the code
#property
def qs(self):
if not hasattr(self, '_qs'):
if not self.is_bound:
self._qs = self.queryset.all()
return self._qs
if not self.form.is_valid():
if self.strict == STRICTNESS.RAISE_VALIDATION_ERROR:
raise forms.ValidationError(self.form.errors)
elif self.strict == STRICTNESS.RETURN_NO_RESULTS:
self._qs = self.queryset.none()
return self._qs
# else STRICTNESS.IGNORE... ignoring
# start with all the results and filter from there
qs = self.queryset.all()
for name, filter_ in six.iteritems(self.filters):
value = self.form.cleaned_data.get(name)
if value is not None: # valid & clean data
qs = filter_.filter(qs, value)
self._qs = qs
return self._qs
As you can see, the qs property is resolved by iterating over a list of Filter objects, passing the initial qs through each of them successively and returning the result. See qs = filter_.filter(qs, value)
Each Filter object here defines a specific def filter operation, that basically takes teh Queryset and then adds a successive .filter to it.
Here's an example from the BaseFilter class
def filter(self, qs, value):
if isinstance(value, Lookup):
lookup = six.text_type(value.lookup_type)
value = value.value
else:
lookup = self.lookup_expr
if value in EMPTY_VALUES:
return qs
if self.distinct:
qs = qs.distinct()
qs = self.get_method(qs)(**{'%s__%s' % (self.name, lookup): value})
return qs
The line of code that matters is: qs = self.get_method(qs)(**{'%s__%s' % (self.name, lookup): value})
So the two pre-conditions create the perfect storm for this issue.

This worked for me:
class FooFilterSet(FilterSet):
def filter_queryset(self, queryset):
"""
Overrides the basic methtod, so that instead of iterating over tthe queryset with multiple `.filter()`
calls, one for each filter, it accumulates the lookup expressions and applies them all in a single
`.filter()` call - to filter with an explicit "AND" in many to many relationships.
"""
filter_kwargs = {}
for name, value in self.form.cleaned_data.items():
if value not in EMPTY_VALUES:
lookup = '%s__%s' % (self.filters[name].field_name, self.filters[name].lookup_expr)
filter_kwargs.update({lookup:value})
queryset = queryset.filter(**filter_kwargs)
assert isinstance(queryset, models.QuerySet), \
"Expected '%s.%s' to return a QuerySet, but got a %s instead." \
% (type(self).__name__, name, type(queryset).__name__)
return queryset
Overriding the filter_queryset method so that it accumulates the expressions and applies them in a single .filter() call

how to merge two annotated querysets into one result

Model:
class Foo(models.model):
name = models.CharField(max_length = 50, blank = True, unique = True)
class Bar1(models.Model):
foo = models.ForeignKey('Foo')
value = models.DecimalField(max_digits=10,decimal_places=2)
class Bar2(models.Model):
foo = models.ForeignKey('Foo')
value = models.DecimalField(max_digits=10,decimal_places=2)
Clasess Bar1 and Bar2 are unrelated, so I can't do it as one class what would solve the problem. But this is only example to show the problem as pure as possible.
first = Foo.objects.all().annotate(Sum("bar1__value"))
second = Foo.objects.all().annotate(Sum("bar2__value"))
each of this querysets contains correct values.
I can't merge it into:
both = Foo.objects.all().annotate(Sum("bar1__value")).annotate(Sum("bar2__value"))
Because the sum value multiplicates - this is unfortunately expected behaviour - because of JOINS
And now the problem - how to merge/join first and second to get the both?
Example:
Bar 1:
foo | value
--------------
A | 10
B | 20
B | 20
Bar 2:
foo | value
--------------
A | -0.10
A | -0.10
B | -0.25
both (value differs depends on order of entering bar1 and bar2)
foo | bar1__value__sum | bar2__value__sum
---------------------------------
A | 20 | -0.20
B | 40 | -0.50
expected result:
foo | bar1__value__sum | bar2__value__sum
---------------------------------
A | 10 | -0.20
B | 40 | -0.25
I couldn't use itertools.chains because the result is:
foo | bar1__value__sum | bar2__value__sum
---------------------------------
A | null | -0.20
B | null | -0.25
A | 10 | null
B | 40 | null

Your problem is a known limitation of Django's ORM: https://code.djangoproject.com/ticket/10060.
If you're ok with doing two queries, here's one option:
result = Foo.objects.annotate(b1_sum=Sum("bar1__value"))
bar2_sums = Foo.objects.annotate(b2_sum=Sum("bar2__value")).in_bulk()
for foo in result:
foo.b2_sum = bar2_sums.get(foo.pk).b2_sum

According to answer of #emulbreh i read the ticket and found some solution. I go this way and made this:
models.py:
from django.db.models.expressions import RawSQL
from django.db.models.query import QuerySet
(...)
class NewManager(models.Manager):
"""A re-usable Manager to access a custom QuerySet"""
def __getattr__(self, attr, *args):
try:
return getattr(self.__class__, attr, *args)
except AttributeError:
# don't delegate internal methods to the queryset
if attr.startswith('__') and attr.endswith('__'):
raise
return getattr(self.get_query_set(), attr, *args)
def get_query_set(self):
return self.model.QuerySet(self.model, using=self._db)
class Foo(models.Model):
name = models.CharField(max_length = 50, blank = True, unique = True)
objects =NewManager()
def __str__(self):
return self.name
class QuerySet(QuerySet):
def annotate_sum(self, modelClass, field_name):
annotation_name="%s__%s__%s" % (modelClass._meta.model_name,field_name,'sum')
raw_query = "SELECT SUM({field}) FROM {model2} WHERE {model2}.{model3}_id = {model1}.id".format(
field = field_name,
model3 = self.model._meta.model_name,
model2 = modelClass._meta.db_table,
model1 = self.model._meta.db_table
)
debug.debug("%s" % raw_query)
annotation = {annotation_name: RawSQL(raw_query, [])}
return self.annotate(**annotation)
And views.py:
both = Foo.objects.annotate_sum(Bar1, 'value').annotate_sum( Bar2, 'value')
the sql result is exact what I want:
SELECT "app_foo"."id", "app_foo"."name", (SELECT SUM(value) FROM app_bar1 WHERE app_bar1.foo_id = app_foo.id) AS "bar1__value__sum", (SELECT SUM(value) FROM app_bar2 WHERE app_bar2.foo_id = app_foo.id) AS "bar2__value__sum" FROM "app_foo"
Of course it isn't perfect - it needs some error checking (e.g. double quotes) or aliases, but i think this is the right direction

I landed on this page after having a similar problem, but with Count instead of Sum.
The simplest solution is to use Count(<field>, distinct=True) on the 2nd Count, i.e.
both = Foo.objects.all().annotate(Count("bar1__value")
).annotate(Count("bar2__value", distinct=True))
References:
ticket 10060/comment:60 linked by #emulbreh answer
django 2.0 docs / Aggregation # Combining multiple aggregations

Django QuerySet union operator are not commutative after annotated filter

... and return unexpected results (in Django 1.6.5)
My models.py
class Member(models.Model):
...
class Donation(models.Model):
year = models.PositiveSmallIntegerField()
cheque_amount = models.DecimalField(default=0, max_digits=8, decimal_places=2)
donor = models.ForeignKey(Member)
...
class SpecialTitle(models.Model):
chair_title = models.CharField(max_length=128, blank=True)
member = models.OneToOneField(Member)
...
I'd like the union of the two querysets in one of my admin filters
donors = queryset.filter(
donation__year__exact=2014
).annotate(sum_donation=Sum('donation__cheque_amount')).filter(sum_donation__gte=1000)
chairs = queryset.filter(specialtitle__chair_title__iendswith='Chair')
Here is the puzzling part (in Django manager shell)
>>> donors | chairs == chairs | donors
False
>>> donors.count(); chairs.count()
189
17
>>> (donors | chairs).count(); (chairs | donors).count()
193
291
>>> (donors | chairs).distinct().count(); (chairs | donors).distinct().count()
193
207
And none of them are the correct results. I'd expect a set operation to be
>>> set(donors) | set(chairs) == set(chairs) | set(donors)
True
>>> set(donors) & set(chairs) == set(chairs) & set(donors)
True
>>>
And they return the correct results. However, Django admin filter demands a QuerySet, not a python set (or list)
Why is this? How do I get a proper union of Django QuerySet (of the same type) after annotated filter?
Thank you.

It appears I had no other choice but to use the python set union operator and hit the database again for the desired result.
donors = queryset.filter(
donation__year__exact=2014
).annotate(sum_donation=Sum('donation__cheque_amount')).filter(sum_donation__gte=1000)
chairs = queryset.filter(specialtitle__chair_title__iendswith='Chair')
result = queryset.filter(pk__in=[person.id for person in set(donors) | set(chairs)])

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

complex sums divided by month - django

Related

How to group by two columns on queryset in Django2?

Two forms in one model. Combining values for table databases

OR definition of filters when using relations in django filter

how to merge two annotated querysets into one result

Django QuerySet union operator are not commutative after annotated filter

Categories

Resources