Django annotate exclude with Case & When (Conditional Expression) - django

I'm using Django 2.2
While making queryset, I want count of related model, based on few conditions like
queryset = self.model.objects.filter(user=self.request.user).annotate(
count_videos=Count('video'),
count_completed=Count(
Case(
When(video__status__in=Video.STATUS_LIST_COMPLETED)
)
),
count_failed=Count(
Case(
When(video__status__in=Video.STATUS_LIST_FAILED)
)
),
count_pending=Count(
Case(
When(
video__status__not_in=Video.STATUS_LIST_PENDING_EXCLUDE
)
)
)
)
Here 3 counts are working, but in last count count_pending, I have to count against exlude(). i.e., count number of records excluding the passed list.
How can I use exclude with the above statement?

We can negate the value we pass to the filter= parameter [Django-doc]:
from django.db.models import Count, Q
queryset = self.model.objects.filter(user=self.request.user).annotate(
count_videos=Count('video'),
count_completed=Count(
'video',
filter=Q(video__status__in=STATUS_LIST_COMPLETED)
),
count_failed=Count(
'video',
filter=Q(video__status__in=Video.STATUS_LIST_FAILED)
),
count_pending=Count(
'video',
filter=~Q(video__status__in=Video.STATUS_LIST_PENDING_EXCLUDE)
)
)
This will result in a query like:
SELECT model.*,
COUNT(
CASE WHEN NOT video.status IN STATUS_LIST_PENDING_EXCLUDE
AND video.status IS NOT NULL
THEN video.id
ELSE NULL END
) AS count_pending
FROM model
LEFT OUTER JOIN video ON model.id = video.model_id
GROUP BY model.id

Apologies for the reply to a super old question, but this one hits high on searches for this topic. I needed a very similar thing and wanted a count but had some odd conditions I couldn't work out with ~Q and landed on an annotate that looked like the following. Posting here only for case for someone that happens to need something similar.
I required a count of Reviews completed, and those in progress, but if the review.status was UNTOUCHED it wasn't to get counted in the 'in progress' or 'completed' bin. I used Case with the default value set to 1 for the "not" condition (not completed) then wrapped the Case in a Sum as shown. There were about 9 different status's that indicated 'in progress' and I didn't want to name them all.
.values(___bunch_of_group_by_fields_here___)\
.annotate(
completed=Sum(Case(
When(status__in=[Review.REVIEW_COMPLETE,
], then=Value(1)),
default=Value(0),
output_field=IntegerField(),
)),
# essentially: ( not (review complete or untouched) )
# gets all the status between untouched (default first step) and
# complete (final status in the workflow for a review) without having
# to specify all the in between statuses
inprogress=Sum(Case(
When(status__in=[Review.REVIEW_COMPLETE,
Review.UNTOUCHED
], then=Value(0)),
default=Value(1),
output_field=IntegerField(),
))

Related

Conditional Aggregation of Foreign Key fields

I would like to get the count of foreign key objects with django, the foreign key itself will change conditionally. So, something like the example below.
Game.objects.annotate(
filled=models.Case(
models.When(
GreaterThan(
models.F("size_max"),
(
models.Count(
models.Case(
models.When(
participant_type=1, then="players"
),
models.When(
participant_type=2, then="teams",
),
),
),
),
),
then=1,
),
default=0,
)
)
What I'd like to achieve is this:
players and teams are reverse foreign keys to Game. I want to check whether the size_max field of Game exceeds the count of players or teams depending on the participant_type. How would I go about achieving this? Any help would be appreciated.
The above query results in an error - it introduces a GROUP BY with the model name in it. So, something like
GROUP BY ('Game'), "game"."id"
which I have no clue why this happens.

Django workaround to use window function call in an aggregate function?

I'm trying to calculate customer order frequency.
First use a window function to get the previous order date then annotate the days since the last order.
from django.db.models import Avg, F, Window
from django.db.models.functions import ExtractDay, Lag, TruncDate
orders = (
Order.objects
.annotate(
prev_order_date=Window(
expression=Lag('paid_at', 1),
partition_by=[F('customer_email')],
order_by=F('paid_at').asc(),
),
days_since_last=ExtractDay(
TruncDate('paid_at') - TruncDate('prev_order_date')
),
)
)
Then group by customer_email before calculating the average frequency.
customer_data = (
orders.values('customer')
.annotate(avg_frequency=Avg('days_since_last'))
)
Unfortunately this throws an error. Does anyone know of a workaround or know of an alternate way to calculate the average frequency?
psycopg2.errors.GroupingError: aggregate function calls cannot contain window function calls
I found the django-cte package through this answer.
Join on the order id then make sure to annotate the result of the window function before grouping.
from django_cte import CTEManager, With
class OrderCTE(Order):
objects = CTEManager()
class Meta:
proxy = True
orders = With(
Order.objects
.annotate(
prev_order_date=Window(
expression=Lag('paid_at', 1),
partition_by=[F('customer_email')],
order_by=F('paid_at').asc(),
),
days_since_last=ExtractDay(
TruncDate('paid_at') - TruncDate('prev_order_date')
),
)
)
customer_data = list(
orders.join(OrderCTE, id=orders.col.id)
.with_cte(orders)
.annotate(days_since_last=orders.col.days_since_last)
.values('customer_email')
.order_by('customer_email')
.annotate(
avg_frequency=Avg('days_since_last'),
)
.values_list(
'customer_email',
'avg_frequency',
)
)

Using Annotate & Artithmetic in a Django subquery

I am trying to improve my understanding of the Django queryset syntax and am hoping that someone could help me check my understanding.
Could this:
total_packed = (
PackingRecord.objects.filter(
product=OuterRef('pk'), fifolink__sold_out=False
).values('product') # Group by product
.annotate(total=Sum('qty')) # Sum qty for 'each' product
.values('total')
)
total_sold = (
FifoLink.objects.filter(
packing_record__product=OuterRef('pk'), sold_out=False
).values('packing_record__product')
.annotate(total=Sum('sale__qty'))
.values('total')
)
output = obj_set.annotate(
sold=Subquery(total_sold[:1]),
packed=Subquery(total_packed[:1]),
).annotate(
in_stock=F('packed') - F('sold')
)
be safely reduced to this:
in_stock = (
FifoLink.objects.filter(
packing_record__product=OuterRef('pk'), sold_out=False
).values('packing_record__product')
.annotate(total=Sum(F('sale__qty')-F('packing_record__qty')))
.values('total')
)
output = obj_set.annotate(
in_stock=Subquery(total_sold[:1]),
)
Basically, I am trying to move the math being completed in the outer .annotate() into the queryset itself by using the fk relationship instead of running two separate querysets. I think this is allowed, but I am not sure if I am understanding it correctly.

Django conditional Subquery aggregate

An simplified example of my model structure would be
class Corporation(models.Model):
...
class Division(models.Model):
corporation = models.ForeignKey(Corporation)
class Department(models.Model):
division = models.ForeignKey(Division)
type = models.IntegerField()
Now I want to display a table that display corporations where a column will contain the number of departments of a certain type, e.g. type=10. Currently, this is implemented with a helper on the Corporation model that retrieves those, e.g.
class Corporation(models.Model):
...
def get_departments_type_10(self):
return (
Department.objects
.filter(division__corporation=self, type=10)
.count()
)
The problem here is that this absolutely murders performance due to the N+1 problem.
I have tried to approach this problem with select_related, prefetch_related, annotate, and subquery, but I havn't been able to get the results I need.
Ideally, each Corporation in the queryset should be annotated with an integer type_10_count which reflects the number of departments of that type.
I'm sure I could do something with raw sql in .extra(), but the docs announce that it is going to be deprecated (I'm on Django 1.11)
EDIT: Example of raw sql solution
corps = Corporation.objects.raw("""
SELECT
*,
(
SELECT COUNT(*)
FROM foo_division div ON div.corporation_id = c.id
JOIN foo_department dept ON dept.division_id = div.id
WHERE dept.type = 10
) as type_10_count
FROM foo_corporation c
""")
I think with Subquery we can get SQL similar to one you have provided, with this code
# Get amount of departments with GROUP BY division__corporation [1]
# .order_by() will remove any ordering so we won't get additional GROUP BY columns [2]
departments = Department.objects.filter(type=10).values(
'division__corporation'
).annotate(count=Count('id')).order_by()
# Attach departments as Subquery to Corporation by Corporation.id.
# Departments are already grouped by division__corporation
# so .values('count') will always return single row with single column - count [3]
departments_subquery = departments.filter(division__corporation=OuterRef('id'))
corporations = Corporation.objects.annotate(
departments_of_type_10=Subquery(
departments_subquery.values('count'), output_field=IntegerField()
)
)
The generated SQL is
SELECT "corporation"."id", ... (other fields) ...,
(
SELECT COUNT("division"."id") AS "count"
FROM "department"
INNER JOIN "division" ON ("department"."division_id" = "division"."id")
WHERE (
"department"."type" = 10 AND
"division"."corporation_id" = ("corporation"."id")
) GROUP BY "division"."corporation_id"
) AS "departments_of_type_10"
FROM "corporation"
Some concerns here is that subquery can be slow with large tables. However, database query optimizers can be smart enough to promote subquery to OUTER JOIN, at least I've heard PostgreSQL does this.
1. GROUP BY using .values and .annotate
2. order_by() problems
3. Subquery
You should be able to do this with a Case() expression to query the count of departments that have the type you are looking for:
from django.db.models import Case, IntegerField, Sum, When, Value
Corporation.objects.annotate(
type_10_count=Sum(
Case(
When(division__department__type=10, then=Value(1)),
default=Value(0),
output_field=IntegerField()
)
)
)
I like the following way of doing it:
departments = Department.objects.filter(
type=10,
division__corporation=OuterRef('id')
).annotate(
count=Func('id', 'Count')
).values('count').order_by()
corporations = Corporation.objects.annotate(
departments_of_type_10=Subquery(depatments)
)
The more details on this method you can see in this answer: https://stackoverflow.com/a/69020732/10567223

Django 1.8 conditional annotation results in INNER JOIN instead of LEFT OUTER JOIN

The models:
class Bar(GenericModel):
...
class Foo(GenericModel):
bar = models.ForeignKey(Bar, related_name='foo_bar')
The query:
bars = Bar.objects
.prefetch_related('foo_bar')
.annotate(sum_foo=Sum(
Case(
When(foo_bar__is_deleted=False, then='foo_bar__amount'),
default=Value(0),
output_field=IntegerField()
)
)
)
The former results in an inner join: SELECT ... FROM "bar" INNER JOIN "foo" ON ( "bar"."id" = "foo"."bar_id" ) ...
What I intend to obtain is a LEFT OUTER JOIN (a full list of "bar" objects annotated with "foo.amount" sums, or 0s if "foo" related to "bar" doesn't exist) instead of the INNER JOIN? Is it possible to do without falling back to raw SQL?
This is a known bug, corrected in Django 1.8.3 (release notes).
As you noted, the issue is that an INNER JOIN is being created, filtering out Bar objects when there's no corresponding relation to Foo objects.
Using a Django version higher than 1.8.3 will solve the issue.
This way seems to work correctly:
bars = Bar.objects
.prefetch_related('foo_bar')
.annotate(sum_foo=Sum(
Case(
When(Q(foo_bar__is_deleted=False) | Q(foo_bar__is_deleted=None),
then='foo_bar__amount'),
default=Value(0),
output_field=IntegerField()
)
),
)