Django: Sum of annotated field after grouping - django

I have the following two models:
class ProductionSet(models.Model):
product = models.ForeignKey(Product, on_delete=models.CASCADE)
line = models.ForeignKey(Line, on_delete=models.CASCADE)
class ProductionSetEntry(models.Model):
productionset = models.ForeignKey(
ProductionSet, on_delete=models.CASCADE, related_name="set"
)
audited_at = models.DateTimeField(
verbose_name=_("Audited at"), null=True, blank=True
)
What I want to achieve is a result set grouped for each line with two counts: an entries total count and a count for fully audited sets. Sample result:
<QuerySet [{'line': 1, 'sum_fully_audited': 0, 'total': 43}, {'line': 2, 'sum_fully_audited': 0, 'total': 51}, {'line': 3, 'sum_fully_audited': 2, 'total': 4}]>
I'm able to add the fully_audited to each row before grouping by:
ProductionSet.objects.annotate(
audited=Count("set", filter=Q(set__audited_at__isnull=False)),
).annotate(
fully_audited=Case(
When(audited=Count("set"), then=1),
default=0,
output_field=IntegerField(),
)
)
Following query:
ProductionSet.objects.annotate(
audited=Count("set", filter=Q(set__audited_at__isnull=False)),
).annotate(
fully_audited=Case(
When(audited=Count("set"), then=1),
default=0,
output_field=IntegerField(),
)
).values(
"line", "fully_audited"
).annotate(
total=Count("id", distinct=True),
sum_fully_audited=Sum("fully_audited"),
).order_by(
"line"
)
raises django.core.exceptions.FieldError: Cannot compute Sum('fully_audited'): 'fully_audited' is an aggregate while
ProductionSet.objects.annotate(
audited=Count("set", filter=Q(set__audited_at__isnull=False)),
).annotate(
fully_audited=Case(
When(audited=Count("set"), then=1),
default=0,
output_field=IntegerField(),
)
).values(
"line", "fully_audited"
).annotate(
total=Count("id", distinct=True),
).order_by(
"line"
)
results in <QuerySet [{'line': 1, 'fully_audited': 0, 'total': 43}, {'line': 3, 'fully_audi... which looks good so far but obviously has no sum_fully_audited yet.

Related

django orm multiple points ranking

I've a model name Points which store the user points on the base of it's actions.
class Points(CreateUpdateModelMixin):
class Action(models.TextChoices):
BASE = 'BASE', _('Base')
REVIEW = 'REVIEW', _('Review')
FOLLOW = 'FOLLOW', _('Follow')
VERIFIED_REVIEW = 'VERIFIED_REVIEW', _('Verified Review')
REFERRAL = 'REFERRAL', _('Referral')
ADD = 'ADD', _('Add')
SUBTRACT = 'SUBTRACT', _('Subtract')
user = models.ForeignKey(User, on_delete=models.CASCADE)
points = models.IntegerField()
action = models.CharField(max_length=64, choices=Action.choices, default=Action.BASE)
class Meta:
db_table = "diner_points"
Please note that there are multiple rows for the same user.
For the past few days I'm trying to write a query to get the total_points of the use and also the rank of that user.
Using:
Django 3.2
MySQL 5.7
I want to know input of you guys. Thanks.
I wrote this query and many other like it. But none of them give the results I want.
Let's suppose the data is something like this.
user
points
771
221
1083
160
1083
12
1083
10
771
-15
1083
4
1083
-10
124
0
23
1771
The current query I have written is this...
innerquery = (
DinerPoint.objects
.values("user")
.annotate(total=Sum("points"))
.distinct()
)
query = (
DinerPoint.objects
.annotate(
total = Subquery(
innerquery.filter(user=OuterRef("user")).values("total")
),
rank = Subquery(
DinerPoint.objects
.annotate(
total = Subquery(
innerquery.filter(user=OuterRef("user")).values("total")
),
rank=Func(F("user"), function="Count")
)
.filter(
Q(total__gt=OuterRef("total")) |
Q(total=OuterRef("total"), user__lt=OuterRef("user"))
)
.values("rank")[:1]
)
)
)
query.values('user', 'total', 'rank').distinct().order_by('rank')
But this give the results like this
<QuerySet [
{'user': 23, 'total': 1771, 'rank': 1},
{'user': 1083, 'total': 176, 'rank': 2},
{'user': 771, 'total': 106, 'rank': 8}, <---- Issue beacuse of dups entries
{'user': 124, 'total': 0, 'rank': 9}
]>
I've tried RANK, DENSE RANK and didn't got the results I wanted.
The only way I got the results I wanted I throught the Common Table Expression(CTE). But unfortunately I can't use that because of mysql version 5.7 in produciton.
P.S I'm using the count and greater than beacause of my use case. I have a use case where we have to get rank in the user friends.
The working code using CTE by django_cte (You can ignore this beacuse of mysql 5.7 ;) )
def get_queryset(user=None, following=False):
if not user:
user = User.objects.get(username="king")
innerquery = (
DinerPoint.objects
.values("user", "user__username", "user__first_name", "user__last_name", "user__profile_fixed_url",
"user__is_influencer", "user__is_verified", "user__instagram_handle")
.annotate(total=Sum("points"))
.distinct()
)
if following:
innerquery = innerquery.filter(Q(user__in=Subquery(user.friends.values('id'))) |
Q(user = user))
basequery = With(innerquery)
subquery = (
basequery.queryset()
.filter(Q(total__gt=OuterRef("total")) |
Q(total=OuterRef("total"), user__lt=OuterRef("user")))
.annotate(rank=Func(F("user"), function="Count"))
.values("rank")
.with_cte(basequery)
)
query = (
basequery.queryset()
.annotate(rank=Subquery(subquery) + 1)
.select_related("user")
.with_cte(basequery)
)
return query
I have done this using the Func expression field. The final query which works for me is attached below in case you are looking for an answer.
rank=Func(
F("user"),
function="Count",
template="%(function)s(DISTINCT %(expressions)s)",
),
Final query
def get_queryset(self):
following = self.request.query_params.get("following", False)
innerquery = (
DinerPoint.objects.values("user").annotate(total=Sum("points")).distinct()
)
basequery = DinerPoint.objects
if following:
innerquery = innerquery.filter(
Q(user__in=Subquery(self.request.user.friends.values("id")))
| Q(user=self.request.user)
)
basequery = basequery.filter(
Q(user__in=Subquery(self.request.user.friends.values("id")))
| Q(user=self.request.user)
)
query = (
basequery.annotate(
total=Subquery(
innerquery.filter(user=OuterRef("user")).values("total")
),
rank=Subquery(
DinerPoint.objects.annotate(
total=Subquery(
innerquery.filter(user=OuterRef("user")).values("total")
),
rank=Func(
F("user"),
function="Count",
template="%(function)s(DISTINCT %(expressions)s)",
),
)
.filter(
Q(total__gt=OuterRef("total"))
| Q(total=OuterRef("total"), user__lt=OuterRef("user"))
)
.values("rank")
)
+ 1,
)
.values(
"user",
"user__username",
"user__first_name",
"user__last_name",
"user__profile_fixed_url",
"user__is_influencer",
"user__is_verified",
"user__instagram_handle",
"total",
"rank",
)
.distinct()
)
return query

How to get the Primary key of annotate Count

Hi stackoverflow community, my question is about django annotate.
Basically what I am trying to do is to find duplicated value with same values from two different fields in two different tables.
This is my models.py
class Order(models.Model):
id_order = models.AutoField(primary_key=True)
class OrderDelivery(models.Model):
order = models.ForeignKey(Order, on_delete=models.SET_NULL, null=True, blank=True)
delivery_address = models.TextField()
class OrderPickup(models.Model):
order = models.ForeignKey(Order, on_delete=models.SET_NULL, null=True, blank=True)
pickup_date = models.DateField(blank=True, null=True)
This is my current code:
dup_job = Order.objects.filter(
orderpickup__pickup_date__range=(start_date, end_date)
).values(
'orderdelivery__delivery_address',
'orderpickup__pickup_date',
).annotate(
duplicated=Count('orderdelivery__delivery_address')
).filter(
duplicated__gt=1
)
Based on what I have, I am getting result like this (delivery_address is omitted for privacy purpose):
{'orderdelivery__delivery_address': '118A', 'orderpickup__pickup_date': datetime.date(2022, 3, 9), 'duplicated': 2}
{'orderdelivery__delivery_address': '11', 'orderpickup__pickup_date': datetime.date(2022, 3, 2), 'duplicated': 6}
{'orderdelivery__delivery_address': '11 A ', 'orderpickup__pickup_date': datetime.date(2022, 3, 3), 'duplicated': 5}
{'orderdelivery__delivery_address': '21', 'orderpickup__pickup_date': datetime.date(2022, 3, 10), 'duplicated': 3}
{'orderdelivery__delivery_address': '642', 'orderpickup__pickup_date': datetime.date(2022, 3, 7), 'duplicated': 2}
{'orderdelivery__delivery_address': '642', 'orderpickup__pickup_date': datetime.date(2022, 3, 8), 'duplicated': 2}
{'orderdelivery__delivery_address': 'N/A,5', 'orderpickup__pickup_date': datetime.date(2022, 3, 8), 'duplicated': 19}
Is there a way to get the id_order of those 'duplicated'?
I have tried include id_order in .values() but the output will not be accurate as the annotation is grouping by the id_order instead of delivery_address.
Thank you in advance
You can get the smallest (or largest) item with a Min [Django-doc] (or Max) aggregate:
from django.db.models import Min
dup_job = Order.objects.filter(
orderpickup__pickup_date__range=(start_date, end_date)
).values(
'orderdelivery__delivery_address',
'orderpickup__pickup_date',
).annotate(
min_id_order=Min('id_order')
duplicated=Count('orderdelivery__delivery_address')
).filter(
duplicated__gt=1
)
or for postgresql, you can make use of the ArrayAgg [Django-doc] to generate a list:
# PostgreSQL only
from django.contrib.postgres.aggregates import ArrayAgg
dup_job = Order.objects.filter(
orderpickup__pickup_date__range=(start_date, end_date)
).values(
'orderdelivery__delivery_address',
'orderpickup__pickup_date',
).annotate(
min_id_order=ArrayAgg('id_order')
duplicated=Count('orderdelivery__delivery_address')
).filter(
duplicated__gt=1
)

Django: annotate(sum(case(when())))

In my project, I'm trying to aggregate the data based on the status of a 'field'. The 'view' is expected to return a table like so:
SERVICE_CODE
SUCCESS
TECHNICAL_DECLINES
BUSINESS_DECLINES
Service-1
S11
S12
S13
Service-2
S21
S22
S23
where S11,S12... are the aggregates taken based on the value of the field 'STATUS' in the model given below:
models.py
from django.db import models
class Wfmain(models.Model):
ENTITY_NUM = models.IntegerField()
SRV_REF_NUM = models.CharField(max_length=30,primary_key=True)
ITER_SL = models.IntegerField()
STAGE_ID = models.CharField(max_length=30)
ACTION = models.CharField(max_length=30)
STATUS = models.CharField(max_length=30)
REQUEST_DATE = models.DateField()
SERVICE_CODE = models.CharField(max_length=30)
SERVICE_TYPE_CODE = models.CharField(max_length=30)
FUNCTION_CODE = models.CharField(max_length=30)
REQUEST_START_TIME = models.DateField()
class Meta:
db_table = "WFMAIN"
unique_together = (("ENTITY_NUM", "SRV_REF_NUM"),)
The aggregates ought to be grouped by the field 'SERVICE_CODE'
views.py
from django.db.models import When, Case, Sum, IntegerField
from django.shortcuts import render,redirect
from hr.models import *
def financial(request):
S=['0','00','000']
NT=['0','00','000','099','100','101','102','103','104','105','107','108','109','110','111','113','114','116','117','118','119','120','121','122',
'123','124','180','181','182','183','184','185','186','187','188','189','200','201','205','213','217','218','219','220','221','222','223','224',
'230','231','232','233','234','235','236','237','238','239','240','241','248','249','250','256','258','260','262','263','264','265']
B=['S','099','100','101','102','103','104','105','107','108','109','110','111','113','114','116','117','118','119','120','121','122','123','124',
'180','181','182','183','184','185','186','187','188','189','200','201','205','213','217','218','219','220','221','222','223','224','230','231',
'232','233','234','235','236','237','238','239','240','241','248','249','250','256','258','260','262','263','264','265']
wf=Wfmain.objects.using('MBDB')
fin = wf.values('SERVICE_CODE').annotate(
Success=Sum(Case(When(STATUS in S, then=1),
when(STATUS not in S, then=0),
output_field=IntegerField())),
Technical_declines=Sum(Case(When(STATUS not in NT, then=1),
When(STATUS in NT, then=0),
output_field=IntegerField())),
Business_declines=Sum(Case(When(STATUS in B, then=1),
When(STATUS not in B, then=0),
output_field=IntegerField()))).order_by('SERVICE_CODE')
I'm stuck with error: "name 'STATUS' is not defined"
Output #browser:
Please tell me where I went wrong.
You need to translate the Case(When(…)) to a query named parameters. You thus define this with:
from django.db.models import Case, Value, When
fin = wf.values('SERVICE_CODE').annotate(
Success=Sum(Case(
When(!STATUS__in=S, then=Value(1)),
default=Value(0),
output_field=IntegerField()
)),
Technical_declines=Sum(Case(
When(STATUS__in=NT, then=Value(0)),
default=Value(1),
output_field=IntegerField()
)),
Business_declines=Sum(Case(
When(STATUS__in=B, then=Value(1)),
default=Value(0)
output_field=IntegerField()
))
).order_by('SERVICE_CODE')

How to annotate queryset with another queryset

Now I'm trying to build complex queryset that uses annotations with conditional related queries.
I have the following models:
class MenuItemCategory(CreateUpdateModel):
name = models.CharField(max_length=255, blank=True, null=True)
class MenuItem(CreateUpdateModel):
category = models.ForeignKey(MenuItemCategory, blank=True, null=True)
name = models.CharField(max_length=255, blank=True, null=True)
class LineItem(models.Model):
order = models.ForeignKey(Orders, blank=True, null=True)
menu_item = models.ForeignKey(MenuItems, blank=True, null=True)
price = models.DecimalField(max_digits=10, decimal_places=2)
quantity = models.DecimalField(max_digits=10, decimal_places=3)
amount = models.DecimalField(max_digits=10, decimal_places=2)
class Order(CreateUpdateModel):
waiter = models.ForeignKey(Employees, blank=True, null=True)
guests_count = models.IntegerField(blank=True, null=True, default=0)
closed_at = models.DateTimeField(blank=True, null=True, db_index=True)
class Employees(CreateUpdateModel):
restaurant = models.ForeignKey(Restaurants, blank=True, null=True)
name = models.CharField(max_length=255, blank=True, null=True)
My goal is to build json with following scheme:
[
{
employee_name: 'Jane',
menu_item_categories: [
{
name: 'Drinks',
line_items_quantity: 10, //times when this waiter brings any item from this category to the customer at the period
amount: 49.00, // price of all drinks sold by this waiter at the period
menu_items: [
name: 'Vodka',
amount: 1.00,
line_items_quantity: 4, # times when this item has been ordered for this waiter at the period
]
}
],
visits: 618,
guests: 813,
cycle_time: 363
}
]
With following serializer:
class EmployeeSerializer(serializers.ModelSerializer):
name = serializers.CharField(max_length=255)
visits = serializers.SerializerMethodField()
guests = serializers.SerializerMethodField()
cycle_time = serializers.SerializerMethodField()
menu_item_categories = serializers.SerializerMethodField()
def get_visits(self, obj):
# works
def get_guests(self, obj):
# works
def get_cycle_time(self, obj):
# works
def get_menu_item_categories(self, obj):
qs = MenuItemCategories.objects.annotate(
line_items_quantity=Count('menuitems__lineitems__order',
filter=Q(
menuitems__lineitems__order__closed_at__range=self.context.get('period'),
menuitems__lineitems__order__waiter=obj)
),
amount=Sum('menuitems__lineitems__amount',
filter=Q(
menuitems__lineitems__order__closed_at__range=self.context.get('period'),
menuitems__lineitems__order__waiter=obj)
),
menu_items=Subquery(
MenuItems.objects.filter(
lineitems__order__closed_at__range=self.context.get('period'),
lineitems__order__waiter=obj
).annotate(amount=Sum('lineitems__amount', filter=Q(lineitems__order__closed_at__range=self.context.get('period'),
lineitems__order__waiter=obj)))
)
)
return MenuItemCategorySerializer(qs, many=True).data
But when I try to build menu_item_categories value - it gives me an error: subquery must return only one column. As I understand, my goal is to annotate categories queryset with custom subquery and my trouble is that I don't understand how subquery works or I use incorrect toolkit to build orm query. So, how can I build this json with orm query and this serializer?
UPD
current query is
SELECT
"menu_item_categories"."id", "menu_item_categories"."created_at",
"menu_item_categories"."updated_at", "menu_item_categories"."restaurant_id",
"menu_item_categories"."name", "menu_item_categories"."is_active",
COUNT("line_items"."order_id") AS "line_items_quantity",
(SELECT
U0."id", U0."created_at", U0."updated_at",
U0."restaurant_id", U0."category_id", U0."name",
SUM(U1."amount") AS "amount"
FROM "menu_items"
U0 INNER JOIN "line_items" U1
ON (U0."id" = U1."menu_item_id")
INNER JOIN "orders" U2
ON (U1."order_id" = U2."id")
WHERE (
U2."waiter_id" = 5 AND U2."closed_at"
BETWEEN 2017-12-20 14:19:16+00:00 AND 2017-12-26 14:19:16+00:00)
GROUP BY U0."id")
AS "menu_items",
SUM("line_items"."amount") AS "amount"
FROM "menu_item_categories"
LEFT OUTER JOIN "menu_items"
ON ("menu_item_categories"."id" = "menu_items"."category_id")
LEFT OUTER JOIN "line_items"
ON ("menu_items"."id" = "line_items"."menu_item_id")
GROUP BY "menu_item_categories"."id",
(
SELECT
U0."id", U0."created_at",
U0."updated_at", U0."restaurant_id",
U0."category_id", U0."name", SUM(U1."amount"
) AS "amount"
FROM "menu_items" U0
INNER JOIN "line_items" U1
ON (U0."id" = U1."menu_item_id")
INNER JOIN "orders" U2
ON (U1."order_id" = U2."id")
WHERE (U2."waiter_id" = 5
AND U2."closed_at"
BETWEEN 2017-12-20 14:19:16+00:00
AND 2017-12-26 14:19:16+00:00)
GROUP BY U0."id")
You can't get that kind of structure directly with one single query, simply because of how RDBMS's work. You can, however, get a big result with lots of redundant information, all the way from the simplest items so you can group data programatically to generate your json structure, or you can just do that in one step by iterating over your querysets:
t_range=self.context.get('period')
employees = Employees.objects.filter(order__closed_at__range=t_range) \
.annotate(
visits=Count(...),
guests=Count(...),
cycle_time=Sum(...),
)
result = []
for employee in employees:
menu_item_categories = MenuItemCategory.objects.filter(menuitem__lineitem__order__waiter=employee) \
.annotate(
line_items_quantity=Count(...),
amount=Sum(...),
)
_cats = []
for cat in menu_item_categories:
menu_items = cat.menuitem_set.filter(order__waiter=employee) \
.annotate(
amount=Sum(...),
line_items_quantity=Count(...),
)
_menu_items = []
for menu_item in menu_items:
_menu_item = {
'name': menu_item.name,
'amount': menu_item.amount,
'line_items_quantity': menu_item.line_items_quantity,
}
_menu_items.append(_menu_item)
_cats.append({
'name': cat.name,
'line_items_quantity': cat.line_items_quantity,
'amount': cat.amount,
'menu_items': _menu_items
})
result.append({
'employee_name': employee.name,
'visits': employee.visits,
'guests': employee.guests,
'cycle_time': employee.cycle_time,
'menu_item_categories': _cats
})
Sure, this will hit the database more than once so unless you prefer performance over this approach, this will do the trick.

Use of Prefetch with an inner select_related in Django

I basically have to display a list of service providers and in each, I need to display the categories of service they offer.
So as an example:
Possible Service Type Categories:
[id: 1, name:'Programming']
[id: 2, name:'Design']
Possible Service Types:
[id: 1, name: 'PHP Service', service_type_category_id: 1]
[id: 2, name: 'JAVA Service', service_type_category_id: 1]
[id: 3, name: 'Web Design Service', service_type_category_id: 2]
Example of Display Results:
Company Blue offers 'Programming'
Company Test offers 'Programming' and 'Design'
Company Orange offers 'Design' ....
I'm trying to write the least number of queries:
I have these models:
class ServiceTypeCategory( BaseModel ):
# Model Attributes
name = models.CharField( _( "name" ), max_length = 40 )
class ServiceType( BaseModel ):
# Model Attributes
service_type_category = models.ForeignKey( 'ServiceTypeCategory', verbose_name = _( 'category' ) )
name = models.CharField( _( "name" ), max_length = 60 )
description = models.TextField( _( "description" ) )
class Provider( BaseModel ):
# Model Attributes
display_name = models.CharField( _( "name" ), max_length = 80 )
# Many to many relations
countries = models.ManyToManyField( 'core.Country' ) # countries this provider support
service_types = models.ManyToManyField( 'ServiceType', through = 'Provider_ServiceTypes', related_name = 'service_types' )
class Provider_ServiceTypes( BaseModel ):
# Model Attributes
service_type = models.ForeignKey( 'ServiceType', verbose_name = _( 'service type' ) )
provider = models.ForeignKey( 'Provider', verbose_name = _( 'provider' ) )
is_top = models.BooleanField( _( "is top service" ), default = False )
Then, to run the query, I have the following:
providers = Provider.objects.select_related(
'user',
).prefetch_related(
Prefetch(
'service_types__service_type_category',
queryset = ServiceTypeCategory.objects
.only( 'name' )
)
).filter(
countries = country_id,
).only(
'id', 'display_name', 'user'
).order_by(
'-user__last_login'
)
This works out well, but it runs the 3 following queries:
SELECT app_provider.id, app_provider.user_id, app_provider.display_name, core_user.id, core_user.password, core_user.last_login, core_user.is_superuser, core_user.created_date, core_user.modified_date, core_user.email, core_user.name, core_user.is_active, core_user.is_admin
FROM app_provider
INNER JOIN app_provider_countries ON ( app_provider.id = app_provider_countries.provider_id )
INNER JOIN core_user ON ( app_provider.user_id = core_user.id )
LEFT OUTER JOIN core_userpersonal ON ( core_user.id = core_userpersonal.user_id )
LEFT OUTER JOIN core_userstats ON ( core_user.id = core_userstats.user_id )
WHERE app_provider_countries.country_id = 204
ORDER BY core_userstats.total_reviews DESC, core_userstats.total_contracts DESC, core_userstats.total_answers DESC, core_user.last_login DESC LIMIT 5
SELECT (app_provider_servicetypes.provider_id) AS _prefetch_related_val_provider_id, app_servicetype.id, app_servicetype.created_date, app_servicetype.modified_date, app_servicetype.service_type_category_id, app_servicetype.name, app_servicetype.description
FROM app_servicetype
INNER JOIN app_provider_servicetypes ON ( app_servicetype.id = app_provider_servicetypes.service_type_id )
WHERE app_provider_servicetypes.provider_id IN (2)
SELECT app_servicetypecategory.id, app_servicetypecategory.name
FROM app_servicetypecategory
WHERE app_servicetypecategory.id IN (1, 2)
Question is: How can I make to run just 2 queries in total? (The last 2 queries should be joined with INNER JOIN and a group by per service_type_category_name)
Thanks in advance!
Try this:
providers = Provider.objects.select_related(
'user',
).prefetch_related(
Prefetch(
'service_types',
queryset = ServiceType.objects\
.select_related('service_type_category')\
.only( 'service_type_category', 'name' )
)
).filter(
countries = country_id,
).only(
'id', 'display_name', 'user'
).order_by(
'-user__last_login'
)