Django ArrayAgg - filtering annotated array - django

class Language(models.Model):
iso_code = models.CharField()
class Publisher(models.Model)
name = models.CharField()
class Book(modle.Model):
name = models.CharField()
language = models.ForeignKey(Language)
publisher = models.ForeignKey(Publisher, related_name='books')
lang_ids = [1,2]
qs = Publisher.objects.annotate(
x=ArrayAgg(
Case(
When(
books__language__in=lang_ids,
then="books__name"
)
)
)
)
I want to filter the qs as shown here - https://docs.djangoproject.com/en/3.1/ref/contrib/postgres/fields/#len
qs.filter(x__len=2)
Why is it impossible to filter the qs this way? I am getting an error IndexError: tuple index out of range.
Output field in ArrayAgg is ArrayField
class ArrayAgg(OrderableAggMixin, Aggregate):
function = 'ARRAY_AGG'
template = '%(function)s(%(distinct)s%(expressions)s %(ordering)s)'
allow_distinct = True
#property
def output_field(self):
return ArrayField(self.source_expressions[0].output_field)
def convert_value(self, value, expression, connection):
if not value:
return []
return value

In order to get length of an ArrayField, you will need to use a different function, there is cardinality and also array_length. More info here https://www.postgresql.org/docs/current/functions-array.html
If you have just one dimenssion array you can use cardinality
from django.db import models
class ArrayLength(models.Func):
function = 'CARDINALITY'
qs = Publisher.objects.annotate(
x=ArrayAgg(
Case(
When(
books__language__in=lang_ids,
then="books__name"
)
)
)
)
qs = qs.annotate(x_len=ArrayLength('x')) # notice `x_len` is just a name, you can use anything
qs = qs.filter(x_len=2) # the actual filter

Related

Django ORM distinct on only a subset of the queryset

Working in Django Rest Framework (DRF), django-filter, and PostgreSQL, and having an issue with one of our endpoints.
Assume the following:
# models.py
class Company(models.Model):
name = models.CharField(max_length=50)
class Venue(models.Model):
company = models.ForeignKey(to="Company", on_delete=models.CASCADE)
name = models.CharField(max_length=50)
# create some data
company1 = Company.objects.create(name="Proper Ltd")
company2 = Company.objects.create(name="MyCompany Ltd")
Venue.objects.create(name="Venue #1", company=company1)
Venue.objects.create(name="Venue #2", company=company1)
Venue.objects.create(name="Property #1", company=company2)
Venue.objects.create(name="Property #2", company=company2)
# viewset
class CompanyViewSet(viewsets.ReadOnlyModelViewSet):
serializer_class = CompanyVenueSearchSerializer
queryset = (
Venue.objects.all()
.select_related("company")
.order_by("company__name")
)
permission_classes = (ReadOnly,)
http_method_names = ["get"]
filter_backends = (filters.DjangoFilterBackend,)
filterset_class = CompanyVenueListFilter
pagination_class = None
# filterset
class CompanyVenueListFilter(filters.FilterSet):
text = filters.CharFilter(method="name_search")
def name_search(self, qs, name, value):
return qs.filter(
Q(name__icontains=value)
| Q(company__name__icontains=value)
)
class Meta:
model = Venue
fields = [
"name",
"company__name",
]
# serializer
class CompanyVenueSearchSerializer(serializers.ModelSerializer):
company_id = serializers.IntegerField(source="company.pk")
company_name = serializers.CharField(source="company.name")
venue_id = serializers.IntegerField(source="pk")
venue_name = serializers.CharField(source="name")
class Meta:
model = Venue
fields = (
"company_id",
"company_name",
"venue_id",
"venue_name",
)
We now want to allow the user to filter the results by sending a query in the request, e.g. curl -X GET https://example.com/api/company/?text=pr.
The serializer result will look something like:
[
{
"company_id":1,
"company_name":"Proper Ltd",
"venue_id":1,
"venue_name":"Venue #1"
},
{ // update ORM to exclude this dict
"company_id":1,
"company_name":"Proper Ltd",
"venue_id":2,
"venue_name":"Venue #1"
},
{
"company_id":2,
"company_name":"MyCompany Ltd",
"venue_id":3,
"venue_name":"Property #1"
},
{
"company_id":2,
"company_name":"MyCompany Ltd",
"venue_id":4,
"venue_name":"Property #1"
}
]
Expected result:
Want to rewrite the ORM query so that if the filter ("pr") matches the venue__name, return all venues. But if the filter matches the company__name, only return it once, thus in the example above the second dict in the list would be excluded/removed.
Is this possible?
What you can do is to filter Company that matches name filtering and annotate them with the first related Venue and then combine it's results with the second requirement to return venue with name=value
from django.db.models import OuterRef, Q, Subquery
value = "pr"
first_venue = Venue.objects.filter(company__in=OuterRef("id")).order_by("id")
company_qs = Company.objects.filter(name__icontains=value).annotate(
first_venue_id=Subquery(first_venue.values("id")[:1])
)
venue_qs = Venue.objects.filter(
Q(name__icontains=value)
| Q(id__in=company_qs.values_list("first_venue_id", flat=True))
)
The query executed when accessing values of venue_qs looks like
SELECT
"venues_venue"."id",
"venues_venue"."company_id",
"venues_venue"."name"
FROM
"venues_venue"
WHERE
(
UPPER("venues_venue"."name"::TEXT) LIKE UPPER(% pr %)
OR "venues_venue"."id" IN (
SELECT
(
SELECT
U0."id"
FROM
"venues_venue" U0
WHERE
U0."company_id" IN (V0."id")
ORDER BY
U0."id" ASC
LIMIT
1
) AS "first_venue_id"
FROM
"venues_company" V0
WHERE
UPPER(V0."name"::TEXT) LIKE UPPER(% pr %)
)
)
This is how the filter should look like
class CompanyVenueListFilter(filters.FilterSet):
text = filters.CharFilter(method="name_search")
def name_search(self, qs, name, value):
first_venue = Venue.objects.filter(company__in=OuterRef("id")).order_by("id")
company_qs = Company.objects.filter(name__icontains=value).annotate(
first_venue_id=Subquery(first_venue.values("id")[:1])
)
return qs.filter(
Q(name__icontains=value)
| Q(id__in=company_qs.values_list("first_venue_id", flat=True))
)
class Meta:
model = Venue
fields = [
"name",
"company__name",
]
Update for Django 3.2.16
Seems like the query above will not work for such version because it generated a query without parentheses in WHERE clause around V0."id", chunk of query looks like
WHERE
U0."company_id" IN V0."id"
and it makes PostgreSQL complain with error
ERROR: syntax error at or near "V0"
LINE 17: U0."company_id" IN V0."id"
For Django==3.2.16 the filtering method in CompanyVenueListFilter could look like following:
def name_search(self, qs, name, value):
company_qs = Company.objects.filter(name__icontains=value)
venues_qs = (
Venue.objects.filter(company__in=company_qs)
.order_by("company_id", "id")
.distinct("company_id")
)
return qs.filter(Q(name__icontains=value) | Q(id__in=venues_qs.values_list("id")))
The answer is based on other stackoverflow answer and django docs
Django manager annotate first element of m2m as fk
Subquery() expressions
We have a temporary solution, which we're a bit wary about but it seems to do its job. Won't tag this answer as accepted as we're still hoping that someone has a more pythonic/djangoistic solution to the problem.
# viewset
class CompanyViewSet(viewsets.ReadOnlyModelViewSet):
serializer_class = CompanyVenueSearchSerializer
queryset = (
Venue.objects.all()
.select_related("company")
.order_by("company__name")
)
permission_classes = (ReadOnly,)
http_method_names = ["get"]
filter_backends = (filters.DjangoFilterBackend,)
filterset_class = CompanyVenueListFilter
pagination_class = None
def list(self, request, *args, **kwargs):
queryset = self.filter_queryset(self.get_queryset())
serializer = self.get_serializer(queryset, many=True)
text = request.GET.get("text").lower()
first_idx = 0
to_remove = []
for data in serializer.data:
if text in data.get("name").lower() and text not in data.get("venue_name").lower():
if data.get("id") != first_idx:
"""We don't want to remove the first hit of a company whose name matches"""
first_idx = data.get("id")
continue
to_remove.append((data.get("id"), data.get("venue_id")))
return Response(
[
data
for data in serializer.data
if (data.get("id"), data.get("venue_id")) not in to_remove
],
status=status.HTTP_200_OK,
)

django "can't adapt type '__proxy__'" error message

class GenderTypeEnum:
FEMALE = 1
MALE = 2
UNKNOWN = 3
types = (
(FEMALE, _("Female")),
(MALE, _("Male")),
(UNKNOWN, _("Unknown"))
)
class PersonModel(models.Model):
identity = models.CharField(max_length=50, unique=True)
name = models.CharField(max_length=75)
last_name = models.CharField(max_length=75)
gender = models.PositiveIntegerField(choices=GenderTypeEnum.types)
class StaffModel(models.Model):
person = models.ForeignKey('PersonModel', on_delete=models.CASCADE, related_name='staffs')
registration_number = models.CharField(max_length=50, unique=True)
start_date = models.DateField()
finish_date = models.DateField(null=True, blank=True)
I am using the following query to list the gender statistics of the staff
StaffModel.objects.values("person__gender").annotate(count=Count("person__gender"))
output:
[
{"person__gender":1, "count":1},
{"person_gender":2, "count":5}
]
But gender field is a choices field so that, output that I want like this:
[
{"person__gender":1, "gender_exp":"Male", "count":1},
{"person_gender":2, "gender_exp":"Female", "count":5}
]
I created the following class by looking at the answer given to #bachkoi32 Display name of a choice field in Django while using annotate
In order to output, I use this class:
class WithChoices(Case):
def __init__(self, model, field, condition=None, then=None, **lookups):
fields = field.split('__')
for f in fields:
model = model._meta.get_field(f)
if model.related_model:
model = model.related_model
choices = dict(model.flatchoices)
whens = [When(**{field: k, 'then': Value(v)}) for k, v in choices.items()]
return super().__init__(*whens, output_field=CharField())
I changed my query:
qs = StaffModel.objects.values("person__gender").annotate(gender_exp=WithChoices(StaffModel, 'person__gender'), count=Count("person__gender")).values("person__gender","gender_exp","count")
When I want to print the query result, it raise the error;
django.db.utils.ProgrammingError: can't adapt type 'proxy'
qs = StaffModel.objects.values("person__gender").annotate(gender_exp=WithChoices(StaffModel, 'person__gender'), count=Count("person__gender")).values("person__gender","gender_exp","count")
print(qs)
# raise error;
# django.db.utils.ProgrammingError: can't adapt type '__proxy__'
The labels for your choices are lazy translations, these can't be passed as values to a query, they need to be converted to strings using force_str
from django.utils.encoding import force_str
class WithChoices(Case):
def __init__(self, model, field, condition=None, then=None, **lookups):
fields = field.split('__')
for f in fields:
model = model._meta.get_field(f)
if model.related_model:
model = model.related_model
choices = dict(model.flatchoices)
whens = [When(**{field: k, 'then': Value(force_str(v))}) for k, v in choices.items()]
return super().__init__(*whens, output_field=CharField())

Multiplying three models in django and getting the result in views

My model:
class VisData(models.Model):
visdata_id = models.AutoField(primary_key=True,blank=True)
user_name = models.ForeignKey(Customer, null=True, on_delete=models.SET_NULL,blank=True)
title = models.CharField(max_length=200, null=True,blank=True)
buy_sell = models.CharField(max_length=1, null=True,blank=True)
date = models.DateField(auto_now_add=False,null=True,editable=True,blank=True)
hour = models.TimeField(auto_now=False, auto_now_add=False,null=True,editable=True,blank=True)
shares_number = models.DecimalField(decimal_places=0,default=0,max_digits=999,null=True,blank=True)
course = models.DecimalField(decimal_places=2,default=0,max_digits=999,null=True,blank=True)
fare = models.DecimalField(decimal_places=2,default=0,max_digits=999,null=True,blank=True)
def __str__(self):
return self.title
I want to assign:
total_value = (shares_number * (course - fare)) and just print it in terminal
My views:
def summaryPage(request):
visdata = VisData.objects.all()
#print(visdata)
context = {}
return render(request, 'smth/homepage.html', context)
I found some close answers but I couldn't understand the solution nor use them in my code.
What you probably need called aggregation:
from django.db.models import F, Sum
def summaryPage(request):
aggregated_data = VisData.objects.annotate(
intermid_result=F('course') - F('fare')
).annotate(
record_total=F('shares_number') * F('intermid_result')
).aggregate(
total=SUM('record_total')
)
result = aggregated_data['total']
print(result)
...
This query will annotate each record with the value of record_total = shares_number * (course - fare) and then calculate a sum for record_total of all records.
Also try to avoid using camelcase function names in Python. See here for details.

Make order by function inside model class in Django

models:
class Product(models.Model):
product_model = models.CharField(max_length=255)
price = models.DecimalField(default=0 , decimal_places=0 , max_digits=8)
discount_price = models.DecimalField(blank=True, null=True, decimal_places=0, max_digits=8)
def product_all_price(self):
if self.price > 0:
a = sum([stock.quantity for stock in self.product.all()])
if a == 0:
return "0"
if a >= 1:
if self.discount_price:
discount_price = str(self.discount_price)
price = str(self.price)
return 0 ,discount_price, price
else:
return str(self.price)
else:
return "0"
my views:
def products_list(request):
products_list = Product.objects.filter(product_draft=False)
products_list = products_list.order_by('-created_on')
...
How can i order this list by product_all_price in models. show returned "price", above and all returned "0", below
You can order with:
from django.db.models import BooleanField, ExpressionWrapper, Q
Product.objects.filter(
product_draft=False
).alias(
price_non_zero=ExpressionWrapper(
Q(price__gt=0),
output_field=BooleanField()
)
).order_by('-price_non_zero', '-created_on')
Or in before django-3.2:
from django.db.models import BooleanField, ExpressionWrapper, Q
Product.objects.filter(
product_draft=False
).annotate(
price_non_zero=ExpressionWrapper(
Q(price__gt=0),
output_field=BooleanField()
)
).order_by('-price_non_zero', '-created_on')
This will thus list the Products with a price greater than 0 first (sorted by created_on in decending order); and then the items with price 0 (again sorted by created_on in descending order).

Django Query how to sum returned objects method

I have a model:
class TimeStamp(models.Model):
user = models.ForeignKey(User, on_delete=models.CASCADE)
t_in = models.DateTimeField(_("In"), auto_now=False, auto_now_add=False)
t_out = models.DateTimeField(
_("Out"), auto_now=False, auto_now_add=False, blank=True, null=True)
class Meta:
verbose_name = _("TimeStamp")
verbose_name_plural = _("TimeStamps")
def __str__(self):
return str(f'{self.t_in.date()} {self.user.get_full_name()}')
def get_absolute_url(self):
return reverse("TimeStamp_detail", kwargs={"pk": self.pk})
def get_total_hrs(self):
if self.t_out:
t_hrs = abs(self.t_out-self.t_in).total_seconds()/3600
else:
t_hrs = '0'
return str(t_hrs)
then in my views.py
def timeclock_view(request):
week_start = timezone.now().date()
week_start -= timedelta(days=(week_start.weekday()+1) % 7)
week_end = week_start + timedelta(days=7)
obj = request.user.timestamp_set.filter(
t_in__gte=week_start, t_in__lt=week_end)
obj.aggregate(total_hrs=Sum('get_total_hrs'))
if obj:
last = obj.last()
context = {'obj': obj, 'last': last,
'week_start': week_start, 'week_end': week_end, 'week_total': total_hrs, }
else:
context = {}
return render(request, 'timeclock/timeclock_view.html', context)
How do i write this to get a sum of the hrs for the queryset?
obj.aggregate(total_hrs=Sum('get_total_hrs)) is giving me an error:
django.core.exceptions.FieldError: Cannot resolve keyword
'get_total_hrs' into field. Choices are: description, id, note,
note_set, pw, t_in, t_out, user, user_id
Aggregation is done on the database. That means you can use only fields in the aggregation, and not model functions or properties.
I would first implement the logic of get_total_hrs with an annotate() queryset, then use the this queryset to aggregate over the calculated field.
from django.db.models.functions import Abs
from django.db.models import F, ExpressionWrapper, DurationField, Sum
queryset.annotate(
total_hrs=ExpressionWrapper(
Abs(F("t_out") - F("t_in")),
output_field=DurationField()
),
).aggregate(overall_hrs=Sum("total_hrs"))