How to apply a function on the values selected in Django queryset? - django

Say I'm having the below model in Django
class Book(models.Model):
id = models.AutoField(primary_key=True)
volumes = JSONField()
I want to get the length of title of all the Books as values -
[
{
"id": 1,
"volumes": [
{
"order": 1
},
{
"order": 2
}
],
"length_of_volumes": 2
},
]
I tried the following, but it's not the proper way to do it as it's not a CharField -
from django.db.models.functions import Length
Books.objects.all().values('id', 'title', length_of_valumes=Length('volumes'))

len('title') will just determine the length of the string 'title' which thus has five characters, so as .values(…), you use .values(length_of_title=5).
You can make use of the Length expression [Django-doc]:
from django.db.models.functions import Length
Books.objects.values('id', 'title', length_of_title=Length('title'))
Note: normally a Django model is given a singular name, so Book instead of Books.

Similar to Willem's answer, but uses annotation. Taken from https://stackoverflow.com/a/34640020/14757226.
from django.db.models.functions import Length
qs = Books.objects.annotate(length_of_title=Length('title')).values('id', 'title', 'length_of_title')
An advantage would be you can then add filter or exclude clauses to query on the length of the title. So if you only wanted results where the title was less than 10 characters or something.

You can use dict structure:
values = []
for b in Books.objects.all().values('id', 'title'):
values.append({
'id': b.id,
'title': b.title,
'length_of_title': len(b.title)
})

Related

Django DRF: how to groupby on a foreign fields?

I have a model where users can upvote other users for specific topics. Something like:
#models.py
Class Topic(models.Model):
name = models.StringField()
def __str__(self):
return str(self.name)
Class UserUpvotes(models.Model):
"""Holds total upvotes by user and topic"""
user = models.ForeignKey(User)
topic= models.ForeignKey(Topic)
upvotes = models.PositiveIntegerField(default=0)
Using DRF, I have an API that returns the following: topic_id, topic_name, and upvotes, which is the total upvotes for a given topic.
One of the project requirements is for the API to use these field names specifically: topic_id, topic_name, and upvotes
#serializers.py
class TopicUpvotesSerializer(serializers.ModelSerializer):
topic_name = serializers.StringRelatedField(source="topic")
class Meta:
model = UserUpvotes
fields = ["topic_id", "topic_name", "upvotes"]
My trouble is aggregating these fields. I'm filtering the UserUpvotes by user or team and then aggregating by topic.
Desired output
This is the result I want to get. When I don't perform any aggregations (and there are views where this will be the case), it works.
[
{
"topic_id": 3,
"topic_name": "Korean Studies",
"upvotes": 14
},
{
"topic_id": 12,
"topic_name": "Inflation",
"upvotes": 3
},
]
At first, I tried creating a TopicSerializer, and then assigning it to the topic field in TopicUpvotesSerializer. But then, the resulting json would have a nested "topic" field and the aggragation would fail.
Attempt 1
#views.py
def get_queryset(self):
return (
UserUpvotes.objects.filter(user__team=team)
.values("topic")
.annotate(upvotes=models.Sum("upvotes"))
.order_by("-upvotes")
)
My problem is that the topic_id and topic_name fields are not showing. I get something like:
[
{
"topic_name": "3",
"upvotes": 14
},
{
"topic_name": "12",
"upvotes": 3
},
]
Attempt 2
Another queryset attempt:
# views.py
def get_queryset(self):
return (
UserUpvotes.objects.filter(user__team=team)
.values("topic__id", "topic__name")
.annotate(upvotes=models.Sum("upvotes"))
.order_by("-upvotes")
)
Which yields:
[
{
"upvotes": 14
},
{
"upvotes": 3
},
]
The aggregation worked on the queryset level, but the serializer failed to find the correct fields.
Attempt 3
This was the closest I got:
# views.py
def get_queryset(self):
return (
UserUpvotes.objects.filter(user__team=team)
.values("topic__id", "topic__name")
.annotate(upvotes=models.Sum("upvotes"))
.values("topic_id", "topic", "upvotes")
.order_by("-upvotes")[:n]
)
[
{
"topic_name": 3,
"topic_name": "3",
"upvotes": 14
},
{
"topic_name": 12,
"topic_name": "12",
"upvotes": 3
},
]
I have no idea why "topic_name" is simply transforming the "topic_id" into a string, instead of calling the string method.
Work with a serializer for the topic:
class TopicSerializer(serializers.ModelSerializer):
upvotes = serializers.IntegerField(read_only=True)
class Meta:
model = Topic
fields = ['id', 'name', 'upvotes']
then in the ModelViewSet, you annotate:
from django.db.models import Sum
from rest_framework.viewsets import ModelViewSet
class TopicViewSet(ModelViewSet):
serializer_class = TopicSerializer
queryset = Topic.objects.annotate(upvotes=Sum('userupvotes__upvotes'))
Desired output
This is the result I want to get. When I don't perform any aggregations (and there are views where this will be the case), it works.
[
{
"topic_name": 3,
"topic_name": "Korean Studies",
"upvotes": 14
},
{
"topic_name": 12,
"topic_name": "Inflation",
"upvotes": 3
},
]
The serialized FK will always give you the ID of the related model. I am not sure why you name it topic_name if that is equal to an ID. Now, if you really want to get the name field of the Topic model
in the topic_name = serializers.StringRelatedField(source="topic") you should give it a source="topic.name"
However, if you trying to get the ID of the relation you can still use ModelSerializer :
class TopicUpvotesSerializer(serializers.ModelSerializer):
class Meta:
model = UserUpvotes
fields = "__all__"
#willem-van-onsem's answer is the correct one for the problem as I had put it.
But... I had another use case (sorry! ◑﹏◐), for when the Users API used UserUpvotes serializer as a nested field. So I had to find another solution. This is was I eventually ended up with. I'm posting in case it helps anyone.
class UserUpvotesSerializer(serializers.ModelSerializer):
topic_name = serializers.SerializerMethodField()
def get_topic_name (self, obj):
try:
_topic_name = obj.topic.name
except TypeError:
_topic_name = obj.get("skill__name", None)
return _topic_name
class Meta:
model = UserUpvotes
fields = ["topic_id", "topic_name", "upvotes"]
I still have no idea why the SerializerMethodField works and the StringRelatedField field doesn't. It feels like a bug?
Anyways, the rub here is that, after the values().annotate() aggregation, obj is no longer a QuerySet, but a dict. So accessing namedirectly will give you a 'UserUpvotes' object is not subscriptable error.
I don’t know if there are any other edge cases I should be aware of (this is when I REALLY miss type hints in Django), but it works so far

How to annotate sum over Django JSONField (Array of objects) data?

I have models sth like this
# models.py
class MyModel( models.Model ):
orders = models.JsonField(null= True, blank=True, default=list)
category = models.ForeignKey(Category, on_delete=models.CASCADE)
I stored json data in this structure.
[
{
"order_name": "first order",
"price": 200
},
{
"order_name": "second order",
"price": 800
},
{
"order_name": "third order",
"price": 100
}
]
I want to sum price of all json objects ie 200+800+100
One way will be to use jsonb_array_elements to break each value into rows and then use the normal aggregate function.
For eg:
from django.db import models
Model.objects.annotate(
# This will break items into multiple rows
annotate_field_1=models.Func(models.F('array_field__items'), function='jsonb_array_elements'),
).aggregate(
# Then calculate the total.
total=models.Count('annotate_field_1'),
)['total']
I haven't worked with JSONArrayField but I did a little bit of research and found that the following example can give you a clue:
MyModel.objects.annotate(
order_price_sum=Sum(
Cast(
KeyTextTransform("price", "orders"), models.FloatField()
)
),
)
I tried to implement it to your specific question you can find more helpfull information in the following link: https://dev.to/saschalalala/aggregation-in-django-jsonfields-4kg5
Workaround:
I was trying to figure out how to manage JSONArray using annotate in django but it seems to not be well-documented so I share this workaround to achieve the goal:
total = 0
for i in MyModel.objects.exclude(orders__isnull=True).values('orders'):
total += sum([j.get('price',0) for j in i.get('orders') if j is not None])

How to filter an empty list? Many to many relationship

I filter as follows:
queryset = queryset.prefetch_related(
Prefetch('level', queryset=Level.objects.filter(id=level)))
In this case, empty lists remain:
{
...
"level": []
...
},
{
...
"level": [
2
]
...
}
I tried to filter like this:
queryset = queryset.prefetch_related(
Prefetch('level',queryset=Level.objects.filter(id=level).exclude(id__isnull=True)))
But it didn't help.
I also want to know if it is possible to get a value without lists?
{
...
"level": 2
...
}
You should not filter on the Prefetch object, since then it is "too" late. Then you filter out elements. You should filter the queryset with:
queryset.filter(level=level).prefetch_related(
Prefetch('level', queryset=Level.objects.filter(id=level)))
)
You can furthermore annotate the value of Level, and then use that as a field:
from django.db.models import F
queryset.filter(level=level).annotate(
level_value=F('level')
)
Then in the serializer, you can use an IntegerField for example that takes as source='level_value':
from rest_framework import serializers
class MyModelSerializer(serializers.ModelSerializer)
level = IntegerField(source='level_value')
class Meta:
model = MyModel

Prefetching indirectly related items using Django ORM

I'm trying to optimize the queries for my moderation system, build with Django and DRF.
I'm currently stuck with the duplicates retrieval: currently, I have something like
class AdminSerializer(ModelSerializer):
duplicates = SerializerMethodField()
def get_duplicates(self, item):
if item.allowed:
qs = []
else:
qs = Item.objects.filter(
allowed=True,
related_stuff__language=item.related_stuff.language
).annotate(
similarity=TrigramSimilarity('name', item.name)
).filter(similarity__gt=0.2).order_by('-similarity')[:10]
return AdminMinimalSerializer(qs, many=True).data
which works fine, but does at least one additional query for each item to display. In addition, if there are duplicates, I'll do additional queries to fill the AdminMinimalSerializer, which contains fields and related objects of the duplicated item. I can probably reduce the overhead by using a prefetch_related inside the serializer, but that doesn't prevent me from making several queries per item (assuming I have only one related item to prefetch in AdminMinimalSerializer, I'd still have ~2N + 1 queries: 1 for the items, N for the duplicates, N for the related items of the duplicates).
I've already looked at Subquery, but I can't retrieve an object, only an id, and this is not enough in my case. I tried to use it in both a Prefetch object and a .annotate.
I also tried something like Item.filter(allowed=False).prefetch(Prefetch("related_stuff__language__related_stuff_set__items", queryset=Items.filter..., to_attr="duplicates")), but the duplicates property is added to "related_stuff__language__related_stuff_set", so I can't really use it...
I'll welcome any idea ;)
Edit: the real code lives here. Toy example below:
# models.py
from django.db.models import Model, CharField, ForeignKey, CASCADE, BooleanField
class Book(Model):
title = CharField(max_length=250)
serie = ForeignKey(Serie, on_delete=CASCADE, related_name="books")
allowed = BooleanField(default=False)
class Serie(Model):
title = CharField(max_length=250)
language = ForeignKey(Language, on_delete=CASCADE, related_name="series")
class Language(Model):
name = CharField(max_length=100)
# serializers.py
from django.contrib.postgres.search import TrigramSimilarity
from rest_framework.serializers import ModelSerializer, SerializerMethodField
from .models import Book, Language, Serie
class BookAdminSerializer(ModelSerializer):
class Meta:
model = Book
fields = ("id", "title", "serie", "duplicates", )
serie = SerieAdminAuxSerializer()
duplicates = SerializerMethodField()
def get_duplicates(self, book):
"""Retrieve duplicates for book"""
if book.allowed:
qs = []
else:
qs = (
Book.objects.filter(
allowed=True, serie__language=book.serie.language)
.annotate(similarity=TrigramSimilarity("title", book.title))
.filter(similarity__gt=0.2)
.order_by("-similarity")[:10]
)
return BookAdminMinimalSerializer(qs, many=True).data
class BookAdminMinimalSerializer(ModelSerializer):
class Meta:
model = Book
fields = ("id", "title", "serie")
serie = SerieAdminAuxSerializer()
class SerieAdminAuxSerializer(ModelSerializer):
class Meta:
model = Serie
fields = ("id", "language", "title")
language = LanguageSerializer()
class LanguageSerializer(ModelSerializer):
class Meta:
model = Language
fields = ('id', 'name')
I'm trying to find a way to prefetch related objects and duplicates so that I can get rid of the get_duplicates method in BookSerializer, with the N+1 queries it causes, and have only a duplicates field in my BookSerializer.
Regarding data, here would be an expected output:
[
{
"id": 2,
"title": "test2",
"serie": {
"id": 2,
"language": {
"id": 1,
"name": "English"
},
"title": "series title"
},
"duplicates": [
{
"id": 1,
"title": "test",
"serie": {
"id": 1,
"language": {
"id": 1,
"name": "English"
},
"title": "first series title"
}
}
]
},
{
"id": 3,
"title": "random",
"serie": {
"id": 3,
"language": {
"id": 1,
"name": "English"
},
"title": "random series title"
},
"duplicates": []
}
]

Django queryset grouped by count of values in Postgres JSONField

My model:
from django.contrib.postgres.fields import JSONField
class Image(models.Model):
tags = JSONField(null=False, blank=True, default={})
tags field value can be empty, or something like:
[
{"tag": "xxx"},
{"tag": "yyy"},
{"tag": "zzz"}
]
The number or dicts may vary (from 0 to N).
I need to make a query that counts Images grouped by number of tags. Something like:
{
"0": "345",
"1": "1223",
"2": "220",
...
"N": "23"
}
where the key is the number of tags, and the value is the count of Image objects that contains this number of tags.
How can i do that? Thank you for your help!
UPDATE
I modified my code: now I don't use JsonField, but a dedicated model:
class ImageTag(models.Model):
image = models.ForeignKey(Image)
tag = models.CharField()
The question is the same :)