Django Complicated Query - django

I am using django restframework with my Django app and I need to create quite specific query.
Here is the models.py:
class TaskHours(models.Model):
name = models.CharField(max_length=100)
hours = models.FloatField()
task = models.CharField(max_length=100)
date = models.DateField()
views.py:
class TaskHoursView(generics.ListAPIView):
serializer_class = TaskHoursSerializer
queryset = TaskHours.objects.all()
def get_queryset(self):
start_date = self.request.query_params.get('start_date')
end_date = self.request.query_params.get('end_date')
return TaskHours.filter(date__range=[start_date, end_date])
and serializer is default one with class Meta with all fields.
This query is working fine but I need to alter it. In the data there are entries which have same name and same date, but different tasks. What I would need is get all the tasks and hours worked with the same name and date to one object like this:
{
"name": "John",
"date": "2021-04-14",
"task": "cleaning",
"hours": "4.5",
"task": "hoovering",
"hours": "2.0"
}
Now I am receiving it like this:
{
"name": "John",
"date": "2021-04-14",
"task": "cleaning",
"hours": "4.5",
},
{
"name": "John",
"date": "2021-04-14",
"task": "hoovering",
"hours": "2.0"
}
Is there any way how to merge the two objects into one?

You need to slightly modify your serializer in order to create a subquery for each object you're going to serialize.
class TaskHoursSerializer(serializers.ModelSerializer):
tasks = serializers.SerializerMethodField()
class Meta:
model = TaskHours
exclude = ['task', 'hour']
def get_tasks(self, obj):
tasks = TaskHours.objects.filter(name=obj.name, date=obj.date).values_list("task", "hour")
return list(tasks)
And also, you need to change the queryset in your view in order to not have object duplicates.
class TaskHoursView(generics.ListAPIView):
serializer_class = TaskHoursSerializer
queryset = TaskHours.objects.all()
def get_queryset(self):
start_date = self.request.query_params.get('start_date')
end_date = self.request.query_params.get('end_date')
return TaskHours.filter(date__range=[start_date, end_date]).values('name', 'date').distinct()
That should output a field called "tasks" containing each task with the matching hour.

Related

Django DRF: how to groupby on a foreign fields?

I have a model where users can upvote other users for specific topics. Something like:
#models.py
Class Topic(models.Model):
name = models.StringField()
def __str__(self):
return str(self.name)
Class UserUpvotes(models.Model):
"""Holds total upvotes by user and topic"""
user = models.ForeignKey(User)
topic= models.ForeignKey(Topic)
upvotes = models.PositiveIntegerField(default=0)
Using DRF, I have an API that returns the following: topic_id, topic_name, and upvotes, which is the total upvotes for a given topic.
One of the project requirements is for the API to use these field names specifically: topic_id, topic_name, and upvotes
#serializers.py
class TopicUpvotesSerializer(serializers.ModelSerializer):
topic_name = serializers.StringRelatedField(source="topic")
class Meta:
model = UserUpvotes
fields = ["topic_id", "topic_name", "upvotes"]
My trouble is aggregating these fields. I'm filtering the UserUpvotes by user or team and then aggregating by topic.
Desired output
This is the result I want to get. When I don't perform any aggregations (and there are views where this will be the case), it works.
[
{
"topic_id": 3,
"topic_name": "Korean Studies",
"upvotes": 14
},
{
"topic_id": 12,
"topic_name": "Inflation",
"upvotes": 3
},
]
At first, I tried creating a TopicSerializer, and then assigning it to the topic field in TopicUpvotesSerializer. But then, the resulting json would have a nested "topic" field and the aggragation would fail.
Attempt 1
#views.py
def get_queryset(self):
return (
UserUpvotes.objects.filter(user__team=team)
.values("topic")
.annotate(upvotes=models.Sum("upvotes"))
.order_by("-upvotes")
)
My problem is that the topic_id and topic_name fields are not showing. I get something like:
[
{
"topic_name": "3",
"upvotes": 14
},
{
"topic_name": "12",
"upvotes": 3
},
]
Attempt 2
Another queryset attempt:
# views.py
def get_queryset(self):
return (
UserUpvotes.objects.filter(user__team=team)
.values("topic__id", "topic__name")
.annotate(upvotes=models.Sum("upvotes"))
.order_by("-upvotes")
)
Which yields:
[
{
"upvotes": 14
},
{
"upvotes": 3
},
]
The aggregation worked on the queryset level, but the serializer failed to find the correct fields.
Attempt 3
This was the closest I got:
# views.py
def get_queryset(self):
return (
UserUpvotes.objects.filter(user__team=team)
.values("topic__id", "topic__name")
.annotate(upvotes=models.Sum("upvotes"))
.values("topic_id", "topic", "upvotes")
.order_by("-upvotes")[:n]
)
[
{
"topic_name": 3,
"topic_name": "3",
"upvotes": 14
},
{
"topic_name": 12,
"topic_name": "12",
"upvotes": 3
},
]
I have no idea why "topic_name" is simply transforming the "topic_id" into a string, instead of calling the string method.
Work with a serializer for the topic:
class TopicSerializer(serializers.ModelSerializer):
upvotes = serializers.IntegerField(read_only=True)
class Meta:
model = Topic
fields = ['id', 'name', 'upvotes']
then in the ModelViewSet, you annotate:
from django.db.models import Sum
from rest_framework.viewsets import ModelViewSet
class TopicViewSet(ModelViewSet):
serializer_class = TopicSerializer
queryset = Topic.objects.annotate(upvotes=Sum('userupvotes__upvotes'))
Desired output
This is the result I want to get. When I don't perform any aggregations (and there are views where this will be the case), it works.
[
{
"topic_name": 3,
"topic_name": "Korean Studies",
"upvotes": 14
},
{
"topic_name": 12,
"topic_name": "Inflation",
"upvotes": 3
},
]
The serialized FK will always give you the ID of the related model. I am not sure why you name it topic_name if that is equal to an ID. Now, if you really want to get the name field of the Topic model
in the topic_name = serializers.StringRelatedField(source="topic") you should give it a source="topic.name"
However, if you trying to get the ID of the relation you can still use ModelSerializer :
class TopicUpvotesSerializer(serializers.ModelSerializer):
class Meta:
model = UserUpvotes
fields = "__all__"
#willem-van-onsem's answer is the correct one for the problem as I had put it.
But... I had another use case (sorry! ◑﹏◐), for when the Users API used UserUpvotes serializer as a nested field. So I had to find another solution. This is was I eventually ended up with. I'm posting in case it helps anyone.
class UserUpvotesSerializer(serializers.ModelSerializer):
topic_name = serializers.SerializerMethodField()
def get_topic_name (self, obj):
try:
_topic_name = obj.topic.name
except TypeError:
_topic_name = obj.get("skill__name", None)
return _topic_name
class Meta:
model = UserUpvotes
fields = ["topic_id", "topic_name", "upvotes"]
I still have no idea why the SerializerMethodField works and the StringRelatedField field doesn't. It feels like a bug?
Anyways, the rub here is that, after the values().annotate() aggregation, obj is no longer a QuerySet, but a dict. So accessing namedirectly will give you a 'UserUpvotes' object is not subscriptable error.
I don’t know if there are any other edge cases I should be aware of (this is when I REALLY miss type hints in Django), but it works so far

Django (drf) add new parameters/fields only for output with existing queryset(used for output) from database

I am working on a project on drf, where I need to fetch data from database (in thousands), and display selective fields in response.
models.py
class TblDemo(models.Model):
tbl_id = models.IntegerField()
tbl_name = models.CharField(max_length=50)
tbl_size = models.CharField(max_length=50)
tbl_height = models.Charfield(max_length=50)
tbl_material = models.Charfield(max_length=50)
class Meta:
managed = True
db_table = 'tbl_demo'
views.py
class data_fetch(viewsets.ViewSet):
def tbl_list(self, request, format=None)
serializer = Tbl_Serializer_input(data=request.data)
if serializer.is_valid():
queryset1 = tbl_name.objects.all()
queryset2 = queryset1.filter(tbl_name=request.data.get("title"))
serializer = Tbl_Serializer_output(queryset2, many=True)
return Response(serializer.data)
serializers.py
class Tbl_Serializer_input(serializers.Serializer):
title = serializers.CharField(required=True, max_length=50)
refer_no = serializers.CharField(required=True, max_length=50)
class Tbl_Serializer_output(serializers.Serializer):
tbl_id = serializers.CharField(max_length=50)
tbl_name = serializers.CharField(max_length=50)
tbl_size = serializers.CharField(max_length=50)
tbl_height = serializers.Charfield(max_length=50)
output
[
{
"tbl_id":"1",
"tbl_name":"white table",
"tbl_size": "square",
"tbl_height": "2 feet"
},
{
"tbl_id":"2",
"tbl_name":"black table",
"tbl_size": "square",
"tbl_height": "3 feet"
},
.......and so on.
but now, requirement is that I can't change database/model, but need to add some more fields like ("refer_no", "material" and "density", which is overall same in every cases) and with every object in queryset, which will not be stored in database, but it is only for response/output.
so, after adding new parameters my output will be like :
where, "refer_no" is taken directly from input to show output fields.
and "material" and "density" can't be added in database, just need to be hardcoded in middle.
new_output
[
{
"tbl_id":"1",
"refer_no":"abadadf",
"tbl_name":"white table",
"tbl_size": "square",
"tbl_height": "2 feet",
"density": "350gm/in"
"material": "tek wood"
},
{
"tbl_id":"2",
"refer_no":"abadadf",
"tbl_name":"black table",
"tbl_size": "square",
"tbl_height": "3 feet",
"density": "350gm/in",
"material": "tek wood"
},
.......and so on.
I have actually added new fields with query set from database, but when it went for serialization, it is showing circular loop error.
Please help.
You could process the data inside the serializer using the to_representation method.
For instance:
class ModelSerializer(serializers.ModelSerializer):
...
def to_representation(self, instance):
to_repr = super().to_representation(instance)
# add here the "refer_no", "material" and "density" key-value per object
return to_repr

How do I create new models not affecting the nested serializer

I already have a general idea of how it should be done. The only issue that I face now is how to actually send the data. I don't want to create new Projects I just want to add them to the notifications. How do I pass the data, the actual JSON?
class NotificationsScheduleSerializer(ModelSerializer):
projects = ProjectSerializer(many=True) # Thats the Many2Many Field
user = HiddenField(default=CurrentUserDefault())
class Meta:
model = NotificationsSchedule
fields = [
"pk",
"projects",
"period",
"week_day",
"created_at",
"time",
"report_type",
"user",
]
def create(self, validated_data):
breakpoint() # I don't ever get "projects" in validated_data just Empty OrderedDict
projects_data = validated_data.pop("projects", [])
notification = NotificationsSchedule.objects.create(**validated_data)
return notification
class ProjectSerializer(ModelSerializer):
class Meta:
model = Project
fields = ["pk", "name"]
I want to be able to pass something like this.
{
"projects": [290, 289],
"period": "daily",
"week_day": 2,
"time": "16:02:00",
"report_type": "word_report"
}
But it expects dict instead.
"non_field_errors": [
"Invalid data. Expected a dictionary, but got int."
]
You have to set read_only,
projects = ProjectSerializer(many=True, read_only=True)
And when creating Notifications ,
notification = NotificationsSchedule.objects.create(**validated_data)
notification.projects.add(*self.initial_data.get("projects"))
notification.save()

Prefetching indirectly related items using Django ORM

I'm trying to optimize the queries for my moderation system, build with Django and DRF.
I'm currently stuck with the duplicates retrieval: currently, I have something like
class AdminSerializer(ModelSerializer):
duplicates = SerializerMethodField()
def get_duplicates(self, item):
if item.allowed:
qs = []
else:
qs = Item.objects.filter(
allowed=True,
related_stuff__language=item.related_stuff.language
).annotate(
similarity=TrigramSimilarity('name', item.name)
).filter(similarity__gt=0.2).order_by('-similarity')[:10]
return AdminMinimalSerializer(qs, many=True).data
which works fine, but does at least one additional query for each item to display. In addition, if there are duplicates, I'll do additional queries to fill the AdminMinimalSerializer, which contains fields and related objects of the duplicated item. I can probably reduce the overhead by using a prefetch_related inside the serializer, but that doesn't prevent me from making several queries per item (assuming I have only one related item to prefetch in AdminMinimalSerializer, I'd still have ~2N + 1 queries: 1 for the items, N for the duplicates, N for the related items of the duplicates).
I've already looked at Subquery, but I can't retrieve an object, only an id, and this is not enough in my case. I tried to use it in both a Prefetch object and a .annotate.
I also tried something like Item.filter(allowed=False).prefetch(Prefetch("related_stuff__language__related_stuff_set__items", queryset=Items.filter..., to_attr="duplicates")), but the duplicates property is added to "related_stuff__language__related_stuff_set", so I can't really use it...
I'll welcome any idea ;)
Edit: the real code lives here. Toy example below:
# models.py
from django.db.models import Model, CharField, ForeignKey, CASCADE, BooleanField
class Book(Model):
title = CharField(max_length=250)
serie = ForeignKey(Serie, on_delete=CASCADE, related_name="books")
allowed = BooleanField(default=False)
class Serie(Model):
title = CharField(max_length=250)
language = ForeignKey(Language, on_delete=CASCADE, related_name="series")
class Language(Model):
name = CharField(max_length=100)
# serializers.py
from django.contrib.postgres.search import TrigramSimilarity
from rest_framework.serializers import ModelSerializer, SerializerMethodField
from .models import Book, Language, Serie
class BookAdminSerializer(ModelSerializer):
class Meta:
model = Book
fields = ("id", "title", "serie", "duplicates", )
serie = SerieAdminAuxSerializer()
duplicates = SerializerMethodField()
def get_duplicates(self, book):
"""Retrieve duplicates for book"""
if book.allowed:
qs = []
else:
qs = (
Book.objects.filter(
allowed=True, serie__language=book.serie.language)
.annotate(similarity=TrigramSimilarity("title", book.title))
.filter(similarity__gt=0.2)
.order_by("-similarity")[:10]
)
return BookAdminMinimalSerializer(qs, many=True).data
class BookAdminMinimalSerializer(ModelSerializer):
class Meta:
model = Book
fields = ("id", "title", "serie")
serie = SerieAdminAuxSerializer()
class SerieAdminAuxSerializer(ModelSerializer):
class Meta:
model = Serie
fields = ("id", "language", "title")
language = LanguageSerializer()
class LanguageSerializer(ModelSerializer):
class Meta:
model = Language
fields = ('id', 'name')
I'm trying to find a way to prefetch related objects and duplicates so that I can get rid of the get_duplicates method in BookSerializer, with the N+1 queries it causes, and have only a duplicates field in my BookSerializer.
Regarding data, here would be an expected output:
[
{
"id": 2,
"title": "test2",
"serie": {
"id": 2,
"language": {
"id": 1,
"name": "English"
},
"title": "series title"
},
"duplicates": [
{
"id": 1,
"title": "test",
"serie": {
"id": 1,
"language": {
"id": 1,
"name": "English"
},
"title": "first series title"
}
}
]
},
{
"id": 3,
"title": "random",
"serie": {
"id": 3,
"language": {
"id": 1,
"name": "English"
},
"title": "random series title"
},
"duplicates": []
}
]

Django: Most efficient way to create a nested dictionary from querying related models?

In Django, what is the most efficient way to create a nested dictionary of data from querying related and child models?
For example, if I have the following models:
Parent
Children
Pets
I've seen django's model_to_dict method, and that's pretty cool, so I imagine I could loop through each level's queryset and create a bunch of DB calls on each level, for each instance, but is there a better way?
For example, could "prefetch_related" be used to get all three tiers as it is used to get two tiers here?
It would be great to get the dictionary to look something like this:
[
{
"name": "Peter Parent",
"children": [
{
"name": "Chaden Child",
"pets": [
{
"name": "Fanny",
"type:": "fish"
},
{
"name": "Buster",
"type:": "bunny"
}
]
},
{
"name": "Charlete Child",
"pets": [
{
"name": "Dandy",
"type:": "dog"
}
]
}
]
}
]
Edit:
By request this is what the models could look like:
class Pet(models.Model):
name = models.CharField(max_length=50)
type = models.CharField(max_length=50)
def __str__(self):
return self.name
class Child(models.Model):
name = models.CharField(max_length=50)
pets = models.ManyToManyField(Pet)
def __str__(self):
return self.name
class Parent(models.Model):
name = models.CharField(max_length=50)
children = models.ManyToManyField(Child)
def __str__(self):
return self.name
And this is what the raw sql would look like:
SELECT pa.name, ch.name, pe.name, pe.type
FROM aarc_parent pa
JOIN aarc_parent_children pc ON pc.parent_id = pa.id
JOIN aarc_child ch ON ch.id = pc.child_id
JOIN aarc_child_pets cp ON cp.child_id = ch.id
JOIN aarc_pet pe ON pe.id = cp.pet_id
You can use prefetch_related along with list comprehensions. prefetch_related will help in avoiding extra queries every time related object is accessed.
parents = Parent.objects.all().prefetch_related('children__pets')
[{'name': parent.name, 'children': [{'name': child.name, 'pets': [{'name':pet.name, 'type':pet.type} for pet in child.pets.all()]} for child in parent.children.all()]} for parent in parents]