I'm trying to optimize the queries for my moderation system, build with Django and DRF.
I'm currently stuck with the duplicates retrieval: currently, I have something like
class AdminSerializer(ModelSerializer):
duplicates = SerializerMethodField()
def get_duplicates(self, item):
if item.allowed:
qs = []
else:
qs = Item.objects.filter(
allowed=True,
related_stuff__language=item.related_stuff.language
).annotate(
similarity=TrigramSimilarity('name', item.name)
).filter(similarity__gt=0.2).order_by('-similarity')[:10]
return AdminMinimalSerializer(qs, many=True).data
which works fine, but does at least one additional query for each item to display. In addition, if there are duplicates, I'll do additional queries to fill the AdminMinimalSerializer, which contains fields and related objects of the duplicated item. I can probably reduce the overhead by using a prefetch_related inside the serializer, but that doesn't prevent me from making several queries per item (assuming I have only one related item to prefetch in AdminMinimalSerializer, I'd still have ~2N + 1 queries: 1 for the items, N for the duplicates, N for the related items of the duplicates).
I've already looked at Subquery, but I can't retrieve an object, only an id, and this is not enough in my case. I tried to use it in both a Prefetch object and a .annotate.
I also tried something like Item.filter(allowed=False).prefetch(Prefetch("related_stuff__language__related_stuff_set__items", queryset=Items.filter..., to_attr="duplicates")), but the duplicates property is added to "related_stuff__language__related_stuff_set", so I can't really use it...
I'll welcome any idea ;)
Edit: the real code lives here. Toy example below:
# models.py
from django.db.models import Model, CharField, ForeignKey, CASCADE, BooleanField
class Book(Model):
title = CharField(max_length=250)
serie = ForeignKey(Serie, on_delete=CASCADE, related_name="books")
allowed = BooleanField(default=False)
class Serie(Model):
title = CharField(max_length=250)
language = ForeignKey(Language, on_delete=CASCADE, related_name="series")
class Language(Model):
name = CharField(max_length=100)
# serializers.py
from django.contrib.postgres.search import TrigramSimilarity
from rest_framework.serializers import ModelSerializer, SerializerMethodField
from .models import Book, Language, Serie
class BookAdminSerializer(ModelSerializer):
class Meta:
model = Book
fields = ("id", "title", "serie", "duplicates", )
serie = SerieAdminAuxSerializer()
duplicates = SerializerMethodField()
def get_duplicates(self, book):
"""Retrieve duplicates for book"""
if book.allowed:
qs = []
else:
qs = (
Book.objects.filter(
allowed=True, serie__language=book.serie.language)
.annotate(similarity=TrigramSimilarity("title", book.title))
.filter(similarity__gt=0.2)
.order_by("-similarity")[:10]
)
return BookAdminMinimalSerializer(qs, many=True).data
class BookAdminMinimalSerializer(ModelSerializer):
class Meta:
model = Book
fields = ("id", "title", "serie")
serie = SerieAdminAuxSerializer()
class SerieAdminAuxSerializer(ModelSerializer):
class Meta:
model = Serie
fields = ("id", "language", "title")
language = LanguageSerializer()
class LanguageSerializer(ModelSerializer):
class Meta:
model = Language
fields = ('id', 'name')
I'm trying to find a way to prefetch related objects and duplicates so that I can get rid of the get_duplicates method in BookSerializer, with the N+1 queries it causes, and have only a duplicates field in my BookSerializer.
Regarding data, here would be an expected output:
[
{
"id": 2,
"title": "test2",
"serie": {
"id": 2,
"language": {
"id": 1,
"name": "English"
},
"title": "series title"
},
"duplicates": [
{
"id": 1,
"title": "test",
"serie": {
"id": 1,
"language": {
"id": 1,
"name": "English"
},
"title": "first series title"
}
}
]
},
{
"id": 3,
"title": "random",
"serie": {
"id": 3,
"language": {
"id": 1,
"name": "English"
},
"title": "random series title"
},
"duplicates": []
}
]
Related
I have a model where users can upvote other users for specific topics. Something like:
#models.py
Class Topic(models.Model):
name = models.StringField()
def __str__(self):
return str(self.name)
Class UserUpvotes(models.Model):
"""Holds total upvotes by user and topic"""
user = models.ForeignKey(User)
topic= models.ForeignKey(Topic)
upvotes = models.PositiveIntegerField(default=0)
Using DRF, I have an API that returns the following: topic_id, topic_name, and upvotes, which is the total upvotes for a given topic.
One of the project requirements is for the API to use these field names specifically: topic_id, topic_name, and upvotes
#serializers.py
class TopicUpvotesSerializer(serializers.ModelSerializer):
topic_name = serializers.StringRelatedField(source="topic")
class Meta:
model = UserUpvotes
fields = ["topic_id", "topic_name", "upvotes"]
My trouble is aggregating these fields. I'm filtering the UserUpvotes by user or team and then aggregating by topic.
Desired output
This is the result I want to get. When I don't perform any aggregations (and there are views where this will be the case), it works.
[
{
"topic_id": 3,
"topic_name": "Korean Studies",
"upvotes": 14
},
{
"topic_id": 12,
"topic_name": "Inflation",
"upvotes": 3
},
]
At first, I tried creating a TopicSerializer, and then assigning it to the topic field in TopicUpvotesSerializer. But then, the resulting json would have a nested "topic" field and the aggragation would fail.
Attempt 1
#views.py
def get_queryset(self):
return (
UserUpvotes.objects.filter(user__team=team)
.values("topic")
.annotate(upvotes=models.Sum("upvotes"))
.order_by("-upvotes")
)
My problem is that the topic_id and topic_name fields are not showing. I get something like:
[
{
"topic_name": "3",
"upvotes": 14
},
{
"topic_name": "12",
"upvotes": 3
},
]
Attempt 2
Another queryset attempt:
# views.py
def get_queryset(self):
return (
UserUpvotes.objects.filter(user__team=team)
.values("topic__id", "topic__name")
.annotate(upvotes=models.Sum("upvotes"))
.order_by("-upvotes")
)
Which yields:
[
{
"upvotes": 14
},
{
"upvotes": 3
},
]
The aggregation worked on the queryset level, but the serializer failed to find the correct fields.
Attempt 3
This was the closest I got:
# views.py
def get_queryset(self):
return (
UserUpvotes.objects.filter(user__team=team)
.values("topic__id", "topic__name")
.annotate(upvotes=models.Sum("upvotes"))
.values("topic_id", "topic", "upvotes")
.order_by("-upvotes")[:n]
)
[
{
"topic_name": 3,
"topic_name": "3",
"upvotes": 14
},
{
"topic_name": 12,
"topic_name": "12",
"upvotes": 3
},
]
I have no idea why "topic_name" is simply transforming the "topic_id" into a string, instead of calling the string method.
Work with a serializer for the topic:
class TopicSerializer(serializers.ModelSerializer):
upvotes = serializers.IntegerField(read_only=True)
class Meta:
model = Topic
fields = ['id', 'name', 'upvotes']
then in the ModelViewSet, you annotate:
from django.db.models import Sum
from rest_framework.viewsets import ModelViewSet
class TopicViewSet(ModelViewSet):
serializer_class = TopicSerializer
queryset = Topic.objects.annotate(upvotes=Sum('userupvotes__upvotes'))
Desired output
This is the result I want to get. When I don't perform any aggregations (and there are views where this will be the case), it works.
[
{
"topic_name": 3,
"topic_name": "Korean Studies",
"upvotes": 14
},
{
"topic_name": 12,
"topic_name": "Inflation",
"upvotes": 3
},
]
The serialized FK will always give you the ID of the related model. I am not sure why you name it topic_name if that is equal to an ID. Now, if you really want to get the name field of the Topic model
in the topic_name = serializers.StringRelatedField(source="topic") you should give it a source="topic.name"
However, if you trying to get the ID of the relation you can still use ModelSerializer :
class TopicUpvotesSerializer(serializers.ModelSerializer):
class Meta:
model = UserUpvotes
fields = "__all__"
#willem-van-onsem's answer is the correct one for the problem as I had put it.
But... I had another use case (sorry! ◑﹏◐), for when the Users API used UserUpvotes serializer as a nested field. So I had to find another solution. This is was I eventually ended up with. I'm posting in case it helps anyone.
class UserUpvotesSerializer(serializers.ModelSerializer):
topic_name = serializers.SerializerMethodField()
def get_topic_name (self, obj):
try:
_topic_name = obj.topic.name
except TypeError:
_topic_name = obj.get("skill__name", None)
return _topic_name
class Meta:
model = UserUpvotes
fields = ["topic_id", "topic_name", "upvotes"]
I still have no idea why the SerializerMethodField works and the StringRelatedField field doesn't. It feels like a bug?
Anyways, the rub here is that, after the values().annotate() aggregation, obj is no longer a QuerySet, but a dict. So accessing namedirectly will give you a 'UserUpvotes' object is not subscriptable error.
I don’t know if there are any other edge cases I should be aware of (this is when I REALLY miss type hints in Django), but it works so far
In Django, what is the most efficient way to create a nested dictionary of data from querying related and child models?
For example, if I have the following models:
Parent
Children
Pets
I've seen django's model_to_dict method, and that's pretty cool, so I imagine I could loop through each level's queryset and create a bunch of DB calls on each level, for each instance, but is there a better way?
For example, could "prefetch_related" be used to get all three tiers as it is used to get two tiers here?
It would be great to get the dictionary to look something like this:
[
{
"name": "Peter Parent",
"children": [
{
"name": "Chaden Child",
"pets": [
{
"name": "Fanny",
"type:": "fish"
},
{
"name": "Buster",
"type:": "bunny"
}
]
},
{
"name": "Charlete Child",
"pets": [
{
"name": "Dandy",
"type:": "dog"
}
]
}
]
}
]
Edit:
By request this is what the models could look like:
class Pet(models.Model):
name = models.CharField(max_length=50)
type = models.CharField(max_length=50)
def __str__(self):
return self.name
class Child(models.Model):
name = models.CharField(max_length=50)
pets = models.ManyToManyField(Pet)
def __str__(self):
return self.name
class Parent(models.Model):
name = models.CharField(max_length=50)
children = models.ManyToManyField(Child)
def __str__(self):
return self.name
And this is what the raw sql would look like:
SELECT pa.name, ch.name, pe.name, pe.type
FROM aarc_parent pa
JOIN aarc_parent_children pc ON pc.parent_id = pa.id
JOIN aarc_child ch ON ch.id = pc.child_id
JOIN aarc_child_pets cp ON cp.child_id = ch.id
JOIN aarc_pet pe ON pe.id = cp.pet_id
You can use prefetch_related along with list comprehensions. prefetch_related will help in avoiding extra queries every time related object is accessed.
parents = Parent.objects.all().prefetch_related('children__pets')
[{'name': parent.name, 'children': [{'name': child.name, 'pets': [{'name':pet.name, 'type':pet.type} for pet in child.pets.all()]} for child in parent.children.all()]} for parent in parents]
I have an app with lots of investors that invest in the same rounds, which belong to companies, as seen below. However when a user(investor) is logged in, i only want him to be able to see HIS investments.
{
"id": 1,
"name": "Technology Company",
"rounds": [
{
"id": 1,
"kind": "priced round",
"company": 1,
"investments": [
{
"id": 1,
"investor": 1,
"round": 1,
"size": 118000,
},
{
"id": 2,
"investor": 2,
"round": 1,
"size": 183000,
},
]
}
]
},
Currently, my viewsets extend get_queryset as so:
class CompanyViewSet(viewsets.ModelViewSet):
def get_queryset(self):
user = self.request.user
investor = Investor.objects.get(user=user)
companies = Company.objects.filter(rounds__investments__investor=investor)
return companies
It retrieves the investments that belong to the investor, but when it takes those investments again to retrieve the rounds, it grabs the round with ALL investors.
How can i write this such that it only displays the investments that below to the Investor?
Here are my models:
class Company(models.Model):
name = models.CharField(max_length=100)
class Round(PolymorphicModel):
company = models.ForeignKey(Company, related_name='rounds', blank=True, null=True)
class Investment(PolymorphicModel):
investor = models.ForeignKey(Investor, related_name='investor')
size = models.BigIntegerField(default=0)
Your description of what happens is pretty unclear. What does "when it takes those investments again" mean? Anyway, I'm guessing what you need to do is to use .prefetch_related and a Prefetch object.
from django.db.models import Prefetch
class CompanyViewSet(viewsets.ModelViewSet):
def get_queryset(self):
user = self.request.user
investor = Investor.objects.get(user=user)
companies = Company.objects.filter(
rounds__investments__investor_id=investor.id
).prefetch_related(Prefetch(
'rounds__investments',
queryset=Investment.objects.filter(
investor_id=investor.pk,
),
))
return companies
I haven't tested this snippet but it should give you a pointer in the right direction. I also optimized the investor lookup to check the id only, this will save you an unnecessary indirection.
Due to the way my database is designed, images are not stored with the project.
This is because there is no set amount of images per product. Some may have 1 image, others may have 10.
I would like my API to return content nested within itself. Currently, my code simply repeats the entire object when additional images exist for the item.
I am using Django Rest Framework:
class ProductDetailView(APIView):
renderer_classes = (JSONRenderer, )
def get(self, request, *args, **kwargs):
filters = {}
for key, value in request.GET.items():
key = key.lower()
if key in productdetailmatch:
lookup, val = productdetailmatch[key](value.lower())
filters[lookup] = val
qset = (
Product.objects
.filter(**filters)
.values('pk', 'brand')
.annotate(
image=F('variation__image__image'),
price=F('variation__price__price'),
name=F('variation__name'),
)
)
return Response(qset)
Currently, an item with 3 images pointing to it will look like this:
[{
"name": "Amplitiue jet black",
"brand": "Allup",
"price": "$1248",
"vari": "917439",
"image": "url1",
},
{
"name": "Amplitiue jet black",
"brand": "Allup",
"price": "$1248",
"vari": "917439",
"image": "url",
},
{
"name": "Amplitiue jet black",
"brand": "Allup",
"price": "$1248",
"vari": "917439",
"image": "url",
},
]
Ideally, it should look like this, combining all the images within an array:
{
"name": "Amplitiue jet black",
"brand": "Allup",
"price": "$1248",
"vari": "917439",
"images": [
"url1",
"url2"
"url3"
],
}
You should use a ListApiView together with a ModelSerializer. Don't put the filtering in the get method, the Django class based view way is to use get_queryset for that.
from rest_framework import serializers, generics
class ImageSerializer(serializers.ModelSerializer):
class Meta:
model = Image
fields = ("url",)
class ProductSerializer(serializers.ModelSerializer):
images = ImageSerializer(many=True)
class Meta:
model = Product
fields = ("name", "brand", "price", "vari", "images")
class ProductListView(generics.ListAPIView): # it is not a details view
serializer_class = ProductSerializer
def get_queryset(self):
filters = {}
for key, value in self.request.GET.items():
key = key.lower()
if key in productdetailmatch:
lookup, val = productdetailmatch[key](value.lower())
filters[lookup] = val
return Product.objects.prefetch_related("images").filter(**filters)
The image list in the JSON will be objects with one "url" element instead of just a list of urls, but this is more consistent with REST standards anyway.
So I have a Film model that holds a list of Actors model in a many to many field:
class Person(models.Model):
full = models.TextField()
short = models.TextField()
num = models.CharField(max_length=5)
class Film(models.Model):
name = models.TextField()
year = models.SmallIntegerField(blank=True)
actors = models.ManyToManyField('Person')
I'm trying to load some initial data from json fixtures, however the problem I have is loading the many to many actors field.
For example I get the error:
DeserializationError: [u"'Anna-Varney' value must be an integer."]
with these fixtures:
{
"pk": 1,
"model": "data.Film",
"fields": {
"actors": [
"Anna-Varney"
],
"name": "Like a Corpse Standing in Desperation (2005) (V)",
"year": "2005"
}
while my actors fixture looks like this:
{
"pk": 1,
"model": "data.Person",
"fields": {
"full": "Anna-Varney",
"num": "I",
"short": "Anna-Varney"
}
}
So the many to many fields must use the pk integer, but the problem is that the data isn't sorted and for a long list of actors I don't think its practical to manually look up the pk of each one. I've been looking for solutions and it seems I have to use natural keys, but I'm not exactly sure how to apply those for my models.
EDIT: I've changed my models to be:
class PersonManager(models.Manager):
def get_by_natural_key(self, full):
return self.get(full=full)
class Person(models.Model):
objects = PersonManager()
full = models.TextField()
short = models.TextField()
num = models.CharField(max_length=5)
def natural_key(self):
return self.full
But I'm still getting the same error
There's a problem with both the input and the natural_key method.
Documentation: Serializing Django objects - natural keys states:
A natural key is a tuple of values that can be used to uniquely
identify an object instance without using the primary key value.
The Person natural_key method should return a tuple
def natural_key(self):
return (self.full,)
The serialised input should also contain tuples/lists for the natural keys.
{
"pk": 1,
"model": "data.film",
"fields": {
"actors": [
[
"Matt Damon"
],
[
"Jodie Foster"
]
],
"name": "Elysium",
"year": 2013
}
}