Optimising number of queries within a DRF ModelSerializer

Optimising number of queries within a DRF ModelSerializer - django

Within Django Rest Framework's serialiser it is possible to add more data to the serialised object than in the original Model.
This is useful for when calculating statistical information, on the server-side, and adding this extra information when responding to an API call.
As I understand, adding extra data is done using a SerializerMethodField, where each field is implemented by a get_... function.
However, if you have a number of these SerializerMethodFields, each one can be querying the Model/database separately, for what might be essentially the same data.
Is it possible to query the database once, store the list/result as a data member of the ModelSerializer object, and use the result of the queryset in many functions?
Here's a very simple example, just for illustration:
############## Model
class Employee(Model):
SALARY_TYPE_CHOICES = (('HR', 'Hourly Rate'), ('YR', 'Annual Salary'))
salary_type = CharField(max_length=2, choices=SALARY_TYPE_CHOICES, blank=False)
salary = PositiveIntegerField(blank=True, null=True, default=0)
company = ForeignKey(Company, related_name='employees')
class Company(Model):
name = CharField(verbose_name='company name', max_length=100)
############## View
class CompanyView(RetrieveAPIView):
queryset = Company.objects.all()
lookup_field='id'
serializer_class = CompanySerialiser
class CompanyListView(ListAPIView):
queryset = Company.objects.all()
serializer_class = CompanySerialiser
############## Serializer
class CompanySerialiser(ModelSerializer):
number_employees = SerializerMethodField()
total_salaries_estimate = SerializerMethodField()
class Meta:
model = Company
fields = ['id', 'name',
'number_employees',
'total_salaries_estimate',
]
def get_number_employees(self, obj):
return obj.employees.count()
def get_total_salaries_estimate(self, obj):
employee_list = obj.employees.all()
salaries_estimate = 0
HOURS_PER_YEAR = 8*200 # 8hrs/day, 200days/year
for empl in employee_list:
if empl.salary_type == 'YR':
salaries_estimate += empl.salary
elif empl.salary_type == 'HR':
salaries_estimate += empl.salary * HOURS_PER_YEAR
return salaries_estimate
The Serialiser can be optimised to:
use an object data member to store the result from the query set,
only retrieve the queryset once,
re-use the result of the queryset for all extra information provided in SerializerMethodFields.
Example:
class CompanySerialiser(ModelSerializer):
def __init__(self, *args, **kwargs):
super(CompanySerialiser, self).__init__(*args, **kwargs)
self.employee_list = None
number_employees = SerializerMethodField()
total_salaries_estimate = SerializerMethodField()
class Meta:
model = Company
fields = ['id', 'name',
'number_employees',
'total_salaries_estimate',
]
def _populate_employee_list(self, obj):
if not self.employee_list: # Query the database only once.
self.employee_list = obj.employees.all()
def get_number_employees(self, obj):
self._populate_employee_list(obj)
return len(self.employee_list)
def get_total_salaries_estimate(self, obj):
self._populate_employee_list(obj)
salaries_estimate = 0
HOURS_PER_YEAR = 8*200 # 8hrs/day, 200days/year
for empl in self.employee_list:
if empl.salary_type == 'YR':
salaries_estimate += empl.salary
elif empl.salary_type == 'HR':
salaries_estimate += empl.salary * HOURS_PER_YEAR
return salaries_estimate
This works for the single retrieve CompanyView. And, in fact saves one query/context-switch/round-trip to the database; I've eliminated the "count" query.
However, it does not work for the list view CompanyListView, because it seems that the serialiser object is created once and reused for each Company. So, only the first Company's list of employees is stored in the objects "self.employee_list" data member, and thus, all other companies erroneously get given the data from the first company.
Is there a best practice solution to this type of problem? Or am I just wrong to use the ListAPIView, and if so, is there an alternative?

I think this issue can be solved if you can pass the queryset to the CompanySerialiser with data already fetched.
You can do the following changes
class CompanyListView(ListAPIView):
queryset = Company.objects.all().prefetch_related('employee_set')
serializer_class = CompanySerialiser`
And instead of count use len function because count does the query again.
class CompanySerialiser(ModelSerializer):
number_employees = SerializerMethodField()
total_salaries_estimate = SerializerMethodField()
class Meta:
model = Company
fields = ['id', 'name',
'number_employees',
'total_salaries_estimate',
]
def get_number_employees(self, obj):
return len(obj.employees.all())
def get_total_salaries_estimate(self, obj):
employee_list = obj.employees.all()
salaries_estimate = 0
HOURS_PER_YEAR = 8*200 # 8hrs/day, 200days/year
for empl in employee_list:
if empl.salary_type == 'YR':
salaries_estimate += empl.salary
elif empl.salary_type == 'HR':
salaries_estimate += empl.salary * HOURS_PER_YEAR
return salaries_estimate
Since the data is prefetched, serializer will not do any additional query for all. But make sure you are not doing any kind of filter because another query will execute in that case.

As mentioned by #Ritesh Agrawal, you simply need to prefetch the data. However, I advise to do the aggregations directly inside the database instead of using Python:
class CompanySerializer(ModelSerializer):
number_employees = IntegerField()
total_salaries_estimate = FloatField()
class Meta:
model = Company
fields = ['id', 'name',
'number_employees',
'total_salaries_estimate', ...
]
class CompanyListView(ListAPIView):
queryset = Company.objects.annotate(
number_employees=Count('employees'),
total_salaries_estimate=Sum(
Case(
When(employees__salary_type=Value('HR'),
then=F('employees_salary') * Value(8 * 200)
),
default=F('employees__salary'),
output_field=IntegerField() #optional a priori, because you only manipulate integers
)
)
)
serializer_class = CompanySerializer
Notes:
I haven't tested this code, but I'm using the same kind of syntax for my own projects. If you encounter errors (like 'cannot determine type of output' or similar), try wrapping F('employees_salary') * Value(8 * 200) inside an ExpressionWrapper(..., output_field=IntegerField()).
Using aggregation, you can apply filters on the queryset afterwards. However, if you're prefetching your related Employees, then you cannot filter the related objects anymore (as mentioned in the previous answer). BUT, if you already know you'll need the list of employees with hourly rate, you can do .prefetch_related(Prefetch('employees', queryset=Employee.object.filter(salary_type='HR'), to_attr="hourly_rate_employees")).
Relevant documentation:
Query optimization
Aggregation
Hope this will help you ;)

Related

Django queryset.update() seems to act lazy

I have a Django model with a field named order :
class Foo(models.Model):
user = models.ForeignKey(...)
order = models.PositiveIntegerField(null=True)
class Meta:
unique_together = [
['user', 'order'],
]
def set_order(self, order):
self.order = order
self.save()
I want to implement an update method for my serializer to be able to reorder an object. Let's consider our object's current order is 5 and we want to change it to 1. So the scenario is something like this:
make obj's current order null
increment order of objects that have an order between our obj's current_order and new_order
change obj's order to new_order
My first attempt at the code is this:
def update(self, instance, validated_data):
user = self.context['request'].user
current_order = instance.order
new_order = validated_data['order']
instance.set_order(None)
if new_order < current_order:
qs = PIN.objects.exclude(order__isnull=True).filter(
user_id=user.id, order__gte=new_order, order__lt=current_order
).update(order=F('order')+1)
else:
qs = PIN.objects.exclude(order__isnull=True).filter(
user_id=user.id, order__gt=current_order, order__lte=new_order
).update(order=F('order')-1)
But the problem is the order of updating objects in database and I get unique constraint error:
('user', 'order') = ('x', '2') already exist
I did read the django documentations for queryset.update method, so, order_by method is not the result (I'm using PostgreSQL).
I decided to change my code to this:
user = self.context['request'].user
current_order = instance.order
new_order = validated_data['order']
instance.set_order(None)
if new_order < current_order:
qs = Foo.objects.exclude(order__isnull=True).filter(
user_id=user.id, order__gte=new_order, order__lt=current_order
)
qs_orders = qs.values_list('order', flat=True)
qs_objs = list(qs)
qs.update(order=None)
for idx, qs_order in enumerate(qs_orders):
qs_objs[idx].order = qs_order + 1
Foo.objects.bulk_update(qs_objs, ['order'])
else:
qs = Foo.objects.exclude(order__isnull=True).filter(
user_id=user.id, order__gt=current_order, order__lte=new_order
).update(order=F('order')-1)
qs_orders = qs.values_list('order', flat=True)
qs_objs = list(qs)
qs.update(order=None)
for idx, qs_order in enumerate(qs_orders):
qs_objs[idx].order = qs_order - 1
Foo.objects.bulk_update(qs_objs, ['order'])
instance.set_order(new_order)
Now, when I debug the code with pycharm debug (line by line), the code works fine, but, when I run the project (like python manage.py runserver) and call the API, the unique_together constraint error appears again!
It seems that qs.update(order=None) is not working, or works lazy!
where is the catch?! any advice?

Django filter exact match for multi field: ManyToManyField using ModelMultipleChoiceFilter

I'm using Django filters (django-filter) in my project. I have the models below, where a composition (Work) has a many-to-many instrumentations field with a through model. Each instrumentation has several instruments within it.
models.py:
class Work(models.Model):
instrumentations = models.ManyToManyField(Instrument,
through='Instrumentation',
blank=True)
class Instrument(models.Model):
name = models.CharField(max_length=100)
class Instrumentation(models.Model):
players = models.IntegerField(validators=[MinValueValidator(1)])
work = models.ForeignKey(Work, on_delete=models.CASCADE)
instrument = models.ForeignKey(Instrument, on_delete=models.CASCADE)
views.py:
import django_filters
class WorkFilter(django_filters.FilterSet):
instrument = django_filters.ModelMultipleChoiceFilter(
field_name="instrumentation__instrument",
queryset=Instrument.objects.all())
My filter works fine: it grabs all the pieces where there is the instrument selected by the user in the filter form.
However, I'd like to add the possibility of filtering the compositions with those exact instruments. For instance, if a piece contains violin, horn and cello and nothing else, I'd like to get that, but not a piece written for violin, horn, cello, and percussion. Is it possible to achieve that?
I'd also like the user to choose, from the interface, whether to perform an exact search or not, but that's a secondary issue for now, I suppose.
Update: type_of_search using ChoiceFilter
I made some progress; with the code below, I can give the user a choice between the two kinds of search. Now, I need to find which query would grab only the compositions with that exact set of instruments.
class WorkFilter(django_filters.FilterSet):
# ...
CHOICES = {
('exact', 'exact'), ('not_exact', 'not_exact')
}
type_of_search = django_filters.ChoiceFilter(label="Exact match?", choices=CHOICES, method="filter_instruments")
def filter_instruments(self, queryset, name, value):
if value == 'exact':
return queryset.??
elif value == 'not_exact':
return queryset.??
I know that the query I want is something like:
Work.objects.filter(instrumentations__name='violin').filter(instrumentations__name='viola').filter(instrumentations__name='horn')
I just don't know how to 'translate' it into the django_filters language.
Update 2: 'exact' query using QuerySet.annotate
Thanks to this question, I think this is the query I'm looking for:
from django.db.models import Count
instrument_list = ['...'] # How do I grab them from the form?
instruments_query = Work.objects.annotate(count=Count('instrumentations__name')).filter(count=len(instrument_list))
for instrument in instrument_list:
instruments_query = instruments_query.filter(instrumentations__name=instrument_list)
I feel I'm close, I just don't know how to integrate this with django_filters.
Update 3: WorkFilter that returns empty if the search is exact
class WorkFilter(django_filters.FilterSet):
genre = django_filters.ModelChoiceFilter(
queryset=Genre.objects.all(),
label="Filter by genre")
instrument = django_filters.ModelMultipleChoiceFilter(
field_name="instrumentation__instrument",
queryset=Instrument.objects.all(),
label="Filter by instrument")
CHOICES = {
('exact', 'exact'), ('not_exact', 'not_exact')
}
type_of_search = django_filters.ChoiceFilter(label="Exact match?", choices=CHOICES, method="filter_instruments")
def filter_instruments(self, queryset, name, value):
instrument_list = self.data.getlist('instrumentation__instrument')
if value == 'exact':
queryset = queryset.annotate(count=Count('instrumentations__name')).filter(count=len(instrument_list))
for instrument in instrument_list:
queryset = queryset.filter(instrumentations__name=instrument)
elif value == 'not_exact':
pass # queryset = ...
return queryset
class Meta:
model = Work
fields = ['genre', 'title', 'instrument', 'instrumentation']

You can grab instrument_list with self.data.getlist('instrument').
This is how you would use instrument_list for the 'exact' query:
type_of_search = django_filters.ChoiceFilter(label="Exact match?", choices=CHOICES, method=lambda queryset, name, value: queryset)
instrument = django_filters.ModelMultipleChoiceFilter(
field_name="instrumentation__instrument",
queryset=Instrument.objects.all(),
label="Filter by instrument",
method="filter_instruments")
def filter_instruments(self, queryset, name, value):
if not value:
return queryset
instrument_list = self.data.getlist('instrument') # [v.pk for v in value]
type_of_search = self.data.get('type_of_search')
if type_of_search == 'exact':
queryset = queryset.annotate(count=Count('instrumentations')).filter(count=len(instrument_list))
for instrument in instrument_list:
queryset = queryset.filter(instrumentations__pk=instrument)
else:
queryset = queryset.filter(instrumentations__pk__in=instrument_list).distinct()
return queryset

Django-filter get all records when a specific value for a filter_field is passed

I am using django-filter to filter my Queryset on the basis of url params.
class WorklistViewSet(ModelViewSet):
serializer_class = MySerializer
queryset = MyModel.objects.all()
filter_backends = [DjangoFilterBackend, ]
filterset_fields = ['class', ]
# possible values of *class* which is allowed to be passed in the url params are ['first', 'second', 'ALL'].
class MyModel(BaseModel):
CLASS_CHOICES = (
(FIRST_CLASS, 'first'),
(SECOND_CLASS, 'second'),
)
class = models.CharField(choices=CLASS_CHOICES, max_length=3, )
URLs http://127.0.0.1:8000?class=first and http://127.0.0.1:8000?class=first are giving the expected results.
I want that when http://127.0.0.1:8000?class=ALL is called, all the records in my table should be listed i.e without filtering.
How can i do this while using django-filter ?

You may want to use Filter.method, as explained in the docs.
In your case, I would do as follows:
class F(django_filters.FilterSet):
klass = CharFilter(method='my_custom_filter')
class Meta:
model = MyModel
fields = ['klass']
def my_custom_filter(self, queryset, name, value):
if value == 'ALL':
return queryset
return queryset.filter(**{
name: value,
})
Be also reminded that class is a reserved word in Python and cannot be used as a variable name. I've used klass instead, although that's used as something else in many Python books and may be confusing.

Django- filtering of the filter object

I want to make a complex filtering on the page using the FilterSets. This is my Filterset, nicely showing me tuples from chosen time and with chosen parameters.
# filters.py
class workFilter(filters.FilterSet):
start__gt = filters.DateTimeFilter(name='time_start', lookup_expr='gte')
start__lt = filters.DateTimeFilter(name='time_end', lookup_expr='lte')
class Meta:
model = Work
fields = ('machine', 'program')
But I want to add charts explaining the queried data. For that I need informations, like overall count of time. I am querying them like that:
#views.py
def search(request):
work_list = Work.objects.all()
work_filter = workFilter(request.GET, queryset=work_list)
filter_backends = (filters.DjangoFilterBackend,)
#some queries to add to context, such as
sum_work = Work.objects.aggregate(Sum('time'))['time__sum']
return render_to_response(
TEMPLATE_DIRS + 'index.html',
{
'filter': praca_filter,
'sum_work': sum_work,
}
)
But sadly, those queries are according to whole database, not to my filtered set of object.
How can I make queries on filtered set work_filter?

Define sum_work as a property of your FilterSet.
class WorkFilter(filters.FilterSet):
start__gt = filters.DateTimeFilter(name='time_start', lookup_expr='gte')
start__lt = filters.DateTimeFilter(name='time_end', lookup_expr='lte')
class Meta:
model = Work
fields = ('machine', 'program')
#property
def work_sum(self):
qs = super(WorkFilter, self).qs
return qs.aggregate(Sum('time'))['time__sum']
Then when you pass your filter through to your view you just need {{ filter.work_sum }} in your template.

Django filter based on joined model

Let's say I have the following design
database table
A track has a song and a song has a singer.
I would like the track allows filtering based on singer name too.
So, I need the track model extract the singer name. I got stuck with the filters.
I receive the following error message:
File ".../lib/python3.6/site-packages/django_filters/filterset.py", line 352, in get_filters
"%s" % ', '.join(undefined)
TypeError: 'Meta.fields' contains fields that are not defined on this FilterSet: singer
I have heard from this to use __ but I have no idea how to apply that.
Here is the code
class TrackSerializer(MyModelSerializer):
singer = serializers.SerializerMethodField()
def get_singer(self, track): # Is there any shortcut?
song = Song.objects.get(id=track.song_id)
if song is not None:
return Channel.objects.get(id=song.singer_id).name
else:
return ''
class Meta:
model = Track
fields = (
'id',
'name',
'song',
'singer',
)
class TrackFilterSet(MyFilterSet):
singer = CharFilter(method='singer_filter')
song = RefineModelChoiceFilter(
queryset=Song.objects.all(),
refine_choices=lambda qs, keywords, request: qs.filter(name__icontains=keywords)
)
def singer_filter(self, queryset, name, value):
# print('queryset:', TrackSerializer(queryset, many=True))
return queryset.filter(**{
name: value, # ???????????
})
class Meta:
model = Track
fields = (
'singer',
'song',
)
class TrackViewSet(MyViewSet):
queryset = Track.objects.all()
serializer_class = TrackSerializer
filterset_fields = ('singer', 'song')
def filter_refine_choices_singer(self, qs, keywords, request):
return qs.filter(name__icontains=keywords)
def filter_refine_choices_song(self, qs, keywords, request):
return qs.filter(name__icontains=keywords)

try putting filterset_fields as an array, between [] rather that ().
filterset_fields = ['singer', 'song']

I think i the method singer_filter you should so something like :
def singer_filter(self, queryset, name, value):
return queryset.filter(song_id__singer_id__name_icontains=value)
I didn't test this but i think something like that should work unless third relation __ is not allowed. Take a look here:
https://django-filter.readthedocs.io/en/master/ref/filters.html?highlight=method

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Optimising number of queries within a DRF ModelSerializer - django

Related

Django queryset.update() seems to act lazy

Django filter exact match for multi field: ManyToManyField using ModelMultipleChoiceFilter

Django-filter get all records when a specific value for a filter_field is passed

Django- filtering of the filter object

Django filter based on joined model

Categories

Resources