How to call instance methods on the nested Serializer with django rest? - django

I've got two models I'm trying to nest together. Timesheet and Invoice
My InvoiceSerializer looks something like this:
class InvoiceSerializer(serializers.ModelSerializer):
billable_timesheets = serializers.SerializerMethodField()
total_hours_and_cost = serializers.SerializerMethodField()
class Meta:
model = Invoice
fields = (
"hours",
"hour_cost",
"billable_timesheets",
"total_hours_and_cost",
...
)
def get_total_hours_and_cost(self, obj):
return obj.hours * obj.hour_cost
def get_billable_timesheets(self, obj):
"""Getting all timesheets for selected billable period"""
timesheets = obj.project.timesheets.filter(<queryset here>)
return TimesheetSerializer(timesheets, many=True).data
This works fine and all - I can define MethodFields and the correct JSON is returned, great. However, I got a method on my child model (in this case, the Timesheet model) that I need to access and run some calculations on. I'm getting the data necessary via get_billable_timesheets, and now I need to run a method on my Timesheet model called total_duration(). Whenever I try to do something along the lines of
timesheets = self.get_billable_timesheets(obj)
hours = 0
for timesheet in timesheets:
hours += timesheet.total_duration()
I get:
AttributeError: 'collections.OrderedDict' object has no attribute 'total_duration'
What I don't understand is that I'm actually serializing the data already through the get_billable_timesheets method - why am I still receiving this error?

The timesheet in your for loop is an ordered dict instance, since get_billable_timesheets returns serialized data.
A workaround can be,
timesheets = self.get_billable_timesheets(obj)
hours = 0
for timesheet in timesheets:
timesheet_id = timesheet.get('id')
timesheet_obj = Timesheet.objects.get(id=timesheet_id)
hours += timesheet_obj.total_duration()

A working solution for my case was to not return a serialized version of the timesheets, (e.g instead of return TimesheetSerializer(timesheets, many=True).data
I just did return timesheets → did my calculations needed and then serialized the timesheet:
def get_billable_timesheets(self, obj):
"""Getting all timesheets for selected billable period"""
timesheets = obj.project.timesheets.filter(<queryset here>)
return timesheets # instead of a serialized version

Related

Django Rest Framework: optimize nester serializers performance

I have problem with my endpoint performance which returns around 40 items and response takes around 17 seconds.
I have model:
class GameTask(models.Model):
name= models.CharField()
description = RichTextUploadingField()
...
and another model like that:
class TaskLevel(models.Model):
master_task = models.ForeignKey(GameTask, related_name="sub_levels", on_delete-models.CASCADE)
sub_tasks = models.ManyToManyField(GameTask, related_name="master_levels")
...
So basicly I can have "normal" tasks, but when I create TaskLevel object I can add master_task as a task which gonna fasten other tasks added to sub_tasks field.
My serializers look like:
class TaskBaseSerializer(serializers.ModelSerializer):
fields created by serializers.SerializerMethodField()
...
class TaskLevelSerializer(serializers.ModelSerializer):
sub_tasks = serializers.SerializerMethodField()
class Meta:
model = TaskLevel
def get_sub_tasks(self, obj: TaskLevel):
sub_tasks = get_sub_tasks(level=obj, user=self.context["request"].user) # method from other module
return TaskBaseSerializer(sub_tasks, many=True, context=self.context).data
class TaskSerializer(TaskBaseSerializer):
levels_config = serializers.SerializerMethodField()
class Meta:
model = GameTask
def get_levels_config(self, obj: GameTask):
if is_mastertask(obj):
return dict(
sub_levels=TaskLevelSerializer(
obj.sub_levels.all().order_by("number"), many=True, context=self.context
).data,
progress=get_progress(
master_task=obj, user=self.context["request"].user
),
)
return None
When I tried to measure time it turned out that get_levels_config method takes around 0.25 seconds for one multilevel-task (which contain 7 subtasks). Is there any way to improve this performance? If any more detailed methods are needed I will add them
Your code might be suffering from N+1 problem. TaskSerializer.get_levels_config() performs database queries from obj.sub_levels.all().order_by("number").
What happens when serializing multiple instances like:
TaskSerializer(tasks, many=True)
each instance calls .get_levels_config()
You can use prefetch_related & selected_related(more explanation here).
You will have to manually check for prefetched objects since you are using SerializerMethodField. There's also the functions get_progress & get_sub_tasks which I assume does another query.
Here are some examples that can be used around your code:
Prefetching:
GameTask.objects.prefetch_related("sub_levels", "master_levels")
# Accessing deeper level
GameTask.objects.prefetch_related(
"sub_levels",
"master_levels",
"sub_levels__sub_tasks",
).select_related(
"master_levels__master_task",
)
Checking prefetch:
def get_sub_tasks(self, obj: TaskLevel):
if hasattr(obj, "_prefetched_objects_cache") and obj._prefetched_objects_cache.get("sub_tasks", None):
return obj._prefetched_objects_cache
return obj.sub_tasks.all()

How does drf serialize manytomany fields

How does DRF by default handle serializing a manytomany?
I see it defaults to render the field as an array of ids ex: [1,2,3]
And only uses 2 queries when I prefetch the related model.
However, when I generate it myself with .values_list('id', flat=True) it makes an extra query for every row.
Models
class Fails(models.Model):
runs = models.ManyToManyField(Runs, related_name='fails')
class Runs(models.Model):
name = models.TextField()
View
class FailsViewSet(viewsets.ModelViewSet):
...
def get_queryset(self):
...
return Fails.objects.filter(**params).prefetch_related('runs')
Serializer
class FailsSerializer(QueryFieldsMixin, serializers.ModelSerializer):
runs = serializers.SerializerMethodField()
def get_failbin_regressions(self, obj):
runids = self.context.get('runids')
return obj.runs.values_list('id', flat=True) #this creates an extra query for every row
The end goal is to get runs to display a filtered list of runids.
return obj.runs.values_list('id', flat=True).filter(id__in=runids)
or
runs = obj.runs.values_list('id', flat=True)
return [x for x in runs if x in runids] #to avoid an extra query from the .filter
I know the filter creates more queries, I assume the prefetch model is lost in the serializerMethodField.
Is there a way of getting the list of ids like drf does it without the extra query cost when I do it manually?
I can't find any documentation on how they implement the manytomany render.
By calling:
obj.runs.values_list('id', flat=True)
you are performing a new DB query. Since it will be called for every instance, you'll have a lot of extra queries.
prefetch_related loads the associated instances. So you can interact with the Python objects without extra queries. You could fix your issue with:
def get_failbin_regressions(self, obj):
runids = self.context.get('runids')
return [run.id for run in obj.runs.all() if run.id in runids]

Django API REST return all objects in a model

I am working with django-rest-framework and I have an API that returns me the info with a filter like this:
http://example.com/api/products?category=clothing&in_stock=True
--this returns me 10 items
But it also returns the whole Model data if I dont put the filters, this is the default way.
http://example.com/api/products/
--this returns me more than 100 (all the Model Table)
How can I disable this default operation, I mean, how can I make a filter to be necesary to make this api works? or even better! how can I make the last URL to return an empty json response?
UPDATE
Here is some code:
serializers.py
class OEntradaDetalleSerializer(serializers.HyperlinkedModelSerializer):
item = serializers.RelatedField(source='producto.item')
descripcion = serializers.RelatedField(source='producto.descripcion')
unidad = serializers.RelatedField(source='producto.unidad')
class Meta:
model = OEntradaDetalle
fields = ('url','item','descripcion','unidad','cantidad_ordenada','cantidad_recibida','epc')
views.py
class OEntradaDetalleViewSet(BulkUpdateModelMixin,viewsets.ModelViewSet):
filter_backends = (filters.DjangoFilterBackend,)
filter_fields = ('cantidad_ordenada','cantidad_recibida','oentrada__codigo_proveedor','oentrada__folio')
queryset = OEntradaDetalle.objects.all()
serializer_class = OEntradaDetalleSerializer
urls.py
router2 = BulkUpdateRouter()
router2.register(r'oentradadetalle', OEntradaDetalleViewSet)
urlpatterns = patterns('',
url(r'^api/',include(router2.urls)),
)
URL EXAMPLE
http://localhost:8000/api/oentradadetalle/?oentrada__folio=E01
THIS RETURNS ONLY SOME FILTERED VALUES
http://localhost:8000/api/oentradadetalle/
THIS RETURNS EVERYTHING IN THE MODEL (I need to remove this or make it return some empty data)
I would highly recommend using pagination, to prevent anyone from being able to return all of the results (which likely takes a while).
If you can spare the extra queries being made, you can always check if the filtered and unfiltered querysets match, and just return an empty queryset if that is the case. This would be done in the filter_queryset method on your view.
def filter_queryset(self, queryset):
filtered_queryset = super(ViewSet, self).filter_queryset(queryset)
if queryset.count() === len(filtered_queryset):
return queryset.model.objects.none()
return filtered_queryset
This will make one additional query for the count of the original queryset, and if it is the same as the filtered queryset, an empty queryset will be returned. If the queryset was actually filtered, it will be returned and the results will be what you are expecting.

DRF - How to get WritableField to not load entire database into memory?

I have a very large database (6 GB) that I would like to use Django-REST-Framework with. In particular, I have a model that has a ForeignKey relationship to the django.contrib.auth.models.User table (not so big) and a Foreign Key to a BIG table (lets call it Products). The model can be seen below:
class ShoppingBag(models.Model):
user = models.ForeignKey('auth.User', related_name='+')
product = models.ForeignKey('myapp.Product', related_name='+')
quantity = models.SmallIntegerField(default=1)
Again, there are 6GB of Products.
The serializer is as follows:
class ShoppingBagSerializer(serializers.ModelSerializer):
product = serializers.RelatedField(many=False)
user = serializers.RelatedField(many=False)
class Meta:
model = ShoppingBag
fields = ('product', 'user', 'quantity')
So far this is great- I can do a GET on the list and individual shopping bags, and everything is fine. For reference the queries (using a query logger) look something like this:
SELECT * FROM myapp_product WHERE product_id=1254
SELECT * FROM auth_user WHERE user_id=12
SELECT * FROM myapp_product WHERE product_id=1404
SELECT * FROM auth_user WHERE user_id=12
...
For as many shopping bags are getting returned.
But I would like to be able to POST to create new shopping bags, but serializers.RelatedField is read-only. Let's make it read-write:
class ShoppingBagSerializer(serializers.ModelSerializer):
product = serializers.PrimaryKeyRelatedField(many=False)
user = serializers.PrimaryKeyRelatedField(many=False)
...
Now things get bad... GET requests to the list action take > 5 minutes and I noticed that my server's memory jumps up to ~6GB; why?! Well, back to the SQL queries and now I see:
SELECT * FROM myapp_products;
SELECT * FROM auth_user;
Ok, so that's not good. Clearly we're doing "prefetch related" or "select_related" or something like that in order to get access to all the products; but this table is HUGE.
Further inspection reveals where this happens on Line 68 of relations.py in DRF:
def initialize(self, parent, field_name):
super(RelatedField, self).initialize(parent, field_name)
if self.queryset is None and not self.read_only:
manager = getattr(self.parent.opts.model, self.source or field_name)
if hasattr(manager, 'related'): # Forward
self.queryset = manager.related.model._default_manager.all()
else: # Reverse
self.queryset = manager.field.rel.to._default_manager.all()
If not readonly, self.queryset = ALL!!
So, I'm pretty sure that this is where my problem is; and I need to say, don't select_related here, but I'm not 100% if this is the issue or where to deal with this. It seems like all should be memory safe with pagination, but this is simply not the case. I'd appreciate any advice.
In the end, we had to simply create our own PrimaryKeyRelatedField class to override the default behavior in Django-Rest-Framework. Basically we ensured that the queryset was None until we wanted to lookup the object, then we performed the lookup. This was extremely annoying, and I hope the Django-Rest-Framework guys take note of this!
Our final solution:
class ProductField(serializers.PrimaryKeyRelatedField):
many = False
def __init__(self, *args, **kwargs):
kwarsgs['queryset'] = Product.objects.none() # Hack to ensure ALL products are not loaded
super(ProductField, self).__init__(*args, **kwargs)
def field_to_native(self, obj, field_name):
return unicode(obj)
def from_native(self, data):
"""
Perform query lookup here.
"""
try:
return Product.objects.get(pk=data)
except Product.ObjectDoesNotExist:
msg = self.error_messages['does_not_exist'] % smart_text(data)
raise ValidationError(msg)
except (TypeError, ValueError):
msg = self.error_messages['incorrect_type'] % type(data)
raise ValidationError(msg)
And then our serializer is as follows:
class ShoppingBagSerializer(serializers.ModelSerializer):
product = ProductField()
...
This hack ensures the entire database isn't loaded into memory, but rather performs one-off selects based on the data. It's not as efficient computationally, but it also doesn't blast our server with 5 second database queries loaded into memory!

tastypie with django-simple-history - display model history as rest API

I would like to share django model history (created by django-simple-history) using tastypie.
Problem is, how to prepare ModelResource for this purpose.
Access to model history is by model.history manager. So access to all changes of model we can gain by model.history.all()
What i would like to obtain? For example. I have django model Task and the API endpoints:
http://127.0.0.1/api/v1/task - display all tasks list
http://127.0.0.1/api/v1/task/1 - display details for choosen task
http://127.0.0.1/api/v1/task/1/history - display history of task no. 1
First two links presents default behavior of ModelResource. what i have till now?
class TaskResource(ModelResource):
class Meta:
# it displays all available history entries for all task objects
queryset = Task.history.all()
resource_name = 'task'
def prepend_urls(self):
return [
url(r"^(?P<resource_name>%s)/(?P<pk>\w[\w/-]*)/history$" % (self._meta.resource_name,),
self.wrap_view('get_history'),
name="api_history"),
]
def get_history(self, request, **kwargs):
#...
get_history should return bundle with history entries.. but how this method should look?
I guess, i need to create bundle with needed data, but don't know how exactly should i do that.
Does someeone have experience with simple-history and tastypie to present some simple example?
It seems, solution was simpler than i thought. Maybe someone use this in feature:
class TaskHistoryResource(ModelResource):
class Meta:
queryset = Task.history.all()
filtering = { 'id' = ALL }
class TaskResource(ModelResource):
history = fields.ToManyField(AssetTypeHistoryResource, 'history')
class Meta:
# it displays all available history entries for all task objects
queryset = Task.history.all()
resource_name = 'task'
def prepend_urls(self):
return [
url(r"^(?P<resource_name>%s)/(?P<pk>\w[\w/-]*)/history$" %(self._meta.resource_name,),
self.wrap_view('get_history'),
name="api_history"),
]
def get_history(self, request, **kwargs):
try:
bundle = self.build_bundle(data={'pk': kwargs['pk']}, request=request)
obj = self.cached_obj_get(bundle=bundle, **self.remove_api_resource_names(kwargs))
except ObjectDoesNotExist:
return HttpGone()
except MultipleObjectsReturned:
return HttpMultipleChoices("More than one resource is found at this URI.")
history_resource = TaskHistoryResource()
return history_resource.get_list(request, id=obj.pk)
A bit changed solution from:
http://django-tastypie.readthedocs.org/en/latest/cookbook.html#nested-resources
Basically, there was need to create additional resource with history entries. get_history method creates and returns instance of it with appropriate filter on id field (in django-simple-history id field contain id of major object. Revision primary key names history_id)
Hope, that will help someone.