Optimise django query when fetching information from multiple models

Optimise django query when fetching information from multiple models - django

I am new to Django and using Django 3.0.6.
With the following code, I have been able to achieve the desired results and display detailed book information onto the template. However, on average, ORM makes 8 to 9 database queries to get detailed information about the book. I am looking for expert help to optimize my database queries so that I could fetch book-related information with fewer queries.
I tried using select_related() and prefetch_related() but without any luck, maybe I did it improperly. Is there a scope of using Q object or union(), just my thought? How can I achieve the same results with minimum queries to the database?
Please help me with detailed code, if possible.
models.py
class Publisher(models.Model):
publisher_name = models.CharField(max_length=50)
class Author(models.Model):
author_name = models.CharField(max_length=50)
class Booktype(models.Model):
book_type = models.CharField(max_length=20) # Hard Cover, Soft Cover, Kindle Edition, Digital PDF etc.
class Book(models.Model):
book_title = models.TextField()
slug = models.SlugField(max_length=50, unique=False)
published_date = models.DateField(auto_now=False, auto_now_add=False)
publisher = models.ForeignKey(Publisher, on_delete=models.CASCADE)
author = models.ForeignKey(Author, on_delete=models.CASCADE)
book_type = models.ManyToManyField(Booktype, through='BookPrice', through_fields=('book', 'book_type'))
# I created this separate model due to havy content and to keep Book model light
class BookDetail(models.Model):
a = models.TextField(null=True, blank=True)
b = models.TextField(null=True, blank=True)
c = models.TextField(null=True, blank=True)
book = models.OneToOneField(Book, on_delete=models.CASCADE)
class BookPrice(models.Model):
book_type = models.ForeignKey(Booktype, on_delete=models.CASCADE)
book = models.ForeignKey(Book, on_delete=models.CASCADE)
price = models.DecimalField(max_digits=7, decimal_places=2)
view.py
def get_book_details(request, book_id, slug):
book = Book.objects.get(id=book_id, slug=slug)
context = {'book': book}
return render(request, 'products/book_detail.html', context)
book_detail.html Template
# 1st databse query
{{ book.book_title }}
{{ book.id }}
{{ book.published_date }}
# 2nd databse query
{{ book.publisher.publisher_name }}
# 3rd databse query
{{ book.author.author_name }}
# 4th databse query
{{ book.bookdetail.a }}
{{ book.bookdetail.b }}
{{ book.bookdetail.c }}
# 5th to 9th databse query depending upon avaialble Book Types
{% for x in book.bookprice_set.all %}
{{ x.book_type }} {{ x.price|floatformat }}
{% endfor %}

You can use .select_related(…) [Django-doc] to fetch the publisher, author and bookdetail. We can use prefetch_related
def get_book_details(request, book_id, slug):
book = Book.objects.select_related(
'publisher', 'author', 'bookdetail'
).prefetch_related(
'bookprice_set', 'bookprice_set__book_type'
).get(id=book_id, slug=slug)
context = {'book': book}
return render(request, 'products/book_detail.html', context)

Related

Django: want to loop through _set for pk values only

I'm stuck trying to figure out how to filter my template values with the detail view PK. I have a detail view for my employee. I wish to display my employee's subjects, where I then wish to filter the subjects with the evaluations that have been made for the subject.
I've gotten so far that I can show my subject names, and show all the evaluations for each subject. However, I don't want to show ALL of them I only want to show the ones that exist for the current employee (detailView PK). As you can see in my template, I'm using _set to make the relation, but I have no clue on how to filter the PK into that equation.
Example, what I want:
Subject 1:
Evaluationname - employee Johnny
Evaluationname - employee Johnny
Example, what I currently have:
Subject 1:
Evaluationname - employee Johnny
Evaluationname - employee Chris
I don't want Chris's evaluation, I only wish to filter the primary key, so in this case Johnny's evaluations.
Template
{% for subject in subject_list %}
{{ subject.subejctname }}
{% for evaluation in subject.evaluation_set.all %}
<div>
<p>{{ evaluering.ma }} | {{ evaluering.ma.firstname }} | {{ evaluering.ma.lastname }}</p>
</div>
{% empty %}
<p>No evaluations founds.</p>
{% endfor %}
{% endfor %}
View
class EmployeeDetailView(DetailView):
template_name = 'evalsys/employee/alle_employees_eval.html'
model = Employee
# Uses employee PK to make a detail view
def view_employee_with_pk(self, pk=None):
if pk:
employee = Employee.objects.get(pk=pk)
else:
employee = self.employee
args = {'employee': employee, }
return render(self, 'evalsys/employee/alle_employees_eval.html', args)
def get_context_data(self, **kwargs):
context = super(EmployeeDetailViewDetailView, self).get_context_data(**kwargs)
context['subject_list'] = Subject.objects.all()
return context
Subject Model
class Subject(models.Model):
id = models.AutoField(primary_key=True)
subjectname = models.CharField(max_length=255, help_text="Indtast navnet på faget.")
slug = models.SlugField(max_length=200, unique=True)
Evaluation model
class Evaluation(models.Model):
id = models.AutoField(primary_key=True)
employee_num = models.ForeignKey('Employee', on_delete=models.CASCADE, null=True)
subjectname = models.ForeignKey('Subject', on_delete=models.CASCADE, null=True)
Employee model
class Employee(models.Model):
id = models.AutoField(primary_key=True)
slug = models.SlugField(max_length=200)
employee_num = models.IntegerField(help_text="Indtast medarbejderens MA-nummer. (F.eks 123456)")
firstname = models.CharField(max_length=30, help_text="Indtast medarbejderens fornavn.")
lastname = models.CharField(max_length=30, help_text="Indtast medarbejderens efternavn.")
subjectname = models.ForeignKey('Subject', on_delete=models.CASCADE, null=True)

Reverse relationships (subject.evaluation_set) can be prefetched, this is a technique for reducing the number of database queries made when you access the reverse relationship for many objects in a queryset. When using the following queryset, when you access subject.evaluation_set.all it will not perform an additional DB access as the result has already been cached on each instance
Subject.objects.all().prefetch_related('evaluation_set')
This cached result can be modified by using Prefetch objects. Using these you can limit the contents of subject.evaluation_set.all to only contain the result that you want
Subject.objects.all().prefetch_related(
Prefetch(
'evaluation_set',
queryset=Evaluation.objects.filter(employee=self.employee)
)
)

Your model structure is confusing. Are you able to detail the relationship between employee, subject and evaluation?? You have mentioned you wish to display an employee's subjects, but via your model structure, an employee can have only one subject, as employee is related to the subject by a foreign key.
Below I have suggested some changes to your model names and your model structure so it might make more sense to retrieve your evaluations in the template. Feel free to ask questions about your model design as that is crucial to design your views, templates, etc.
Also please refer here for model naming conventions
Django Foreign Key Reference
Django Model Coding Style (PEP8)
Subject Model
class Subject(models.Model):
id = models.AutoField(primary_key=True)
subject_name = models.CharField(max_length=255, help_text="Indtast navnet på
faget.")
slug = models.SlugField(max_length=200, unique=True)
Employee Model
class Employee(models.Model):
id = models.AutoField(primary_key=True)
slug = models.SlugField(max_length=200)
employee_num = models.IntegerField(help_text="Indtast medarbejderens MA-nummer. (F.eks 123456)")
first_name = models.CharField(max_length=30, help_text="Indtast medarbejderens fornavn.")
last_name = models.CharField(max_length=30, help_text="Indtast medarbejderens efternavn.")
subjects = models.ManyToManyField(Subject, related_name='employee', through='Evaluation')
Evaluation Model
class Evaluation(models.Model):
name = models.CharField(blank=True,max_length=50)
employee = models.ForeignKey('Employee', on_delete=models.CASCADE)
subject = models.ForeignKey('Subject', on_delete=models.CASCADE)
So the assumption, is an employee can have different subjects and the mapping is defined via a through model using many-to-many.
Your DetaiView can then just be
class EmployeeDetailView(DetailView):
template_name = 'evalsys/employee/alle_employees_eval.html'
model = Employee
def get_context_data(self, **kwargs):
context = super(EmployeeDetailViewDetailView,
self).get_context_data(**kwargs)
context['evaluations'] = Evaluation.objects.filter(employee=self.object)
return context
Template
{% for evaluation in evaluations %}
{{ evaluation.subject.subject_name }}
<p>{{ evaluation.name }} | {{ evaluation.employee.first_name }} |
{{evaluation.employee.last_name }}</p>
{% empty %}
<p>No evaluations founds.</p>
{% endfor %}

How to query in Django with best efficiency?

I recently found that too much SQL query optimization issue. django-debug-tool reported hundreds of similar and duplicate queries. So, I'm trying to figure out the best efficiency of Django ORM to avoid unnecessary Queryset evaluation.
As you see the below Store model, a Store model has many Foreign key and ManyToManyFields. Due to that structure, there are many code snippets doing the blow on HTML template files such as store.image_set.all or store.top_keywords.all. Everything starts with store. In each store detail page, I simply pass a cached store object with prefetch_related or select_related. Is this a bad approach? Should I cache and prefetch_related or select_related each Foreign key or ManyToManyField separately on views.py?
HTML templates
{% for img in store.image_set.all %}
{{ img }}
{% endfor %}
{% for top_keyword in store.top_keywords.all %}
{{ top_keyword }}
{% endfor %}
{% for sub_keyword in store.sub_keywords.all %}
{{ sub_keyword }}
{% endfor %}
views.py
class StoreDetailView(View):
def get(self, request, *args, **kwargs):
cache_name_store = 'store-{0}'.format(store_domainKey)
store = cache.get(cache_name_store, None)
if not store:
# query = get_object_or_404(Store, domainKey=store_domainKey)
query = Store.objects.all().prefetch_related('image_set').get(domainKey=store_domainKey)
cache.set(cache_name_store, query)
store = cache.get(cache_name_store)
context = {
'store': store,
}
return render(request, template, context)
models.py
class Store(TimeStampedModel):
categories = models.ManyToManyField(Category, blank=True)
price_range = models.ManyToManyField(Price, blank=True)
businessName = models.CharField(unique=True, max_length=40,
verbose_name='Business Name')
origin = models.ForeignKey(Origin, null=True, on_delete=models.CASCADE, blank=True)
ship_to = models.ManyToManyField(ShipTo, blank=True)
top_keywords = models.ManyToManyField(Keyword, blank=True, related_name='store_top_keywords')
sub_keywords = models.ManyToManyField(SubKeyword, blank=True, related_name='store_sub_keywords')
sponsored_stores = models.ManyToManyField(
'self', through='Sponsorship', symmetrical=False, related_name='sponsored_store_of_store')
similar_stores = models.ManyToManyField(
'self', through='Similarity', symmetrical=False, related_name='similar_store_of_store')
shortDesc = models.TextField(blank=True, verbose_name='Short Description')
longDesc = models.TextField(blank=True, verbose_name='Long Description')
returnPol = models.TextField(verbose_name='Return Policy', blank=True)
returnUrl = models.CharField(max_length=255, null=True, blank=True, verbose_name='Return Policy URL')
likes = models.ManyToManyField(settings.AUTH_USER_MODEL, blank=True, editable=False)
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, editable=False, on_delete=models.CASCADE,
related_name='stores_of_created_by', null=True, blank=True)
updated_by = models.ForeignKey(settings.AUTH_USER_MODEL, editable=False, on_delete=models.CASCADE,
related_name='stores_of_updated_by', null=True, blank=True)

I really wouldn't advise custom caching/performance optimisation, unless it's a very last resort. Django has great docs on querysets and optimisation - if you follow those, it should be rare for you to experience major performance issues that require custom workarounds.
I think the issue here is that you're printing your objects in a template and hence calling their str() method. There's nothing wrong with this, but I'd check what variables you're using in your str() methods. I suspect you're referencing other models? I.e. the str() method in your image model (or whatever) is doing something like image.field.other_field. In this case, your query should look like:
queryset = Store.objects.prefetch_related('image_set__field')
Your final queryset may look like:
queryset = Store.objects.prefetch_related('image_set__field1', 'image_set__field2', 'top_keywords__field3', ...)
Note that you can still pass this into get_object_or_404 like so:
get_object_or_404(queryset, pk=<your_stores_id>)
Hope this helps.

Navigating many2many relationships in both directions

I'm trying to understand how Django returns columns from foreign keys, particularly the m2m situation, easy in SQL but I'm trying to get into Django.
In this example I have 3 models, Sample which has a m2m with Container
and Location which has a 1-to-many with Container.
Scenario 1a: From the Sample table get the Containers that sample is in(return sample_number and container_name).
Scenario 1b: From the Container get the related Samples (return container_number and sample_number).
Scenario 2a: From the Location model get the containers (location_name and container_names).
Scenario 2b: From the Container model get the location (Container_name and location_name).
Hopefully this will serve as a good overall reference for others.
# models.py
class Location(models.Model):
location_id = models.AutoField(primary_key=True)
location_name = models.CharField(max_length=100, blank=True, null=True)
class Sample(models.Model):
sample_id = models.AutoField(primary_key=True)
sample_number = models.IntegerField()
class Container(models.Model): #like a friend
container_id = models.AutoField(primary_key=True)
container_name = models.CharField(max_length=50, blank=True, null=True)
location_id = models.ForeignKey(Location, db_column='location_id', on_delete = models.PROTECT, related_name = 'location')
samples = models.ManyToManyField('Sample', through='ContainerSamples', related_name='containers')
# views.py - Implements a filter
def detailcontainer(request, container_id):
container = get_object_or_404(Container, pk=container_id)
samples = container.samples.all()
container_contents = container.samples.all()
unassigned_samples = Sample.objects.all()
qs = Sample.objects.all()
context = {
'queryset': qs,
'container':container,
'container_contents': container_contents,
'unassigned_samples': unassigned_samples,
}
return render(request, 'container/detailcontainer.html', context)
# templates
{% for unassigned in unassigned_samples %}
# 1a [solved]
{% for unassigned in unassigned_samples %}
{{ unassigned.sample_number }}
{% for container in unassigned.containers.all %}
{{ container.location_id }}.{{ container.container_name }}
{% endfor %}
{% endfor %}
# 1b
{{ unassigned.____________ }} # the container_name
{{ unassigned.____________ }} # the related samples (sample_number)
# 2a
{{ unassigned.____________ }} # the location_name
{{ unassigned.____________ }} # the related container names (container_name)
# 2b
{{ unassigned.____________ }} # the container_name
{{ unassigned.____________ }} # the location_name
{% endfor %}

Scenario 1a: From the Sample table get the Containers that sample is in(return sample_number and container_name).
container_set = sample.containers.all()
for container in container_set:
print([container.container_name, sample.sample_name])
Scenario 1b: From the Container get the related Samples (return container_number and sample_number).
sample_set = container.samples.all()
for sample in sample_set:
print([sample.sample_number, container.container_number])
Scenario 2a: From the Location model get the containers (location_name and container_names).
container_set = location.location.all() # You have Container.location_id.related_name = 'location', I don't know why.
for container in container_set:
print([location.location_name, container.container_name])
Scenario 2b: From the Container model get the location (Container_name and location_name).
print([container.container_name, container.location_id.location_name])
Also your models should be written as follows:
# models.py
class Location(models.Model):
name = models.CharField(max_length=100, blank=True, null=True)
class Sample(models.Model):
number = models.IntegerField()
class Container(models.Model): #like a friend
container = models.AutoField(primary_key=True)
name = models.CharField(max_length=50, blank=True, null=True)
location = models.ForeignKey(Location, on_delete = models.PROTECT, related_name='containers')
samples = models.ManyToManyField(Sample, through='ContainerSamples', related_name='containers')
If you're using the same definition for the autofield on the models, there' no reason to add it. When you're defining properties on a model such as a name, you shouldn't prefix it with the model name. It should be understood that it's the model's name. And finally related_name is the field on the referenced model for the relationship to traverse back to the current model.

How can I increase page load speed?

I need to have multilingual site. For this purpose I wrote django module, which collects lots of info about countries, cities and their translations to almost all languages.
Below is the short version of models of this module:
class LanguagesGroups(models.Model):
class Meta:
verbose_name = 'Language Group'
class Languages(models.Model):
iso_code = models.CharField("ISO Code", max_length=14, db_index=True)
group = models.ForeignKey(LanguagesGroups, on_delete=models.CASCADE, verbose_name='Group of ISO',
related_name='group', db_index=True)
class Cities(models.Model):
population = models.IntegerField(null=True)
territory_km2 = models.IntegerField(null=True)
class CitiesTranslations(models.Model):
common_name = models.CharField(max_length=188, db_index=True)
city = models.ForeignKey(Cities, on_delete=models.CASCADE, verbose_name='Details of City')
lang_group = models.ForeignKey(LanguagesGroups, on_delete=models.CASCADE, verbose_name='Language of city',
null=True)
class Meta:
index_together = (['common_name', 'city'],
['city', 'lang_group'])
I want to show to users some data about places which user requested with translated versions of cities (depending on user settings):
class Profile(models.Model):
title = models.CharField(_('title'), max_length=120)
info = models.TextField(_('information'), max_length=1500, blank=True)
city = models.ForeignKey(Cities, verbose_name=_('city'), null=True, blank=True)
def get_city(self):
user_lang = get_language() # en
lang_group = Languages.objects.get(iso_code=user_lang).group # 1823
return CitiesTranslations.objects.get(city=self.city, lang_group=lang_group).common_name
template.html
{% for item in object_list %}
{{ item.title }}
{{ item.get_city }}
{{ item.info }}
{% endfor %}
When I add {{ item.get_city }}, in case of pagination and just 25 items per page, the page load speed goes down up to 18 times and amount of queries (according to django-debug-tool) goes up from 2 to 102. django-debug-tool tells me about 25 duplications.
How can I fix this slowness?
EDIT:
My view
class ProfileListView(ListView):
model = Profile
template_name = 'profiles/profiles_list.html'
context_object_name = 'places_list'
paginate_by = 25

First of all, if you want speed - you should try caching.
You can also optimize your query.
def get_city(self):
user_lang = get_language() # en
return CitiesTranslations.objects.get(
city=self.city_id, lang_group__group__iso_code=user_lang
).common_name
What you probably also want is to get all your stuff in batches, not with individual method calls. Assuming we have your object_list:
city_ids = [x.city_id for x in object_list]
city_translations = CitiesTranslations.objects.filter(
city__in=city_ids, lang_group__group__iso_code=user_lang
).values_list('city_id', 'common_name')
city_translations = dict(city_translations)
for obj in object_list:
obj.city_name = city_translations[obj.city_id]
You can put this code somewhere in your view. You will also have to change {{ item.get_city }} to {{ item.city_name }} in the templates.

How to reduce DB queries?

Models:
class Technology(models.Model):
name = models.CharField(max_length=100, unique=True)
slug = models.SlugField(max_length=100, unique=True)
class Site(models.Model):
name = models.CharField(max_length=100, unique=True)
slug = models.SlugField(max_length=100, unique=True)
technology = models.ManyToManyField(Technology, blank=True, null=True)
Views:
def portfolio(request, page=1):
sites_list = Site.objects.select_related('technology').only('technology__name', 'name', 'slug',)
return render_to_response('portfolio.html', {'sites':sites_list,}, context_instance=RequestContext(request))
Template:
{% for site in sites %}
<div>
{{ site.name }},
{% for tech in site.technology.all %}
{{ tech.name }}
{% endfor %}
</div>
{% endfor %}
But in that example each site makes 1 additional query to get technology list. Is there any way to make it in 1 query somehow?

What you are looking for is an efficient way to do reverse foreign-key lookups. A generic approach is:
qs = MyRelatedObject.objects.all()
obj_dict = dict([(obj.id, obj) for obj in qs])
objects = MyObject.objects.filter(myrelatedobj__in=qs)
relation_dict = {}
for obj in objects:
relation_dict.setdefault(obj.myobject_id, []).append(obj)
for id, related_items in relation_dict.items():
obj_dict[id].related_items = related_items
I wrote a blogpost about this a while ago, you can find more info here: http://bit.ly/ge59D2

How about:
Using Django's session framework; load list request.session['lstTechnology'] = listOfTechnology on startup. And use session in rest of the app.

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Optimise django query when fetching information from multiple models - django

Related

Django: want to loop through _set for pk values only

How to query in Django with best efficiency?

Navigating many2many relationships in both directions

How can I increase page load speed?

How to reduce DB queries?

Categories

Resources