Django query set object query taking too much time?

Django query set object query taking too much time? - django

I am using a job model fetching the related jobs data which is also relationship with others model too for one-to-one or foreign key relations i have used select_related() and pass relative model inside and another one manny-to-manny relations i have used prefetch_related() and pass relative model inside. I have used this queryset on page 2 times based on different conditions filter everythings working fine but 1job queryset taking time and 2nd working well I can't understand that what's thing i missed up and how to resolved it. Please help if any one understand.
Model queryset
jobs = Job.objects.filter(
(Q(job_status__job_status_description='Booked') |
Q(job_status__job_status_description='Allocated') |
Q(job_status__job_is_done=True)) &
Q(completed_date__gte=start.strftime("%Y-%m-%d"), completed_date__lte=week_end_date.strftime("%Y-%m-%d")) |
Q(allocated_date__gte=start.strftime("%Y-%m-%d"), allocated_date__lte=week_end_date.strftime("%Y-%m-%d")),
functools.reduce(operator.and_, jobs_q_condition)
).select_related(
'customer',
'job_status',
'customer__book_with',
'customer__frequency'
).prefetch_related(
'customer__booking_road__area__franchise',
'customer__booking_road__area'
).annotate(
job_id=F('id'),
job_window_cleaner=Concat(
'window_cleaner__user__first_name',
Value(' '),
'window_cleaner__user__last_name'
),
job_window_cleaner_booking_road=Concat(
'customer__booking_road__area__default_cleaner__user__first_name',
Value(' '),
'customer__booking_road__area__default_cleaner__user__last_name'),
job_window_cleaner_id1=F('window_cleaner__user__id'),
job_window_cleaner_id2=F('customer__booking_road__area__default_cleaner__user__id'),
address=F('customer__address_line_1'),
wc_wants=F('customer__booking_road__area__default_cleaner__requested_turnover'),
action_on_check_in_str=F('action_on_check_in'),
booking_info_str=F('customer__booking_info'),
book_with_str=F('customer__book_with__book_with'),
is_job_completed = F('job_status__job_is_done'),
job_status_str = F('job_status__job_status_description'),
area_str=F('customer__booking_road__area__area'),
booking_road_str=F('customer__booking_road__booking_road'),
price_str=Coalesce('price_on_day', 'set_price'),
frequency_text_str=F('customer__frequency__frequency_text'),
due_date_str=F('due_date'),
allocated_date_str=F('allocated_date'),
completed_date_str=F('completed_date')
).order_by(
'job_window_cleaner','area_str','booking_road_str','due_date'
)[:20]
due_jobs = Job.objects.filter(
functools.reduce(operator.and_, due_jobs_q_condition)
).select_related(
'customer',
'job_status',
'customer__book_with',
'customer__frequency'
).prefetch_related(
'customer__booking_road__area__franchise',
'customer__booking_road__area'
).annotate(
job_id=F('id'),
job_window_cleaner=Concat(
'customer__booking_road__area__default_cleaner__user__first_name',
Value(' '),
'customer__booking_road__area__default_cleaner__user__last_name'),
job_window_cleaner_id=F('customer__booking_road__area__default_cleaner__user__id'),
address=F('customer__address_line_1'),
wc_wants=F('customer__booking_road__area__default_cleaner__requested_turnover'),
action_on_check_in_str=F('action_on_check_in'),
booking_info_str=F('customer__booking_info'),
book_with_str=F('customer__book_with__book_with'),
is_job_completed = F('job_status__job_is_done'),
job_status_str = F('job_status__job_status_description'),
area_str=F('customer__booking_road__area__area'),
booking_road_str=F('customer__booking_road__booking_road'),
price_str=Coalesce('price_on_day', 'set_price'),
frequency_text_str=F('customer__frequency__frequency_text'),
due_date_str=F('due_date'),
).order_by(
'job_window_cleaner','area_str','booking_road_str','due_date'
)[:20]

Related

Django QS filter by nested annotated field

I have two Django models:
class User(models.Model):
first_name = ...
last_name = ...
class Book(models.Model):
authors = models.ManyToManyField(User)
Now I want to add filtering Books by their author's full name (first_name + last_name). I've added FilterSet with following logic:
qs_prefetched = queryset.prefetch_related(
Prefetch("authors", User.objects.annotate(full_name=Concat('first_name', Value(' '), 'last_name')))
).all().filter(authors__full_name__icontains=value)
And when I try to filter in that way I have following error:
django.core.exceptions.FieldError: Related Field got invalid lookup: full_name
But field full_name is in authors values:
(Pdb) qs_prefetched[1].authors.all()[0].full_name
'Ke Xu'
Can anybody tell what I'm missing?
Thanks in advance.

Your annotation is on the prefetched queryset. You need to annotate the base queryset as well in order to filter it as desired.
Something like this:
qs_prefetched = queryset.prefetch_related(
Prefetch("authors", User.objects.annotate(full_name=Concat('first_name', Value(' '), 'last_name')))
).annotate(
calc_full_name=Concat('authors__first_name', Value(' '), 'authors__last_name')
).filter(calc_full_name__icontains=value)
Though admittedly, I skeptical if above will work as exactly as you want because it's trying to annotate across a many to many relationship.
Another option would be to use a Subquery and Exists filter in which you look up the users who match the name, the filter on an exists check.
user_subquery = User.objects.annotate(full_name=Concat('first_name', Value(' '), 'last_name').filter(
full_name__icontains=value,
id=OuterRef('author_id'), # This depends on your models.
)
qs_prefetched = queryset.filter(Exists(user_subquery)).prefetch_related(
Prefetch("authors", User.objects.annotate(full_name=Concat('first_name', Value(' '), 'last_name')))
)

Django ORM query with exclude not working properly

I have below Django ORM query which excluding product having 0(zero) sale_price.
selected_attr_values = ProductAttribValue.objects.filter(
product__status_id=1,
product_id__in=product_attributes_values.values_list('product_id', flat=True).distinct()
).exclude(
product__sale_price = 0,
ifield_value = '',
field_value__isnull=False
).distinct(
"field_value",
'field_id'
).values(
'field_value',
'product_id',
'field__caption',
'field_id',
'id'
)
Above query does not excluding products having 0 sale_price.
But after updating query like below.
selected_attr_values = ProductAttribValue.objects.filter(
product__status_id=1,
product_id__in=product_attributes_values.values_list('product_id', flat=True).distinct()
).exclude(
field_value='',
field_value__isnull=False
).distinct(
"field_value",
'field_id'
).exclude(
product__sale_price=0
).values(
'field_value',
'product_id',
'field__caption',
'field_id',
'id'
)
it working fine.
So my question is why do I need to call exclude 2 times to get desired output.
Thanks.

Django by default join multiple conditions with AND operator. Your query will only exclude rows with product__sale_price=0 AND field_value='' AND field_value__isnull=False. If you want OR operator between your conditions, you have to use Q.
from django.db.models import Q
...exclude(Q(product__sale_price=0) | Q(field_value='') | Q(field_value__isnull=False))

Filtering Django Simple-History by created/creator

I've created a simple Django data model that is using Django Simple-History for auditing:
from django.db import models
from simple_history.models import HistoricalRecords
class AuditedModel(models.Model):
history = HistoricalRecords(inherit=True)
In the interest of DRY, I'm trying to leverage Simple-History's history_date & history_user attributes in place of adding created_at and created_by attributes. I've added a couple of properties to simplify accessing the history for these attributes as follows:
#property
def created_date(self):
return self.history.earliest().history_date
#property
def created_by(self):
return self.history.earliest().history_user
This works fine when I'm working with an instance. Now, I'm running into issues when I try to filter querysets by history_date or history_user. For example, I can filter for objects created in the last 24hrs by doing this:
queryset = AuditedModel.objects.all()
queryset.filter(
uuid__in=AuditedModel.history.annotate(
created_date=Min('history_date'),
).filter(
created_date__gte=timezone.now() - relativedelta(days=1)
).values_list(
'uuid', flat=True
)
)
But, I'm unable to figure out how to filter the AuditedModel by more than one attribute. Ultimately, I'd like to get a queryset with new instances that were created by a specific user.
Something like:
queryset.filter(
uuid__in=AuditedModel.history.annotate(
created_date=Min('history_date'),
).filter(
created_date__gte=timezone.now() - relativedelta(days=1)
).values_list(
'uuid', flat=True
),
history__history_user=request.user
)
This doesn't work as history can't be resolved, but it illustrates (I hope) what I'm trying to get at.
Has anyone out there used Simple History in this way, and could they maybe give me a push in the right direction to resolve this? At this point, I'm debating replacing my created_by and created_at properties with proper fields as I can't see an alternative but that feels wrong.

Of course, I realised how simple it was shortly after posting my question to StackOverflow.
In case anyone want to see what worked for me:
queryset.filter(
uuid__in=AuditedModel.history.annotate(
created_date=Min('history_date'),
).filter(
created_date__gte=timezone.now() - relativedelta(days=1),
history_user=request.user # Filter by user here!
).values_list(
'uuid', flat=True
)
)

QuerySet Optimisations in Django

I was just wondering, I have the following two pseudo-related queries:
organisation = Organisation.objects.get(pk=org_id)
employees = Employee.objects.filter(organisation=organisation).filter(is_active=True)
Each Employee has a ForeignKey relationship with Organisation.
I was wondering if there is anything I can leverage to do the above in one Query in the native Django ORM?
Also, would:
employees = Employee.objects.filter(organisation__id=organisation.id).filter(is_active=True)
Be a quicker way to fetch employees?
For Willem's reference, employees is then used as:
# Before constructing **parameters, it is neccessary to filter out any supurfluous key, value pair that do not correspond to model attributes:
if len(request.GET.getlist('gender[]')) > 0:
parameters['gender__in'] = request.GET.getlist('gender[]')
employees = employees.filter(**parameters)
if len(request.GET.getlist('age_group[]')) > 0:
parameters['age_group__in'] = request.GET.getlist('age_group[]')
employees = employees.filter(**parameters)
results = SurveyResult.objects.filter(
user__in=employees,
created_date__range=date_range,
).annotate(
date=TruncDate('created_date'),
).values(
'survey',
'date',
).annotate(
score=Sum('normalized_score'),
participants=Count('user'),
).order_by(
'survey',
'date',
)
I omitted this as it seemed like unnecessary complications to my original goal.

Also, would:
employees = Employee.objects.filter(organisation__id=organisation.id).filter(is_active=True)
Be a quicker way to fetch employees?
No, or perhaps marginally, since that is in essence what the Django ORM will do itself: it will simply obtain the primary key of the organisation and then make a query like the one you describe.
If you do not need the organisation itself, you can query with:
employees = Employee.objects.filter(organisation_id=org_pk, is_active=True)
Furthermore you can for example perform a .select_related(..) [Django-doc] on the organisation, to load the data of the organisation in the same query as the one of the employee, although reducing one extra query, usually does not make that much of a difference. Performance is more an issue if iut results in N+1 queries.
We can for example "piggyback" fetching the Organisation details with fetching the employees, like:
employees = list(
Employee.objects.select_related('organization').filter(
organisation_id=org_pk, is_active=True
)
)
if employees: # at least one employee
organization = employees[0].organization
But anyway, as said before the difference between one or two queries is not that much. It is usually more of a problem if you have N+1 queries. It is a bit of a pitty that Django/Python does not seem to have a Haxl [GitHub] equivalent, to enable fast retrieval of (remote) resources through algebraic analysis.
In case you are interested in the Employee servey results, you can query with:
results = SurveyResult.objects.filter(
user__organization_id=org_pk,
created_date__range=date_range,
).annotate(
date=TruncDate('created_date'),
).values(
'survey',
'date',
).annotate(
score=Sum('normalized_score'),
participants=Count('user'),
).order_by(
'survey',
'date',
)
You can thus omit a separate querying of Employees if you do not need these anyway.
You can furthermore add the filters to your query, like:
emp_filter = {}
genders = request.GET.getlist('gender[]')
if genders:
emp_filter['user__gender__in'] = genders
age_groups = request.GET.getlist('age_group[]')
if age_groups:
emp_filter['user__age_group__in'] = age_groups
results = SurveyResult.objects.filter(
user__organization_id=org_pk,
created_date__range=date_range,
**emp_filter
).annotate(
date=TruncDate('created_date'),
).values(
'survey',
'date',
).annotate(
score=Sum('normalized_score'),
participants=Count('user'),
).order_by(
'survey',
'date',
)

if you have a foreign key relation between organisation and employees then you can get the employees using the select_related like this:
employees = Employee.objects.selected_related('organisation').filter(is_active=True)
OR
organisation = Organisation.objects.get(pk=org_id)
employees =organisation.employee_set.all() #your_employee_model_name_set.all

Django filter by annotated field is too slow

I use DRF and I have model Motocycle, which has > 2000 objects in DB. Model has one brand. I want to search by full_name:
queryset = Motocycle.objects.prefetch_related(
"brand"
).annotate(
full_name=Concat(
'brand__title',
Value(' - '),
'title',
)
)
)
I want to filter by full_name, but query is running very slowly:
(1.156) SELECT "mp_api_motocycle"."id"...
Without filtering with pagination:
(3.980) SELECT "mp_api_motocycle"."id"...
There is some possibilty to make this query faster?

Keep your full_name annotation as a column in the database and add an index to it.
Otherwise, you are doing full table scan while calculating full_name and then sorting by it.

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Django query set object query taking too much time? - django

Related

Django QS filter by nested annotated field

Django ORM query with exclude not working properly

Filtering Django Simple-History by created/creator

QuerySet Optimisations in Django

Django filter by annotated field is too slow

Categories

Resources