class Parent(models.Model):
# some fields
class Child(models.Model):
parent = models.ForeginKey(Parent)
species = models.ForeignKey(Species)
# other fields
I have a function like this:
1. def some_function(unique_id):
2. parent_object = Parent.objects.get(unique_id=unique_id)
3. new_child = Child.objects.create(name='Joe', parent=parent_object)
4. call_some_func(new_child.parent.name, new_child.species.name)
In the line 4, a db query is generated for Species. Is there any way, I can use select_related to prefetch the Species, so as to prevent extra query.
Can it be done while I use .create(). This is just an example , I am using many other fields too and they are querying the DB every time.
The only way I can think is after line 3 using this code:
child_obj = Child.objects.select_related('species').get(id=new_child.id)
The only parameter that create accepts is force_insert which is not related to what you're asking, so it seems it's not possible. Also, noticing that create performs an INSERT ... RETURNING ... statement, I don't think it would be possible anyway because you cannot return columns from other tables.
Possibly the best approach is what you already suggested: do a get() afterwards with the related fields you need.
Related
I have two models:
Model A is an AbstractUserModel and Model B
class ModelB:
user = ForeignKey(User, related_name='modelsb')
timestamp = DateTimeField(auto_now_add=True)
What I want to find is how many users have at least one ModelB object created at least in 3 of the 7 past days.
So far, I have found a way to do it but I know for sure there is a better one and that is why I am posting this question.
I basically split the query into 2 parts.
Part1:
I added a foo method inside the User Model that checks if a user meets the above conditions
def foo(self):
past_limit = starting_date - timedelta(days=7)
return self.modelsb.filter(timestamp__gte=past_limit).order_by('timestamp__day').distinct('timestamp__day').count() > 2
Part 2:
In the Custom User Manager, I find the users that have more than 2 modelsb objects in the last 7 days and iterate through them applying the foo method for each one of them.
By doing this I narrow down the iterations of the required for loop. (basically its a filter function but you get the point)
def boo(self):
past_limit = timezone.now() - timedelta(days=7)
candidates = super().get_queryset().annotate(rc=Count('modelsb', filter=Q(modelsb__timestamp__gte=past_limit))).filter(rc__gt=2)
return list(filter(lambda x: x.foo(), candidates))
However, I want to know if there is a more efficient way to do this, that is without the for loop.
You can use conditional annotation.
I haven't been able to test this query, but something like this should work:
from django.db.models import Q, Count
past_limit = starting_date - timedelta(days=7)
users = User.objects.annotate(
modelsb_in_last_seven_days=Count('modelsb__timestap__day',
filter=Q(modelsb__timestamp__gte=past_limit),
distinct=True))
.filter(modelsb_in_last_seven_days__gte = 3)
EDIT:
This solution did not work, because the distinct option does specify what field makes an entry distinct.
I did some experimenting on my own Django instance, and found a way to make this work using SubQuery. The way this works is that we generate a subquery where we make the distinction ourself.
counted_modelb = ModelB.objects
.filter(user=OuterRef('pk'), timestamp__gte=past_limit)
.values('timestamp__day')
.distinct()
.annotate(count=Count('timestamp__day'))
.values('count')
query = User.objects
.annotate(modelsb_in_last_seven_days=Subquery(counted_modelb, output_field=IntegerField()))
.filter(modelsb_in_last_seven_days__gt = 2)
This annotates each row in the queryset with the count of all distinct days in modelb for the user, with a date greater than the selected day.
In the subquery I use values('timestamp__day') to make sure I can do distinct() (Because a combination of distinct('timestamp__day') and annotate() is unsupported.)
I have a model that has arbitrary key/value pairs (attributes) associated with it. I'd like to have the option of sorting by those dynamic attributes. Here's what I came up with:
class Item(models.Model):
pass
class Attribute(models.Model):
item = models.ForeignKey(Item, related_name='attributes')
key = models.CharField()
value = models.CharField()
def get_sorted_items():
return Item.objects.all().annotate(
first=models.select_attribute('first'),
second=models.select_attribute('second'),
).order_by('first', 'second')
def select_attribute(attribute):
return expressions.RawSQL("""
select app_attribute.value from app_attribute
where app_attribute.item_id = app_item.id
and app_attribute.key = %s""", (attribute,))
This works, but it has a bit of raw SQL in it, so it makes my co-workers wary. Is it possible to do this without raw SQL? Can I make use of Django's ORM to simplify this?
I would expect something like this to work, but it doesn't:
def get_sorted_items():
return Item.objects.all().annotate(
first=Attribute.objects.filter(key='first').values('value'),
second=Attribute.objects.filter(key='second').values('value'),
).order_by('first', 'second')
Approach 1
Using Djagno 1.8+ Conditional Expressions
(see also Query Expressions)
items = Item.objects.all().annotate(
first=models.Case(models.When(attribute__key='first', then=models.F('attribute__value')), default=models.Value('')),
second=models.Case(models.When(attribute__key='second', then=models.F('attribute__value')), default=models.Value(''))
).distinct()
for item in items:
print item.first, item.second
Approach 2
Using prefetch_related with custom models.Prefetch object
keys = ['first', 'second']
items = Item.objects.all().prefetch_related(
models.Prefetch('attributes',
queryset=Attribute.objects.filter(key__in=keys),
to_attr='prefetched_attrs'),
)
This way every item from the queryset will contain a list under the .prefetched_attrs attribute.
This list will contains all filtered-item-related attributes.
Now, because you want to get the attribute.value, you can implement something like this:
class Item(models.Model):
#...
def get_attribute(self, key, default=None):
try:
return next((attr.value for attr in self.prefetched_attrs if attr.key == key), default)
except AttributeError:
raise AttributeError('You didnt prefetch any attributes')
#and the usage will be:
for item in items:
print item.get_attribute('first'), item.get_attribute('second')
Some notes about the differences in using both approaches.
you have a one idea better control over the filtering process using the approach with the custom Prefetch object. The conditional-expressions approach is one idea harder to be optimized IMHO.
with prefetch_related you get the whole attribute object, not just the value you are interested in.
Django executes prefetch_related after the queryset is being evaluated, which means a second query is being executed for each clause in the prefetch_related call. On one way this can be good, because it this keeps the main queryset untouched from the filters and thus not additional clauses like .distinct() are needed.
prefetch_related always put the returned objects into a list, its not very convenient to use when you have prefetchs returning 1 element per object. So additional model methods are required in order to use with pleasure.
I'd like to update multiple integer fields at once in following model.
class Foo(models.Model):
field_a = models.PositiveIntegerField()
field_b = models.PositiveIntegerField()
field_c = models.PositiveIntegerField()
Originally, it can be done like following code with two queries.
foo = Foo.objects.get(id=1)
foo.field_a += 1
foo.field_b -= 1
foo.field_c += 2
foo.save()
I'd like make it more simpler with update in one query.
However, following attempts raised error.
# 1st attempt
Foo.objects.filter(id=1).update(
field_a=F('field_a')+1,
field_b=F('field_a')-1,
field_c=F('field_a')+2)
# 2nd attempt
Foo.objects.filter(id=1).\
update(field_a=F('field_a')+1).\
update(field_b=F('field_b')-1) ).\
update(field_c=F('field_c')+2)
How can I solve this ?
Form the django docs:
Calls to update can also use F expressions to update one field based on the value of another field in the model. This is especially useful for incrementing counters based upon their current value. For example, to increment the pingback count for every entry in the blog:
>>> from django.db.models import F
>>> Entry.objects.all().update(n_pingbacks=F('n_pingbacks') + 1)
You have to have an instance of Foo or a queryset before you can update. You should do something like this:
Foo.objects.get(id=1)update(field_a=F('field_a')+1).\
update(field_b=F('field_b')-1) ).\
update(field_c=F('field_c')+2)
or
Foo.objects.filter(id__in=[1,3,6,7]).update(field_a=F('field_a')+1).\
update(field_b=F('field_b')-1) ).\
update(field_c=F('field_c')+2)
Reference: https://docs.djangoproject.com/en/1.8/topics/db/queries/#updating-multiple-objects-at-once
If save() is passed a list of field names in keyword argument update_fields, only the fields named in that list will be updated. This may be desirable if you want to update just one or a few fields on an object. There will be a slight performance benefit from preventing all of the model fields from being updated in the database. For example:
product.name = 'Name changed again'
product.save(update_fields=['name'])
see more docs [here]:https://docs.djangoproject.com/en/dev/ref/models/instances/#specifying-which-fields-to-save
I have a really strange problem with Django 1.4.4.
I have this model :
class LogQuarter(models.Model):
timestamp = models.DateTimeField()
domain = models.CharField(max_length=253)
attempts = models.IntegerField()
success = models.IntegerField()
queue = models.IntegerField()
...
I need to gather the first 20 domains with the higher sent property. The sent property is attempts - queue.
This is my request:
obj = LogQuarter.objects\
.aggregate(Sum(F('attempts')-F('queue')))\
.values('domain')\
.filter(**kwargs)\
.order_by('-sent')[:20]
I tried with extra too and it isn't working.
It's really basic SQL, I am surprised that Django can't do this.
Did someone has a solution ?
You can actually do this via subclassing some of the aggregation functionality. This requires digging in to the code to really understand, but here's what I coded up to do something similar for MAX and MIN. (Note: this code is based of Django 1.4 / MySQL).
Start by subclassing the underlying aggregation class and overriding the as_sql method. This method writes the actual SQL to the database query. We have to make sure to quote the field that gets passed in correctly and associate it with the proper table name.
from django.db.models.sql import aggregates
class SqlCalculatedSum(aggregates.Aggregate):
sql_function = 'SUM'
sql_template = '%(function)s(%(field)s - %(other_field)s)'
def as_sql(self, qn, connection):
# self.col is currently a tuple, where the first item is the table name and
# the second item is the primary column name. Assuming our calculation is
# on two fields in the same table, we can use that to our advantage. qn is
# underlying DB quoting object and quotes things appropriately. The column
# entry in the self.extra var is the actual database column name for the
# secondary column.
self.extra['other_field'] = '.'.join(
[qn(c) for c in (self.col[0], self.extra['column'])])
return super(SqlCalculatedSum, self).as_sql(qn, connection)
Next, subclass the general model aggregation class and override the add_to_query method. This method is what determines how the aggregate gets added to the underlying query object. We want to be able to pass in the field name (e.g. queue) but get the corresponding DB column name (in case it is something different).
from django.db import models
class CalculatedSum(models.Aggregate):
name = SqlCalculatedSum
def add_to_query(self, query, alias, col, source, is_summary):
# Utilize the fact that self.extra is set to all of the extra kwargs passed
# in on initialization. We want to get the corresponding database column
# name for whatever field we pass in to the "variable" kwarg.
self.extra['column'] = query.model._meta.get_field(
self.extra['variable']).db_column
query.aggregates[alias] = self.name(
col, source=source, is_summary=is_summary, **self.extra)
You can then use your new class in an annotation like this:
queryset.annotate(calc_attempts=CalculatedSum('attempts', variable='queue'))
Assuming your attempts and queue fields have those same db column names, this should generate SQL similar to the following:
SELECT SUM(`LogQuarter`.`attempts` - `LogQuarter`.`queue`) AS calc_attempts
And there you go.
I am not sure if you can do this Sum(F('attempts')-F('queue')). It should throw an error in the first place. I guess, easier approach would be to use extra.
result = LogQuarter.objects.extra(select={'sent':'(attempts-queue)'}, order_by=['-sent'])[:20]
Suppose I have something like this in my models.py:
class Hipster(models.Model):
name = CharField(max_length=50)
class Party(models.Model):
organiser = models.ForeignKey()
participants = models.ManyToManyField(Profile, related_name="participants")
Now in my views.py I would like to do a query which would fetch a party for the user where there are more than 0 participants.
Something like this maybe:
user = Hipster.get(pk=1)
hip_parties = Party.objects.filter(organiser=user, len(participants) > 0)
What's the best way of doing it?
If this works this is how I would do it.
Best way can mean a lot of things: best performance, most maintainable, etc. Therefore I will not say this is the best way, but I like to stick to the ORM features as much as possible since it seems more maintainable.
from django.db.models import Count
user = Hipster.objects.get(pk=1)
hip_parties = (Party.objects.annotate(num_participants=Count('participants'))
.filter(organiser=user, num_participants__gt=0))
Party.objects.filter(organizer=user, participants__isnull=False)
Party.objects.filter(organizer=user, participants=None)
Easier with exclude:
# organized by user and has more than 0 participants
Party.objects.filter(organizer=user).exclude(participants=None)
Also returns distinct results
Derived from #Yuji-'Tomita'-Tomita answer, I've also added .distinct('id') to exclude the duplitate records:
Party.objects.filter(organizer=user, participants__isnull=False).distinct('id')
Therefore, each party is listed only once.
I use the following method when trying to return a queryset having at least one object in a manytomany field:
First, return all the possible manytomany objects:
profiles = Profile.objects.all()
Next, filter the model by returning only the queryset containing at least one of the profiles:
hid_parties = Party.objects.filter(profiles__in=profiles)
To do the above in a single line:
hid_parties = Party.objects.filter(profiles__in=Profile.objects.all())
You can further refine individual querysets the normal way for more specific filtering.
NOTE:This may not be the most effective way, but at least it works for me.