Django ORM. Joining subquery - django

I have a table which contains list of some web sites and a table with statistics of them.
class Site(models.Model):
domain_name = models.CharField(
max_length=256,
unique=True,
)
class Stats(models.Model):
date = models.DateField()
site = models.ForeignKey('Site')
google_pr = models.PositiveIntegerField()
class Meta:
unique_together = ('site', 'date')
I want to see all sites and statistics for a concrete date. If a stats record for the date doesn't exist, then the selection must contain only site.
If I use:
Site.objects.filter(stats__date=my_date)
I will not get sites which have no records for my_date in stats table. Because in this case the SQL query will be like the following:
SELECT *
FROM site
LEFT OUTER JOIN stats ON site.id = stats.site_id
WHERE stats.date = 'my_date'
The query condition will exclude records with NULL-dates and sites without stats will be not included to the selection.
In my case I need join stats table, which has already been filtered by date:
SELECT *
FROM site
LEFT OUTER JOIN
(SELECT *
FROM stats
WHERE stats.date = 'my-date') AS stats
ON site.id = stats.site_id
How can I translate this query to Django ORM?
Thanks.

In Django v2.0 use FilteredRelation
Site.objects.annotate(
t=FilteredRelation(
'stats', condition=Q(stats__date='my-date')
).filter(t__google_pr__in=[...])

I had a similar problem and wrote the following utility function for adding left outer join on a subqueryset using Django ORM.
The util is derived from a solution given to add custom left outer join to another table (not subquery) using Django ORM. Here is that solution: https://stackoverflow.com/a/37688104/2367394
Following is the util and all related code:
from django.db.models.fields.related import ForeignObject
from django.db.models.options import Options
from django.db.models.sql.where import ExtraWhere
from django.db.models.sql.datastructures import Join
class CustomJoin(Join):
def __init__(self, subquery, subquery_params, parent_alias, table_alias, join_type, join_field, nullable):
self.subquery_params = subquery_params
super(CustomJoin, self).__init__(subquery, parent_alias, table_alias, join_type, join_field, nullable)
def as_sql(self, compiler, connection):
"""
Generates the full
LEFT OUTER JOIN (somequery) alias ON alias.somecol = othertable.othercol, params
clause for this join.
"""
params = []
sql = []
alias_str = '' if self.table_alias == self.table_name else (' %s' % self.table_alias)
params.extend(self.subquery_params)
qn = compiler.quote_name_unless_alias
qn2 = connection.ops.quote_name
sql.append('%s (%s)%s ON (' % (self.join_type, self.table_name, alias_str))
for index, (lhs_col, rhs_col) in enumerate(self.join_cols):
if index != 0:
sql.append(' AND ')
sql.append('%s.%s = %s.%s' % (
qn(self.parent_alias),
qn2(lhs_col),
qn(self.table_alias),
qn2(rhs_col),
))
extra_cond = self.join_field.get_extra_restriction(
compiler.query.where_class, self.table_alias, self.parent_alias)
if extra_cond:
extra_sql, extra_params = compiler.compile(extra_cond)
extra_sql = 'AND (%s)' % extra_sql
params.extend(extra_params)
sql.append('%s' % extra_sql)
sql.append(')')
return ' '.join(sql), params
def join_to(table, subquery, table_field, subquery_field, queryset, alias):
"""
Add a join on `subquery` to `queryset` (having table `table`).
"""
# here you can set complex clause for join
def extra_join_cond(where_class, alias, related_alias):
if (alias, related_alias) == ('[sys].[columns]',
'[sys].[database_permissions]'):
where = '[sys].[columns].[column_id] = ' \
'[sys].[database_permissions].[minor_id]'
children = [ExtraWhere([where], ())]
return where_class(children)
return None
foreign_object = ForeignObject(to=subquery, from_fields=[None], to_fields=[None], rel=None)
foreign_object.opts = Options(table._meta)
foreign_object.opts.model = table
foreign_object.get_joining_columns = lambda: ((table_field, subquery_field),)
foreign_object.get_extra_restriction = extra_join_cond
subquery_sql, subquery_params = subquery.query.sql_with_params()
join = CustomJoin(
subquery_sql, subquery_params, table._meta.db_table,
alias, "LEFT JOIN", foreign_object, True)
queryset.query.join(join)
# hook for set alias
join.table_alias = alias
queryset.query.external_aliases.add(alias)
return queryset
join_to is the utility function you want to use. For your query you can use it in as follows:
sq = Stats.objects.filter(date=my_date)
q = Site.objects.filter()
q = join_to(Site, sq, 'id', 'site_id', q, 'stats')
And following statement would print a query similar to you example query (with subquery).
print q.query

Look at it this way: you want to see statistics with accompanying site data for certain date, which translates to:
Stats.objects.filter(date=my_date).select_related('site')

Related

Django How To Query ManyToMany Relationship Where All Objects Match

I have the following models:
## Tags for issues
class issueTags(models.Model):
name = models.CharField(max_length=400)
class issues(models.Model):
tags = models.ManyToManyField(issueTags,blank = True)
In my view I get an array from some client side JavaScript i.e.
(Pdb) array_data = request.POST['arr']
(Pdb) array_data
'["2","3"]'
How should I filter my issues object to find all issues which match all tags in the array? (the 2,3 are the ID values for tag__id.
If there is a better way to arrange the objects that would also work so I can search in this fashion.
At the time of writing this, the existing answers are either incorrect (e.g. filtering matching all Issues that have any of the specified tags and the correct tag count) or inefficient (e.g. attaching filters in a loop).
For the following models:
class IssueTag(models.Model):
name = models.CharField(max_length=400, blank=True)
class Issue(models.Model):
label = models.CharField(max_length=50, blank=True)
tags = models.ManyToManyField(IssueTag, related_name='issues')
I suggest using Django Annotation in conjunction with a filter like so:
from django.db.models import Count, Q
tags_to_match = ['tag1', 'tag2']
issues_containing_all_tags = Issue.objects \
.annotate(num_correct_tags=Count('tags',
filter=Q(tags__name__in=tags_to_match))) \
.filter(num_correct_tags=2)
to get all Issues that have all required tags (but may have additional tags, as is required in the question).
This will produce the following SQL query, that resolves all tag matching in a single IN clause:
SELECT "my_app_issue"."id", "my_app_issue"."label",
COUNT("my_app_issue_tags"."issuetag_id")
FILTER (WHERE "my_app_issuetag"."name" IN ('tag1', 'tag2'))
AS "num_correct_tags"
FROM "my_app_issue"
LEFT OUTER JOIN "my_app_issue_tags" ON ("my_app_issue"."id" = "my_app_issue_tags"."issue_id")
LEFT OUTER JOIN "my_app_issuetag" ON ("my_app_issue_tags"."issuetag_id" = "my_app_issuetag"."id")
GROUP BY "my_app_issue"."id", "my_app_issue"."label"
HAVING COUNT("my_app_issue_tags"."issuetag_id")
FILTER (WHERE ("my_app_issuetag"."name" IN ('tag1', 'tag2'))) = 2;
args=('tag1', 'tag2', 'tag1', 'tag2', 2)
I haven't tested this, but I think you could do the following:
from django.db.models import Q
array_data = array_data.split(',')
issues.objects.filter(
tags__in=array_data,
).exclude(
# Exclude any that aren't in array_data
~Q(tags__in=array_data)
).annotate(
matches=Count(tags, distinct=True)
).filter(
# Make sure the number found is right.
matches=len(array_data)
)
FYI, you should be using Issue, IssueTag for your model names to follow Django's naming pattern.
It isn't most elegant solution or pythonic but I ended up just looping around the resulting filter.
def filter_on_category(issue_object,array_of_tags):
#keep filtering to make an and
i = 0
current_filter = issue_object
while (i < (len(array_of_tags))):
#lets filter again
current_filter=current_filter.filter(tags__id__in=array_of_tags[i])
i=i+1
return current_filter
Django field lookups argument (__) for many-to-many fields needs list argument. I have created a dummy list for each array element of IssueTags and pass it to lookups argument and it works as expected.
Let you have this models:
class IssueTags(models.Model):
name = models.CharField(max_length=400)
class Issues(models.Model):
tags = models.ManyToManyField(IssueTags,blank = True)
You want to get Issues which contains all of these IssueTags = ["1","2","3"]
issue_tags_array = ["1","2","3"]
#First initialize queryset
queryset = Issues.objects.all()
i = 0
while i < len(issue_tags_array):
#dummy issue_tag list
issue_tag = [issue_tags_array[i]]
#lets filter again
queryset = queryset.filter(tags__id__in=issue_tag)
i=i+1
return queryset

Django prefetch_related - filter with or-clause from different tables

I have a model with simple relation
class Tasks(models.Model):
initiator = models.ForeignKey(User, on_delete = models.CASCADE)
class TaskResponsiblePeople(models.Model):
task = models.ForeignKey('Tasks')
auth_user = models.ForeignKey(User)
And I need to write an analogue of an SQL query as follows:
select a.initiator, b.auth_user
from Tasks a
inner join TaskResponsiblePeople b
on TaskResponsiblePeople.task_id = task.id
where Tasks.initiator = 'value A' OR TaskResponsiblePeople.auth_user = 'value B'
The problem is that the OR statement deals with two different tables and I've got no idea about the right Django syntax to mimique the above-stated raw-SQL query. Help me out please !
UPDATE 1
According to the below-stated answer, I use the following code:
people = TaskResponsiblePeople.objects.filter(Q(task__initiator = request.user.id)|Q(auth_user = request.user.id)).select_related('auth_user')
print people.query
# The result of the print copy-pasted from console
# SELECT * FROM `task_responsible_people`
# LEFT OUTER JOIN `tasks` ON (`task_responsible_people`.`task_id` = `tasks`.`id`)
# LEFT OUTER JOIN `auth_user` T4
# ON (`task_responsible_people`.`auth_user_id` = T4.`id`)
# WHERE (`tasks`.`initiator_id` = 7 OR
# 'task_responsible_people`.`auth_user_id` = 7)
tasks = Tasks.objects.prefetch_related(
Prefetch('task_responsible_people', queryset=people, to_attr='people'))
However, in the final resultset I can still see records where neither initiator nor auth_user are equal to request.user (equal to 7 in this case)
I avoid using ".values" because of the potential need to serialize and transform the queryset into json.
I think you can do it this way if you just want those specific columns:
from django.db.models import Q
qs = Tasks.objects.filter(Q(initiator=userA) | Q(taskresponsiblepeople__auth_user=userB))\
.values('initiator', 'taskresponsiblepeople__auth_user')
To examine the generated query you can look at:
print(qs.query)
I don't have the models in my database but it should generate a query similar to following:
SELECT "tasks"."initiator_id", "taskresponsiblepeople"."auth_user_id"
FROM "tasks" LEFT OUTER JOIN "taskresponsiblepeople"
ON ( "tasks"."id" = "taskresponsiblepeople"."tasks_id" )
WHERE ("tasks"."initiator_id" = userA_id
OR "taskresponsiblepeople"."auth_user_id" = userB_id))

Advanced select with django ORM

I am using the following model:
class Topping(models.Model):
name = models.CharField(max_length=30)
class Pizza(models.Model):
name = models.CharField(max_length=50)
toppings = models.ManyToManyField(Topping)
def __str__(self): # __unicode__ on Python 2
return "%s (%s)" % (self.name, ", ".join(topping.name
for topping in self.toppings.all()))
And now I want only the elements for vegetarian menu, filtered by tomatoes
pizza_item = Pizza.objects.filter(toppings__name='tomatoes')
My select is:
SELECT `pizza`.`id`, `pizza`.`name`
FROM `pizza`
INNER JOIN `pizza_toppings` ON (
`pizza`.`id` = `pizza_toppings`.`pizza_id` )
INNER JOIN `web_topping` ON (
`pizza_toppings`.`topping_id` = `topping`.`id` )
WHERE `topping`.`name` = azucar
but i want get:
SELECT `pizza`.`id`, `pizza`.`name`, `topping`.`name`
FROM `pizza`
INNER JOIN `pizza_toppings` ON (
`pizza`.`id` = `pizza_toppings`.`pizza_id` )
INNER JOIN `web_topping` ON (
`pizza_toppings`.`topping_id` = `topping`.`id` )
WHERE `topping`.`name` = azucar
This last query works fine in mysql db. And works using pizza.objects.raw but i want get using django ORM
Is a select with topping.name i try it using prefetch_select('toppings'). but i cant get the same select.
Have you tried using the values method for Queryset ?
Something like :
pizza_item = Pizza.objects.filter(toppings__name='tomatoes').values("id", "name", "toppings__name")
I am not sure if that's doable. Because when you use Pizza.objects... you are limited to the fields that are in the Pizza model. Since the Pizza model does not contain toppings' name field. You cannot retrieve it. You can only retrieve toppings' id field:
pizza_item = Pizza.objects.filter(toppings__name='tomatoes').values('id', 'name', 'toppings')
Which will provide "toppings"."topping_id" in SELECT.
Also, since you have specified that toppings__name='tomatoes, all of toppings' name will be tomatoes in this queryset, so what is the point of having topping.name in your result?

How to add a filter in a custom manager?

I'm stuck with adding a filter to a custom manager in Django. This is my current custom manager that is working:
class VoteAwareManager(models.Manager):
def _get_score_annotation(self):
model_type = ContentType.objects.get_for_model(self.model)
table_name = self.model._meta.db_table
return self.extra(select={
'active': 'select active from %s mh where mh.main_id = %s.id and mh.active = true and mh.date_begin = (select max(date_begin) from euvoudebicicletaengine_mainhistoric where main_id = mh.main_id) and mh.date_end >= now()' % (MainHistoric._meta.db_table, table_name),
'row_num': '(row_number() over(order by (SELECT COALESCE(SUM(vote / ((extract(epoch from now() - time_stamp )/3600)+2)^1.5),0) FROM %s WHERE content_type_id=%d AND object_id=%s.id) DESC))' % (Vote._meta.db_table, int(model_type.id), table_name), # To know the position(#number) on the front page
'score': 'SELECT COALESCE(SUM(vote / ((extract(epoch from now() - time_stamp )/3600)+2)^1.5),0) FROM %s WHERE content_type_id=%d AND object_id=%s.id' % (Vote._meta.db_table, int(model_type.id), table_name)
}
)
def most_loved(self,):
return self._get_score_annotation().order_by('-score')
def most_hated(self):
return self._get_score_annotation().order_by('score')
I need to add a filter to the most_loved and most_hated to active=True that will be the SQL equivalent to where active=true in the main sql expression.
Any clues on how to do it?
I think you probably need to write a SQL view (to replace your extra() function) and create a new unmanaged model for the view (including active as a field in your model).
As in this question. Or this (possibly out of date) one.
Then use the view in your _get_score_annotation and add a filter to the queryset you're getting from that function.
def _get_score_annotation(self):
return ContentTypeView.objects.filter(# any filtering you need)
def most_loved(self,):
return self._get_score_annotation().filter(active=True).order_by('-score')

Assign table alias name in a QuerySet

How can I assign an alias name to the main table of a QuerySet in Django?
queryset = Price.objects
queryset = queryset.extra(
where = ['p1.created = (select max(p2.created) from products_price p2 where p2.product_id = p1.product_id)']
)
I would like to set the 'p1' alias name to the Price main table to use it in the subselect.
Edit: Note that there is a lastest Price for each Produtc.
You can see the sql query to do the next:
queryset = Price.objects.all()
print queryset.query
If you know first sql query. You can do the subquery better.
Although, I do the next:
price_max = Price.objects.all().order_by('-created')[0]
queryset = Price.objects.filter(created=price_max)
Or the best:
https://docs.djangoproject.com/en/1.3/topics/db/aggregation/#generating-aggregates-over-a-queryset
from django.db.models import Max
price_max = Price.objects.aggregate((Max('created'))['created__max']
queryset = Price.objects.filter(created=price_max)