Django ORM calculate all Members of Group - django

I am trying to annotate all my groups by the number of Users, that apply to a certain condition. In this case I want to get a number of users, that have related Offer model to the Task in the Group.
Users can create Offers to the Task. One Task can be related to one Group.
So as the result of annotating Groups I want something like this
| id | name | runners_num |
| -- | ------ | ----------- |
| 1 | name1 | 3 |
| 2 | name2 | 5 |
With a query below I can get this count as a dictionary
runners = User.objects.filter(
offer__tack__group_id=1
).distinct(
).values(
'id'
).aggregate(
count=Count('id')
)
output: {'count': 5}
But I can't figure out how to do this with OuterRef clause
runners = User.objects.filter(
offer__task__group_id=OuterRef('id')
).distinct().values('id')
groups = Group.objects.annotate(
runners_num=Count(Subquery(runners))
)
It ended up with this wrong query
SELECT
"groups"."id",
"groups"."name",
COUNT(
(
SELECT
DISTINCT U0."id"
FROM
"users" U0
INNER JOIN "offers" U1 ON (U0."id" = U1."runner_id")
INNER JOIN "tasks" U2 ON (U1."task_id" = U2."id")
WHERE
U2."group_id" = ("groups"."id")
)
) AS "runners_num"
FROM
"groups"
GROUP BY
"groups"."id"
LIMIT
21
My models
class Task(models.Model):
tasker = models.ForeignKey(
"user.User", null=True, blank=True, on_delete=models.SET_NULL, related_name="tasker"
runner = models.ForeignKey(
"user.User", null=True, blank=True, on_delete=models.SET_NULL, related_name="runner",
)
group = models.ForeignKey(
"group.Group",
on_delete=models.CASCADE
)
class Offer(models.Model):
task = models.ForeignKey("task.Task", on_delete=models.CASCADE)
runner = models.ForeignKey("user.User", null=True, blank=True, on_delete=models.SET_NULL)
class Group(model.Model):
name = models.CharField(max_length=100)
class GroupMembers(models.Model):
group = models.ForeignKey("group.Group", on_delete=models.CASCADE)
member = models.ForeignKey("user.User", null=True, blank=True, on_delete=models.SET_NULL)
EDIT
I have conditions where my Users get filtered. In my case I want count only Users that have more than 3 Offers that apply to conditions. So probably I can't get rid of Subquery statement and OuterRef field.
runners = User.objects.filter(
offer__task__group_id=OuterRef('id')
).distinct(
).annotate(
offer_num=Count(
'offer',
filter=
Q(
offer__task__completion_time__isnull=False,
offer__task__completion_time__gte=timezone.now() - timedelta(hours=24),
) |
Q(
offer__task__status__in=(
TaskStatus.ACTIVE,
TaskStatus.ACCEPTED,
TaskStatus.IN_PROGRESS,
TaskStatus.WAITING_REVIEW
),
offer__task__is_active=True,
),
offer__runner_id=F('id'),
)
).filter(
offer_num__gte=3
).values('id')
It is working fine if I replace OuterRef('id') with just a int Group number. But I don't know the proper solution how to count on this QuerySet. Something like
runners_num = Count(Subquery(runners))
And after that I get
django.db.utils.ProgrammingError: more than one row returned by a subquery used as an expression

Annotate the number of users who have at least one offer for each group's task, correct?
I believe this should do:
Group.objects.annotate(
runners_count=Count(
'task__offer__user', distinct=True
)
).values('id', 'runners_count')
Count() has some neat kwargs you can leverage like that: https://docs.djangoproject.com/en/3.2/ref/models/querysets/#django.db.models.Count

So my final solution is to carry out User's subquery and only then aggregate by Count function.
users_runners = User.objects.filter(
offer__task__group_id=OuterRef(OuterRef('id'))
).annotate(
offer_num=Count(
'offer',
filter=
Q(
offer__task__completion_time__isnull=False,
offer__task__completion_time__gte=timezone.now() - timedelta(hours=24),
) |
Q(
offer__task__status__in=(
TaskStatus.ACTIVE,
TaskStatus.ACCEPTED,
TaskStatus.IN_PROGRESS,
TaskStatus.WAITING_REVIEW
),
offer__task__is_active=True,
)
)
).filter(
offer_num__gte=3
).values(
'id'
).distinct(
)
runners = User.objects.filter(
id__in=users_runners,
).annotate(
count=Func(
F('id'),
function='Count'
)
).values(
'count'
)
groups = Group.objects.annotate(
runners_num=runners
).order_by(
'id'
)

Related

The best way to do an efficient filter query in django

models.py file
I am not so good at this aspect in Django. Please can someone help me? I wish to know if there is a more efficient way for the class method already_voted
class Vote(TimeStamped):
voter = models.ForeignKey(get_user_model(), verbose_name=_("Vote"), on_delete=models.CASCADE)
contester = models.ForeignKey(Contester, verbose_name=_("Contester"), on_delete=models.CASCADE,
help_text=("The chosen contester"), related_name="votes")
ip_address = models.GenericIPAddressField(
_("Voter's IP"),
protocol="both",
unpack_ipv4=False,
default="None",
unique=True
)
num_vote = models.PositiveIntegerField(_("Vote"), default=0)
class Meta:
unique_together = ('voter','contester')
verbose_name = _("Vote")
verbose_name_plural = _("Votes")
permissions = (
("vote_multiple_times", "can vote multiple times"),
)
....
....
#classmethod
def already_voted(cls, contester_id, voter_id=None, ip_addr=None):
return cls.objects.filter(contester_id=contester_id).exists() and \
(cls.objects.filter(ip_address=ip_addr).exists() or \
cls.objects.filter(voter_id=voter_id).exists())
The class method may be right, but your model needs one more index:
contester = models.ForeignKey( db_index= True #... )
Notice that:
voter doesn't need index because is on first place on unique_together constraint.
contester needs index because, despite it is on unique_together, doesn't is place on first position of the constraint.
ip_address doesn't need index because has unique constraint.
Also:
unique_together is deprecated and should be a list of tuples (not just a tuple)
Edited
Edited 5 feb 2021 due to OP comment
You can get results in just one hit using Exists but it is less readable, also, I'm not sure if it is more efficient or the best way:
from django.db.models import Exists
q_ip=Vote.objects.filter(ip_address="1")
q_voter=Vote.objects.filter(voter=2)
already_voted=(
Vote
.objects
.filter(contester=3)
.filter(Exists(q_ip)|Exists(q_voter))
.exists())
The underlying sql, you can see this is just one query:
SELECT ( 1 ) AS "a"
FROM "a1_vote"
WHERE ( "a1_vote"."contester" = 3
AND ( EXISTS(SELECT U0."id",
U0."voter",
U0."contester",
U0."ip_address",
U0."num_vote"
FROM "a1_vote" U0
WHERE U0."ip_address" = '1')
OR EXISTS(SELECT U0."id",
U0."voter",
U0."contester",
U0."ip_address",
U0."num_vote"
FROM "a1_vote" U0
WHERE U0."voter" = 2) ) )
LIMIT 1

Filtering X most recent entries in each category of queryset

Question is regarding filtering X most recent entries in each category of queryset.
Goal is like this:
I have a incoming queryset based on the following model.
class UserStatusChoices(models.TextChoices):
CREATOR = 'CREATOR'
SLAVE = 'SLAVE'
MASTER = 'MASTER'
FRIEND = 'FRIEND'
ADMIN = 'ADMIN'
LEGACY = 'LEGACY'
class OperationTypeChoices(models.TextChoices):
CREATE = 'CREATE'
UPDATE = 'UPDATE'
DELETE = 'DELETE'
class EntriesChangeLog(models.Model):
content_type = models.ForeignKey(
ContentType,
on_delete=models.CASCADE,
)
object_id = models.PositiveIntegerField(
)
content_object = GenericForeignKey(
'content_type',
'object_id',
)
user = models.ForeignKey(
get_user_model(),
verbose_name='user',
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name='access_logs',
)
access_time = models.DateTimeField(
verbose_name='access_time',
auto_now_add=True,
)
as_who = models.CharField(
verbose_name='Status of the accessed user.',
choices=UserStatusChoices.choices,
max_length=7,
)
operation_type = models.CharField(
verbose_name='Type of the access operation.',
choices=OperationTypeChoices.choices,
max_length=6,
)
And I need to filter this incoming queryset in a such way to keep only 4 most recent objects (defined by access_time field) in each category. Categories are defined by ‘content_type_id’ field and there are 3 possible options.
Lets call it ‘option1’, ‘option2’ and ‘option3’
This incoming queryset might contain different amount of objects of 1,2 or all 3 categories. This is can’t be predicted beforehand.
DISTINCT is not possible to use as after filtering operation this queryset might be ordered.
I managed to get 1 most recent object in a following way:
# get one most recent operation in each category
last_operation_time = Subquery(
EntriesChangeLog.objects.filter(user=OuterRef('user')).values('content_type_id').
annotate(last_access_time=Max(‘access_time’)).values_list('last_access_time', flat=True)
)
queryset.filter(access_time__in=last_operation_time)
But I have a hard time to figure out how to get last 4 most recent objects instead of last one.
This is needed for Django-Filter and need to be done in one query.
DB-Postgres 12
Do you have any ideas how to do such filtration?
Thanks...
pk_to_rank = queryset.annotate(rank=Window(
expression=DenseRank(),
partition_by=('content_type_id',),
order_by=F('access_time').desc(),
)).values_list('pk', 'rank', named=True)
pks_list = sorted(log.pk for log in pk_to_rank if log.rank <= value)
return queryset.filter(pk__in=pks_list)
Managed to do it only this way by spliting queryset in 2 parts. Option with 3 unions is also possible but what if we have 800 options instead 3 - make 800 unions()??? ges not...

Django: Aggregate (sum) of a field on two different set of data in a single query

I am using Django 1.6.
My Model looks something like:
Class Transaction(models.Model):
type = models.CharField(max_length=255, db_index=True)
amount = models.DecimalField(decimal_places=2, max_digits=10, default=0.00)
I have few transactions, few of which are credit and other are debit (determined by type column). I need to check the balance of all transaction i.e., (debit - credit)
Currently, I could do that using 2 queries as below:
debit_amount=Transaction.objects.fitler(type='D').aggregate(debit_amount=Sum('amount'))['debit_amount']
credit_amount=Transaction.objects.fitler(type='C').aggregate(credit_amount=Sum('amount'))['credit_amount']
balance = debit_amount - credit_amount
I am looking something like:
Transaction.objects.aggregate(credit=Sum('amount', filter=Q(type='C')), debit=Sum('amount', filter=Q(type='D')))
You can use conditional expression
from django.db.models import *
result = Transaction.objects.aggregate(
credit=Sum(Case(
When(Q(tye='C'), then=F('amount')),
output_field=IntegerField(),
default=0
)),
debit=Sum(Case(
When(Q(tye='D'), then=F('amount')),
output_field=IntegerField(),
default=0
)),
)
balance = result['debit'] - result['credit']
This should be possible in django 2.0 (https://docs.djangoproject.com/en/2.0/ref/models/conditional-expressions/#case)
totals = Transaction.objects.aggregate(
credit=Sum('amount', filter=Q(type='C')),
debit=Sum('amount', filter=Q(type='D'))
)
total = totals.credits - totals.debit

Django query with annotated conditional expression uses INNER JOIN. How do I get it to use OUTER JOIN?

I have a "Meal" model with a foreign key to "Food". Each meal has a rating: good, bad, or indifferent. I want to query a list of all foods and annotate the count of each type of meal rating, but some foods have no meals yet, so I want the query to use a LEFT OUTER JOIN and in that case the counts should be zero.
I am using Conditional Expressions in Django 1.8, and it always switches the relationship to an INNER JOIN between "Food" and "Meal". For example:
Meal model:
class Meal(models.Model):
GOOD = 1
BAD = 2
INDIFFERENT = 3
RATING_CHOICES = (
(GOOD, 'Good'),
(BAD, 'Bad'),
(INDIFFERENT, 'Indifferent')
)
meal_time = models.DateTimeField()
food = models.ForeignKey("Food")
rating = models.IntegerField(blank=True, null=True, choices=RATING_CHOICES)
When I query Food.objects.annotate(total_meals=Count('meal')), Django generates a query like
SELECT ... FROM "Food"
LEFT OUTER JOIN "Meal" ON ...
GROUP BY "Food"
However, when I add these conditional annotations:
class FoodQuerySet(models.QuerySet):
def with_meal_rating_frequency(self):
return self.annotate(
total_meals=Count('meal'),
good_meals=Sum(
Case(When(meal__rating=Meal.GOOD, then=1),
output_field=models.IntegerField(), default=0)
),
bad_meals=Sum(
Case(When(meal__rating=Meal.BAD, then=1),
output_field=models.IntegerField(), default=0)
),
indifferent_meals=Sum(
Case(When(meal__rating=Meal.INDIFFERENT, then=1),
output_field=models.IntegerField(), default=0)
)
)
Django uses and INNER JOIN instead.
SELECT ... FROM "Food"
INNER JOIN "Meal" ON ...
GROUP BY "Food"
I know this question is very similar to this one but Its not clear to me how to apply the accepted solution to my case. How can I get Django to use a LEFT OUTER JOIN? Your help is appreciated, thanks!
I have found a solution that seems to be working so far, using Count() instead of Sum() and having the conditions check for NULL meals, which won't be included in the count:
class FoodQuerySet(models.QuerySet):
def with_meal_rating_frequency(self):
return self.annotate(
total_meals=Count('meal'),
good_meals=Count(
Case(When(Q(meal__isnull=True) | Q(meal__rating=Meal.GOOD), then='meal__rating'),
output_field=models.IntegerField(), default=None)
),
bad_meals=Count(
Case(When(Q(meal__isnull=True) | Q(meal__rating=Meal.BAD), then='meal__rating'),
output_field=models.IntegerField(), default=None)
),
indifferent_meals=Count(
Case(When(Q(meal__isnull=True) | Q(meal__rating=Meal.INDIFFERENT), then='meal__rating'),
output_field=models.IntegerField(), default=None)
)
)

Django selecting a count

I'm fairly new to Django and I want to get the total clients a user has (my users will be selling something through my website) so I have a table called orders where I keep the user_id of the user who purchased and the product_id being purchased. Each product id is related to a product that has a user (the one I'm doing the query for):
select COUNT(distinct(o.user_id)) as total_clients from `order` o
inner join product p on p.id=o.product_id where p.user_id=32;
User id 32 is logged in and I want to show him how many clients purchased his products.
I want to do this in a get instead of filter as it makes more sense.
Here's what my logic tells me to write:
clients = Order.objects.get(
status = Order.COMPLETED,
product__user = self.user
).annotate(
total_clients = Count( 'user', distinct = True )
)
return clients.total_clients
.and here's what it returns:
Exception Type: MultipleObjectsReturned
Exception Value: get() returned more than one Order -- it returned 2!
I could probably be running a query instead of using the orm but I don't want that. This is a rather simple query that I'm sure Django is handling very easily and I want to keep away from writing strings in my code.
Here's my model setup:
class UserProfile( models.Model ):
user = models.OneToOneField( User, related_name = 'profile' )
....
def get_total_clients( self ):
clients = Order.objects.get(
status = Order.COMPLETED,
product__user = self.user
).annotate(
total_clients = Count( 'user', distinct = True )
)
return clients.total_clients
class Order( models.Model ):
PENDING = 0
COMPLETED = 1
REFUNDED = 2
STATUS_CHOICES = (
(PENDING, "Pending"),
(COMPLETED, "Completed"),
(REFUNDED, "Refunded"),
)
user = models.ForeignKey( User, related_name = "orders" )
product = models.ForeignKey( Product, related_name = "orders" )
amount = models.DecimalField( max_digits = 6, decimal_places = 2, default = 0.00 )
status = models.SmallIntegerField(
choices = STATUS_CHOICES, default = PENDING, db_index = True
)
class Product( models.Model ):
user = models.ForeignKey( User, related_name = "unlocks" )
name = models.CharField( max_length = 255 )
Django queryset have a count method:
clients = Order.objects.filter(
status = Order.COMPLETED,
product__user = self.user
)
return clients.count()
If i got you right, you are intrested in the amount of consumers who ordered a product from a user. Some docs, that may be helpful.
My suggestion is:
result = Product.objects.distinct().filter(
# or any query:
orders__status=Order.COMPLETED,
user=default_user
).aggregate( # see the difference between aggregate() and annotate()
total_users=Count('orders__user', distinct=True)
)
I expect the result to be: {'total_users': NUM_OF_USERS}
In raw SQL it will be something like:
SELECT DISTINCT COUNT(DISTINCT "main_order"."user_id") AS
"total_users" FROM "main_product" INNER JOIN "main_order" ON (
"main_product"."id" = "main_order"."product_id" ) WHERE
("main_order"."status" = STATUS AND "main_product"."user_id" = USER_ID)
Is that what you wanted?