that's the weirdest bug I've ever faced since I started using Django
I've a queryset with some corrupt data that needs to be excluded, when printing the queryset length it prints 97 and when prints the corruptdata queryset it's 2 so the result must be 95, but what I get is only 70!!
here's my code
qs = model.objects.filter(
query,
**sku_filter
).values(
'sku',
'sku__asin',
'sku__title',
).order_by(
'-report_date',
'sku',
).annotate(
in_inbound=Coalesce(
LedgerQuery.in_inbound_subquery,
Value(0)
)
).annotate(
fnsku=F('fnsku'),
action=F('sku__reconciliation_activity__action'),
case_number=F('sku__reconciliation_activity__case_number'),
is_being_manually_reconciled=F('sku__reconciliation_activity__is_being_manually_reconciled'),
missing_subquery_count = Count(missing_subquery_count),
missing=Subquery(
queryset=missing_subquery
),
available=Sum(
'ending_warehouse_balance',
filter=Q(disposition='SELLABLE')
),
total_units=Sum(
F('ending_warehouse_balance') + # Available + Unsellable units in all the countries warehouses
Abs('in_transit_between_warehouses') + # Reserved Component 1
Abs('customer_shipments') # Reserved Component 2
) + F('missing') + F('in_inbound'),
# it's important to put out the missing from the Sum, otherwise
# the result will be missing * grouped by rows
main_image_link=Subquery(
queryset=main_image_link_subquery
)
).filter(
# this is a hacky way to exclude "corrupt" data
~Q(total_units=0),
~Q(
Q(action='NEEDED') | Q(action='TICKET'),
missing=0
),
)
##prints 97
print(qs.count())
##prints 70
print("c1", qs.filter(
~Q(
action=ReconciliationActivity.ActionChoices.NO_ACTION, missing__gte=1
)
).count()
)
##prints 2
print(
qs.filter(
action=ReconciliationActivity.ActionChoices.NO_ACTION, missing__gte=1
).count()
)
I tried to convert it to sql to be easier for debug but it was more then 45k of words and eventually I couldn't find anything wrong with it
and no it's not because of the q filter, I tried to print objects with only missing gte 1 or has no action and found a lot
The issue is with the 2nd query in which you get count of 70. Negation on the query set with multiple parameters doesn't work the way you think.
To make this easier to understand, let me briefly explain De Morgan's Law that is in effect here.
not (A and B) = not (A) or not (B)
In your 2nd query, the count that you get includes all the objects that either don't have ReconciliationActivity.ActionChoices.NO_ACTION or their missing__get is not 1. If any case is true, then object contributes to the count value.
The correct query that you should write is:
##prints 95
print("c1", qs.filter(
~Q(
action=ReconciliationActivity.ActionChoices.NO_ACTION
) &
~Q(
missing__gte=1
)
)
).count()
)
And P.S. this is not a bug. This is how conditions work everywhere.
Related
I'm making a project and want to use limit for fetch data how can i use limit if there are any function or any way i can give limit to my fetching data
i expect the output of (2019, 12, 27)(2019, 6, 30) to be (2019, 12, 27) but it fetching all records
def maintenancefunction(request): #maintenance page function
if 'user' not in request.session:
return redirect('/login')
else:
if request.session.has_key('user'):
abc=request.session['user']
today = date(2019,1,1) # today= date.today.().strftime('%d/%m/%Y')
next_date=today.strftime('%Y-%m-%d')
lastdate= today + timedelta(days=180)
new_date= lastdate.strftime('%Y-%m-%d')
duedate=maintanance_table.objects.values_list('maintanance_todate').filter(user_email=abc).order_by('maintanance_todate').reverse()
# # newduedate=duedate.strftime('%Y-%m-%d')
print("DueDate:",duedate)
checkstatus=maintanance_table.objects.filter(user_email=abc).filter(maintanance_status="PAID").order_by('maintanance_todate').reverse()
if checkstatus:
lastdate = lastdate + timedelta(days=180)
new_date = lastdate.strftime('%Y-%m-%d')
else:
lastdate=lastdate
new_date= lastdate.strftime('%Y-%m-%d')
return render(request,"maintenance.html", {'abc':abc,'new_date':new_date})
else:
return render(request,"login.html")
return render(request,"maintenance.html")
You can add range at the end of the query like [1-10] and if you want the first record then just put [0] at the end of the query. If you want specific record then put its number at the end of the query like [5] or [3] etc.
duedate=maintanance_table.objects.values_list('maintanance_todate').filter(user_email=abc).order_by('maintanance_todate').reverse()[1-10]
checkstatus=maintanance_table.objects.filter(user_email=abc).filter(maintanance_status="PAID").order_by('maintanance_todate').reverse()[1-10]
I'm retrieving all records, and I would like to display the record's age for those records that are older than 5 minutes.
The output should be something like this (in this example, two records: 1.8.9.1 and 2.7.3.1 are older than 5 minutes) :
ip ... status
---------------------
1.8.9.1 ... 3 hours
2.7.3.1 ... 7 minutes
1.1.1.1 ... up
1.1.1.2 ... up
1.1.1.3 ... up
1.1.1.4 ... up
1.1.1.5 ... up
Here's my current code:
Interfaces.objects.all()
.annotate(
age = (datetime.utcnow() - F('timestamp')), # 0:00:08.535704
age2 = Epoch(datetime.utcnow() - F('timestamp')), # 8.535704
# age3 = int(Epoch(datetime.utcnow() - F('timestamp'))/300),
current_time=Value(str(datetime.utcnow()),
output_field=null_char_field),
)
.order_by('age','ip')
age and age2 both work, but the problem is that I want the records that are older than 5 minutes sorted by age, and the rest by ip
So I'm trying to set age to 0, if it's less than 5 minutes.
If I would do it directly in postgresql, I'd use this query:
select ip, <other fields>,
case when extract('epoch' from now() - "timestamp") > 300
then extract('epoch' from now() - "timestamp")
else 0
end
Is there a way to do it in django?
I figured it out:
Interfaces.objects.all()
.annotate(
age=Case(
When(timestamp__lt=datetime.utcnow() - timedelta(minutes=5),
then=Cast(Epoch(datetime.utcnow() - F('timestamp')),
NullIntegerField)),
default=0,
output_field=NullIntegerField
),
)
.order_by('age','ip')
By the way, my imports and relevant settings:
from django.db.models import F, Func, Case, When, IntegerField
from django.db.models.functions import Coalesce, Cast
NullIntegerField = IntegerField(null=True)
class Epoch(Func):
function = 'EXTRACT'
template = "%(function)s('epoch' from %(expressions)s)"
This website ended up being the most helpful: https://micropyramid.com/blog/django-conditional-expression-in-queries/
You can do it in other way also which will be faster.
Get current time, subtract from that 5 minutes, after that search all the Interfaces
where age is less or equal than the subtracted date.
example:
current_time = datetime.now()
older_than_five = current_time - datetime.timedelta(minutes=5)
Interfaces.objects.all()
.annotate(
age=Case(
When(age__lt=older_than_five, then=Value(0)),
default=F('age')
)
)
.order_by('age','ip')
I am looking for fast method to count model's objects created within past 30 days, for each day separately. For example:
27.07.2013 (today) - 3 objects created
26.07.2013 - 0 objects created
25.07.2013 - 2 objects created
...
27.06.2013 - 1 objects created
I am going to use this data in google charts API. Have you any idea how to get this data efficiently?
items = Foo.objects.filter(createdate__lte=datetime.datetime.today(), createdate__gt=datetime.datetime.today()-datetime.timedelta(days=30)).\
values('createdate').annotate(count=Count('id'))
This will (1) filter results to contain the last 30 days, (2) select just the createdate field and (3) count the id's, grouping by all selected fields (i.e. createdate). This will return a list of dictionaries of the format:
[
{'createdate': <datetime.date object>, 'count': <int>},
{'createdate': <datetime.date object>, 'count': <int>},
...
]
EDIT:
I don't believe there's a way to get all dates, even those with count == 0, with just SQL. You'll have to insert each missing date through python code, e.g.:
import datetime
# needed to use .append() later on
items = list(items)
dates = [x.get('createdate') for x in items]
for d in (datetime.datetime.today() - datetime.timedelta(days=x) for x in range(0,30)):
if d not in dates:
items.append({'createdate': d, 'count': 0})
I think this can be somewhat more optimized solution with #knbk 's solution with python. This has fewer iterations and iterations inside SET is highly optimized in python (both in processing and in CPU-cycles).
from_date = datetime.date.today() - datetime.timedelta(days=7)
orders = Order.objects.filter(created_at=from_date, dealer__executive__branch__user=user)
orders = orders.annotate(count=Count('id')).values('created_at').order_by('created_at')
if len(orders) < 7:
orders_list = list(orders)
dates = set([(datetime.date.today() - datetime.timedelta(days=i)) for i in range(6)])
order_set = set([ord['created_at'] for ord in orders])
for dt in (order_set - dates):
orders_list.append({'created_at': dt, 'count': 0})
orders_list = sorted(orders_list, key=lambda item: item['created_at'])
else:
orders_list = orders
I am trying to do something pretty simple and trivial but with no luck.
I am using django-rating to rate specific objects on my site.
On my model which I wanted to rate I have a field :
rating = RatingField(range=5)
Now , all I want is to filter all of the objects which have a rate of 2 and aobve for example.
If rating was IntegerField for example, I would only need to do :
objects.filter( rating__gte = 2)
how can I do the same using django-rating ?
Reading django-rate documentation I found this trick to sort by rate:
# In this example, ``rating`` is the attribute name for your ``RatingField``
qs = qs.extra(select={
'rating': '((100/%s*rating_score/(rating_votes+%s))+100)/2'
% (MyModel.rating.range, MyModel.rating.weight)
})
qs = qs.order_by('-rating')
Perhaps you can modify this code sample and use extra where to get your results:
qs = qs.extra(where=[
'((100/%s*rating_score/(rating_votes+%s))+100)/2 >= 2 ' %
(MyModel.rating.range, MyModel.rating.weight) ,
])
When I use extra in a certain way on a Django queryset (call it qs), the result of qs.count() is different than len(qs.all()). To reproduce:
Make an empty Django project and app, then add a trivial model:
class Baz(models.Model):
pass
Now make a few objects:
>>> Baz(id=1).save()
>>> Baz(id=2).save()
>>> Baz(id=3).save()
>>> Baz(id=4).save()
Using the extra method to select only some of them produces the expected count:
>>> Baz.objects.extra(where=['id > 2']).count()
2
>>> Baz.objects.extra(where=['-id < -2']).count()
2
But add a select clause to the extra and refer to it in the where clause, and the count is suddenly wrong, even though the result of all() is correct:
>>> Baz.objects.extra(select={'negid': '0 - id'}, where=['"negid" < -2']).all()
[<Baz: Baz object>, <Baz: Baz object>] # As expected
>>> Baz.objects.extra(select={'negid': '0 - id'}, where=['"negid" < -2']).count()
0 # Should be 2
I think the problem has to do with django.db.models.sql.query.BaseQuery.get_count(). It checks whether the BaseQuery's select or aggregate_select attributes have been set; if so, it uses a subquery. But django.db.models.sql.query.BaseQuery.add_extra adds only to the BaseQuery's extra attribute, not select or aggregate_select.
How can I fix the problem? I know I could just use len(qs.all()), but it would be nice to be able to pass the extra'ed queryset to other parts of the code, and those parts may call count() without knowing that it's broken.
Redefining get_count() and monkeypatching appears to fix the problem:
def get_count(self):
"""
Performs a COUNT() query using the current filter constraints.
"""
obj = self.clone()
if len(self.select) > 1 or self.aggregate_select or self.extra:
# If a select clause exists, then the query has already started to
# specify the columns that are to be returned.
# In this case, we need to use a subquery to evaluate the count.
from django.db.models.sql.subqueries import AggregateQuery
subquery = obj
subquery.clear_ordering(True)
subquery.clear_limits()
obj = AggregateQuery(obj.model, obj.connection)
obj.add_subquery(subquery)
obj.add_count_column()
number = obj.get_aggregation()[None]
# Apply offset and limit constraints manually, since using LIMIT/OFFSET
# in SQL (in variants that provide them) doesn't change the COUNT
# output.
number = max(0, number - self.low_mark)
if self.high_mark is not None:
number = min(number, self.high_mark - self.low_mark)
return number
django.db.models.sql.query.BaseQuery.get_count = quuux.get_count
Testing:
>>> Baz.objects.extra(select={'negid': '0 - id'}, where=['"negid" < -2']).count()
2
Updated to work with Django 1.2.1:
def basequery_get_count(self, using):
"""
Performs a COUNT() query using the current filter constraints.
"""
obj = self.clone()
if len(self.select) > 1 or self.aggregate_select or self.extra:
# If a select clause exists, then the query has already started to
# specify the columns that are to be returned.
# In this case, we need to use a subquery to evaluate the count.
from django.db.models.sql.subqueries import AggregateQuery
subquery = obj
subquery.clear_ordering(True)
subquery.clear_limits()
obj = AggregateQuery(obj.model)
obj.add_subquery(subquery, using=using)
obj.add_count_column()
number = obj.get_aggregation(using=using)[None]
# Apply offset and limit constraints manually, since using LIMIT/OFFSET
# in SQL (in variants that provide them) doesn't change the COUNT
# output.
number = max(0, number - self.low_mark)
if self.high_mark is not None:
number = min(number, self.high_mark - self.low_mark)
return number
models.sql.query.Query.get_count = basequery_get_count
I'm not sure if this fix will have other unintended consequences, however.