Django annotate on BooleanField - django

I have the following models:
class Foo(models.Model):
pass
class Bar(models.Model):
foo = models.ForeignKey(Foo)
is_successful = models.BooleanField()
I would like to get all foo objects with an annotation if all of the bar objects associated with foo object have is_successful as True
So far my queryset is:
foos = Foo.objects.all().annotate(all_successful=Min('bar__is_successful'))
The idea for the all_successful annotation is that if the minimum value of all is_successful rows is 1, then all of them must be True (assuming 0 is False and 1 is True). So knowing that I can use the queryset like so:
foo = foos[0]
if foo.all_successful == 1:
print 'All bars are successful'
else:
print 'Not all bars are successful'
This works great in sqlite however it fails in PostgreSQL because PostgreSQL can't execute MIN aggregate on a boolean column. I guess this works in sqlite because sqlite treats bools as integers hence it can execute the aggregate.
My question is how can I make this queryset work in PostgreSQL without converting my is_successful field to an IntegerField?
Thanx

I know this is an old question, but I ran up against this recently. Django v1.8 now has built in support for case/when, so you can use the ORM instead of hacking around with custom SQL.
https://docs.djangoproject.com/en/1.8/ref/models/conditional-expressions/#case
Foo.objects.annotate(
all_successful=Case(
When(bar__is_successful=False, then=False),
When(bar__is_successful=True, then=True),
default=False,
output_field=BooleanField()
))
I haven't tried this out, but something similar worked for me on a recent project.

FOR DJANGO <= 1.7: to get an annotation I think you can simply use Extra
foos = Foo.objects.extra(select={'all_successful': 'CASE WHEN COUNT(b.foo) > 0 THEN 0 ELSE 1 END FROM yourapp_bar as b WHERE b.is_successful = false and b.foo = yourapp_foo.id' })
if your system is running Django 1.8+ please follow Dav3xor answer.

being inspired by https://docs.djangoproject.com/en/dev/topics/db/managers/ I suggest to use a custom manager for Bar class instead of annotation
class BarManager(models.Manager):
def get_all_successful_foos_ids(self):
from django.db import connection
cursor = connection.cursor()
cursor.execute("""
SELECT foo, COUNT(*)
FROM yourapp_bar
GROUP BY 1
WHERE is_successful = true""") # <-- you have to write the correct table name here
result_list = []
for row in cursor.fetchall():
if row[1] > 0:
result_list.append(row[0])
return result_list
class Bar(models.Model):
foo = models.ForeignKey(Foo)
is_successful = models.BooleanField()
objects = BarManager() # here I'm changing the default manager
then, in your code:
foos = foo.objects.filter(id__in=Bar.objects.get_all_successful_foos_ids())

Related

How can I filter a Django queryset by the latest of a related model?

Imagine I have the following 2 models in a contrived example:
class User(models.Model):
name = models.CharField()
class Login(models.Model):
user = models.ForeignKey(User, related_name='logins')
success = models.BooleanField()
datetime = models.DateTimeField()
class Meta:
get_latest_by = 'datetime'
How can I get a queryset of Users, which only contains users whose last login was not successful.
I know the following does not work, but it illustrates what I want to get:
User.objects.filter(login__latest__success=False)
I'm guessing I can do it with Q objects, and/or Case When, and/or some other form of annotation and filtering, but I can't suss it out.
We can use a Subquery here:
from django.db.models import OuterRef, Subquery
latest_login = Subquery(Login.objects.filter(
user=OuterRef('pk')
).order_by('-datetime').values('success')[:1])
User.objects.annotate(
latest_login=latest_login
).filter(latest_login=False)
This will generate a query that looks like:
SELECT auth_user.*, (
SELECT U0.success
FROM login U0
WHERE U0.user_id = auth_user.id
ORDER BY U0.datetime DESC
LIMIT 1
) AS latest_login
FROM auth_user
WHERE (
SELECT U0.success
FROM login U0
WHERE U0.user_id = auth_user.id
ORDER BY U0.datetime
DESC LIMIT 1
) = False
So the outcome of the Subquery is the success of the latest Login object, and if that is False, we add the related User to the QuerySet.
You can first annotate the max dates, and then filter based on success and the max date using F expressions:
User.objects.annotate(max_date=Max('logins__datetime'))\
.filter(logins__datetime=F('max_date'), logins__success=False)
for check bool use success=False and for get latest use latest()
your filter has been look this:
User.objects.filter(success=False).latest()

django update_or_create gets "duplicate key value violates unique constraint "

Maybe I misunderstand the purpose of Django's update_or_create Model method.
Here is my Model:
from django.db import models
import datetime
from vc.models import Cluster
class Vmt(models.Model):
added = models.DateField(default=datetime.date.today, blank=True, null=True)
creation_time = models.TextField(blank=True, null=True)
current_pm_active = models.TextField(blank=True, null=True)
current_pm_total = models.TextField(blank=True, null=True)
... more simple fields ...
cluster = models.ForeignKey(Cluster, null=True)
class Meta:
unique_together = (("cluster", "added"),)
Here is my test:
from django.test import TestCase
from .models import *
from vc.models import Cluster
from django.db import transaction
# Create your tests here.
class VmtModelTests(TestCase):
def test_insert_into_VmtModel(self):
count = Vmt.objects.count()
self.assertEqual(count, 0)
# create a Cluster
c = Cluster.objects.create(name='test-cluster')
Vmt.objects.create(
cluster=c,
creation_time='test creaetion time',
current_pm_active=5,
current_pm_total=5,
... more simple fields ...
)
count = Vmt.objects.count()
self.assertEqual(count, 1)
self.assertEqual('5', c.vmt_set.all()[0].current_pm_active)
# let's test that we cannot add that same record again
try:
with transaction.atomic():
Vmt.objects.create(
cluster=c,
creation_time='test creaetion time',
current_pm_active=5,
current_pm_total=5,
... more simple fields ...
)
self.fail(msg="Should violated integrity constraint!")
except Exception as ex:
template = "An exception of type {0} occurred. Arguments:\n{1!r}"
message = template.format(type(ex).__name__, ex.args)
self.assertEqual("An exception of type IntegrityError occurred.", message[:45])
Vmt.objects.update_or_create(
cluster=c,
creation_time='test creaetion time',
# notice we are updating current_pm_active to 6
current_pm_active=6,
current_pm_total=5,
... more simple fields ...
)
count = Vmt.objects.count()
self.assertEqual(count, 1)
On the last update_or_create call I get this error:
IntegrityError: duplicate key value violates unique constraint "vmt_vmt_cluster_id_added_c2052322_uniq"
DETAIL: Key (cluster_id, added)=(1, 2018-06-18) already exists.
Why didn't wasn't the model updated? Why did Django try to create a new record that violated the unique constraint?
The update_or_create(defaults=None, **kwargs) has basically two parts:
the **kwargs which specify the "filter" criteria to determine if such object is already present; and
the defaults which is a dictionary that contains the fields mapped to values that should be used when we create a new row (in case the filtering fails to find a row), or which values should be updated (in case we find such row).
The problem here is that you make your filters too restrictive: you add several filters, and as a result the database does not find such row. So what happens? The database then aims to create the row with these filter values (and since defaults is missing, no extra values are added). But then it turns out that we create a row, and that the combination of the cluster and added already exists. Hence the database refuses to add this row.
So this line:
Model.objects.update_or_create(field1=val1,
field2=val2,
defaults={
'field3': val3,
'field4': val4
})
Is to semantically approximately equal to:
try:
item = Model.objects.get(field1=val1, field2=val2)
except Model.DoesNotExist:
Model.objects.create(field1=val1, field2=val2, field3=val3, field4=val4)
else:
item = Model.objects.filter(
field1=val1,
field2=val2,
).update(
field3 = val3
field4 = val4
)
(but the original call is typically done in a single query).
You probably thus should write:
Vmt.objects.update_or_create(
cluster=c,
creation_time='test creaetion time',
defaults = {
'current_pm_active': 6,
'current_pm_total': 5,
}
)
(or something similar)
You should separate your field:
Fields that should be searched for
Fields that should be updated
for example:
If I have the model:
class User(models.Model):
username = models.CharField(max_length=200)
nickname = models.CharField(max_length=200)
And I want to search for username = 'Nikolas' and update this instance nickname to 'Nik'(if no User with username 'Nikolas' I need to create it) I should write this code:
User.objects.update_or_create(
username='Nikolas',
defaults={'nickname': 'Nik'},
)
see in https://docs.djangoproject.com/en/3.1/ref/models/querysets/
This is already answered well in the above.
To be more clear the update_or_create() method should have **kwargs as those parameters on which you want to check if that data already exists in DB by filtering.
select some_column from table_name where column1='' and column2='';
Filtering by **kwargs will give you objects. Now if you wish to update any data/column of those filtered objects, you should pass them in defaults param in update_or_create() method.
so lets say you found an object based on a filter now the default param values are expected to be picked and updated.
and if there's no matching object found based on the filter then it goes ahead and creates an entry with filters and the default param passed.

django get using _id cannot resolve keyword into field

class Foo(models.Model):
name = models.CharField(max_length=20)
bar = models.ForeignKey(Branch, null=True)
class Bar(models.Model):
name = models.CharField(max_length=20)
When I do:
x = Foo.objects.get(bar_id=2)
I get:
*** FieldError: Cannot resolve keyword 'bar_id' into field. Choices are: bar, id, name
Shouldn't bar_id use the foreign key id directly to look for the matching Foo?
I understand I can query the db first to get the Bar object and then use it in the get statement, but should I waste a query on this if the id is already in the Foo table?
(I'm using an old django and python for a legacy project: django 1.4 python 2.7.2)
It's x = Foo.objects.get(bar__id=2) with double underscore.
django doc.
x = Foo.objects.get(bar__id=2)
Even though the SQL table is using bar_id as the name of the column for the ForeignKey, you need to use bar__id to traverse the FK.
Hopefully, Django will optimize this and don't do a real join to execute the query:
>>> print MyModel.objects.filter(bar__id=1).values('pk').query
SELECT "myapp_mymodel"."id" FROM "myapp_bar" WHERE "myapp_mymodel"."bar_id" = 1
# Let's compare this with a real joining... (assuming Bar has a `name` field)
>>> print MyModel.objects.filter(bar__name='Thing').values('pk').query
SELECT "myapp_mymodel"."id" FROM "myapp_bar" INNER JOIN "myapp_bar" ON (
"myapp_mymodel"."bar_id" = "myapp_bar"."id"
) WHERE "myapp_bar"."name" = Thing

Django Tests: setUpTestData on Postgres throws: "Duplicate key value violates unique constraint"

I am running into a database issue in my unit tests. I think it has something to do with the way I am using TestCase and setUpData.
When I try to set up my test data with certain values, the tests throw the following error:
django.db.utils.IntegrityError: duplicate key value violates unique constraint
...
psycopg2.IntegrityError: duplicate key value violates unique constraint "InventoryLogs_productgroup_product_name_48ec6f8d_uniq"
DETAIL: Key (product_name)=(Almonds) already exists.
I changed all of my primary keys and it seems to be running fine. It doesn't seem to affect any of the tests.
However, I'm concerned that I am doing something wrong. When it first happened, I reversed about an hour's worth of work on my app (not that much code for a noob), which corrected the problem.
Then when I wrote the changes back in, the same issue presented itself again. TestCase is pasted below. The issue seems to occur after I add the sortrecord items, but corresponds with the items above it.
I don't want to keep going through and changing primary keys and urls in my tests, so if anyone sees something wrong with the way I am using this, please help me out. Thanks!
TestCase
class DetailsPageTest(TestCase):
#classmethod
def setUpTestData(cls):
cls.product1 = ProductGroup.objects.create(
product_name="Almonds"
)
cls.variety1 = Variety.objects.create(
product_group = cls.product1,
variety_name = "non pareil",
husked = False,
finished = False,
)
cls.supplier1 = Supplier.objects.create(
company_name = "Acme",
company_location = "Acme Acres",
contact_info = "Call me!"
)
cls.shipment1 = Purchase.objects.create(
tag=9,
shipment_id=9999,
supplier_id = cls.supplier1,
purchase_date='2015-01-09',
purchase_price=9.99,
product_name=cls.variety1,
pieces=99,
kgs=999,
crackout_estimate=99.9
)
cls.shipment2 = Purchase.objects.create(
tag=8,
shipment_id=8888,
supplier_id=cls.supplier1,
purchase_date='2015-01-08',
purchase_price=8.88,
product_name=cls.variety1,
pieces=88,
kgs=888,
crackout_estimate=88.8
)
cls.shipment3 = Purchase.objects.create(
tag=7,
shipment_id=7777,
supplier_id=cls.supplier1,
purchase_date='2014-01-07',
purchase_price=7.77,
product_name=cls.variety1,
pieces=77,
kgs=777,
crackout_estimate=77.7
)
cls.sortrecord1 = SortingRecords.objects.create(
tag=cls.shipment1,
date="2015-02-05",
bags_sorted=20,
turnout=199,
)
cls.sortrecord2 = SortingRecords.objects.create(
tag=cls.shipment1,
date="2015-02-07",
bags_sorted=40,
turnout=399,
)
cls.sortrecord3 = SortingRecords.objects.create(
tag=cls.shipment1,
date='2015-02-09',
bags_sorted=30,
turnout=299,
)
Models
from datetime import datetime
from django.db import models
from django.db.models import Q
class ProductGroup(models.Model):
product_name = models.CharField(max_length=140, primary_key=True)
def __str__(self):
return self.product_name
class Meta:
verbose_name = "Product"
class Supplier(models.Model):
company_name = models.CharField(max_length=45)
company_location = models.CharField(max_length=45)
contact_info = models.CharField(max_length=256)
class Meta:
ordering = ["company_name"]
def __str__(self):
return self.company_name
class Variety(models.Model):
product_group = models.ForeignKey(ProductGroup)
variety_name = models.CharField(max_length=140)
husked = models.BooleanField()
finished = models.BooleanField()
description = models.CharField(max_length=500, blank=True)
class Meta:
ordering = ["product_group_id"]
verbose_name_plural = "Varieties"
def __str__(self):
return self.variety_name
class PurchaseYears(models.Manager):
def purchase_years_list(self):
unique_years = Purchase.objects.dates('purchase_date', 'year')
results_list = []
for p in unique_years:
results_list.append(p.year)
return results_list
class Purchase(models.Model):
tag = models.IntegerField(primary_key=True)
product_name = models.ForeignKey(Variety, related_name='purchases')
shipment_id = models.CharField(max_length=24)
supplier_id = models.ForeignKey(Supplier)
purchase_date = models.DateField()
estimated_delivery = models.DateField(null=True, blank=True)
purchase_price = models.DecimalField(max_digits=6, decimal_places=3)
pieces = models.IntegerField()
kgs = models.IntegerField()
crackout_estimate = models.DecimalField(max_digits=6,decimal_places=3, null=True)
crackout_actual = models.DecimalField(max_digits=6,decimal_places=3, null=True)
objects = models.Manager()
purchase_years = PurchaseYears()
# Keep manager as "objects" in case admin, etc. needs it. Filter can be called like so:
# Purchase.objects.purchase_years_list()
# Managers in docs: https://docs.djangoproject.com/en/1.8/intro/tutorial01/
class Meta:
ordering = ["purchase_date"]
def __str__(self):
return self.shipment_id
def _weight_conversion(self):
return round(self.kgs * 2.20462)
lbs = property(_weight_conversion)
class SortingModelsBagsCalulator(models.Manager):
def total_sorted(self, record_date, current_set):
sorted = [SortingRecords['bags_sorted'] for SortingRecords in current_set if
SortingRecords['date'] <= record_date]
return sum(sorted)
class SortingRecords(models.Model):
tag = models.ForeignKey(Purchase, related_name='sorting_record')
date = models.DateField()
bags_sorted = models.IntegerField()
turnout = models.IntegerField()
objects = models.Manager()
def __str__(self):
return "%s [%s]" % (self.date, self.tag.tag)
class Meta:
ordering = ["date"]
verbose_name_plural = "Sorting Records"
def _calculate_kgs_sorted(self):
kg_per_bag = self.tag.kgs / self.tag.pieces
kgs_sorted = kg_per_bag * self.bags_sorted
return (round(kgs_sorted, 2))
kgs_sorted = property(_calculate_kgs_sorted)
def _byproduct(self):
waste = self.kgs_sorted - self.turnout
return (round(waste, 2))
byproduct = property(_byproduct)
def _bags_remaining(self):
current_set = SortingRecords.objects.values().filter(~Q(id=self.id), tag=self.tag)
sorted = [SortingRecords['bags_sorted'] for SortingRecords in current_set if
SortingRecords['date'] <= self.date]
remaining = self.tag.pieces - sum(sorted) - self.bags_sorted
return remaining
bags_remaining = property(_bags_remaining)
EDIT
It also fails with integers, like so.
django.db.utils.IntegrityError: duplicate key value violates unique constraint "InventoryLogs_purchase_pkey"
DETAIL: Key (tag)=(9) already exists.
UDPATE
So I should have mentioned this earlier, but I completely forgot. I have two unit test files that use the same data. Just for kicks, I matched a primary key in both instances of setUpTestData() to a different value and sure enough, I got the same error.
These two setups were working fine side-by-side before I added more data to one of them. Now, it appears that they need different values. I guess you can only get away with using repeat data for so long.
I continued to get this error without having any duplicate data but I was able to resolve the issue by initializing the object and calling the save() method rather than creating the object via Model.objects.create()
In other words, I did this:
#classmethod
def setUpTestData(cls):
cls.person = Person(first_name="Jane", last_name="Doe")
cls.person.save()
Instead of this:
#classmethod
def setUpTestData(cls):
cls.person = Person.objects.create(first_name="Jane", last_name="Doe")
I've been running into this issue sporadically for months now. I believe I just figured out the root cause and a couple solutions.
Summary
For whatever reason, it seems like the Django test case base classes aren't removing the database records created by let's just call it TestCase1 before running TestCase2. Which, in TestCase2 when it tries to create records in the database using the same IDs as TestCase1 the database raises a DuplicateKey exception because those IDs already exists in the database. And even saying the magic word "please" won't help with database duplicate key errors.
Good news is, there are multiple ways to solve this problem! Here are a couple...
Solution 1
Make sure if you are overriding the class method tearDownClass that you call super().tearDownClass(). If you override tearDownClass() without calling its super, it will in turn never call TransactionTestCase._post_teardown() nor TransactionTestCase._fixture_teardown(). Quoting from the doc string in TransactionTestCase._post_teardown()`:
def _post_teardown(self):
"""
Perform post-test things:
* Flush the contents of the database to leave a clean slate. If the
class has an 'available_apps' attribute, don't fire post_migrate.
* Force-close the connection so the next test gets a clean cursor.
"""
If TestCase.tearDownClass() is not called via super() then the database is not reset in between test cases and you will get the dreaded duplicate key exception.
Solution 2
Override TransactionTestCase and set the class variable serialized_rollback = True, like this:
class MyTestCase(TransactionTestCase):
fixtures = ['test-data.json']
serialized_rollback = True
def test_name_goes_here(self):
pass
Quoting from the source:
class TransactionTestCase(SimpleTestCase):
...
# If transactions aren't available, Django will serialize the database
# contents into a fixture during setup and flush and reload them
# during teardown (as flush does not restore data from migrations).
# This can be slow; this flag allows enabling on a per-case basis.
serialized_rollback = False
When serialized_rollback is set to True, Django test runner rolls back any transactions inserted into the database beween test cases. And batta bing, batta bang... no more duplicate key errors!
Conclusion
There are probably many more ways to implement a solution for the OP's issue, but these two should work nicely. Would definitely love to have more solutions added by others for clarity sake and a deeper understanding of the underlying Django test case base classes. Phew, say that last line real fast three times and you could win a pony!
The log you provided states DETAIL: Key (product_name)=(Almonds) already exists. Did you verify in your db?
To prevent such errors in the future, you should prefix all your test data string by test_
I discovered the issue, as noted at the bottom of the question.
From what I can tell, the database didn't like me using duplicate data in the setUpTestData() methods of two different tests. Changing the primary key values in the second test corrected the problem.
I think the problem here is that you had a tearDownClass method in your TestCase without the call to super method.
In this way the django TestCase lost the transactional functionalities behind the setUpTestData so it doesn't clean your test db after a TestCase is finished.
Check warning in django docs here:
https://docs.djangoproject.com/en/1.10/topics/testing/tools/#django.test.SimpleTestCase.allow_database_queries
I had similar problem that had been caused by providing the primary key value to a test case explicitly.
As discussed in the Django documentation, manually assigning a value to an auto-incrementing field doesn’t update the field’s sequence, which might later cause a conflict.
I have solved it by altering the sequence manually:
from django.db import connection
class MyTestCase(TestCase):
#classmethod
def setUpTestData(cls):
Model.objects.create(id=1)
with connection.cursor() as c:
c.execute(
"""
ALTER SEQUENCE "app_model_id_seq" RESTART WITH 2;
"""
)

Django, query filtering from model method

I have these models:
def Foo(Models.model):
size = models.IntegerField()
# other fields
def is_active(self):
if check_condition:
return True
else:
return False
def Bar(Models.model):
foo = models.ForeignKey("Foo")
# other fields
Now I want to query Bars that are having active Foo's as such:
Bar.objects.filter(foo.is_active())
I am getting error such as
SyntaxError at /
('non-keyword arg after keyword arg'
How can I achieve this?
You cannot query against model methods or properties. Either use the criteria within it in the query, or filter in Python using a list comprehension or genex.
You could also use a custom manager. Then you could run something like this:
Bar.objects.foo_active()
And all you have to do is:
class BarManager(models.Manager):
def foo_active(self):
# use your method to filter results
return you_custom_queryset
Check out the docs.
I had similar problem: I am using class-based view object_list and I had to filter by model's method. (storing the information in database wasn't an option because the property was based on time and I would have to create a cronjob and/or... no way)
My answer is ineffective and I don't know how it's gonna scale on larger data; but, it works:
q = Model.objects.filter(...)...
# here is the trick
q_ids = [o.id for o in q if o.method()]
q = q.filter(id__in=q_ids)
You can't filter on methods, however if the is_active method on Foo checks an attribute on Foo, you can use the double-underscore syntax like Bar.objects.filter(foo__is_active_attribute=True)