I'm trying to check if files reported in one system exist in a different system. The models belong to different tables in different databases. They have no relationship other than the name in common.
I'm using django rest framework to serialize the values of one table and I'd like to include the values of the other table in one efficient way. The way I'm currently doing, makes too many queries! My question is: Is there a way to improve this performance issue?
Here is an example of what I have
# model1
class Product(models.Model):
name = models.CharField(max_length=50)
# model2 (different database)
class Files(models.Model):
name = models.CharField(max_length=50)
filename = models.CharField(max_length=50)
And my view set is
class ProductViewSet(viewsets.ModelViewSet):
queryset = Inventory.objects.all()
serializer_class = ProductSerializer
I managed to get the result as I said (not efficiently, though) in two different ways:
1) Including the fields in the serializer
class ProductSerializer(serializers.ModelSerializer):
has_png = serializers.SerializerMethodField('has_png')
has_jpg = serializers.SerializerMethodField('has_jpg')
def has_png(self, product):
# I wish I could avoid this code duplication too...
# I'm basically using the same code in two functions
files = Files.objects.filter(name=product.name)
filtered_files = files.filter(filename__startswith='png')
return filtered_files.exists()
def has_bam(self, product):
files = Files.objects.filter(name=product.name)
filtered_files = files.filter(filename__istartswith='jpg')
return filtered_files.exists()
Meta:
model = Product
2) Including properties in my model1 which is serialized
class Product(modes.Model):
name = models.CharField(max_length=50)
def _get_png(self):
# I tried to cache this query in a different function but didn't work
files = Files.objects.filter(name=self.name)
filtered_files = files.filter(filename__istartswith='png')
return filtered_files.exists()
def _get_jpg(self):
files = Files.objects.filter(name=self.name)
filtered_files = files.filter(filename__istartswith='jpg')
return filtered_files.exists()
has_png = property(_get_png)
has_jpg = property(_get_jpg)
and then I add in the serializer:
class ProductSerializer(serializers.ModelSerializer):
has_fastq = serializers.ReadOnlyField()
has_bam = serializers.ReadOnlyField()
Meta:
model = Product
you can try to use cached_property
from django.db.models.functions import Lower
from django.utils.functional import cached_property
class Product(modes.Model):
name = models.CharField(max_length=50)
#cached_property
def file_formats(self):
files = Files.objects.filter(name=self.name)
files = files.annotate(lower_format=Lower('file_format')))
return files.values_list('lower_format', flat=True)
def _get_png(self):
return 'png' in self.file_formats
def _get_jpg(self):
return 'jpg' in self.file_formats
has_png = property(_get_png)
has_jpg = property(_get_jpg)
Related
I want to create a queryset that references three related models, and allows me to filter. The SQL might look like this:
SELECT th.id, th.customer, ft.filename, fva.path
FROM TransactionHistory th
LEFT JOIN FileTrack ft
ON th.InboundFileTrackID = ft.id
LEFT JOIN FileViewArchive fva
ON fva.FileTrackId = ft.id
WHERE th.customer = 'ACME, Inc.'
-- AND ft.filename like '%storage%' --currently don't need to do this, but seeing placeholder logic would be nice
I have three models in Django, shown below. It's a bit tricky, because the TransactionHistory model has two foreign keys to the same model (FileTrack). And FileViewArchive has a foreign key to FileTrack.
class FileTrack(models.Model):
id = models.BigIntegerField(db_column="id", primary_key=True)
filename = models.CharField(db_column="filename", max_length=128)
class Meta:
managed = False
db_table = "FileTrack"
class TransactionHistory(models.Model):
id = models.BigIntegerField(db_column="id", primary_key=True)
customer = models.CharField(db_column="Customer", max_length=128)
inbound_file_track = models.ForeignKey(
FileTrack,
db_column="InboundFileTrackId",
related_name="inbound_file_track_id",
on_delete=models.DO_NOTHING,
null=True,
)
outbound_file_track = models.ForeignKey(
FileTrack,
db_column="OutboundFileTrackId",
related_name="outbound_file_track_id",
on_delete=models.DO_NOTHING,
null=True,
)
class Meta:
managed = False
db_table = "TransactionHistory"
class FileViewArchive(models.Model):
id = models.BigIntegerField(db_column="id", primary_key=True)
file_track = models.ForeignKey(
FileTrack,
db_column="FileTrackId",
related_name="file_track_id",
on_delete=models.DO_NOTHING,
null=True,
)
path = models.CharField(db_column="Path", max_length=256)
class Meta:
managed = False
db_table = "FileViewArchive"
One thing I tried:
qs1 = TransactionHistory.objects.select_related('inbound_file_track').filter(customer='ACME, Inc.')
qs2 = FileViewArchive.objects.select_related('file_track').all()
qs = qs1 & qs2 # doesn't work b/c they are different base models
And this idea to use chain doesn't work either because it's sending two separate queries an I'm not altogether sure if/how it's merging them. I'm looking for a single query in order to be more performant. Also it returns an iterable, so I'm not sure I can use this in my view (Django Rest Framework). Lastly x below returns a TransactionHistory object, so I can't even access the fields from the other two models.
from itertools import chain
c = chain(qs1 | qs2) # great that his this lazy and doesn't evaluate until used!
type(c) # this returns <class 'itertools.chain'> and it doesn't consolidate
x = list(c)[0] # runs two separate queries
type(x) # a TransactionHistory object -> so no access to the Filetrack or FileViewArchive fields
Any ideas how I can join three models together? Something like this?:
qs = TransactionHistory.objects.select_related('inbound_file_track').select_related('file_track').filter(customer='ACME, Inc.', file_track__filename__contains='storage')
More info: this is part of a view that will look like below. It returns a querysets that is used as part of a Django Rest Framework view.
class Transaction(generics.ListAPIView):
serializer_class = TransactionSerializer
def filter_queryset(self, queryset):
query_params = self.request.query_params.copy()
company = query_params.pop("company", [])[0]
filename = query_params.pop("filename", [])[0]
# need code here that generate filtered queryset for filename and company
# qs = TransactionHistory.objects.select_related('inbound_file_track').select_related('file_track').filter(customer='ACME, Inc.', file_track__filename__contains='storage')
return qs.order_by("id")
Based from the sql query you shared, you are filtering based on the inbound_file_track file name. So something like this should work:
TransactionHistory.objects.select_related(
'inbound_file_track',
).prefetch_related(
'inbound_file_track__file_track_id',
).filter(
customer='ACME, Inc.', inbound_file_track___filename__contains='storage',
)
I have a very simple ( with a first look) problem. Case - A product can be sold in a several places(shops), and every product can be represented in a single shop with a different categories and sub categories ( That is why category linked via ForeignKey with Assortment twice).
So here is My Assortment model, with several FKs.
class Assortment(models.Model):
category = models.ForeignKey('category.Category', null=True, blank=True, default=None,related_name='assortment_child')
parent_category = models.ForeignKey('category.Category', null=True, blank=True, default=None,related_name='assortment_parent')
product = models.ForeignKey(Product)
shop = models.ForeignKey(Shop)
View, based on rest_framework.generics.ListAPIView
class InstitutionTreeCategories(generics.ListAPIView):
"""Resource to get shop's tree of categories."""
serializer_class = serializers.InstitutionCategoriesSerializer
def get_queryset(self):
shop = self.get_shop()
return Category.objects.filter(assortment_parent__shop=shop).distinct()
And finally, serializers
class CategoryListSerializer(serializers.ModelSerializer):
class Meta:
"""Meta class."""
model = Category
fields = ('id', 'name', 'image')
class CategoriesTreeSerializer(CategoryListSerializer):
# childs = CategoryListSerializer(many=True, source='assortment_child__parent_category')
childs = serializers.SerializerMethodField()
class Meta(CategoryListSerializer.Meta):
"""Meta class."""
fields = ('id', 'name', 'image', 'childs')
def get_childs(self, obj):
qs = Category.objects.filter(assortment_child__parent_category=obj.id).distinct()
return CategoryListSerializer(qs, many=True, context=self.context).data
And i need to show Category Tree for a one single shop with my API.
But the problem is - If I use serializer.SerializerMethodField - it works, but too many queries (for every parent category). I tried to avoid it using 'source' option with my 'CategoryListSerializer' by I can't make it. Every time, I get - 'Category' object has no attribute assortment_child__parent_category. In a shell model i've tried
In [8]: cat.assortment_parent.values('category').distinct()
Out[8]: (0.003) SELECT DISTINCT "marketplace_assortment"."category_id" FROM "marketplace_assortment" WHERE "marketplace_assortment"."parent_category_id" = 4 LIMIT 21; args=(4,)
<AssortmentQuerySet [{'category': 3}]>
So - category object has this attributes, of course it does, i used it a get_childs method. So question is - how i can use it with serializer.ModelSerializer and it's source option? ( Of course using select_related method with queryset, to avoid excess queries).
by source option you should use . in instead of __:
childs = CategoryListSerializer(many=True, source='assortment_child.parent_category')
but still you will has many queries, to fix it you should use prefetch-related
def get_queryset(self):
shop = self.get_shop()
qs = Category.objects.filter(assortment_parent__shop=shop).all()
return qs.prefetch_related('assortment_child').distinct()
more detail you can read in the how-can-i-optimize-queries-django-rest-framework
I had the similar problem and the best solution I have found is to do some manual processing in order to receive desired tree representation.
So firstly we fetch all Assortment for shop and then build the tree manually.
Let's look at the example.
def get_categories_tree(assortments, context):
assortments = assortments.select_related('category', 'parent_category')
parent_categories_dict = OrderedDict()
for assortment in assortments:
parent = assortment.parent_category
# Each parent category will appear in parent_categories_dict only once
# and it will accumulate list of child categories
if parent not in parent_categories_dict:
parent_data = CategoryListSerializer(instance=parent, context=context).data
parent_categories_dict[parent] = parent_data
parent_categories_dict[parent]['childs'] = []
child = assortment.category
child_data = CategoryListSerializer(instance=child, context=context).data
parent_categories_dict[parent]['childs'].append(child_data)
# convert to list as we don't need the keys already - they were used only for matching
parent_categories_list = list(parent_categories_dict.values())
return parent_categories_list
class InstitutionTreeCategories(generics.ListAPIView):
def list(self, request, *args, **kwargs):
shop = self.get_shop()
assortments = Assortment.objects.filter(shop=shop)
context = self.get_serializer_context()
categories_tree = get_categories_tree(assortments, context)
return Response(categories_tree)
All in single DB query.
The problem here is that there is no explicit relation between category and parent_category. If you define ManyToManyField in Category using Assortment as through intermediate model, you will get an access which Django can understand, so you would just use attribute childs on Category for example. However this will still return all children (the same would happen if your source example works) categories, ignoring shop, so some clever Prefetch would have to be done to achieve correct results. But I believe manual "join" is simpler.
you need to use prefetch_related along with serializer method field
serializer:
class CategoriesTreeSerializer(CategoryListSerializer):
children = serializers.SerializerMethodField()
class Meta(CategoryListSerializer.Meta):
fields = (
'id',
'name',
'image',
'children'
)
def get_children(self, obj):
children = set()
for assortment in obj.assortment_parent.all():
children.add(assortment.category)
serializer = CategoryListSerializer(list(children), many=True)
return serializer.data
your get queryset method:
def get_queryset(self):
shop = self.get_shop()
return (Category.objects.filter(assortment_parent__shop=shop)
.prefetch_related(Prefetch('assortment_parent', queryset=Assortment.objects.all().select_related('category')
.distinct())
I'm trying to fetch some different attribute from reverse foreign key object and show it on django admin list_display. But this current method will call the db queries multiple times
models.py:
class Author(models.Model):
name = models.CharField()
...
def get_all_book(self):
return self.book_set.all()
class Book(models.Model):
author = models.ForeignKey(Author)
aaa = some field type
bbb = some field type
...
admin.py:
class AuthorAdmin(admin.ModelAdmin):
def book_aaa(obj):
booklist = obj.get_all_book
all_bookaaa = ",".join([k.aaa for k in booklist])
return all_bookaaa
def book_bbb(obj):
booklist = obj.get_all_book
all_bookbbb = ",".join([k.bbb for k in booklist])
return all_bookbbb
list_display = ('name', book_aaa, book_bbb)
admin.site.register(Author, AuthorAdmin)
Because I need to separate those book information in separate column, but if using this method, it might called "book_set.all()" queryset twice, which is very bad for the performance. Is there any correct method to implement this problem?
By creating extra attribute to the object, and check whether the attribute exists or not.
def get_booklist(self, obj):
if not hasattr(obj, 'booklist')
obj.booklist = obj.get_all_book
return obj
def book_aaa(self, obj):
booklist = self.get_booklist(obj).booklist
all_bookaaa = ",".join([k.aaa for k in booklist])
return all_bookaaa
def book_bbb(self, obj):
booklist = self.get_booklist(obj).booklist
all_bookbbb = ",".join([k.bbb for k in booklist])
return all_bookbbb
list_display = ('name', 'book_aaa', 'book_bbb')
Maybe this is not the best solution, but at least can prevent the queryset called multiple times.
I have 2 models:
1: KW (individual keywords)
2: Project (many keywords can belong to many different projects)
class KW(models.Model):
...
project = models.ManyToManyField('KWproject', blank=True)
class KWproject(models.Model):
ProjectKW = models.CharField('Main Keyword', max_length=1000)
author = models.ForeignKey(User, editable=False)
Now when user is in Admin for KWproject they should be able to see all keywords belonging to selected project in list_display. I achieved this but it doesn't feel like proper way.
class ProjectAdmin(admin.ModelAdmin):
form = ProjectForm
list_display = ('Keywordd', 'author')
def Keywordd(self, obj):
return '%s' % (obj.id, obj.ProjectKW)
Keywordd.allow_tags = True
Keywordd.admin_order_field = 'ProjectKW'
Keywordd.short_description = 'ProjectKW'
Is there better way to link and then list_display all items that have reverse relationship to the model? (via "project" field in my example)
As per the Django Admin docs:
ManyToManyField fields aren’t supported, because that would entail
executing a separate SQL statement for each row in the table. If you
want to do this nonetheless, give your model a custom method, and add
that method’s name to list_display. (See below for more on custom
methods in list_display.)
So, you may opt to implement a custom model method like so:
# models.py
class KW(models.Model):
...
project = models.ManyToManyField('KWproject', blank=True)
class KWproject(models.Model):
ProjectKW = models.CharField('Main Keyword', max_length=1000)
author = models.ForeignKey(User, editable=False)
def all_keywords(self):
# Retrieve your keywords
# KW_set here is the default related name. You can set that in your model definitions.
keywords = self.KW_set.values_list('desired_fieldname', flat=True)
# Do some transformation here
desired_output = ','.join(keywords)
# Return value (in example, csv of keywords)
return desired_output
And then, add that model method to your list_display tuple in your ModelAdmin.
# admin.py
class ProjectAdmin(admin.ModelAdmin):
form = ProjectForm
list_display = ('Keywordd', 'author', 'all_keywords')
def Keywordd(self, obj):
return '%s' % (obj.id, obj.ProjectKW)
Keywordd.allow_tags = True
Keywordd.admin_order_field = 'ProjectKW'
Keywordd.short_description = 'ProjectKW'
Do take note: This can potentially be a VERY EXPENSIVE operation. If you are showing 200 rows in the list, then a request to the page will execute 200 additional SQL queries.
I have two legacy models listed below. The Library.libtype_id is effectively a foreign key to LibraryType when libtype_id > 0. I want to represent this as a ForeignKey Resource in TastyPie when that condition is met.
Can someone help me out? I have seen this but I'm not sure it's the same thing? Thanks much!!
# models.py
class LibraryType(models.Model):
id = models.AutoField(primary_key=True)
name = models.CharField(max_length=96)
class Library(models.Model):
id = models.AutoField(primary_key=True)
name = models.CharField(max_length=255)
project = models.ForeignKey('project.Project', db_column='parent')
libtype_id = models.IntegerField(db_column='libTypeId')
Here is my api.py
class LibraryTypeResource(ModelResource):
class Meta:
queryset = LibraryType.objects.all()
resource_name = 'library_type'
class LibraryResource(ModelResource):
project = fields.ForeignKey(ProjectResource, 'project')
libtype = fields.ForeignKey(LibraryTypeResource, 'libtype_id' )
class Meta:
queryset = Library.objects.all()
resource_name = 'library'
exclude = ['libtype_id']
def dehydrate_libtype(self, bundle):
if getattr(bundle.obj, 'libtype_id', None) != 0:
return LibraryTypeResource.get_detail(id=bundle.obj.libtype_id)
When I do this however I'm getting the following error on http://0.0.0.0:8001/api/v1/library/?format=json
"error_message": "'long' object has no attribute 'pk'",
Shouldn't
libtype = fields.ForeignKey(LibraryTypeResource, 'libtype_id' )
be
libtype = fields.ForeignKey(LibraryTypeResource, 'libtype' )
(without the '_id')
I believe that as it is you are handing the field an int and it is attempting to get the pk from it.
UPDATE:
Missed that libtype_id is an IntegerField, not a ForeignKey (whole point of the question)
Personally I would add a method to the Library to retrieve the LibraryType object. This way you have access to the LibraryType from the Library and you don't have to override any dehydrate methods.
class Library(models.Model):
# ... other fields
libtype_id = models.IntegerField(db_column='libTypeId')
def get_libtype(self):
return LibraryType.objects.get(id=self.libtype_id)
.
class LibraryResource(ModelResource):
libtype = fields.ForeignKey(LibraryTypeResource, 'get_libtype')