Django ORM distinct on only a subset of the queryset - django

Working in Django Rest Framework (DRF), django-filter, and PostgreSQL, and having an issue with one of our endpoints.
Assume the following:
# models.py
class Company(models.Model):
name = models.CharField(max_length=50)
class Venue(models.Model):
company = models.ForeignKey(to="Company", on_delete=models.CASCADE)
name = models.CharField(max_length=50)
# create some data
company1 = Company.objects.create(name="Proper Ltd")
company2 = Company.objects.create(name="MyCompany Ltd")
Venue.objects.create(name="Venue #1", company=company1)
Venue.objects.create(name="Venue #2", company=company1)
Venue.objects.create(name="Property #1", company=company2)
Venue.objects.create(name="Property #2", company=company2)
# viewset
class CompanyViewSet(viewsets.ReadOnlyModelViewSet):
serializer_class = CompanyVenueSearchSerializer
queryset = (
Venue.objects.all()
.select_related("company")
.order_by("company__name")
)
permission_classes = (ReadOnly,)
http_method_names = ["get"]
filter_backends = (filters.DjangoFilterBackend,)
filterset_class = CompanyVenueListFilter
pagination_class = None
# filterset
class CompanyVenueListFilter(filters.FilterSet):
text = filters.CharFilter(method="name_search")
def name_search(self, qs, name, value):
return qs.filter(
Q(name__icontains=value)
| Q(company__name__icontains=value)
)
class Meta:
model = Venue
fields = [
"name",
"company__name",
]
# serializer
class CompanyVenueSearchSerializer(serializers.ModelSerializer):
company_id = serializers.IntegerField(source="company.pk")
company_name = serializers.CharField(source="company.name")
venue_id = serializers.IntegerField(source="pk")
venue_name = serializers.CharField(source="name")
class Meta:
model = Venue
fields = (
"company_id",
"company_name",
"venue_id",
"venue_name",
)
We now want to allow the user to filter the results by sending a query in the request, e.g. curl -X GET https://example.com/api/company/?text=pr.
The serializer result will look something like:
[
{
"company_id":1,
"company_name":"Proper Ltd",
"venue_id":1,
"venue_name":"Venue #1"
},
{ // update ORM to exclude this dict
"company_id":1,
"company_name":"Proper Ltd",
"venue_id":2,
"venue_name":"Venue #1"
},
{
"company_id":2,
"company_name":"MyCompany Ltd",
"venue_id":3,
"venue_name":"Property #1"
},
{
"company_id":2,
"company_name":"MyCompany Ltd",
"venue_id":4,
"venue_name":"Property #1"
}
]
Expected result:
Want to rewrite the ORM query so that if the filter ("pr") matches the venue__name, return all venues. But if the filter matches the company__name, only return it once, thus in the example above the second dict in the list would be excluded/removed.
Is this possible?

What you can do is to filter Company that matches name filtering and annotate them with the first related Venue and then combine it's results with the second requirement to return venue with name=value
from django.db.models import OuterRef, Q, Subquery
value = "pr"
first_venue = Venue.objects.filter(company__in=OuterRef("id")).order_by("id")
company_qs = Company.objects.filter(name__icontains=value).annotate(
first_venue_id=Subquery(first_venue.values("id")[:1])
)
venue_qs = Venue.objects.filter(
Q(name__icontains=value)
| Q(id__in=company_qs.values_list("first_venue_id", flat=True))
)
The query executed when accessing values of venue_qs looks like
SELECT
"venues_venue"."id",
"venues_venue"."company_id",
"venues_venue"."name"
FROM
"venues_venue"
WHERE
(
UPPER("venues_venue"."name"::TEXT) LIKE UPPER(% pr %)
OR "venues_venue"."id" IN (
SELECT
(
SELECT
U0."id"
FROM
"venues_venue" U0
WHERE
U0."company_id" IN (V0."id")
ORDER BY
U0."id" ASC
LIMIT
1
) AS "first_venue_id"
FROM
"venues_company" V0
WHERE
UPPER(V0."name"::TEXT) LIKE UPPER(% pr %)
)
)
This is how the filter should look like
class CompanyVenueListFilter(filters.FilterSet):
text = filters.CharFilter(method="name_search")
def name_search(self, qs, name, value):
first_venue = Venue.objects.filter(company__in=OuterRef("id")).order_by("id")
company_qs = Company.objects.filter(name__icontains=value).annotate(
first_venue_id=Subquery(first_venue.values("id")[:1])
)
return qs.filter(
Q(name__icontains=value)
| Q(id__in=company_qs.values_list("first_venue_id", flat=True))
)
class Meta:
model = Venue
fields = [
"name",
"company__name",
]
Update for Django 3.2.16
Seems like the query above will not work for such version because it generated a query without parentheses in WHERE clause around V0."id", chunk of query looks like
WHERE
U0."company_id" IN V0."id"
and it makes PostgreSQL complain with error
ERROR: syntax error at or near "V0"
LINE 17: U0."company_id" IN V0."id"
For Django==3.2.16 the filtering method in CompanyVenueListFilter could look like following:
def name_search(self, qs, name, value):
company_qs = Company.objects.filter(name__icontains=value)
venues_qs = (
Venue.objects.filter(company__in=company_qs)
.order_by("company_id", "id")
.distinct("company_id")
)
return qs.filter(Q(name__icontains=value) | Q(id__in=venues_qs.values_list("id")))
The answer is based on other stackoverflow answer and django docs
Django manager annotate first element of m2m as fk
Subquery() expressions

We have a temporary solution, which we're a bit wary about but it seems to do its job. Won't tag this answer as accepted as we're still hoping that someone has a more pythonic/djangoistic solution to the problem.
# viewset
class CompanyViewSet(viewsets.ReadOnlyModelViewSet):
serializer_class = CompanyVenueSearchSerializer
queryset = (
Venue.objects.all()
.select_related("company")
.order_by("company__name")
)
permission_classes = (ReadOnly,)
http_method_names = ["get"]
filter_backends = (filters.DjangoFilterBackend,)
filterset_class = CompanyVenueListFilter
pagination_class = None
def list(self, request, *args, **kwargs):
queryset = self.filter_queryset(self.get_queryset())
serializer = self.get_serializer(queryset, many=True)
text = request.GET.get("text").lower()
first_idx = 0
to_remove = []
for data in serializer.data:
if text in data.get("name").lower() and text not in data.get("venue_name").lower():
if data.get("id") != first_idx:
"""We don't want to remove the first hit of a company whose name matches"""
first_idx = data.get("id")
continue
to_remove.append((data.get("id"), data.get("venue_id")))
return Response(
[
data
for data in serializer.data
if (data.get("id"), data.get("venue_id")) not in to_remove
],
status=status.HTTP_200_OK,
)

Related

insert nested relationships in DB django

Need help , i am trying to push nested relations inside DB don't know where I am going wrong in this, is there something wrong with validated_data , which is a list of dict here , thanks in advance
class CatalogSerializer(serializers.ModelSerializer):
catalog_products = CatalogProductsSerializer(source = 'catalogproducts_set',many=True)
class Meta:
model = Catalog
fields = ['created_by','client','catalog_products','created_datetime','is_active']
def create(self,validate_data):
client_id = validate_data.pop('id')
client = User.objects.get(id=client_id),
catalog_obj = Catalog.objects.create(
client = client,
created_by = self.context['user'],
is_active =True,
)
for pricelist_ins in validate_data:
CatalogProducts.objects.create(
catalog = catalog_obj,**pricelist_ins)
return catalog_obj
Basic Viewset
class CatalogViewset(viewsets.ModelViewSet):
queryset = Catalog.objects.all()
serializer_class = CatalogSerializer
permission_classes = []
authentication_classes = []
def create(self, request, *args, **kwargs):
if request.data:
try:
serialized_data = self.get_serializer(data = request.data)
if serialized_data.is_valid(raise_exception=True):
serialized_data.save()
return Response(serialized_data.data,status=200)
except Exception as e:
return Response({'error':str(e)},status=400)
return Response({'status':'invalid request'},status=400)
the error I am getting in Postman
{
"error": "{'catalog_products': [ErrorDetail(string='This field is required.', code='required')]}"
}
data i am posting
{
"id":"2",
"pricing_list":[
{
"from_quantity":"101",
"to_quantiy":"34",
"price":"1000"
},
{
"from_quantity":"10",
"to_quantiy":"501",
"price":"2000"
}
]
}
You have catelogue_products in the fields, it is by default required. But you are not posting any catelogue_products. You need to post data based on the fields of the serializer. validated data will not contain any other data, but valid data that was set in serializer.
To make it optional you may try to add required=False in the serialzier like this:
class CatalogSerializer(serializers.ModelSerializer):
catalog_products = CatalogProductsSerializer(source = 'catalogproducts_set',many=True, required=False)
class Meta:
model = Catalog
fields = ['created_by','client','catalog_products','created_datetime','is_active']

Custom FilterSet doesn't filter by two fields at the same time

I wrote custom FilterSet to filter queryset by two fields but it doesn't work properly when it's filtering on two fields at the same time.
my FilterSet:
class EventFilter(filters.FilterSet):
values = None
default = None
category = filters.ModelMultipleChoiceFilter(
queryset=EventCategory.objects.all(),
)
interval = filters.CharFilter(
method='filter_interval'
)
class Meta:
model = Event
fields = ('category', 'interval')
def filter_interval(self, queryset, name, value):
if self.request.query_params.get('current_time'):
try:
interval = getattr(self, f'get_{value}_interval')()
interval = list(map(lambda date: self.to_utc(date), interval))
return self.queryset.filter(Q(status=Event.STARTED) | (Q(status=Event.NOT_STARTED, start_at__range=interval)))
except Exception as e:
pass
return queryset
APIView:
class ListEventsAPIView(generics.ListAPIView):
serializer_class = ListEventsSerializer
filter_class = EventFilter
search_fields = 'title',
filter_backends = filters.SearchFilter, DjangoFilterBackend
def get_queryset(self):
return Event.objects.filter(Q(status=Event.STARTED) | (Q(status=Event.NOT_STARTED) & Q(start_at__gte=date)))
Here is generated SQL when I'm trying to filter only by category:
SELECT "*" FROM "events" WHERE (("events"."status" = 'started'
OR ("events"."status" = 'not_started'
AND "events"."start_at" >= '2019-06-19T13:24:26.444183+00:00'::timestamptz))
AND "events"."category_id" = 'JNPIZF54n5q')
When I'm filtering on both:
SELECT "*" FROM "events" WHERE (("events"."status" = 'started'
OR ("events"."status" = 'not_started' AND "events"."start_at" >= '2019-06-19T13:24:26.444183+00:00'::timestamptz))
AND ("events"."status" = 'started' OR ("events"."start_at" BETWEEN '2019-06-19T07:16:48.549000+00:00'::timestamptz AND '2019-06-30T20:59:59.000059+00:00'::timestamptz AND "events"."status" = 'not_started')))
Your issue is in this line:
return self.queryset.filter(Q(status=Event.STARTED) | (Q(status=Event.NOT_STARTED, start_at__range=interval)))
You're using queryset from FilterSet class itself. This queryset doesn't have any previous filters applied, so by using it you're cancelling another filter. Just remove self. from this line to use queryset that is passed to this function as a parameter and everything will work fine.

Django Rest Framework serilize relations

How to serialize a fields in related models.
I got a models:
class Order(models.Model):
order_id = models.BigIntegerField(verbose_name='Order ID', unique=True)
order_name = models.CharField(verbose_name='Order name', max_length=255)
order_type = models.IntegerField(verbose_name='Campaign type')
class Types(models.Model):
delimiter = models.CharField(verbose_name='Delimiter', max_length=255)
status = models.BooleanField(verbose_name='Status', default=True)
title = models.CharField(verbose_name='Title', max_length=255)
class User(models.Model):
name = models.CharField(verbose_name='User name', max_length=200, unique=True)
class Report(models.Model):
order = models.ForeignKey(Order, to_field='order_id', verbose_name='Order ID')
user = models.ForeignKey(User, verbose_name='User ID')
ad_type = models.ForeignKey(Types, verbose_name='Type')
imp = models.IntegerField(verbose_name='Total imp')
month = models.DateField(verbose_name='Month', default=datetime.datetime.today)
View:
class ReportLisAPIView(ListAPIView):
serializer_class = ReportSerializer
def get_queryset(self):
month = parse_date(self.kwargs['month']) - relativedelta(day=1)
queryset = (
Report.objects.filter(month=month)
.values_list(
'user', 'user__name', 'order__order_id',
'order__order_name', 'order__order_type'
).all().annotate(Sum('imp'))
)
return queryset
Serializer:
class ReportSerializer(ModelSerializer):
class Meta:
model = Report
depth = 1
I need to get all field like in 'queryset' in get_queryset()
but I got an error:
Got AttributeError when attempting to get a value for field imp on
serializer ReportSerializer. The serializer field might be named
incorrectly and not match any attribute or key on the tuple
instance. Original exception text was: 'tuple' object has no attribute
'imp'.
But if I return in get_queryset() just Report.objects.filter(month=month).all() I'll get all objects and related object with all field, without aggregate of imp and not grouping.
So the question is how to make serializer return structure that set in queryset?
The get_queryset method requires to return a queryset but you are returning a tuple beacause of values_list. Either drop it to return a queryset or go with a more generic view like APIView.
I found a way how to do it.
As I use .values_list() it return list object instead of queryset object. So for serializer do understand what is inside the list I defined all fields in serializer. And in to_representation() I return dictionary like it should be.
Serializer:
class ReportSerializer(serializers.ModelSerializer):
user = serializers.IntegerField()
user_name = serializers.CharField()
order_id = serializers.IntegerField()
order_name = serializers.CharField()
order_type = serializers.IntegerField()
imp = serializers.IntegerField()
class Meta:
model = Report
fields = [
'user', 'user_name', 'order_id', 'order_name',
'order_type', 'imp'
]
depth = 1
def to_representation(self, instance):
Reports = namedtuple('Reports', [
'user',
'user_name',
'order_id',
'order_name',
'order_type',
'imp',
])
return super(ReportSerializer, self).to_representation(
Reports(*instance)._asdict()
)
View:
class ReportLisAPIView(ListAPIView):
serializer_class = ReportSerializer
def get_queryset(self):
month = parse_date(self.kwargs['month']) - relativedelta(day=1)
queryset = (
Report.objects.filter(month=month)
.values_list(
'user', 'user__name', 'order__order_id',
'order__order_name', 'order__order_type'
).all().annotate(Sum('imp'))
)
return queryset
def list(self, *args, **kwargs):
queryset = self.get_queryset()
serializer = self.serializer_class(queryset, many=True)
# actualy that's it! part of which is below can be pass and just
# return Response(serializer.data)
result = {
'month': parse_date(self.kwargs['month']).strftime('%Y-%m'),
'reports': []
}
inflcr = {}
for item in serializer.data:
inflcr.setdefault(item['user'], {
'id': item['user'],
'name': item['user_name'],
'campaigns': []
})
orders = {
'id': item['order_id'],
'name': item['order_name'],
'type': item['order_type'],
'impressions': item['imp'],
}
inflcr[item['user']]['campaigns'].append(orders)
result['reports'] = inflcr.values()
return Response(result)

DRF - Format Serializer's output from QuerySet

I am working with Django Rest Framework by firt time and now I'm trying to get an output like this:
{
"qty": 5,
"total": 20,
"items": [
{
"id": 1,
"name": "name_1"
},
{
"id": 2,
"name": "name_2"
}
]
}
from a Serializer. The result data in output above, came from a queryset. I'd like to work with the queryset inside the serializer class. I've not been able to get results as I want without makeing queries inside the serializer:
class ResSerializer(serializers.Serializer):
qty = serializers.SerializerMethodField()
items = serializers.SerializerMethodField()
total = serializers.SerializerMethodField()
def get_qty(self, obj):
try:
return Model.objects.filter(...)\
.aggregate(qty=Sum('job__long'))\
.get('qty')
except KeyError:
return 0
def get_items(self, obj):
print 'testing'
def get_total(self, obj):
return 0
class Meta:
fields = ('qty', 'items', 'total')
I'm calling Serializer like this:
queryset = Model.objects.filter(...)
serialized = ResSerializer(queryset, many=False, context={'current_user': request.user})
But this is not working as I want. Any sugestion? Thanks.
UPDATE
This is the model I query to:
class Intermediate(models.Model):
partner = models.ForeignKey('partner.Partner')
job = models.ForeignKey(Job)
joined_at = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
status = models.SmallIntegerField(default=STATUS_ACCEPTED)
reason_index = models.SmallIntegerField('Cancel reason', default=REASON_3)
start_time = models.TimeField(null=True)
end_time = models.TimeField(null=True)
start_date = models.DateField(null=True)
end_date = models.DateField(null=True)
And here's the view:
class ResView(CustomAPIView):
authentication_classes = (CustomTokenAuthentication, )
# permission_classes = (PartnerAuthenticatedOnly, ) # Uncomment this on server
def post(self, request, *args, **kwargs):
try:
queryset = JobPartner.objects.filter(...)
serialized = ResSerializer(queryset, many=False, context={'current_user': request.user})
response_success_object(self.response_dic, serialized.data)
return Response(self.response_dic)
except Exception, e:
print e
To get the items representation, you can use ItemsSerializer which will give the serialized data having id and name.
class ItemsSerializer(serializers.ModelSerializer):
class Meta:
model = MyModel # specify your model
fields = ('id', 'name') # return these 2 fields in the representation
This serializer when dealing with multiple instances will return the serialized data in below fashion.
[
{
"id": 1,
"name": "name_1"
},
{
"id": 2,
"name": "name_2"
}
]
Now, the qty and total fields depends on the queryset and not a particular object of the queryset, it would be better if you compute them separately in your view. Then create a dictionary containing the fields items, qty and total and return it as the response.
class ResView(CustomAPIView):
authentication_classes = (CustomTokenAuthentication, )
# permission_classes = (PartnerAuthenticatedOnly, ) # Uncomment this on server
def post(self, request, *args, **kwargs):
try:
queryset = JobPartner.objects.filter(...)
qty = self.get_qty() # compute the value of qty
total = self.get_total() # compute the value of total
items_serializer = ItemsSerializer(queryset, many=True)
items = items_serializer.data # compute the value of items
return_dict = { # prepare response data
'qty' : qty,
'total': total,
'items': items
}
return Response(return_dict) # return the response
except Exception, e:
print e

tastypie: filter many to many tables with multiple, ANDed values

I have two tables (Movie and Genre) that are connected with a many to many relation using a crosstable (MovieGenre).
My models.py file looks like this:
class Genre( models.Model ):
sName = models.CharField( max_length=176)
[ .. ]
class Movie( models.Model ):
sTitle = models.CharField( max_length=176)
genre = models.ManyToManyField( Genre )
[ .. ]
class MovieGenre( models.Model ):
idMovie = models.ForeignKey( Movie )
idGenre = models.ForeignKey( Genre )
I want to use tastypie to filter all movies of certain genres. E.g. show me all movies that are of genre Action, Thriller and SciFi.
My api.py looks like this:
class GenreResource(ModelResource):
class Meta:
queryset = Genre.objects.all()
resource_name = 'genre'
always_return_data = True
include_resource_uri = False
excludes = ['dtCreated', 'dtModified' ]
authorization= Authorization()
authentication = SessionAuthentication()
filtering = {
"id" : ALL,
}
class MovieResource(ModelResource):
genre = fields.ManyToManyField( 'app.api.GenreResource', 'genre', full=True )
class Meta:
queryset = Movie.objects.all()
resource_name = 'movie'
authorization= Authorization()
authentication = SessionAuthentication()
always_return_data = True
include_resource_uri = False
excludes = ['dtCreated', 'dtModified' ]
filtering = {
"sTitle" : ALL,
"genre" : ALL_WITH_RELATIONS,
}
My test data:
Two movies (with genre ids)
Matrix (1 & 3 )
Blade Runner (1 & 2 )
First I make a query on the title, as expected below query returns 1 result (namely Matrix):
http://localhost:8000/api/v1/movie/?format=json&sTitle__icontains=a&sTitle__icontains=x
However, I get three results with the URL that should query the related genre table (two times Matrix and once Blade Runner) with this query:
http://localhost:8000/api/v1/movie/?format=json&genre__id__in=3&genre__id__in=1
I would expect to get back only Matrix
I also tried to override apply_filters like so:
def apply_filters(self, request, applicable_filters):
oList = super(ModelResource, self).apply_filters(request, applicable_filters)
loQ = [Q(**{'sTitle__icontains': 'a'}), Q(**{'sTitle__icontains': 'x'})]
# works as intended: one result
loQ = [Q(**{'genre__id__in': '3'}) ]
# results in one result (Matrix)
loQ = [Q(**{'genre__id__in': '1'}), Q(**{'genre__id__in': '3'}) ]
# results in no results!
loQ = [Q(**{'genre__id__in': [ 1, 3]}) ]
# results in two results Matrix and Blade Runner which is OK since obviously ORed
oFilter = reduce( operator.and_, loQ )
oList = oList.filter( oFilter ).distinct()
return oList
Any idea to make this work?
Thanks for any idea...
Have you tried http://localhost:8000/api/v1/movie/?format=json&genre__id=3&genre__id=1
If I understand correctly, using __in that way would be like saying genre__id__in=[1, 3].