Querying data from Django - django

Here's what my model structure looks like:
class Visitor(models.Model):
id = models.AutoField(primary_key=True)
class Session(models.Model):
id = models.AutoField(primary_key=True)
visit = models.ForeignKey(Visitor)
sequence_no = models.IntegerField(null=False)
class Track(models.Model):
id = models.AutoField(primary_key=True)
session = models.ForeignKey(Session)
action = models.ForeignKey(Action)
when = models.DateTimeField(null=False, auto_now_add=True)
sequence_no = models.IntegerField(null = False)
class Action(models.Model):
id = models.AutoField(primary_key=True)
url = models.CharField(max_length=65535, null=False)
host = models.IntegerField(null=False)
As you can see, each Visitor has multiple Sessions; each Session has multiple Tracks and each Track has one Action. Tracks are always ordered ascendingly by the session and the sequence_no. A Visitors average time on an site (i.e. a particular Action.host) is the difference in Track.when (time) between the highest and lowest Track.sequence_no divided by the number of Sessions of that Visitor.
I need to calculate the average time of visitors on the site which be the sum of the time for each visitor on the Action.site divided by the number of visitors.
I could query this using SQL but I'd like to keep my query as Djangonic as possible and I'm still very lost with complex queries.

For a specific Action object you can gather interesting data about Sessions:
from django.db.models import Min, Max
from yourapp.models import *
host = 1 # I suppose you want to calculate for each site
sessions = list(Session.objects.filter(
track__action__host=host,
).annotate(
start=Min('track__when'),
end=Max('track__when'),
).values('visit_id', 'start', 'end'))
You will get something in the line of:
[
{ 'visit_id': 1, 'start': datetime(...), 'end': datetime(...) },
{ 'visit_id': 1, 'start': datetime(...), 'end': datetime(...) },
{ 'visit_id': 2, 'start': datetime(...), 'end': datetime(...) },
....
]
Now it's only a matter of getting the desired result from the data:
number_of_visitors = len(set(s['visit_id'] for s in sessions))
total_time = sum((s['end'] - s['start']).total_seconds() for s in sessions)
average_time_spent = total_time / number_of_visitors
Another way is to use two queries instead of one, and avoid the len(set(...)) snippet:
sessions = Session.objects.filter(
track__action__host=host,
).annotate(
start=Min('track__when'),
end=Max('track__when'),
)
number_of_visitors = sessions.values('visit_id').distict().count()
total_time = sum((s['end'] - s['start']).total_seconds()
for s in sessions.values('start', 'end'))
There is NO WAY to do actual calculated fields barring the provided aggregations, so either you do it in raw SQL or you do in code like this.
At least the proposed solution uses Django's ORM as far as possible.

Related

how to call back unique together constraints as a field django

i'm trying to call back unique constraints field , in my project i have to count number of M2M selected
class Booking(models.Model):
room_no = models.ForeignKey(Room,on_delete=models.CASCADE,blank=True,related_name='rooms')
takes_by = models.ManyToManyField(Vistor)
#property
def no_persons(self):
qnt = Booking.objects.filter(takes_by__full_information=self).count()#but this doesnt work
return qnt
Cannot query "some room information": Must be "Vistor" instance.
class Vistor(models.Model):
full_name = models.CharField(max_length=150)
dob = models.DateField(max_length=14)
city = models.ForeignKey(City,on_delete=models.CASCADE)
class Meta:
constraints = [
models.UniqueConstraint(fields=['full_name','dob','city'],name='full_information')
]
def __str__(self):
return f'{self.full_name} - {self.city} - {self.dob}'
it it possible to access full_information through Booking model ? thank you ..
If you want to count the number of Visitors related to that booking, you can count these with:
#property
def no_persons(self):
self.taken_by.count()
This will make an extra query to the database, therefore it is often better to let the database count these in the query. You can thus remove the property, and query with:
from django.db.models import Count
Booking.objects.annotate(
no_persons=Count('takes_by')
)
The Bookings that arise from this QuerySet will have an extra attribute no_persons with the number of related Visitors.

django rest datatables, filter experts by meeting objectives

I need to filter all Experts by past objectives.
I have a minimal runnable example at https://github.com/morenoh149/django-rest-datatables-relations-example (btw there are fixtures you can load with test data).
my models are
class Expert(models.Model):
name = models.CharField(blank=True, max_length=300)
class Meeting(models.Model):
user = models.ForeignKey(User, on_delete=models.SET_NULL, blank=True, null=True)
expert = models.ForeignKey(Expert, on_delete=models.SET_NULL, blank=True, null=True)
objective = models.TextField(null=True, blank=True)
my datatables javascript
$("#table-analyst-search").DataTable({
serverSide: true,
ajax: "/api/experts/?format=datatables",
ordering: false,
pagingType: "full_numbers",
responsive: true,
columns: [
{
data: "objectives",
name: "objectives",
visible: false,
searchable: true,
render: (objectives, type, row, meta) => {
return objectives;
}
},
],
});
My serializer
class ExpertSerializer(serializers.ModelSerializer):
id = serializers.IntegerField(read_only=True)
objectives = serializers.SerializerMethodField()
class Meta:
model = Expert
fields = (
"id",
"objectives",
)
def get_objectives(self, obj):
request = self.context["request"]
request = self.context["request"]
meetings = Meeting.objects.filter(
analyst_id=request.user.id, expert_id=obj.id
).distinct('objective')
if len(meetings) > 0:
objectives = meetings.values_list("objective", flat=True)
objectives = [x for x in objectives if x]
else:
objectives = []
return objectives
When I begin to type in the datatables.js searchbar I get an error like
FieldError at /api/experts/
Cannot resolve keyword 'objectives' into field. Choices are: bio, company, company_id, created_at, description, email, favoriteexpert, first_name, id, is_blocked, last_name, meeting, middle_name, network, network_id, position, updated_at
Request Method: GET
Request URL: http://localhost:8000/api/experts/?format=datatables&draw=3&columns%5B0%5D%5Bdata%5D=tags&columns%5B0%5D%5Bname%5D=favoriteexpert.tags.name&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=false&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=desc&columns%5B1%5D%5Bname%5D=&columns%5B1%5D%5Bsearchable%5D=false&columns%5B1%5D%5Borderable%5D=false&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%
fwiw, in pure django orm what I want to accomplish would be something like
Expert.objects.filter(
pk__in=Meeting.objects.filter(
objective__icontains='Plastics', user=request.user
).values('expert')
)
How can I filter experts by historical meeting objectives?
The reason for the error is that django-rest-framework-datatables is trying to translate the request into a query which can be run against the Expert table.
In your JS, you're asking for a field called 'objectives' to be returned, but there is no such field on the Expert model.
You could probably achieve what you are trying to do using the django-filter integration. In this case, you could set up a filter on the FK reference to the Meeting table. The example app demonstrates how to do this.
I think the best way to understand what's going on is to get the example application running, and if possible, set breakpoints and step through.
Incidentally, if you want to get the search box to work correctly, then you need to define a global_q() method. This is also covered in the example app.
I ended up authoring a custom django-filter
class AssociatedMeetingCharFilter(filters.CharFilter):
def global_q(self):
"""
Uses the global filter to search a meeting field of meetings owned by the logged in user
"""
if not self._global_search_value:
return Q()
kw = "meeting__{}__{}".format(self.field_name, self.lookup_expr)
return Q(**{
kw: self._global_search_value,
"meeting__user_id": self.parent.request.user.id or -1,
})
class ExpertGlobalFilterSet(DatatablesFilterSet):
name = GlobalCharFilter(lookup_expr='icontains')
objectives = AssociatedMeetingCharFilter(field_name='objective', lookup_expr='icontains')
full example at https://github.com/morenoh149/django-rest-datatables-relations-example

Django annotation on compoundish primary key with filter ignoring primary key resutling in too many annotated items

Please see EDIT1 below, as well.
Using Django 3.0.6 and python3.8, given following models
class Plants(models.Model):
plantid = models.TextField(primary_key=True, unique=True)
class Pollutions(models.Model):
pollutionsid = models.IntegerField(unique=True, primary_key=True)
year = models.IntegerField()
plantid = models.ForeignKey(Plants, models.DO_NOTHING, db_column='plantid')
pollutant = models.TextField()
releasesto = models.TextField(blank=True, null=True)
amount = models.FloatField(db_column="amount", blank=True, null=True)
class Meta:
managed = False
db_table = 'pollutions'
unique_together = (('plantid', 'releasesto', 'pollutant', 'year'))
class Monthp(models.Model):
monthpid = models.IntegerField(unique=True, primary_key=True)
year = models.IntegerField()
month = models.IntegerField()
plantid = models.ForeignKey(Plants, models.DO_NOTHING, db_column='plantid')
power = models.IntegerField(null=False)
class Meta:
managed = False
db_table = 'monthp'
unique_together = ('plantid', 'year', 'month')
I'd like to annotate - based on a foreign key relationship and a fiter a value, particulary - to each plant the amount of co2 and the Sum of its power for a given year. For sake of debugging having replaced Sum by Count using the following query:
annotated = tmp.all().annotate(
energy=Count('monthp__power', filter=Q(monthp__year=YEAR)),
co2=Count('pollutions__amount', filter=Q(pollutions__year=YEAR, pollutions__pollutant="CO2", pollutions__releasesto="Air")))
However this returns too many items (a wrong number using Sum, respectively)
annotated.first().co2 # 60, but it should be 1
annotated.first().energy # 252, but it should be 1
although my database guarantees - as denoted, that (plantid, year, month) and (plantid, releasesto, pollutant, year) are unique together, which can easily be demonstrated:
pl = annotated.first().plantid
testplant = Plants.objects.get(pk=pl) # plant object
pco2 = Pollutions.objects.filter(plantid=testplant, year=YEAR, pollutant="CO2", releasesto="Air")
len(pco2) # 1, as expected
Why does django return to many results and how can I tell django to limit the elements to annotate to the 'current primary key' in other words to only annotate the elements where the foreign key matches the primary key?
I can achieve what I intend to do by using distinct and Max:
energy=Sum('yearly__power', distinct=True, filter=Q(yearly__year=YEAR)),
co2=Max('pollutions__amount', ...
However the performance is inacceptable.
I have tested to use model_to_dict and appending the wanted values "by hand" to the dict, which works for the values itself, but not for sorting the resulted dict (e.g. by energy) and it is acutally faster than the workaround directly above.
It conceptually strikes to me that the manual approach is faster than letting the database do, what it is intended to do.
Is this a feature limitation of django's orm or am I missing something?
EDIT1:
The behaviour is known as bug since 11 years.
Even others "spent a whole day on this".
I am now trying it with subqueries. However the forein key I am using is not a primary key of its table. So the kind of "usual" approach to use "pk=''" does not work. More clearly, trying:
tmp = Plants.objects.filter(somefilter)
subq1 = Subquery(Yearly.objects.filter(pk=OuterRef('plantid'), year=YEAR)) tmp1 = tmp.all().annotate(
energy=Count(Subquery(subq1))
)
returns
OperationalError at /xyz
no such column: U0.yid
Which definitely makes sense because Plants has no clue what a yid is, it only knows plantids. How do I adjust the subquery to that?

django querset filter foreign key select first record

I have a History model like below
class History(models.Model):
class Meta:
app_label = 'subscription'
ordering = ['-start_datetime']
subscription = models.ForeignKey(Subscription, related_name='history')
FREE = 'free'
Premium = 'premium'
SUBSCRIPTION_TYPE_CHOICES = ((FREE, 'Free'), (Premium, 'Premium'),)
name = models.CharField(max_length=32, choices=SUBSCRIPTION_TYPE_CHOICES, default=FREE)
start_datetime = models.DateTimeField(db_index=True)
end_datetime = models.DateTimeField(db_index=True, blank=True, null=True)
cancelled_datetime = models.DateTimeField(blank=True, null=True)
Now i have a queryset filtering like below
users = get_user_model().objects.all()
queryset = users.exclude(subscription__history__end_datetime__lt=timezone.now())
The issue is that in the exclude above it is checking end_datetime for all the rows for a particular history object. But i only want to compare it with first row of history object.
Below is how a particular history object looks like. So i want to write a queryset filter which can do datetime comparison on first row only.
You could use a Model Manager method for this. The documentation isn't all that descriptive, but you could do something along the lines of:
class SubscriptionManager(models.Manager):
def my_filter(self):
# You'd want to make this a smaller query most likely
subscriptions = Subscription.objects.all()
results = []
for subscription in subscriptions:
sub_history = subscription.history_set.first()
if sub_history.end_datetime > timezone.now:
results.append(subscription)
return results
class History(models.Model):
subscription = models.ForeignKey(Subscription)
end_datetime = models.DateTimeField(db_index=True, blank=True, null=True)
objects = SubscriptionManager()
Then: queryset = Subscription.objects().my_filter()
Not a copy-pastable answer, but shows the use of Managers. Given the specificity of what you're looking for, I don't think there's a way to get it just via the plain filter() and exclude().
Without knowing what your end goal here is, it's hard to say whether this is feasible, but have you considered adding a property to the subscription model that indicates whatever you're looking for? For example, if you're trying to get everyone who has a subscription that's ending:
class Subscription(models.Model):
#property
def ending(self):
if self.end_datetime > timezone.now:
return True
else:
return False
Then in your code: queryset = users.filter(subscription_ending=True)
I have tried django's all king of expressions(aggregate, query, conditional) but was unable to solve the problem so i went with RawSQL and it solved the problem.
I have used the below SQL to select the first row and then compare the end_datetime
SELECT (end_datetime > %s OR end_datetime IS NULL) AS result
FROM subscription_history
ORDER BY start_datetime DESC
LIMIT 1;
I will select my answer as accepted if not found a solution with queryset filter chaining in next 2 days.

Django - Return JSON, Error

I got a View.py function that looks like this:
def GetAllCities(request):
cities = list(City.objects.all())
return HttpResponse(json.dumps(cities))
My City model looks like this
class City(models.Model):
city = models.CharField()
loc = models.CharField()
population = models.IntegerField()
state = models.CharField()
_id = models.CharField()
class MongoMeta:
db_table = "cities"
def __unicode__(self):
return self.city
I am using a MongoDB that looks like this
{
"_id" : ObjectId("5179837cbd7fe491c1f23227"),
"city" : "ACMAR",
"loc" : "[-86.51557, 33.584132]",
"state" : "AL",
"population" : 6055
}
I get the following error when trying to return the JSON from my GetAllCities function:
City ACMAR is not JSON serializable
So I tried this Instead:
def GetAllCities(request):
cities = serializers.serialize("json", City.objects.all())
return HttpResponse(cities)
And this works but It's very slow, it takes about 9 seconds(My database contains 30000 rows)
Should it take this long or am I doing something wrong?
I've built the same app in PHP, Rails and NodeJS.
In PHP it takes on average 2000ms, NodeJS = 800ms, Rails = 5882ms and Django 9395ms. Im trying to benchmark here so I wonder if there is a way to optimize my Django code or is this as fast as it gets?
For sure you do not need to return ALL cities, as you probably won't display all 30000 rows anyway (at least in user-friendly way). Consider a solution where you return only cities within some range from requested location. Mongo supports geospatial indexes, so there should be no problem in doing that. There are also many tutorials over the internet how to perform spatial filtering in Django/MongoDB.
def GetAllCities(request, lon, lat):
#Pseudo-code
cities = City.objects.filterWithingXkmFromLonLat(lon, lat).all()
cities = serializers.serialize("json", cities)
return HttpResponse(cities)
If you really, really need all cities, consider caching the response. Location, name and population of cities are not things which change dynamically, in a matter of let's say seconds. Cache the result and recalculate only every hour, day or more. Django supports cache out of the box
#cache_page(60 * 60)
def GetAllCities(request):
(...)
Another thing you can try to get a little more of speed is to get from db just the values you need and get the QuerySet to build the dictionary.
A simple query like this would work:
City.objects.all().values('id', 'city', 'loc', 'population', 'state')
Or you can put it in a manager:
class CitiesManager(models.Manager):
class as_dict(self):
return self.all().values('id', 'city', 'loc', 'population', 'state')
class City(models.Model):
.... your fields here...
objects = CitiesManager()
And then use it in your view as:
City.objects.as_dict()
FOUND A SOLUTION
I am benchmarking with different methods, one method is to see how fast one language/framework is to select ALL rows in a database and return it as JSON. I found a solution now that speeds it up by half the time!
My new views.py
def GetAllCities(request):
dictionaries = [ obj.as_dict() for obj in City.objects.all() ]
return HttpResponse(json.dumps({"Cities": dictionaries}), content_type='application/json')
And my new model
class City(models.Model):
city = models.CharField()
loc = models.CharField()
population = models.IntegerField()
state = models.CharField()
_id = models.CharField()
def as_dict(self):
return {
"id": self.id,
"city": self.city,
"loc": self.loc,
"population": self.population,
"state": self.state
# other stuff
}
class MongoMeta:
db_table = "cities"
def __unicode__(self):
return self.city
Found the solution here