Joining 2 Models in Django - django

Im trying to join 2 tables in a django view. I currently have two models
The join could be done on ticker and date. As both models have them.
SSALiReport.objects.all()
NELiReport.objects.all()
NELI looks like this
{
"clout": 40000,
"date": "2018-05-15",
"sentiment": 500,
"ticker": "AAPL"
},
SALI looks like this
{
"date": "2018-05-15",
"market": "NYSE",
"prediction": 0.6,
"price": 300,
"ticker": "AAPL"
},
I want a third view that looks like this, lets call it FullReport
{
"date": "2018-05-15",
"market": "NYSE",
"prediction": 0.6,
"price": 300,
"ticker": "AAPL",
"clout": 40000,
"sentiment": 500
},
How do I pull this in a view? Also note I want to be able to many rows by date. So I'll need to join the two tables
for reference see the model
from django.contrib.postgres.fields import JSONField
from django.db import models
import datetime
# Create your models here.
class SSALiReport(models.Model):
date = models.DateField()
ticker = models.CharField(max_length=10,default='NA')
market = models.CharField(max_length=250,default='NA')
price = models.FloatField()
prediction = models.FloatField()
def __str__(self):
return str(self.date) + " - " + str(self.ticker)
class NELiReport(models.Model):
date = models.DateField()
ticker = models.CharField(max_length=10,default='NA')
sentiment = models.FloatField(default='0.0')
clout = models.FloatField(default='0.0')
top_news = JSONField(default='{}')
def __str__(self):
return str(self.date) + " - " + str(self.ticker)
this SQL script works
SELECT *
FROM api_neli_report, api_ssali_report
WHERE api_neli_report.ticker = api_ssali_report.ticker
AND api_neli_report.date = api_ssali_report.date
what is the django equivalent?

You could simply do a raw query, if you don't want to change your models.
See the documentation: Performing raw SQL queries

Related

How to annotate sum over Django JSONField (Array of objects) data?

I have models sth like this
# models.py
class MyModel( models.Model ):
orders = models.JsonField(null= True, blank=True, default=list)
category = models.ForeignKey(Category, on_delete=models.CASCADE)
I stored json data in this structure.
[
{
"order_name": "first order",
"price": 200
},
{
"order_name": "second order",
"price": 800
},
{
"order_name": "third order",
"price": 100
}
]
I want to sum price of all json objects ie 200+800+100
One way will be to use jsonb_array_elements to break each value into rows and then use the normal aggregate function.
For eg:
from django.db import models
Model.objects.annotate(
# This will break items into multiple rows
annotate_field_1=models.Func(models.F('array_field__items'), function='jsonb_array_elements'),
).aggregate(
# Then calculate the total.
total=models.Count('annotate_field_1'),
)['total']
I haven't worked with JSONArrayField but I did a little bit of research and found that the following example can give you a clue:
MyModel.objects.annotate(
order_price_sum=Sum(
Cast(
KeyTextTransform("price", "orders"), models.FloatField()
)
),
)
I tried to implement it to your specific question you can find more helpfull information in the following link: https://dev.to/saschalalala/aggregation-in-django-jsonfields-4kg5
Workaround:
I was trying to figure out how to manage JSONArray using annotate in django but it seems to not be well-documented so I share this workaround to achieve the goal:
total = 0
for i in MyModel.objects.exclude(orders__isnull=True).values('orders'):
total += sum([j.get('price',0) for j in i.get('orders') if j is not None])

Django: How to retrieve all attributes from related models for GeoJSON serialization?

I have two Models Site and Cell, every site has multiple Cells.
from django.db import models
from django.contrib.gis.db.models import PointField
class SiteManager(models.Model):
def get_by_natural_key(self, name, state_code, location):
return self.get(name=name, state_code=state_code, location=location)
class Site(models.Model):
name = models.CharField(max_length=10)
state_code = models.PositiveSmallIntegerField()
location = PointField()
objects = SiteManager()
class Meta:
unique_together = [['name', 'state_code', 'location']]
def natural_key(self):
return (self.name, self.state_code, self.location)
class Cell(models.Model):
tech = models.CharField(max_length=5)
azimuth = models.IntegerField()
sector_id = models.CharField(max_length=10)
frequency_band = models.CharField(max_length=15)
power = models.DecimalField(decimal_places=2, max_digits=4)
site = models.ForeignKey(Site, on_delete=models.CASCADE)
def natural_key(self):
return (self.tech, self.azimuth,) + self.site.natural_key()
natural_key.dependencies = ['astmaps.site']
I want to retrieve the complete Cell attributes with the related attributes in the Site model, for me to Serialize the resultant Cell's, into GeoJson data, I can easily Serialize the Site model like:
from django.core.serializers import serialize # GeoJSON Serializer
sites = Site.objects.all()
sitesData = serialize('geojson', sites, geometry_field='location',
fields=('name', 'state_code'))
which gives me a GeoJson featureCollection object like:
{
"type":"FeatureCollection",
"crs":{
"type":"name",
"properties":{
"name":"EPSG:4326"
}
},
"features":[
{
"type":"Feature",
"properties": {
"name":"02101",
"state_code":2
},
"geometry":{
"type":"Point",
"coordinates":[
1.34944,
36.1586
]
}
}
]
}
But when It comes to the Cell model, I can't successfully get the geometry field from the related model always null.
Since the Cell model has the Site model as a related model, I've used the function select_related() to get the related attributes:
cells = Cell.objects.select_related('site').all()
cellsData = serialize('geojson', cells, geometry_field='site_location',
fields=('azimuth', ...))
But the GeoJson Serialize function could not identify the Site model attributes from the cells QuerySet:
{
"type":"FeatureCollection",
"crs":{
"type":"name",
"properties":{
"name":"EPSG:4326"
}
},
"features":[
{
"type":"Feature",
"properties":{
"azimuth":340
},
"geometry":null
},
{
"type":"Feature",
"properties":{
"azimuth":340
},
"geometry":null
},
{
"type":"Feature",
"properties":{
"azimuth":240
},
"geometry":null
}
]
}
I've tested the query returned by Django ORM equivalent directly on the database:
cells = Cell.objects.select_related('site').all()
>>> print(cells.query)
SELECT "app_cell"."id", "app_cell"."tech", "app_cell"."azimuth", "app_cell"."sector_id", "app_cell"."frequency_band", "app_cell"."power", "app_cell"."site_id", "app_cell"."id", "app_site"."name", "app_site"."state_code", "app_site"."location"::bytea FROM "app_cell" INNER JOIN "app_site" ON ("app_cell"."site_id" = "app_site"."id")
Which gives me a correct results (all the attributes or columns of the two models):
I've also used Natural Keys, which is the serialization strategy for foreign keys and other relations (as I've read in the documentation and changed the models accordingly):
cellsData = serialize('geojson', cells, geometry_field='site_location',
fields=('azimuth', ...), use_natural_foreign_keys=True)
But the same result, the Serialize method couldn't identify the Site model attributes.
How can I get all the attributes of multiple related models to get serialized using the GeoJSON Serializer?
I managed to get what I want by using raw sql query plus the json_build_object and ST_AsGeoJSON of PostGIS extention:
from django.db import connection
def sites_list(request):
cursor = connection.cursor()
cursor.execute("""select json_build_object(
'type', 'FeatureCollection',
'features', json_agg(ST_AsGeoJSON(t.*)::json)
)
from ( SELECT "app_cell"."id", "app_cell"."tech", "app_cell"."azimuth", "app_cell"."sector_id",
"app_cell"."frequency_band", "app_cell"."power", "app_cell"."site_id", "app_site"."name"
AS "site_name", "app_site"."state_code", "app_cell"."location"::bytea::geometry AS "site_location"
FROM "app_cell" INNER JOIN "app_site" ON ("app_cell"."site_id" = "app_site"."id")
) as t(id, tech, azimuth, sector_id, frequency_band, power, site_id, site_name, state_code, geom);""")
geojson = cursor.fetchone()
return JsonResponse(geojson[0], safe=False)
I had some problems in the JavaScript side, when using:
cursor.fetchall()
return JsonResponse(geojson, safe=False)
the returned GeoJSON object was surrounded by [[]] double brackets, because cursor.fetchall() return a tuple, So, I've used the cursor.fetchone() to get the geoJSON object surrunded only by single brackets [], and used the the index 0 to of the resulted tuble to get the only tuple content as a string, and finally the JsonResponse function will return that result as a JSON object.

Django Complicated Query

I am using django restframework with my Django app and I need to create quite specific query.
Here is the models.py:
class TaskHours(models.Model):
name = models.CharField(max_length=100)
hours = models.FloatField()
task = models.CharField(max_length=100)
date = models.DateField()
views.py:
class TaskHoursView(generics.ListAPIView):
serializer_class = TaskHoursSerializer
queryset = TaskHours.objects.all()
def get_queryset(self):
start_date = self.request.query_params.get('start_date')
end_date = self.request.query_params.get('end_date')
return TaskHours.filter(date__range=[start_date, end_date])
and serializer is default one with class Meta with all fields.
This query is working fine but I need to alter it. In the data there are entries which have same name and same date, but different tasks. What I would need is get all the tasks and hours worked with the same name and date to one object like this:
{
"name": "John",
"date": "2021-04-14",
"task": "cleaning",
"hours": "4.5",
"task": "hoovering",
"hours": "2.0"
}
Now I am receiving it like this:
{
"name": "John",
"date": "2021-04-14",
"task": "cleaning",
"hours": "4.5",
},
{
"name": "John",
"date": "2021-04-14",
"task": "hoovering",
"hours": "2.0"
}
Is there any way how to merge the two objects into one?
You need to slightly modify your serializer in order to create a subquery for each object you're going to serialize.
class TaskHoursSerializer(serializers.ModelSerializer):
tasks = serializers.SerializerMethodField()
class Meta:
model = TaskHours
exclude = ['task', 'hour']
def get_tasks(self, obj):
tasks = TaskHours.objects.filter(name=obj.name, date=obj.date).values_list("task", "hour")
return list(tasks)
And also, you need to change the queryset in your view in order to not have object duplicates.
class TaskHoursView(generics.ListAPIView):
serializer_class = TaskHoursSerializer
queryset = TaskHours.objects.all()
def get_queryset(self):
start_date = self.request.query_params.get('start_date')
end_date = self.request.query_params.get('end_date')
return TaskHours.filter(date__range=[start_date, end_date]).values('name', 'date').distinct()
That should output a field called "tasks" containing each task with the matching hour.

Prefetching indirectly related items using Django ORM

I'm trying to optimize the queries for my moderation system, build with Django and DRF.
I'm currently stuck with the duplicates retrieval: currently, I have something like
class AdminSerializer(ModelSerializer):
duplicates = SerializerMethodField()
def get_duplicates(self, item):
if item.allowed:
qs = []
else:
qs = Item.objects.filter(
allowed=True,
related_stuff__language=item.related_stuff.language
).annotate(
similarity=TrigramSimilarity('name', item.name)
).filter(similarity__gt=0.2).order_by('-similarity')[:10]
return AdminMinimalSerializer(qs, many=True).data
which works fine, but does at least one additional query for each item to display. In addition, if there are duplicates, I'll do additional queries to fill the AdminMinimalSerializer, which contains fields and related objects of the duplicated item. I can probably reduce the overhead by using a prefetch_related inside the serializer, but that doesn't prevent me from making several queries per item (assuming I have only one related item to prefetch in AdminMinimalSerializer, I'd still have ~2N + 1 queries: 1 for the items, N for the duplicates, N for the related items of the duplicates).
I've already looked at Subquery, but I can't retrieve an object, only an id, and this is not enough in my case. I tried to use it in both a Prefetch object and a .annotate.
I also tried something like Item.filter(allowed=False).prefetch(Prefetch("related_stuff__language__related_stuff_set__items", queryset=Items.filter..., to_attr="duplicates")), but the duplicates property is added to "related_stuff__language__related_stuff_set", so I can't really use it...
I'll welcome any idea ;)
Edit: the real code lives here. Toy example below:
# models.py
from django.db.models import Model, CharField, ForeignKey, CASCADE, BooleanField
class Book(Model):
title = CharField(max_length=250)
serie = ForeignKey(Serie, on_delete=CASCADE, related_name="books")
allowed = BooleanField(default=False)
class Serie(Model):
title = CharField(max_length=250)
language = ForeignKey(Language, on_delete=CASCADE, related_name="series")
class Language(Model):
name = CharField(max_length=100)
# serializers.py
from django.contrib.postgres.search import TrigramSimilarity
from rest_framework.serializers import ModelSerializer, SerializerMethodField
from .models import Book, Language, Serie
class BookAdminSerializer(ModelSerializer):
class Meta:
model = Book
fields = ("id", "title", "serie", "duplicates", )
serie = SerieAdminAuxSerializer()
duplicates = SerializerMethodField()
def get_duplicates(self, book):
"""Retrieve duplicates for book"""
if book.allowed:
qs = []
else:
qs = (
Book.objects.filter(
allowed=True, serie__language=book.serie.language)
.annotate(similarity=TrigramSimilarity("title", book.title))
.filter(similarity__gt=0.2)
.order_by("-similarity")[:10]
)
return BookAdminMinimalSerializer(qs, many=True).data
class BookAdminMinimalSerializer(ModelSerializer):
class Meta:
model = Book
fields = ("id", "title", "serie")
serie = SerieAdminAuxSerializer()
class SerieAdminAuxSerializer(ModelSerializer):
class Meta:
model = Serie
fields = ("id", "language", "title")
language = LanguageSerializer()
class LanguageSerializer(ModelSerializer):
class Meta:
model = Language
fields = ('id', 'name')
I'm trying to find a way to prefetch related objects and duplicates so that I can get rid of the get_duplicates method in BookSerializer, with the N+1 queries it causes, and have only a duplicates field in my BookSerializer.
Regarding data, here would be an expected output:
[
{
"id": 2,
"title": "test2",
"serie": {
"id": 2,
"language": {
"id": 1,
"name": "English"
},
"title": "series title"
},
"duplicates": [
{
"id": 1,
"title": "test",
"serie": {
"id": 1,
"language": {
"id": 1,
"name": "English"
},
"title": "first series title"
}
}
]
},
{
"id": 3,
"title": "random",
"serie": {
"id": 3,
"language": {
"id": 1,
"name": "English"
},
"title": "random series title"
},
"duplicates": []
}
]

How create django model with timestamptz field

I have responce data from API it's look like this
{
"api": {
"results": 1,
"fixtures": {
"65": {
"fixture_id": "65",
"event_timestamp": "1533927600",
"event_date": "2018-08-10T19:00:00+00:00",
"league_id": "2",
"round": "Premier League - 1",
"homeTeam_id": "33",
"awayTeam_id": "46",
"homeTeam": "Manchester United",
"awayTeam": "Leicester",
"status": "Match Finished",
"statusShort": "FT",
"goalsHomeTeam": "2",
"goalsAwayTeam": "1",
"halftime_score": "1 - 0",
"final_score": "2 - 1",
"penalty": null,
"elapsed": "95",
"firstHalfStart": "1533927660",
"secondHalfStart": "1533931380"
}
}
}
}
Now I am trying to build fixture model to store above data in PosgreSql database. I dont understand didnt find any example of builded model with timestamptz field. I need to store event_date key in timestamptz. Can anyone to show me how i should create this field
Django does not have a default timestamp field. However, you can add one by having the following model field:
event_date = models.DateTimeField(auto_now_add=True)
EDIT
Or alternatively, something a little more up to date:
from django.utils import timezone
....
event_date = models.DateTimeField(default=timezone.now)
Make sure its timezone.now and not timzone.now()