Django all related data - django

class Docs(models.Model):
doc_id = models.BigIntegerField(primary_key=True)
journal = models.CharField(max_length=50, blank=True, null=True)
year = models.IntegerField(blank=True, null=True)
class Meta:
managed = False
db_table = 'docs'
class Assays(models.Model):
assay_id = models.BigIntegerField(primary_key=True)
doc = models.ForeignKey('Docs', models.DO_NOTHING)
description = models.CharField(max_length=4000, blank=True, null=True)
class Meta:
managed = False
db_table = 'assays'
class Activities(models.Model):
activity_id = models.BigIntegerField(primary_key=True)
assay = models.ForeignKey(Assays, models.DO_NOTHING)
doc = models.ForeignKey(Docs, models.DO_NOTHING, blank=True, null=True)
record = models.ForeignKey('CompoundRecords', models.DO_NOTHING)
class Meta:
managed = False
db_table = 'activities'
I apologize in advance if this answer is easily found elsewhere. I have searched all over and do not see a simple way to query my data as intuitively as I feel like should be possibe.
These are classes for 3 tables. The actual dataset is closer to 100 tables. Each doc_id can have one or many associated activity_ids. Each activity_id is associated with one assay_id.
My goal is to obtain all of the related data for each of the activities in a single doc. For instance:
query_activities_values = Docs.objects.get(doc_id=5535).activities_set.values()
for y in query_activities_values:
print(y)
break
>>> {'activity_id': 753688, 'assay_id': 158542, 'doc_id': 5535, .....
This returns 32 dictionaries (only part of the first is shown) for columns in the Activities table that have doc_id=5535. I would like to go one step further and also automatically pull in all of the data from the Assays table that is associated with the corresponding assay_id for each dictionary.
I can access that Assay data through a similar query, but only by stating each field explicitly:
query_activities_values = Docs.objects.get(doc_id=5535).activities_set.values('assay', 'assay__assay_type', 'assay__description')
for y in query_activities_values:
print(y)
break
I would like a single query that finds not only the assay and associated assay data for one activity_id, but finds all data and associated data for the 90+ other tables associated in the model
Thank you
Update 1
I did find this code that works surprisingly well for my needs, however, I was curious if this is the best method:
from django.forms.models import model_to_dict
def serial_model(modelobj):
opts = modelobj._meta.fields
modeldict = model_to_dict(modelobj)
for m in opts:
if m.is_relation:
foreignkey = getattr(modelobj, m.name)
if foreignkey:
try:
modeldict[m.name] = serial_model(foreignkey)
except:
pass
return modeldict
That's not too much code, but I thought there may be a more built-in way to do this.

What you need is prefetch_related:
Django 2.2 Prefetch Related Docs
query_activities_values = Docs.objects.get(doc_id=5535).activities_set.values()
Would become:
query_activities_values = Docs.objects.prefetch_related(models.Prefetch("activities_set", to_attr="activities"), models.Prefetch("assays_set", to_attr="assays")).get(doc_id=5535)
A new attributes will be created called "activities" and "assays" which you can use to retrieve data.
One more thing. This isn't actually 1 query. It's 3. However, if you're getting more than just one object from Docs, it's still going to be 3.
Also, is there a reason why you're using BigIntegerField?

Related

how to build query with several manyTomany relationships - Django

I really don't understand all the ways to build the right query.
I have the following models in the code i'm working on. I can't change models.
models/FollowUp:
class FollowUp(BaseModel):
name = models.CharField(max_length=256)
questions = models.ManyToManyField(Question, blank=True, )
models/Survey:
class Survey(BaseModel):
name = models.CharField(max_length=256)
followup = models.ManyToManyField(
FollowUp, blank=True, help_text='questionnaires')
user = models.ManyToManyField(User, blank=True, through='SurveyStatus')
models/SurveyStatus:
class SurveyStatus(models.Model):
user = models.ForeignKey(User, on_delete=models.CASCADE)
survey = models.ForeignKey(Survey, on_delete=models.CASCADE)
survey_status = models.CharField(max_length=10,
blank=True,
null=True,
choices=STATUS_SURVEY_CHOICES,
)
models/UserSurvey:
class UserSurvey(BaseModel):
user = models.ForeignKey(User, null=True, blank=True,
on_delete=models.DO_NOTHING)
followups = models.ManyToManyField(FollowUp, blank=True)
surveys = models.ManyToManyField(Survey, blank=True)
questions = models.ManyToManyField(Question, blank=True)
#classmethod
def create(cls, user_id):
user = User.objects.filter(pk=user_id).first()
cu_quest = cls(user=user)
cu_quest.save()
cu_quest._get_all_active_surveys
cu_quest._get_all_followups()
cu_quest._get_all_questions()
return cu_quest
def _get_all_questions(self):
[[self.questions.add(ques) for ques in qstnr.questions.all()]
for qstnr in self.followups.all()]
return
def _get_all_followups(self):
queryset = FollowUp.objects.filter(survey__user=self.user).filter(survey__user__surveystatus_survey_status='active')
# queryset = self._get_all_active_surveys()
[self.followups.add(quest) for quest in queryset]
return
#property
def _get_all_active_surveys(self):
queryset = Survey.objects.filter(user=self.user,
surveystatus__survey_status='active')
[self.surveys.add(quest) for quest in queryset]
return
Now my questions:
my view sends to the create of the UserSurvey model in order to create a questionary.
I need to get all the questions of the followup of the surveys with a survey_status = 'active' for the user (the one who clicks on a button)...
I tried several things:
I wrote the _get_all_active_surveys() function and there I get all the surveys that are with a survey_status = 'active' and then the _get_all_followups() function needs to call it to use the result to build its own one. I have an issue telling me that
a list is not a callable object.
I tried to write directly the right query in _get_all_followups() with
queryset = FollowUp.objects.filter(survey__user=self.user).filter(survey__user__surveystatus_survey_status='active')
but I don't succeed to manage all the M2M relationships. I wrote the query above but issue also
Related Field got invalid lookup: surveystatus_survey_status
i read that a related_name can help to build reverse query but i don't understand why?
it's the first time i see return empty and what it needs to return above. Why this notation?
If you have clear explanations (more than the doc) I will very appreciate.
thanks
Quite a few things to answer here, I've put them into a list:
Your _get_all_active_surveys has the #property decorator but neither of the other two methods do? It isn't actually a property so I would remove it.
You are using a list comprehension to add your queryset objects to the m2m field, this is unnecessary as you don't actually want a list object and can be rewritten as e.g. self.surveys.add(*queryset)
You can comma-separate filter expressions as .filter(expression1, expression2) rather than .filter(expression1).filter(expression2).
You are missing an underscore in surveystatus_survey_status it should be surveystatus__survey_status.
Related name is just another way of reverse-accessing relationships, it doesn't actually change how the relationship exists - by default Django will do something like ModelA.modelb_set.all() - you can do reverse_name="my_model_bs" and then ModelA.my_model_bs.all()

Django-import-export doesn't skip unchanged when skip_unchanged==True

I'm building an app using Django, and I want to import data from an Excel file using django-import-export.
When importing data I want to skip unchanged rows, for this, I'm using skip_unchanged = True in the resource class (like below) but I get unexpected behavior. In my model, I have an attribute updated_at which is a DateTimeField with auto_now=True attribute, it takes a new value each time I upload the Excel file even if the values of rows have not changed in the file.
Below are portions of my code.
models.py
class HREmployee(models.Model):
code = models.IntegerField()
name_en = models.CharField(max_length=55)
status = models.CharField(max_length=75)
termination_date = models.DateField(null=True)
hiring_date = models.DateField()
birth_date = models.DateField()
# other fields to be imported from the file ...
# fields that I want to use for some purposes (not imported from the file)
comment = models.TextField()
updated_at = models.DateTimeField(auto_now=True)
resources.py
class HREmployeeResource(ModelResource):
code = Field(attribute='code', column_name='Employee Code')
name_en = Field(attribute='name_en', column_name='Employee Name - English')
status = Field(attribute='status', column_name='Employee Status')
termination_date = Field(attribute='termination_date', column_name='Termination Date')
hiring_date = Field(attribute='hiring_date', column_name='Hiring Date')
birth_date = Field(attribute='birth_date', column_name='Birth Date')
# other fields to be imported ...
class Meta:
model = HREmployee
import_id_fields = ('code', )
skip_unchanged = True
How can I fix this unexpected behavior?
Edit
After few tries, I've found that columns with date values are causing this problem.
In the Excel file, I have three columns that have date values like in the picture below, when I comment the corresponding attributes in the resource class and do the import, I get the expected behavior (if no changes in the file the import_type equals skip and no changes are made in the DB).
I've edited the code of the model and resource classes (please check above).
This should be easy to fix, simply use the fields parameter to define only the fields you wish to import (docs):
class Meta:
...
fields = ('code', 'name',)
If skip_unchanged is True, then only these fields will be compared for changes, and the instance will be updated if any one of them has changed, otherwise it will be skipped.
The field name has to be the model attribute name, not the name of the column in the import.
Sorry for the revival of that post, but I was stucked on the same issue, and I found only this post exaclty related to, so i post my answer.
In my model I've defined dateField and not DateTimeField.
But it import as DateTimeField, so the comparison failed.
To compare carrots with carrots, I defined a field class to convert values if needed :
import datetime
class DateField(Field):
def get_value(self, obj):
val=super().get_value(obj)
if isinstance(val, datetime.datetime):
return val.date()
return val
and then in my resource
class HREmployeeResource(ModelResource):
hiring_date = DateField(attribute='hiring_date', column_name='Hiring Date')
birth_date = DateField(attribute='birth_date', column_name='Birth Date')
# ....

Django REST: Dynamically add Model Fields

I'm working on a Django Rest project where I'm given two MySQL tables:
metrics: Contain a row for each potential metric
daily_data: Contains a row for each data entry where the column names refer to metrics from the 'metrics' table
What I want to do now, is creating new entries in 'metrics' which should be automatically added to existing 'daily_data' entries (with a default value) and displayed on the website.
Here is how the current models looks like:
class Metrics(model.Model):
metric_id = models.CharField(max_length=255, primary_key=True)
is_main_metric = models.BooleanField(default=False)
name = models.CharField(max_length=255, blank=False, null=False)
description = models.CharField(max_length=255, blank=False, null=False)
lower_bound = models.FloatField(default=0.0, null=False)
upper_bound = models.FloatField(default=0.0, null=False)
class Meta:
verbose_name_plural = "Metrics"
db_table = "metrics"
class DailyData(models.Model):
location = models.CharField(max_length=255, blank=False, null=False)
date = models.DateField(blank=False, null=False)
# then a static field for each metric is added that corresponds to a 'metric_id' in the table 'metrics':
metric_01 = models.FloatField(default=0.0, null=False)
metric_02 = models.FloatField(default=0.0, null=False)
metric_03 = models.FloatField(default=0.0, null=False)
...
class Meta:
verbose_name_plural = "Daily Data"
db_table = "daily_data"
Later on, the Javascript code iterates over all 'metrics' to display them with the corresponding values from a requested 'daily_data' entry. Here is a small example:
let resp = await axios.get(`${API_URL}/daily_data/?location=berlin&date=2021-01-07`);
let data = resp.data[0];
METRICS.forEach(metric => {
let name = metric.name;
let description = metric.description;
let value = data[metric.metric_id];
$content.append(
` <div class="row">
<span>${name}:</span>
<span>${value}</span>
<span>${description}"</span>
</div> `
);
...
}
For the case that all metrics are pre-defined, the application is running fine. If I want to add a new metric, I create a new row in the database table 'metrics', then add the field manually to the 'DailyData' model from above, and finally restart the server.
However, my problem now is that I need the possibility to add new metrics dynamically. I.e. if a user adds a new metric (for example with a POST request), the metric should be added as a column to all existing 'daily_data' entries and should be displayed as an additional field on the website.
The intention is basically something like this (I know that this won't work, but just to get the idea):
def onNewMetricCreation(newMetric):
metric_id = newMetric.metric_id
new_field = models.FloatField(default=0.0, null=False)
DailyData.appendField(metric_id, new_field)
Is there a way to achieve this and add these model fields dynamically? Or is my whole data structure faulty for this case?
Edit: To solve the problem I've actually changed my data structure a bit. I've added a MetricsData model that connects the DailyData with the Metrics and contains the corresponding values. This allows each DailyData object to have a different number of metrics and new ones can be added easily.
The new models look like this:
class DailyData(models.Model):
location = models.ForeignKey("Locations", on_delete=models.CASCADE, blank=False, null=False)
date = models.DateField(blank=False, null=False)
class MetricsData(models.Model):
data_entry = models.ForeignKey("DailyData", on_delete=models.CASCADE, related_name="data_entry")
metric = models.ForeignKey("Metrics", on_delete=models.CASCADE)
value = models.FloatField(default=0.0, null=False)
class Metrics(models.Model):
metric_id = models.CharField(max_length=255, primary_key=True)
...
If I understood you correct I belive you're looking for a ForeignKey(). You would add this to your model:
class DailyData(models.Model):
metrics = models.ForeignKey(Metrics, on_delete=models.CASCADE)
Go inside django admin and I think you'll understand how ForeignKeys work. It's a reference to the metrics instance. Ps. don't add this field dynamically, that's probably impossible. But with this you can simply add another row.
So if you reference an instance of metrics. And then change that. all daily_data that references that will be "changed" since they're still referenceing the same instance.
If you need to reference more the one metrics use ManyToMany
I strongly recommend that you add a Foreign Key for DailyData to Metrics model.
class Metrics(model.Model):
...
related_day = models.ForeignKey(DailyData, on_delete=models.CASCADE, related_name="metrics", related_query_name="metrics", null=True)
Now you also need to add a signal to trigger after creating a metric to connect that metric to its related data.
#receiver(post_save, sender=Metrics)
def add_to_daily_data(sender, instance, created, **kwargs):
if created:
# Put your logic to add a specific metric to a daily data
Also, this way you can access all metrics data related to specific DailyData objects hassle-free.
daily_data.metrics.all()

multiple joins on django queryset

For the below sample schema
# schema sameple
class A(models.Model):
n = models.ForeignKey(N, on_delete=models.CASCADE)
d = models.ForeignKey(D, on_delete=models.PROTECT)
class N(models.Model):
id = models.AutoField(primary_key=True, editable=False)
d = models.ForeignKey(D, on_delete=models.PROTECT)
class D(models.Model):
dsid = models.CharField(max_length=255, primary_key=True)
class P(models.Model):
id = models.AutoField(primary_key=True, editable=False)
name = models.CharField(max_length=255)
n = models.ForeignKey(N, on_delete=models.CASCADE)
# raw query for the result I want
# SELECT P.name
# FROM P, N, A
# WHERE (P.n_id = N.id
# AND A.n_id = N.id
# AND A.d_id = \'MY_DSID\'
# AND P.name = \'MY_NAME\')
What am I trying to achieve?
Well, I’m trying to find a way somehow be able to write a single queryset which does the same as what the above raw query does. So far I was able to do it by writing two queryset, and use the result from one queryset and then using that queryset I wrote the second one, to get the final DB records. However that’s 2 hits to the DB, and I want to optimize it by just doing everything in one DB hit.
What will be the queryset for this kinda raw query ? or is there a better way to do it ?
Above code is here https://dpaste.org/DZg2
You can archive it using related_name attribute and functions like select_related and prefetch_related.
Assuming the related name for each model will be the model's name and _items, but it is better to have proper model names and then provided meaningful related names. Related name is how you access the model in backward.
This way, you can use this query to get all models in a single DB hit:
A.objects.all().select_related("n", "d", "n__d").prefetch_related("n__p_items")
I edited the code in the pasted site, however, it will expire soon.

Django: Filter in multiple models linked via ForeignKey?

I'd like to create a filter-sort mixin for following values and models:
class Course(models.Model):
title = models.CharField(max_length=70)
description = models.TextField()
max_students = models.IntegerField()
min_students = models.IntegerField()
is_live = models.BooleanField(default=False)
is_deleted = models.BooleanField(default=False)
teacher = models.ForeignKey(User)
class Session(models.Model):
course = models.ForeignKey(Course)
title = models.CharField(max_length=50)
description = models.TextField(max_length=1000, default='')
date_from = models.DateField()
date_to = models.DateField()
time_from = models.TimeField()
time_to = models.TimeField()
class CourseSignup(models.Model):
course = models.ForeignKey(Course)
student = models.ForeignKey(User)
enrollment_date = models.DateTimeField(auto_now=True)
class TeacherRating(models.Model):
course = models.ForeignKey(Course)
teacher = models.ForeignKey(User)
rated_by = models.ForeignKey(User)
rating = models.IntegerField(default=0)
comment = models.CharField(max_length=300, default='')
A Course could be 'Discrete mathematics 1'
Session are individual classes related to a Course (e.g. 1. Introduction, 2. Chapter I, 3 Final Exam etc.) combined with a date/time
CourseSignup is the "enrollment" of a student
TeacherRating keeps track of a student's rating for a teacher (after course completion)
I'd like to implement following functions
Sort (asc, desc) by Date (earliest Session.date_from), Course.Name
Filter by: Date (earliest Session.date_from and last Session.date_to), Average TeacherRating (e.g. minimum value = 3), CourseSignups (e.g. minimum 5 users signed up)
(these options are passed via a GET parameters, e.g. sort=date_ascending&f_min_date=10.10.12&...)
How would you create a function for that?
I've tried using
denormalization (just added a field to Course for the required filter/sort criterias and updated it whenever changes happened), but I'm not very satisfied with it (e.g. needs lots of update after each TeacherRating).
ForeignKey Queries (Course.objects.filter(session__date_from=xxx)), but I might run into performance issues later on..
Thanks for any tipp!
In addition to using the Q object for advanced AND/OR queries, get familiar with reverse lookups.
When Django creates reverse lookups for foreign key relationships. In your case you can get all Sessions belonging to a Course, one of two ways, each of which can be filtered.
c = Course.objects.get(id=1)
sessions = Session.objects.filter(course__id=c.id) # First way, forward lookup.
sessions = c.session_set.all() # Second way using the reverse lookup session_set added to Course object.
You'll also want to familiarize with annotate() and aggregate(), these allow you you to calculate fields and order/filter on the results. For example, Count, Sum, Avg, Min, Max, etc.
courses_with_at_least_five_students = Course.objects.annotate(
num_students=Count('coursesignup_set__all')
).order_by(
'-num_students'
).filter(
num_students__gte=5
)
course_earliest_session_within_last_240_days_with_avg_teacher_rating_below_4 = Course.objects.annotate(
min_session_date_from = Min('session_set__all')
).annotate(
avg_teacher_rating = Avg('teacherrating_set__all')
).order_by(
'min_session_date_from',
'-avg_teacher_rating'
).filter(
min_session_date_from__gte=datetime.now() - datetime.timedelta(days=240)
avg_teacher_rating__lte=4
)
The Q is used to allow you to make logical AND and logical OR in the queries.
I recommend you take a look at complex lookups: https://docs.djangoproject.com/en/1.5/topics/db/queries/#complex-lookups-with-q-objects
The following query might not work in your case (what does the teacher model look like?), but I hope it serves as an indication of how to use the complex lookup.
from django.db.models import Q
Course.objects.filter(Q(session__date__range=(start,end)) &
Q(teacher__rating__gt=3))
Unless absolutely necessary I'd indeed steer away from denormalization.
Your sort question wasn't entirely clear to me. Would you like to display Courses, filtered by date_from, and sort it by Date, Name?