Querying django models to include and exclude items - django

I currently have 2 models as such
class Recipe(models.Model):
account = models.ForeignKey(CustomUser, on_delete=models.CASCADE, null=True, blank=True)
name = models.TextField(null=True, blank=True)
slug = models.SlugField(null=False, blank=True, unique=True)
image_path = models.ImageField(upload_to=MEDIA_URL, null=True, blank=True)
description = models.TextField(null=True, blank=True)
date_added = models.DateField(auto_now_add=True)
class RecipeIngredients(models.Model):
recipe = models.ForeignKey(Recipe, on_delete=models.CASCADE, null=True)
ingredient = models.TextField(null=True, blank=True)
quantity = models.CharField(max_length=10, null=True, blank=True)
type = models.CharField(max_length=50, null=True, blank=True)
I am trying to do a query where if I have a list of say 2 or more items, say
ingredients = ["egg", "bacon", "rice"]
That it returns to me only the recipes that have exactly egg, bacon, and rice, or less.
I was able to do this in a hacky way, but it is really slow and not using the ORM correctly I feel.
ingredients = ["egg", "bacon", "rice"]
results = []
recipes = []
for i in ingredients:
r = RecipeIngredients.objects.filter(ingredient__icontains=i)
results.append(r)
for result in results:
for r in result:
recipes.append(r.recipe)
for r in recipes:
recipeingredients = r.recipeingredients_set.all()
for ri in recipeingredients:
ingredient = ri.ingredient
if ingredient not in ingredients:
try:
recipes.remove(r)
except:
print(r)
print("end of recipe")
Any help on how to make this a more correct query would be appreciated.

You can use raw sql, something like this:
recipe_list = Recipe.objects.raw('select a.*
from app_Recipe a
inner join app_RecipeIngredients b
on a.id = b.recipe_id and b.ingredient in ("egg", "bacon", "rice")
group by a.*
having count(*) >= 2')
maybe replace app_ with your project name, replace a.* with list of column names.

Related

Joining more than 2 tables for reports in django and extract all the fields from the joined table

I am joining the ClientDetails, AssignmentTable and CallDetails table to get a view as to which telecaller a particular client has been assigned to and get the latest call details as well. However I am unable to accomplish that using django ORM.
ISSUE:
I am trying to access the fields inside the assignment table and call table but I am getting only the ids and not the other fields.
Question:
How do I extract all the columns from the assignment and call details table which has the client id as 1?
This is the SQL Query that I am trying to come up with:
SELECT t1.uid, t1.phone_number, t1.client_name, t1.base, t1.location, t2.assigner, t2.bpo_agent, t2.cro_agent, t3.bpo_status_id, t3.cro_status_id, t3.agent_id_id
FROM public.bpo_app_clientdetails t1
LEFT JOIN public.bpo_app_assignmentdetails t2 ON t1.uid = t2.client_id_id
LEFT JOIN public.bpo_app_calldetails t3 ON t1.uid = t3.client_id_id;
Below is the model file:
class ClientDetails(models.Model):
uid = models.AutoField(primary_key=True)
phone_number = PhoneNumberField(unique=True)
client_name = models.CharField(max_length=50, blank=True, null=True)
base = models.CharField(max_length=50, blank=True, null=True)
location = models.CharField(max_length=50, blank=True, null=True)
class Meta:
verbose_name_plural = "Client Contact Detail Table"
def __str__(self):
return f"{self.phone_number}, {self.client_name}"
class AssignmentDetails(models.Model):
uid = models.AutoField(primary_key=True)
client_id = models.ForeignKey(
ClientDetails,
on_delete=models.PROTECT,
related_name='assignment_details'
)
date_and_time = models.DateTimeField(auto_now_add=True, blank=True)
assigner = models.ForeignKey(
User,on_delete=models.PROTECT,
related_name='AssignerAgent',
db_column='assigner',
)
bpo_agent = models.ForeignKey(
User,on_delete=models.PROTECT,
related_name='bpoAgent',
db_column='bpo_agent',
)
cro_agent = models.ForeignKey(
User,on_delete=models.PROTECT,
related_name='croAgent',
db_column='cro_agent',
)
class Meta:
verbose_name_plural = "Client Assignment Detail Table"
def __str__(self):
return f"{self.uid}"
class CallDetails(models.Model):
uid = models.AutoField(primary_key=True)
date_and_time = models.DateTimeField(auto_now_add=True, blank=True)
client_id = models.ForeignKey(
ClientDetails,
on_delete=models.PROTECT,
related_name='call_details'
)
agent_id = models.ForeignKey(EmployeeDetails_lk,on_delete=models.PROTECT)
bpo_status = models.ForeignKey(BpoStatus_lk,on_delete=models.PROTECT, blank=True, null=True)
cro_status = models.ForeignKey(CroStatus_lk,on_delete=models.PROTECT, blank=True, null=True)
required_loan_amt = models.CharField(max_length=50, blank=True, null=True)
remarks = models.CharField(max_length=500, blank=True, null=True)
loan_program = models.ForeignKey(LoanProgram_lk, on_delete=models.PROTECT, blank=True, null=True)
disbursement_bank = models.ForeignKey(Banks_lk, on_delete=models.PROTECT, limit_choices_to={'loan_disbursement_status': True}, blank=True, null=True)
class Meta:
verbose_name_plural = "Client Call Detail Table"
def __str__(self):
return f"{self.uid}"
>>> qry=ClientDetails.objects.values('assignment_details','call_details').filter(uid=1)
>>> qry
<QuerySet [{'assignment_details': 1, 'call_details': None}]>
>>> print(a.query)
SELECT "bpo_app_assignmentdetails"."uid", "bpo_app_calldetails"."uid" FROM "bpo_app_clientdetails" LEFT OUTER JOIN "bpo_app_assignmentdetails" ON ("bpo_app_clientdetails"."uid" = "bpo_app_assignmentdetails"."client_id_id") LEFT OUTER JOIN "bpo_app_calldetails" ON ("bpo_app_clientdetails"."uid" = "bpo_app_calldetails"."client_id_id") WHERE "bpo_app_clientdetails"."uid" = 1
You can use prefetch_related() to achieve this. I just use some sample models here for better understanding.
class Company(models.Model):
name = models.CharField(null=True, blank=True, max_length=100)
class Project(models.Model):
name = models.CharField(null=True, blank=True, max_length=100)
company = models.ForeignKey(Company, on_delete=models.CASCADE)
class Employee(models.Model):
name = models.CharField(null=True, blank=True, max_length=100)
company = models.ForeignKey(Company, on_delete=models.CASCADE)
In your views.py function write the below lines to get the desired results
companies = Company.objects.filter(id=1).prefetch_related('project_set', 'employee_set')
for company in companies:
print(company.project_set.values()) # This will print this company projects
print(company.employee_set.values()) # This will print this company employees
Note: If you use related_name in your ForeignKey relationship, make sure that you access with that name instead of model_set inside prefetch_related()

django ORM join statements

I'm learning django queryset API and it's so overwhelming. I'm used to sql statement and I just want a basic join statement where 2 tables join together
How can i get this result in shell?
SELECT e.emp_lastname,e.emp_firstname,o.job_description
FROM hs_hr_employee e
INNER JOIN ohrm_job_title o ON e.job_title_code = o.id
WHERE e.work_station='101';
hs_hr_employee
from django.db import models
class HsHrEmployee(models.Model):
emp_number = models.AutoField(primary_key=True)
employee_id = models.CharField(max_length=50, blank=True, null=True)
emp_lastname = models.CharField(max_length=100)
emp_firstname = models.CharField(max_length=100)
job_title_code = models.ForeignKey('OhrmJobTitle', models.DO_NOTHING,
db_column='job_title_code', blank=True, null=True)
work_station = models.ForeignKey('OhrmSubunit', models.DO_NOTHING,
db_column='work_station', blank=True, null=True)
hs_hr_job_title
class OhrmJobTitle(models.Model):
job_title = models.CharField(max_length=100)
job_description = models.CharField(max_length=400, blank=True,
null=True)
i added the models
You can filter with:
qs = HsHrEmployee.objects.filter(
work_station_id=101
).select_related('job_title_code')
For the HsHrEmployee model objects that arise from this queryset, you can then determine the job_title for example with:
for item in qs:
print(item.job_title_code.job_title)

How to create multiple inner joins using Django ORM?

I'm struggling to find documentation and examples to solve the following dilemma. I have two tables, Results (which contain results of a race) and Photos (where each photo contain runners in that race). Both tables contain entrant_numbers, i.e. the number of the runner in the race and the number of the runner in the photo. In SQL I joined them like so to get the total number of photo captures per runner.
SELECT * FROM Photo p
INNER JOIN Result r ON p.entrant_number=r.entrant_number AND p.race_year=r.race_year
WHERE r.user_id=123
My models are structured like so;
class Photo(models.Model):
photo_id = models.AutoField(primary_key=True)
race_id = models.ForeignKey('Race')
url_pre_string = models.CharField("URL pre-string", max_length=255)
filename = models.CharField("Filename", max_length=100)
extension = models.CharField("File extensions", max_length=4)
date_created = models.DateTimeField(auto_now_add=True)
date_updated = models.DateTimeField(auto_now=True)
class Race(models.Model):
race_id = models.AutoField(primary_key=True)
race_year = models.PositiveIntegerField("Race year", null=False, blank=False)
date_created = models.DateTimeField(auto_now_add=True)
date_updated = models.DateTimeField(auto_now=True)
class Result(models.Model):
result_id = models.AutoField(primary_key=True)
race_id = models.ForeignKey('Race')
position = models.PositiveIntegerField("Position", default=0)
entrant_number = models.PositiveIntegerField("Entrant number", default=0, null=True, blank=True)
user_id = models.ForeignKey(User, null=True)
date_created = models.DateTimeField(auto_now_add=True)
date_updated = models.DateTimeField(auto_now=True)
The most recent query I tried was;
photo_captures = Photo.objects.filter(result__user_id_id=current_user.id)
I was hoping this would traverse back through result and filter on the current user id.
You can join two tables by performing a raw SQL query.
You can check how to do this here
from django.db import connection
def my_custom_sql(self):
cursor = connection.cursor()
cursor.execute("select *
from myapp_photo p
inner join myapp_result r ON
p.entrant_number=r.entrant_number AND
p.race_year=r.race_year
WHERE r.user_id=123")
row = cursor.fetchone()
return row
Alternate way:
You can set foreign key in Photo model to Result model, following this you will have unique photos of the different results. Something like this :
class Result(models.Model):
results_id = models.AutoField(primary_key=True)
position = models.PositiveIntegerField("Position", default=0)
entrant_number = models.PositiveIntegerField("Entrant number", default=0, null=True, blank=True)
race_year = models.PositiveIntegerField("Race year", default=0)
user_id = models.ForeignKey(User, null=True)
date_created = models.DateTimeField(auto_now_add=True)
date_updated = models.DateTimeField(auto_now=True)
class Photo(models.Model):
result= models.ForeignKey(Result)
photo_id = models.AutoField(primary_key=True)
url_pre_string = models.CharField("URL pre-string", max_length=255)
filename = models.CharField("Filename", max_length=100)
extension = models.CharField("File extensions", max_length=4)
entrant_number = models.PositiveIntegerField("Entrant number", default=0, null=True, blank=True)
race_year = models.PositiveIntegerField("Race year", default=0)
date_created = models.DateTimeField(auto_now_add=True)
date_updated = models.DateTimeField(auto_now=True)
and then to fetch the photos for a particular result of year for a single user you can do
photos = Photo.objects.filter(entrant_number = result__entrant_number,
race_year = result__race_year,
result__user_id = 123)
ofcourse i haven't tested this but this should do the trick. let me know if this works or if you need any help.

Manipulate Excel Data Before Adding to Database with Django - Order of Operations?

I receive 6 weekly excel reports that I've been manually compiling into a very large monthly report. Each report has between 5-30 columns, and 4000 to 130,000 rows.
I'm putting together a simple Django app that allows you to upload each report, and the data ends up in the database.
Here's my models.py:
#UPEXCEL models
from django.db import models
############## LISTS ###############
class TransactionTypeList(models.Model):
name = models.CharField(max_length=100)
def __str__(self):
return self.name
class TransactionAppTypeList(models.Model):
name = models.CharField(max_length=100)
def __str__(self):
return self.name
class CrmCaseOriginList(models.Model):
name = models.CharField(max_length=100)
def __str__(self):
return self.name
############## CLIENTS AND STAFF ###############
class Staff(models.Model):
name = models.CharField(max_length=40)
employee_id = models.CharField(max_length=40)
start_date = models.TimeField(blank=True, null=True)
end_date = models.DateField(blank=True, null=True)
first_name = models.CharField(blank=True, null=True, max_length=40)
last_name = models.CharField(blank=True, null=True, max_length=40)
email = models.EmailField(blank=True, null=True)
phone = models.CharField(blank=True, null=True, max_length=20)
street = models.CharField(blank=True, null=True, max_length=100)
city = models.CharField(blank=True, null=True, max_length=100)
state = models.CharField(blank=True, null=True, max_length=2)
zipcode = models.CharField(blank=True, null=True, max_length=10)
is_team_lead = models.BooleanField(default=False)
boss = models.ForeignKey('Staff', related_name='Boss', null=True, blank=True)
def __str__(self):
return self.name
class Meta:
app_label="upexcel"
class Client(models.Model):
name = models.CharField(max_length=40)
short_name = models.CharField(max_length=20, blank=True, null=True)
start_date = models.DateField(default=timezone.now, blank=True, null=True)
end_date = models.DateField(blank=True, null=True)
team_lead = models.ForeignKey(Staff, related_name='client_team_lead')
def __str__(self):
return self.name
class ClientNameChart(models.Model):
client_name = models.ForeignKey(Client, related_name='client_corrected_name')
name_variation = models.CharField(max_length=100)
date_added = models.DateTimeField(auto_now_add=True)
def __str__(self):
return '%s becomes %s' % (self.name_variation, self.client_name)
class StaffNameChart(models.Model):
staff_name = models.ForeignKey(Staff, related_name='staff_corrected_name')
name_variation = models.CharField(max_length=100)
date_added = models.DateTimeField(auto_now_add=True)
def __str__(self):
return '%s becomes %s' % (self.name_variation, self.staff_name)
############## DATA FROM REPORTS ###############
class CrmNotes(models.Model):
created_by = models.ForeignKey(Staff, related_name='note_creator')
case_origin = models.CharField(max_length=20)
client_regarding = models.ForeignKey(Client, related_name='note_client_regarding')
created_on = models.DateTimeField()
case_number = models.CharField(max_length=40)
class Transactions(models.Model):
client_regarding = models.ForeignKey(Client, related_name='transaction_client')
created_by = models.ForeignKey(Staff, related_name='transaction_creator')
type = models.ForeignKey(TransactionTypeList, related_name='transaction_type')
app_type = models.ForeignKey(TransactionAppTypeList, related_name='transaction_app_type')
class Meta:
app_label="upexcel"
class Timesheets(models.Model):
staff = models.ForeignKey(Staff, related_name='staff_clocked_in')
workdate = models.DateField()
start_time = models.DateTimeField()
end_time = models.DateTimeField()
total_hours = models.DecimalField(decimal_places=2, max_digits=8)
class Provider(models.Model):
name = models.CharField(max_length=40)
street = models.CharField(max_length=100)
city = models.CharField(max_length=40)
state = models.CharField(max_length=11)
zip = models.CharField(max_length=10)
class StudentsApplication(models.Model):
app_number = models.CharField(max_length=40)
program = models.CharField(max_length=40)
benefit_period = models.CharField(max_length=40)
student_name = models.CharField(max_length=40)
student_empl_id = models.CharField(max_length=40)
requested_amount = models.DecimalField(max_digits=8, decimal_places=2)
provider = models.ForeignKey(Provider, related_name='app_provider')
provider_code = models.CharField(max_length=40)
class AuditReport(models.Model):
was_audited = models.BooleanField(default=False)
auditor = models.ForeignKey('upexcel.Staff', related_name='auditor')
payment_defect = models.BooleanField(default=False)
grant_discount_error = models.BooleanField(default=False)
math_error = models.BooleanField(default=False)
fees_book_error = models.BooleanField(default=False)
other_error = models.BooleanField(default=False)
overpayment_amount = models.DecimalField(max_digits=8, decimal_places=2)
underpayment_amount = models.DecimalField(max_digits=8, decimal_places=2)
doc_defect = models.BooleanField(default=False)
status_change = models.BooleanField(default=False)
admin_savings_defect = models.BooleanField(default=False)
network_savings_defect = models.BooleanField(default=False)
admin_adjustments = models.DecimalField(max_digits=8, decimal_places=2)
network_adjustments = models.DecimalField(max_digits=8, decimal_places=2)
error_corrected = models.BooleanField(default=False)
comments = models.TextField(max_length=500)
client = models.ForeignKey(Client, related_name='audited_client')
staff = models.ForeignKey(Staff, related_name='processor_audited')
application = models.ForeignKey(StudentsApplication, related_name='app_audited')
class Meta:
app_label="upexcel"
However the excel reports I'm taking in need some work done to them, and I'm trying figure out exactly how I should go about processing them and routing them.
The first challenge is that each report references the associated Staff and Client with different data. For example, if the Staff.name is "Bob Dole", one report has it as "Dole, Bob". Another has it as "Dole, Robert". Still another has "Robert Dole" then "103948210", which is his employee ID number.
Also, these change and new ones sprout up, which is why I made ClientNameChart and StaffNameChart, to where a user can input the string as it shows up in a report, and attach it to a Client or Staff. Then when processing, we can lookup StaffNameChart.name_variation, and return the associated StaffNameChart.Staff.employee_id, which should work great as a foreign key within the respective report's table (ie. AuditReport.staff)
The second challenge is to take a report, and route some of the columns to one database table, and others to another. For example, the big one is the Audit Report sheet. Many of the columns just transpose directly into the AuditReport(models.Model). However, it also has data for each StudentsApplication and Provider, where I need to take several columns, store them as a new record in their destination table, and replace the columns with one column containing a foreign key for that item within that destination table.
So that is my quest.
Here's the order of operations I have in my head - I will use the most complex Audit_Report_Wk_1.xlsx report to address all challenges in one upload:
Upload File
Using openpyxl, load read-only data:
from openpyxl.worksheet.read_only import ReadOnlyWorksheet
myexcelfile = request.FILES['file']
myworkbook = load_workbook(myexcelfile, read_only=True)
mysheet = myworkbook['Sheet1']
Write a script that matches the names strings of the staff, auditor, and client columns with StaffNameChart.name_variation, and replace it with StaffNameChart.Staff.name.
Part B: If the client or staff columns are blank, or contain strings not found in the name charts, all of those rows get saved in a new excel document. Edit: I suppose I could also create a new model class called IncompleteAuditReport that just have fields that match up with each column and store it there, then if someone adds a new NameChart variation, it could trigger a quick look-up to see if that could allow this process to complete and the record to be properly added?)
Check the columns in mysheet that will be replaced by foreign keys for the Provider and StudentsApplication tabes. If their respective data doesn't yet exist in their respective tables, add the new record. Either way, then replace their columns with the foreign key that points to the resulting record.
Is this the correct order of operations? Any advice on what specific tools to use from openpyxl etc. to manipulate the data in the most efficient ways, so I can use the fewest resources possible to look-up and then change several hundred thousand fields?
Thank you so much if you've read this far. I'm currently a bit intimidated by the more complex data types, so it's not crystal clear to me the best way to store the data in memory and to manipulate it while it's there.

django many to many field - programmatically retrieve relations

I have been working on a model for tags and am trying to avoid using contenttypes. I have couple questions related to ManyToManyField in django.
I have the following model
taggables/models.py
class Tag(models.Model):
tag_statuses = (
(u'P', _('Pending approval')),
(u'A', _('Approved')),
)
slug = models.SlugField()
created_at = models.DateTimeField(null=True, blank=True)
created_by = models.ForeignKey(User, related_name='tagged_item_created_by')
status = models.CharField(max_length=20, choices=tag_statuses)
site = models.ForeignKey(Site, default=settings.SITE_ID, related_name='tagged_item_site')
def __unicode__(self):
return self.slug
class TagI18n(models.Model):
tag = models.CharField(max_length=100)
descriptor = models.TextField(null=True, blank=True)
# i18n properties
item = models.ForeignKey(Tag)
language = models.CharField(max_length=6, choices=settings.LANGUAGES, default=settings.LANGUAGE_CODE)
class Meta:
unique_together = (("language", "item"))
def __unicode__(self):
return self.tag
I also have different apps around my project that uses tag model as many to many field. such as events for example
evetns/models.py
class Item(models.Model):
event_status_list = (
(u'P', _('Pending approval')),
(u'A', _('Approved')),
(u'R', _('Rejected')),
(u'S', _('Spam')),
)
published_at = models.DateTimeField(null=True, blank=True)
published_by = models.ForeignKey(User, null=True, blank=True, related_name='item_published_by')
updated_by = models.ForeignKey(User, null=True, blank=True, related_name='item_updated_by')
updated_at = models.DateTimeField(null=True, blank=True)
site = models.ForeignKey(Site, default=settings.SITE_ID, related_name='events_item_site')
event_slug = models.SlugField(null=True, blank=True)
# event timing
event_start_date = models.DateField()
event_start_time = models.TimeField(null=True, blank=True)
event_end_date = models.DateField()
event_end_time = models.TimeField(null=True, blank=True)
event_recurrent = models.BooleanField(default=False)
event_status = models.CharField(max_length=20, choices=event_status_list, default=u'P')
# relations
media = models.ManyToManyField(ImageFile, null=True, blank=True)
comments = models.ManyToManyField(Comment, null=True, blank=True)
votes = models.ManyToManyField(Vote, null=True, blank=True)
tags = models.ManyToManyField(Tag, null=True, blank=True)
audience = models.ManyToManyField(Audience, null=True, blank=True)
Now what am trying to do here is run a query to programmatically retrieve all the related models to Tag and then count how many a times a tag was used. Am sure I can do that with contenttypes (generic types) but I don't know how it will perform under heavy usage that's why I wanted to do the many to many fields.
If you are interested in the total number of usage ( aka reference count ) of a tag very often, I think you should store it in the database, example put one extra field to the Tag model, like
referencecount = models.IntegerField( default=0 )
Than in the appropriate places, ( example models .save() )you can increment or decrements it's value.
For your use case, the performance of generic wouldn't matter, because you need anyway to do N queries over 2N tables (one for each "taggable" model and one for each m2m join table, at least).
With the m2m approach, you should have the list of 'taggable' models stored somewhere, at least as a list of ('app_name', 'model') pairs. Then use ContentType (it's very performant) to get the actual model class or query directly from there:
counts = {}
for m in taggable_models:
ct = ContentType.get_by_natural_key(*m)
c = ct.model_class().objects.filter(tags=yourtag).distinct().count()
counts[ct.name] = c