slow process during saving model in database - django

i want save a list of model in database table but i very slow
when i use save() method for each item it took near 20min
is that a best way to save objects to table
Modles.py
class Part(models.Model):
block = models.CharField(max_length= 2, null= True)
phase = models.CharField(max_length= 3, null= True)
department = models.CharField(max_length= 20, null= True)
type = models.CharField(max_length= 10, null= True)
mark = models.CharField(max_length= 20, null= True)
class Task(models.Model):
name = models.CharField(max_length= 20)
class ProjectTask(models.Model):
project = models.ForeignKey('Project', on_delete= models.CASCADE)
task = models.ForeignKey("Task", on_delete=models.CASCADE)
weight_percent = models.FloatField()
class PartTask(models.Model):
part = models.ForeignKey('Part', on_delete= models.CASCADE)
project_task = models.ForeignKey('ProjectTask', on_delete= models.CASCADE)
progress = models.FloatField(null=True)
views.py
def import_part_task(_project_id):
project_id = _project_id
project_task = ProjectTask.objects.all().filter(project= int(project_id[0]))
part_list = Part.objects.all()
part_task_list = []
for part in part_list:
for task in project_task:
part_task = PartTask()
part_task.part =part
part_task.project_task = task
part_task_list.append(part_task)
#This ACTION TAKES VERY LOG TIME
for part_task in part_task_list:
PartTask.save(part_task)

That makes perfect sense, since saving the database means that you each time query the database. This takes significant time.
You can however boost performance by inserting with bulk_create(..) [Django-doc]:
def import_part_task(_project_id):
project_id = _project_id
project_task = ProjectTask.objects.filter(project= int(project_id[0]))
part_list = Part.objects.all()
part_task_list = [
PartTask(part=part, project_task=task)
for part in part_list
for task in project_task
]
PartTask.objects.bulk_create(part_task_list)
By inserting in bulk, Django will create a query to insert a large amount of objects with a single query, instead of each time making a query for each individual PartTask object. The amount of "round trips" to the database is thus reduced significantly.

Related

Failed to create a seller instance using django signals

I'm building a website with 2 user types and still new to django.
And I want to add the functionality to add the seller of the product whenever a product is sold.
I'm sorry that I couldn't explain it better.
Here's the code of models.py:
class Ordered(models.Model):
products = models.ForeignKey(Products, on_delete = models.SET_NULL, null = True)
seller = models.ForeignKey(SellerProfile, on_delete = models.SET_NULL, null = True)
buyer = models.ForeignKey(CustomerProfile, on_delete = models.CASCADE)
ordered_on = models.DateTimeField(auto_now_add = True)
product/models.py
class Products(models.Model):
seller = models.ForeignKey(SellerProfile, on_delete = models.CASCADE)
title = models.CharField(max_length = 255)
product_category = models.CharField(choices = CATEGORY_CHOICES, max_length = 100, default = 'eBooks')
description = models.TextField()
files = models.FileField(upload_to = 'media/product_files/', null = True)
slug = models.SlugField(max_length = 255, unique = True, null = True, blank = True)
And this is the signal code:
#receiver(post_save, sender = Ordered)
def new_order_for_seller(sender, instance, created, *args, **kwargs):
seller = Ordered.seller.sellerprofile
if created:
Ordered.objects.create(seller = seller)
Any suggestion or correction of the code will be really helpful.
Thank you
You can set the seller attribute as the instance.product.seller:
#receiver(pre_save, sender = Ordered)
def new_order_for_seller(sender, instance, created, *args, **kwargs):
if created and instance.product is not None:
instance.seller_id = instance.product.seller_id
We can do this in a pre_save signal to prevent saving the new Ordered object a second time.
That being said, since the seller is already determined by the Product, it does not make much sense to duplicate this, since it can eventually lead to inconsistencies where the Seller of a Product changes later, and the Ordered is still pointing to the "old" Seller.

Archive records and re-inserting new records in Django?

I've got a Stock table and a StockArchive table.
My Stock table consists of roughly that 10000 stocks that I update daily. The reason I have a StockArchive table is because I still wanna some historic data and not just update existing records. My question is, is this a proper way of doing it?
First, my models:
class Stock(models.Model):
objects = BulkUpdateOrCreateQuerySet.as_manager()
stock = models.CharField(max_length=200)
ticker = models.CharField(max_length=200)
exchange = models.ForeignKey(Exchange, on_delete=models.DO_NOTHING)
eod_price = models.DecimalField(max_digits=12, decimal_places=4)
currency = models.CharField(max_length=20, blank=True, null=True)
last_modified = models.DateTimeField(blank=True, null=True)
class Meta:
db_table = "stock"
class StockArchive(models.Model):
objects = BulkUpdateOrCreateQuerySet.as_manager()
stock = models.ForeignKey(Stock, on_delete=models.DO_NOTHING)
eod_price = models.DecimalField(max_digits=12, decimal_places=4)
archive_date = models.DateField()
class Meta:
db_table = "stock_archive"
I proceed on doing the following:
#transaction.atomic
def my_func():
archive_stocks = []
batch_size = 100
old_stocks = Stock.objects.all()
for stock in old_stocks:
archive_stocks.append(
StockArchive(
stock=stock.stock,
eod_price = stock.eod_price,
archive_date = date.today(),
)
)
# insert into stock archive table
StockArchive.objects.bulk_create(archive_stocks, batch_size)
# delete stock table
Stock.objects.all().delete()
# proceed to bulk_insert new stocks
I also wrapped the function with a #transaction.atomic to make sure that everything is committed and not just one of the transactions.
Is my thought process correct, or should I do something differently? Perhaps more efficient?

Django raw query giving same result on all models

I have 3 models Product, Photo, and ProductLikeDilike. I am performing left outer join on all the 3 models. First I am joining Product with Photo and then the resultant table(temp) I am joining with ProductLikeDilike. Below is the raw sql.
Note: olx is the name of django app.
data = Product.objects.raw('select * from (select
olx_product.id,olx_product.name,olx_photo.file,olx_photo.cover_photo_flag
from olx_product left outer join olx_photo on
(olx_product.id=olx_photo.reference_id_id) where
olx_photo.cover_photo_flag="yes" or olx_photo.cover_photo_flag is null) as
temp left outer join olx_productlikedislike on
(temp.id=olx_productlikedislike.product_id_id and
olx_productlikedislike.product_liked_by_id_id=2)')
for x in data:
print(x.name)
What I want to understand that when I use any of the above 3 models to run the raw sql why I am getting the same result i.e.
When I do
data = Product.objects.raw('select *.....')
for x in data:
print(x.name)
or
data = Photo.objects.raw('select *......')
for x in data:
print(x.name)
or
data = ProductLikeDislike.raw('select *.....')
for x in data:
print(x.name)
I am getting the same result. Why?
Please help me to understand this.
Below is the models.py file
from django.db import models
from django.urls import reverse
from django.dispatch import receiver
from django.contrib.auth.models import User
class Product(models.Model):
category = models.ForeignKey(Category ,on_delete=models.CASCADE)
name = models.CharField(max_length = 200, db_index = True)
slug = models.SlugField(max_length = 200, db_index = True)
description = models.TextField(blank = True)
price = models.DecimalField(max_digits = 10, decimal_places = 2 )#Not used FloatField to avoid rounding issues
created = models.DateTimeField(auto_now_add=True)
updated = models.DateTimeField(auto_now=True)
contact= models.BigIntegerField(default=None,blank=True, null=True)
created_by = models.CharField(max_length = 200, default=None,blank=True, null=True)
uploaded_by_id = models.IntegerField(default=0)
status = models.IntegerField(default=0) # 0-->Active,1-->Inactive
mark_as_sold = models.IntegerField(default=0) # 0-->not sold,1-->sold
def get_absolute_url(self):
return reverse('olx:edit_product', kwargs={'pk': self.pk})
class Meta:
ordering = ('-created',)
index_together = (('id','slug'),)# we want to query product by id and slug using together index to improve performance
def __str__(self):
return self.name
class Photo(models.Model):
reference_id = models.ForeignKey(Product, null=True,on_delete=models.CASCADE)
photo_type = models.CharField(max_length = 70, db_index = True)
file = models.FileField(upload_to='photos/',default='NoImage.jpg')
cover_photo_flag = models.CharField(default=0,max_length = 5, db_index = True)
uploaded_at = models.DateTimeField(auto_now_add=True)
uploaded_by_id = models.IntegerField(default=0)
status = models.IntegerField(default=0) # 0-->Active,1-->Inactive
class Meta:
ordering = ('-uploaded_at',)
class ProductLikeDislike(models.Model):
product_id = models.ForeignKey(Product,models.SET_DEFAULT,default=0)
product_liked_by_id = models.ForeignKey(User,models.SET_DEFAULT,default=0)
status = models.BooleanField(default=False)
And Please also show me how to write it in pure Django way if possible?
I am getting the same result. Why? Please help me to understand this.
Because .raw(..) [Django-doc] just takes a raw query and executes it. The model from which the raw is performed is irrelevant.
We can generate a query that looks like:
from django.db.models import Q
Product.objects.filter(
Q(photo__photo_flag__isnull=True) | Q(photo__photo_flag='yes'),
Q(likedislike__product_liked_by_id_id=2)
)
So here we accept all Products for which a related Photo object has a flag that is NULL (this also happens in case the JOIN does not yield any flags), or the photo_flag is 'yes'). Furthermore there should be a Likedislike object where the liked_by_id_id is 2.
Note that usually a ForeignKey [Django-doc] has no _id suffix, or id_ prefix. It is also a bit "odd" that you set a default=0 for this, especially since most databases only assign strictly positive values as primary keys, and it makes no sense to inherently prefer 0 over another object anyway.
Something like this:
user_i_care_about = User.objects.get(username='user2')
productlikedislike_set = models.Prefetch('productlikedislike_set',
ProductLikeDislike.objects.select_related('product_liked_by') \
.filter(product_liked_by=user_i_care_about) \
.order_by('id'))
photo_set = models.Prefetch('photo_set', Photo.objects.all()) # this is here incase you need to a select_related()
products = Product.objects.prefetch_related(photo_set, productlikedislike_set) \
.filter(models.Q(photo__cover_photo_flag='yes') | models.Q(photo__isnull=True)) \
.filter(productlikedislike__product_liked_by=user_i_care_about)
Then you can use:
for product in products:
for pic in product.photo_set.all():
print(x.file.name)
# every product here WILL be liked by the user
if your models look something like this:
class Product(models.Model):
# category = models.ForeignKey(Category, on_delete=models.CASCADE) # TODO: uncomment, didnt want to model this out
name = models.CharField(max_length=200, db_index=True)
slug = models.SlugField(max_length=200, db_index=True)
description = models.TextField(blank=True)
price = models.DecimalField(max_digits=10, decimal_places=2) # Not used FloatField to avoid rounding issues # this is correct, no need to explain this, anyonw that works with django, gets this.
created = models.DateTimeField(auto_now_add=True)
updated = models.DateTimeField(auto_now=True)
contact = models.BigIntegerField(default=None,blank=True, null=True)
created_by = models.CharField(max_length=200, default=None, blank=True, null=True)
uploaded_by_id = models.IntegerField(default=0) # TODO: use ForeignKey(User) here!!!
status = models.IntegerField(default=0) # 0-->Active,1-->Inactive # TODO: learn to use `choices`
mark_as_sold = models.IntegerField(default=0) # 0-->not sold,1-->sold # TODO: there is something called `BooleanField` use it!
class Meta:
ordering = ('-created',)
index_together = (('id', 'slug'),) # we want to query product by id and slug using together index to improve performance
def get_absolute_url(self):
return reverse('olx:edit_product', kwargs={'pk': self.pk})
def __str__(self):
return self.name
class Photo(models.Model):
product = models.ForeignKey(Product, null=True,on_delete=models.CASCADE, db_column='reference_id')
photo_type = models.CharField(max_length=70, db_index=True)
file = models.FileField(upload_to='photos/', default='NoImage.jpg')
cover_photo_flag = models.CharField(default=0, max_length=5, db_index=True) # TODO: learn to use `choices`, and you use "yes" / "no" -- and the default is 0 -- FIX THIS!!
uploaded_at = models.DateTimeField(auto_now_add=True)
uploaded_by_id = models.IntegerField(default=0) # TODO: use ForeignKey(User) here!!!
status = models.IntegerField(default=0) # 0-->Active,1-->Inactive # TODO: learn to use `choices` -- perhaps just call this "is_active" and make it a bool
class Meta:
ordering = ('-uploaded_at',)
class ProductLikeDislike(models.Model):
product = models.ForeignKey(Product, models.SET_DEFAULT, default=0) # TODO: default=0?? this is pretty bad. models.ForeignKey(Product, models.SET_NULL, null=True) is much better
product_liked_by = models.ForeignKey(User, models.SET_DEFAULT, default=0, db_column='product_liked_by_id') # TODO: default=0?? this is pretty bad. models.ForeignKey(ForeignKey, models.SET_NULL, null=True) is much better
status = models.BooleanField(default=False) # TODO: rename, bad name. try something like "liked" / "disliked" OR go with IntegerField(choices=((0, 'Liked'), (1, 'Disliked')) if you have more than 2 values.
A full example WITH tests can be seen here: https://gist.github.com/kingbuzzman/05ed095d8f48c3904e217e56235af54a

Django aggregate on .extra values

Model , with abstract base class:
class MapObject(models.Model):
start_date = models.DateTimeField(default= datetime.strptime('1940-09-01T00:00:00', '%Y-%m-%dT%H:%M:%S'))
end_date = models.DateTimeField(default= datetime.strptime('1941-07-01T00:00:00', '%Y-%m-%dT%H:%M:%S'))
description = models.TextField(blank=True)
location = models.PointField()
objects = models.GeoManager()
user = models.ForeignKey(User)
created = models.DateTimeField(auto_now_add = True)
last_modified = models.DateTimeField(auto_now = True)
source = models.ForeignKey(Source)
address= models.TextField(blank=True, null=True)
address_road = models.TextField(blank=True, null=True)
class Meta:
abstract = True
class Bomb(MapObject, BombExtraManager):
#Bomb Attributes
type = models.CharField(choices= Type_CHOICES, max_length=10)
night_bombing = models.BooleanField(blank=True)
map_sheet = models.ForeignKey(MapSheet, blank=True, null=True)
def __unicode__(self):
return self.type
Now, I want to get the equivalent result using Django ORM as this query:
Select date_part('day',"start_date") as "day", date_part('hour',"start_date") as "hour", Count('id')
from "Mapper_bomb"
where "source_id" = 1
group by date_part('hour',"start_date"), date_part('day',"start_date")
Order by date_part('day',"start_date") ASC, date_part('hour',"start_date") ASC
Which would give me a table with the count of bombs per day and hour.
Using Django ORM, I have come to the following at the moment (first_day is just a custom manager I defined that returns a subset of the data, same as source_id = 1):
Bomb.first_day.extra(select={'date': "date_part(\'day\', \"start_date\")", 'hour': "date_part(\'hour\', \"start_date\")"}).values('date', 'hour').order_by().annotate(Count('date'), Count('hour'))
but Django complains FieldError: Cannot resolve keyword 'date' into field. Is there a way using Django ORM to get the desired result or do I need to fallback on raw sql?
Does this work?
Bomb.first_day.extra({
'date': "date_part(\'day\', \"start_date\")",
'hour': "date_part(\'hour\', \"start_date\")"
}).values('date', 'hour').order_by('date', 'hour').annotate(Count('id'))

Handling recurring events in a Django calendar app

I'm developing a calendaring application in Django.
The relevant model structure is as follows:
class Lesson(models.Model):
RECURRENCE_CHOICES = (
(0, 'None'),
(1, 'Daily'),
(7, 'Weekly'),
(14, 'Biweekly')
)
frequency = models.IntegerField(choices=RECURRENCE_CHOICES)
lessonTime = models.TimeField('Lesson Time')
startDate = models.DateField('Start Date')
endDate = models.DateField('End Date')
student = models.ForeignKey(Student)
class CancelledLesson(models.Model):
lesson = models.ForeignKey(Lesson)
student = models.ForeignKey(Student)
cancelledLessonDate = models.DateField() # Actual date lesson has been cancelled, this is startDate + Frequency
class PaidLesson(models.Model):
lesson = models.ForeignKey(Lesson)
student = models.ForeignKey(Student)
actualDate = models.DateField() # Actual date lesson took place
paidAmt = models.DecimalField('Amount Paid', max_digits=5, decimal_places=2)
paidDate = models.DateField('date paid')
class CompositeLesson(models.Model):
# only used to aggregate lessons for individual lesson management
lesson = models.ForeignKey(Lesson)
student = models.ForeignKey(Student)
actualDate = models.DateTimeField()
isCancelled = models.BooleanField()
canLesson = models.ForeignKey(CancelledLesson, blank=True, null=True)
payLesson = models.ForeignKey(PaidLesson, blank=True, null=True)
Apparently this is all causing issues with displaying the lessons that belong to a particular student. What I am attempting to do is display a table that shows the Student name plus all instances of scheduled lessons. I am calculating the recurrence dynamically to avoid blowing up my database. Exceptions to the recurrences (i.e. lesson cancellations) are stored in their own tables. Recurrences are checked against the cancelled lesson table when the recurrences are generated.
See my code to generate recurrences (as well as a small catalog of what issues this is causing) here: Can't get key to display in Django template
I'm relatively inexperienced with Python, and am using this project as a way to get my head around a lot of the concepts, so if I'm missing something that's inherently "Pythonic", I apologize.
The key part of your problem is that you're using a handful of models to track just one concept, so you're introducing a lot of duplication and complexity. Each of the additional models is a "type" of Lesson, so you should be using inheritance here. Additionally, most of the additional models are merely tracking a particular characteristic of a Lesson, and as a result should not actually be models themselves. This is how I would have set it up:
class Lesson(models.Model):
RECURRENCE_CHOICES = (
(0, 'None'),
(1, 'Daily'),
(7, 'Weekly'),
(14, 'Biweekly')
)
student = models.ForeignKey(Student)
frequency = models.IntegerField(choices=RECURRENCE_CHOICES)
lessonTime = models.TimeField('Lesson Time')
startDate = models.DateField('Start Date')
endDate = models.DateField('End Date')
cancelledDate = models.DateField('Cancelled Date', blank=True, null=True)
paidAmt = models.DecimalField('Amount Paid', max_digits=5, decimal_places=2, blank=True, null=True)
paidDate = models.DateField('Date Paid', blank=True, null=True)
class CancelledLessonManager(models.Manager):
def get_query_set(self):
return self.filter(cancelledDate__isnull=False)
class CancelledLesson(Lesson):
class Meta:
proxy = True
objects = CancelledLessonManager()
class PaidLessonManager(models.Manager):
def get_query_set(self):
return self.filter(paidDate__isnull=False)
class PaidLesson(Lesson):
class Meta:
proxy = True
objects = PaidLessonManager()
You'll notice that I moved all the attributes onto Lesson. This is the way it should be. For example, Lesson has a cancelledDate field. If that field is NULL then it's not cancelled. If it's an actual date, then it is cancelled. There's no need for another model.
However, I have left both CancelledLesson and PaidLesson for instructive purposes. These are now what's called in Django "proxy models". They don't get their own database table (so no nasty data duplication). They're purely for convenience. Each has a custom manager to return the appropriate matching Lessons, so you can do CancelledLesson.objects.all() and get only those Lessons that are cancelled, for example. You can also use proxy models to create unique views in the admin. If you wanted to have an administration area only for CancelledLessons you can, while all the data still goes into the one table for Lesson.
CompositeLesson is gone, and good riddance. This was a product of trying to compose these three other models into one cohesive thing. That's no longer necessary, and your queries will be dramatically easier as a result.
EDIT
I neglected to mention that you can and should add utility methods to the Lesson model. For example, while tracking cancelled/not by whether the field is NULL or not makes sense from a database perspective, from programming perspective it's not as intuitive as it could be. As a result, you might want to do things like:
#property
def is_cancelled(self):
return self.cancelledDate is not None
...
if lesson.is_cancelled:
print 'This lesson is cancelled'
Or:
import datetime
...
def cancel(self, date=None, commit=True):
self.cancelledDate = date or datetime.date.today()
if commit:
self.save()
Then, you can cancel a lesson simply by calling lesson.cancel(), and it will default to cancelling it today. If you want to future cancel it, you can pass a date: lesson.cancel(date=tommorrow) (where tomorrow is a datetime). If you want to do other processing before saving, pass commit=False, and it won't actually save the object to the database yet. Then, call lesson.save() when you're ready.
This is what I came up with. I feel that lessons_in_range() may not be as "Pythonic" as I could get it, but this does what I need it to do.
class Lesson(models.Model):
RECURRENCE_CHOICES = (
(0, 'None'),
(1, 'Daily'),
(7, 'Weekly'),
(14, 'Biweekly')
)
relatedLesson = models.ForeignKey('self', null=True, blank=True)
student = models.ForeignKey(Student)
frequency = models.IntegerField(choices=RECURRENCE_CHOICES, null=True, blank=True)
lessonTime = models.TimeField('Lesson Time', null=True, blank=True)
startDate = models.DateField('Start Date')
endDate = models.DateField('End Date', null=True, blank=True)
isCancelled = models.BooleanField(default = False)
amtBilled = models.DecimalField(max_digits=5, decimal_places=2, null=True, blank=True)
amtPaid = models.DecimalField(max_digits=5, decimal_places=2, null=True, blank=True)
def get_exceptions(self):
return Lesson.objects.filter(relatedLesson = self.id)
def cancel(self, date=None):
if date:
x = Lesson()
x = self
x.pk = None
x.relatedLesson = self.id
x.isCancelled = True
x.startDate = date
x.endDate = date
x.save()
else:
self.endDate = datetime.date.today()
self.save()
return
def pay_lesson(self, date, amount):
x = Lesson()
x = self
x.pk = None
x.relatedLesson = self.id
x.amtPaid = amount
x.startDate = date
x.endDate = date
x.save()
return
def lessons_in_range(self, startDate, endDate):
if (self.startDate > endDate) or (self.endDate < startDate):
return None
if self.endDate < endDate:
endDate = self.endDate
ex = self.get_exceptions()
if self.frequency == 0:
if ex:
return ex
else:
return self
sd = next_date(self.startDate, self.frequency, startDate)
lessonList = []
while (sd <= endDate):
exf = ex.filter(startDate = sd)
if exf:
# lesson already exists in database, add it
lessonList.append(exf)
elif sd == self.startDate:
# lesson is the original lesson, add that
lessonList.append(self)
else:
# lesson does not exist, create it in the database then add it to the list
x = Lesson()
x.student = self.student
x.frequency = 0
x.lessonTime = self.lessonTime
x.relatedLesson = self
x.startDate = sd
x.endDate = sd
x.isCancelled = False
x.amtBilled = self.amtBilled
x.amtPaid = None
x.save()
lessonList.append(x)
sd += timedelta(self.frequency)
return lessonList