I have a question reg. comparing big set of model instances with data in DB.
for example I have a model with 5 fields:
model Foo(models.Model)
fields1 = models.Integerfield()
fields2 = models.Integerfield()
fields3 = models.Integerfield()
fields4 = models.Integerfield()
fields5 = models.Integerfield()
class Meta:
unique_together = (‘field1', 'field2’,)
and I have 400.000 model Foo entries saved in DB.
Also I have 400.000 +/- few instances in memory(in python)of the same model generated from internet CSV file (without pk set).
Question is – what is the most efficient way to do following:
1)If instance in python equal to same instance in DB – keep instance in DB.
2)If instance in python not equal to instance in DB – update instance in DB.
If instance in python does not coincide to any instances in DB – write it to DB.
If no instances in python coincide to particular instance in DB – delete instance from DB.
Its should be bulk operations or RAW SQL as sample size is quite big.
Thank you.
PS. What i do now - delete all, reset counter, write all again to db
Hoping you use Django>=2.2. Try this script,
import csv, os
from your_app.models import Foo
def get_db_object_from_file_object(file_object):
return Foo(
field1=file_object[0],
field2=file_object[1],
field3=file_object[2],
field4=file_object[3],
field5=file_object[4],
)
def update_db_object_from_file_object(db_object, file_object):
db_object.field1 = file_object[0]
db_object.field2 = file_object[1]
db_object.field3 = file_object[2]
db_object.field4 = file_object[3]
db_object.field5 = file_object[4]
with open("<filename.csv>") as f:
reader = csv.reader(f)
eof = os.fstat(f.fileno()).st_size
file_objects = {(each[0], each[1]): each for each in reader}
db_objects = {
(each.field1, each.field2): each
for each in Foo.objects.filter(
field1__in=(each[0] for each in next_lines),
field2__in=(each[1] for each in next_lines)
)
}
to_create = []
to_update = []
for pair, obj in file_objects.items():
if pair not in db_objects:
to_create.append(obj)
else:
update_db_object_from_file_object(db_objects[pair], obj)
to_update.append(db_objects[pair])
del db_objects[pair]
Foo.objects.bulk_create(to_create)
Foo.objects.bulk_update(to_update, ["field1", "field2", "field3", "field4", "field5"])
Foo.objects.filter(id__in=[each.pk for each in db_objects.values()])
Related
I have two tables, Jobs and Clients.
class Jobs(models.Model):
client_id = models.ForeignKey(Clients,db_column='client_id',on_delete=models.CASCADE)
class Clients(models.Model):
created = models.DateTimeField()
modified = models.DateTimeField(auto_now=True)
I have Jobs data in json format.
jobs_data = {
'client_id':'10000',
....
}
When I want to save this data into Jobs table I get the ValueError: Cannot assign "'10000'": "Jobs.client_id" must be a "Clients" instance.
To save the table I have tried
jobs_obj = Jobs.objects.create(**v['Job'])
I also tried -
client_obj = Clients.objects.get(id=v['Job']['client_id'])
jobs_obj = Jobs.objects.create(**v['Job'],Clients=client_obj)
How can I fix this problem ?
class PurchaseOrder(models.Model):
purchase_order_id = models.AutoField(primary_key=True)
purchase_order_number = models.CharField(unique=True)
vendor = models.ForeignKey(Vendor)
i am creating Purchase Order(po) table. when po created i have to update purchase_order_number as "PO0"+purchase_order_id ex PO0123 (123 is Primary key). so i am using def save in models to accomplish this
def save(self):
if self.purchase_order_id is not None:
self.purchase_order_number = "PO"+str(self.purchase_order_id)
return super(PurchaseOrder, self).save()
It is working fine with single creation but when i try to create bulk of data using locust(Testing tool) its giving an error duplicate entry for PurchseOrdernumber Can we modify field value in models itself some thing like this
purchase_order_number = models.CharField(unique=True,default=("PO"+self.purchase_order_id )
To be honest, I don't think it should work when you create multiple instances. Because as I can see from the code:
if self.purchase_order_id is not None:
self.purchase_order_number = "PO"+str(self.purchase_order_id)
Here purchase_order_id will be None when you are creating new instance. Also, until you call super(PurchaseOrder, self).save(), it will not generate purchase_order_id, meaning purchase_order_number will be empty.
So, what I would recommend is to not store this information in DB. Its basically the same as purchase_order_id with PO in front of it. Instead you can use a property method to get the same value. Like this:
class PurchaseOrder(models.Model):
purchase_order_id = models.AutoField(primary_key=True)
# need to remove `purchase_order_number = models.CharField(unique=True)`
...
#property
def purchase_order_number(self):
return "PO{}".format(self.purchase_order_id)
So, you can also see the purchase_order_number like this:
p = PurchaseOrder.objects.first()
p.purchase_order_number
Downside of this solution is that, you can't make any query on the property field. But I don't think it would be necessary anyway, because you can do the same query for the purchase_order_id, ie PurchaseOrder.objects.filter(purchase_order_id=1).
The contains operation in SQLAlchemy only accepts one Model object instead of a list of objects. If I want to create a filter that accepts containing any of a group of objects, is there a more SQL-style way than creating multiple filters using contains and combining them with union?
For example, see the following code:
from flask import Flask
from flask.ext.sqlalchemy import SQLAlchemy
app = Flask(__name__)
db = SQLAlchemy(app)
class User(db.Model):
id = db.Column(db.Integer, primary_key = True)
name = db.Column(db.String(100))
son = db.relationship('Son', backref = 'parent', lazy = 'dynamic')
def __repr__(self):
return '<"%s">' % self.name
class Son(db.Model):
id = db.Column(db.Integer, primary_key = True)
name = db.Column(db.String(1000))
user_id = db.Column(db.Integer, db.ForeignKey('user.id'))
def __repr__(self):
return '<"%s">' % self.name
db.create_all()
son1 = Son(name = '1 a 1')
son2 = Son(name = '1 b 1')
son3 = Son(name = '1 c 1')
son4 = Son(name = '2 a 2')
son5 = Son(name = '2 b 2')
user0 = User(name = 'w1')
user1 = User(name = 'w2')
user2 = User(name = 'w3')
user0.son.append(son1)
user0.son.append(son2)
user1.son.append(son3)
user1.son.append(son4)
user2.son.append(son5)
db.session.add(son1)
db.session.add(son2)
db.session.add(son3)
db.session.add(son4)
db.session.add(son5)
db.session.add(user0)
db.session.add(user1)
db.session.add(user2)
db.session.commit()
son_query = Son.query.filter(Son.name.ilike('%a%'))
son_query_all = son_query.all()
print son_query.all()
user_query = User.query.filter(User.son.contains(son_query_all[0])).union(*[User.query.filter(User.son.contains(query)) for query in son_query_all[1:]])
print user_query.all()
The example firstly creates two models: User and Son, and then creates 3 User instances and 5 Son instances. user0 contains son1 and son2, user1 contains son3 and son4, and user2 contains son5. Note that the name of son1 and son4 are both like %a%. Now I want to select all User instances containing Son instances whose name likes %a%.
The current method is to select all Son instances in son_query_all, and then selects User instances containing individual desired Son instances, and then combines the selecting result using union. Is there a more SQL-style way for SQLAlchemy to select the same? For example, is there anything like contains_any so that the last query can be changed into something like
user_query = User.query.filter(User.son.contains_any(son_query_all))
Note that of course I can define a custom contains_any function for the same purpose using the union and contains operation. My question is whether there is a more efficient way than simply union all contains-ed?
The correct way to solve this kind of filtering is to use JOIN. Basically you join Son table with User table and filter by joined entity's field.
In SQLAlchemy you can express it like this:
User.query.join(Son).filter(Son.name.ilike('%a%'))
And it will produce following SQL:
SELECT * FROM user
JOIN son ON user.id = son.user_id
WHERE lower(son.name) LIKE lower('%a%')
My models:
class InventoryItem(models.Model):
quantity = models.IntegerField()
...
class Requisition(models.Model):
from_inventoryitem = models.ForeignKey(InventoryItem)
quantity = models.IntegerField()
...
Assuming that requisition has an OneToMany relation with inventory_item below, and the initial value of inventory_item.quantity is 0, When I excute:
>>> requisition = Requisition.objects.get(id=1)
>>> requisition.from_inventoryitem.quantity = 500
>>> requisition.save()
>>> requisition.from_inventoryitem.quantity
500
>>> inventory_item.quantity
0
>>> requisition.from_inventoryitem == inventory_item
True
The inventory_item.quantity in database and InventoryItem side is still 0.
How can I update that change to the database?
This is by design. You have to save each instance seperately. requisition.from_inventoryitem actually queries the InventoryItem instance from the database, just to set the quantity.
requisition = Requisition.objects.get(id=1)
requisition.from_inventoryitem.quantity = 500 # this generates another query here!
requisition.from_inventoryitem.save()
or even better, with a single query, single save
inv_item = InventoryItem.objects.get(requisition_set__id=1)
inv_item.quantity = 500
inv_item.save()
best way. single database call:
InventoryItem.objects.filter(requisition_set__id=1).update(quantity=500)
Still learning Django, so not sure if there's a nice way to do this.
I have a few models with specific attributes (all use Item as base class), and a metadata table (id, language, type, value) used to store any extra attributes that could be potentially associated with instances of any of those models (code below). These models are used with a form / template, simple web-based CRUD.
Right now, I call .save_metadata(...) and .load_metadata(...) explicitly, and use .form_initia(...) to populate the form with metadata that isn't explicitly in the model.
I'm looking for a way to handle this automatically -- basically implementing a model with a variable number of fields, key ones are columns in the model's table, the other ones are rows in the metadata table, and are instance-specific. Is there a way of hooking a method after objects.get(...) or objects.filter(...) etc? I've messed with custom managers and looked into signals, but nothing seems to lead towards an acceptable solution.
class Item(models.Model):
mdata = ['title'] # metadata associated with item
user = models.ForeignKey(User)
created = models.DateTimeField(auto_now_add = True)
status = models.IntegerField(default=0, choices = ([(0,'Staged'), (1,'Published'),(2,'Archived'), ]))
def set_status(self, s):
self.status = s
self.save()
# stores metadata attributes associated with current item
def save_metadata(self, lang, form):
for mt in self.mdata:
try:
md = Metadata.objects.get(item=self, lang=lang, t=mt)
except Metadata.DoesNotExist:
md = Metadata.objects.create(item=self, lang=lang, t=mt)
md.v=form.cleaned_data[mt]
md.save()
# retrieves metadata attributes associated with current item
def load_metadata(self, lang):
for mt in self.mdata:
self.__dict__[mt] = None
try:
self.__dict__[mt] = Metadata.objects.get(item=self, t=mt, lang=lang).v
except Metadata.DoesNotExist:
md = Metadata.objects.filter(item=self, t=mt)
if len(md) > 0:
self.__dict__[mt] = md[0].v
# provides metadata attributes associated with current item needed to populate a form
def form_initial(self, seed=None):
meta = {}
for mt in self.mdata:
meta[mt] = self.__dict__[mt]
#meta[mt] = 'test'
if seed:
meta = dict(meta.items() + seed.items())
return meta
# used to store various metadata associated with models derived from Item
class Metadata(models.Model):
item = models.ForeignKey(Item)
lang = models.CharField(max_length = 8)
t = models.CharField(max_length = 250)
v = models.CharField(max_length = 2500)