how can I accelerate data migration from a large database in django

how can I accelerate data migration from a large database in django - django

I try to remove model inheritance from my django 1.7 app
models.py
class Element(models.Model):
ELEMENT_A = 'ELEMENT_A'
ELEMENT_B = 'ELEMENT_B'
TYPES = (
(ELEMENT_A, 'Element A'),
(ELEMENT_B, 'Element B')
)
number = models.CharField(max_length=255)
type = models.CharField(max_length=10, choices=TYPES, default=ELEMENT_A)
quantity = models.IntegerField(default=1)
class ChildElement(Element):
old_number = models.CharField(max_length=30, unique=True)
old_quantity = models.IntegerField()
my migration file
def forwards_remove_heritance(apps, schema_editor):
childs = apps.get_model("core", "ChildElement")
type = Element.ELEMENT_A
for c in childs.objects.all():
c.number = c.old_number
c.quantity = c.old_quantity
c.type = type
c.save()
return
def backward_remove_heritance(apps, schema_editor):
return
class Migration(migrations.Migration):
dependencies = [
]
operations = [
migrations.RunPython(
forwards_remove_heritance, backward_remove_heritance
),
]
my migration take hours, I have more than 750k elements in app_ChildElement table
Any idea ?

Use queryset update with F expressions, example:
ChildElement.objects.all().update(number=F('old_number'))

the solution I found was to update my database directly with a raw SQL inside an operation
class RemoveChild(Operation):
reversible = True
def __init__(self):
pass
def state_forwards(self, app_label, state):
pass
def database_forwards(self, app_label, schema_editor, from_state, to_state):
schema_editor.execute("""update Element
SET Element.number = ChildElement.old_number, Element.quantity=ChildElement.old_quantity, Element.type='ELEMENT_B'
FROM Element
INNER JOIN ChildElement
ON ChildElement.element_ptr_id = Element.id;
""")
def database_backwards(self, app_label, schema_editor, from_state, to_state):
pass
def describe(self):
return "Remove child heritance model"

Related

Admin Sorting after applying SimpleListFilter

I want a different ordering to be applied after a SimpleListFilter is being applied.
class Store(models.Model):
name = models.CharField("name", max_length = 128)
state = models.IntegerField("state of store", default = 0)
revenue = models.IntegerField("accumulated ...", default = 0)
class StoreStateFilter(admin.SimpleListFilter):
title = "filter by state"
parameter_name = "state"
def lookups(self, request, model_admin):
return [("on", "on"), ("off", "off")]
def queryset(self, request, queryset):
...
return queryset
The admin looks simple:
#admin.register(Store)
class StoreAdmin(admin.ModelAdmin):
ordering = ["revenue", ...]
list_filter = [StoreStateFilter, ...]
...
So the normal ordering is after revenue, but after applying StoreStateFilter I want the ordering to be after name. Is there a way to achieve that?

Bulk create on related models using csv

I'm trying to use bulk_create in order to add objects to related models. Here i'm fetching the csv file through post request which contains required fields. As of now I can add items to models which is unrelated using the csv file and bulk_create and it's working.
class BulkAPI(APIView):
def post(self, request):
paramFile = io.TextIOWrapper(request.FILES['requirementfile'].file)
dict1 = csv.DictReader(paramFile)
list_of_dict = list(dict1)
objs = [
ManpowerRequirement(
project=row['project'],
position=row['position'],
quantity=row['quantity'],
project_location=row['project_location'],
requested_date=row['requested_date'],
required_date=row['required_date'],
employment_type=row['employment_type'],
duration=row['duration'],
visa_type=row['visa_type'],
remarks=row['remarks'],
)
for row in list_of_dict
]
try:
msg = ManpowerRequirement.objects.bulk_create(objs)
returnmsg = {"status_code": 200}
print('imported successfully')
except Exception as e:
print('Error While Importing Data: ', e)
returnmsg = {"status_code": 500}
return JsonResponse(returnmsg)
My models are:
class ManpowerRequirement(models.Model):
project = models.CharField(max_length=60)
position = models.CharField(max_length=60)
quantity = models.IntegerField()
project_location = models.CharField(max_length=60)
requested_date = models.DateField()
required_date = models.DateField()
employment_type = models.CharField(max_length=60,choices = EMPLOYMENT_TYPE_CHOICES,
default = 'Permanent')
duration = models.CharField(max_length=60)
visa_type = models.CharField(max_length=60)
remarks = models.TextField(blank = True , null=True)
def __str__(self):
return self.project
class Meta:
verbose_name_plural = "Manpower_Requirement"
class Fulfillment(models.Model):
candidate_name = models.CharField(max_length=60)
manpower_requirement = models.ForeignKey(ManpowerRequirement, on_delete=models.CASCADE)
passport_number = models.CharField(blank = True, max_length=60)
subcontract_vendors = models.CharField(max_length=200,blank = True , null=True ,default='')
joined_date = models.DateField(blank = True, null = True, default = '')
remarks = models.TextField( blank = True,null = True)
def __str__(self):
return self.candidate_name
class Meta:
verbose_name_plural = "Fulfillment"
class FulfillmentStatus(models.Model):
fulfillment = models.ForeignKey(Fulfillment, on_delete=models.CASCADE)
status = models.CharField(max_length=60)
status_date = models.DateField()
remarks = models.TextField( blank = True, null = True )
def __str__(self):
return self.fulfillment.candidate_name
class Meta:
verbose_name_plural = "FulfillmentStatus"
I don't know how to do the same using bulk_create for Fulfillment and FulfillmentStatus models which are related to ManpowerRequirement. Csv file which I recieve in order to bulkcreate for Fulfillment consists of all the fields of ManpowerRequirement and all fields of Fulfillment and FulfillmentStatus excluding the foreign keys and id fields.

in the past I had the same problem; I solved in that way
Assuming that in a single CSV row you have data for all models I'd go for
create a mapping between the main model and the linked ones (you could use row index as key
use bulk_create() on the main model
iterate the dict and use bulk_create() for the linked modules
items = []
mrs = []
for row in list_of_dict:
mr = ManpowerRequirement(...)
mrs.append(mr)
f = ManpowerRequirement(...)
fs = FulfillmentStatus(...)
items.append((mr, f, fs))
# create all Manpower Requirements
ManpowerRequirements.objects.bulk_create(mrs)
a = []
for mr, f, fs in items:
f.manpower_requirement = mr
a.append(f)
# create all Fulfillments
Fulfillment.objects.bulk_create(a)
a = []
for mr, f, fs in items:
fs.fulfillment = f
a.append(fs)
# create all FulfillmentStatus
FulfillmentStatus.objects.bulk_create(a)
not sure if you can do some optimization in looping but it should solve the problem with just 3 queries

For related models we can do like this
class FulfillmentAPI(APIView):
def post(self, request):
paramFile = io.TextIOWrapper(request.FILES['fulfillmentfile'].file)
dict1 = csv.DictReader(paramFile)
list_of_dict = list(dict1)
objs = [
Fulfillment(
manpower_requirement=ManpowerRequirement.objects.get(project=row['project'], position=row['position'], quantity=row['quantity'], requested_date=row['requested_date'],),
remarks=row['remarks'],
candidate_name=row['candidate_name'],
passport_number=row['passport_number'],
joined_date=row['joined_date'],
subcontract_vendors=row['subcontract_vendors'],
)
for row in list_of_dict
]
try:
msg = Fulfillment.objects.bulk_create(objs)
returnmsg = {"status_code": 200}
print('imported successfully')
except Exception as e:
print('Error While Importing Data: ', e)
returnmsg = {"status_code": 500}
return JsonResponse(returnmsg)

Overwrite fields in Django Serializer

I am new in Django and I would like to overwrite the field value in create and update method of serializer. Here is my model=>
class Holiday(models.Model):
HolidayDay = models.DateField()
Created_DT = models.DateTimeField()
Created_Usr = models.CharField(max_length=20)
LastModified_Usr = models.CharField(max_length=20,blank=True)
LastModified_DT = models.DateTimeField(blank=True,null=True)
def __str__(self):
return str(self.HolidayDay)
Here is my serializer=>
class HolidaySerializer(serializers.ModelSerializer):
class Meta:
model=Holiday
fields = [
'id',
'HolidayDay',
'Created_DT',
'Created_Usr',
'LastModified_Usr',
'LastModified_DT'
]
def create(self,validated_data):
validated_data['Created_Usr'] ="Testing"
return Holiday.objects.create(**validated_data)
I would like to update Create_usr field value in create method and LastModified_usr field in update method. But why I can't overwrite the create_usr field as "Testing"?
Here is my views=>
def post(self,request):
holiday = request.data.get('holiday')
serializer = HolidaySerializer(data=holiday)
serializer.is_valid()
print(serializer.errors)
if serializer.is_valid():
holiday_saved=serializer.save()
return Response({"success":"Holiday '{}' created successfully".format(holiday_saved.HolidayDay)})
def put(self,request,pk):
save_holiday = get_object_or_404(Holiday.objects.all(),pk=pk)
data = request.data.get('holiday')
serializer = HolidaySerializer(instance=save_holiday,data=data,partial=True)
if serializer.is_valid(raise_exception = True):
holiday_saved=serializer.save()
return Response({"sucess": "Holiday '{}' updated successfully".format(holiday_saved.HolidayDay)})

Your create method is not defined in your Serializer class, instead it is part of your Meta class. You should be able to solve it by moving your create method to your HolidaySerializer:
class HolidaySerializer(serializers.ModelSerializer):
def create(self,validated_data):
validated_data['Created_Usr'] = "Testing"
return Holiday.objects.create(**validated_data)
class Meta:
model=Holiday
fields = [
'id',
'HolidayDay',
'Created_DT',
'Created_Usr',
'LastModified_Usr',
'LastModified_DT'
]

Django import export: update without add

I'm using 'django import export' (DIE) for importing and updating some data.
Import process starts from checking exists objects in DB, searching by values in ID-field, and if row with ID from import file not found - new entre will be created. How can i made "update only" scenario, where if 'id key' not found in DB, row will be skipped (not added new)?
my model.py
class Size(models.Model):
id = models.AutoField(unique=True, primary_key=True, null=False, blank=False)
height = models.SmallIntegerField()
width = models.SmallIntegerField()
class Product(models.Model):
id = models.AutoField(unique=True, primary_key=True, null=False, blank=False)
vendor_code = models.CharField(unique=True, max_length=50, null=False, blank=False)
price = models.DecimalField(null=False, blank=False)
size = models.ForeignKey(Size, verbose_name=u'Size')
in resource.py
class ProductSyncResource(resources.ModelResource):
class Meta:
model = ProductVariant
import_id_fields = ('vendor_code',)
fields = ('vendor_code', 'price',)
export_order = ('vendor_code', 'price', 'status', )
skip_unchanged = True
report_skipped = True
dry_run = True
import table (xls)
If vendor_code 'Tк-12856' (Cell A3) will be not found, then DIE will try to add this row, and:
We will get error from DB (foreignKey check for column 'size')
I don't need to add this row to DB in my 'update scenario'

Finally i got it by overriding skip_row. Fields now can be 'null=False' and will be imported only rows with known import_id_field values.
class VariantSyncResource(resources.ModelResource):
class Meta:
model = ProductVariant
import_id_field = 'vendor_code'
import_id_fields = ('vendor_code',)
fields = ('vendor_code', 'price', 'status', )
export_order = ('vendor_code', 'price', 'status', )
skip_unchanged = True
report_skipped = False
dry_run = True
def skip_row(self, instance, original):
original_id_value = getattr(original, self._meta.import_id_field)
instance_id_value = getattr(instance, self._meta.import_id_field)
if original_id_value != instance_id_value:
return True
if not self._meta.skip_unchanged:
return False
for field in self.get_fields():
try:
if list(field.get_value(instance).all()) != list(field.get_value(original).all()):
return False
except AttributeError:
if field.get_value(instance) != field.get_value(original):
return False
return True

If you want it to not create new objects I think you'd need to override the import_row() within ProductSyncResource.
Then you could just do if new: return;
def import_row(self, row, instance_loader, using_transactions=True, dry_run=False, **kwargs):
row_result = self.get_row_result_class()()
try:
self.before_import_row(row, **kwargs)
instance, new = self.get_or_init_instance(instance_loader, row)
self.after_import_instance(instance, new, **kwargs)
if new:
return row_result
else:
row_result.import_type = RowResult.IMPORT_TYPE_UPDATE
row_result.new_record = new
original = deepcopy(instance)
...
Following your comment about the preview error, just remember that this isn't an official feature of the app so you'll have to just follow stack traces & work around issues that popup. My first thought above looked like the quickest & easiest way to do this, but you could try to make the else for the save save conditional against new objects;
elif not new:
with transaction.atomic():
self.save_instance(instance, using_transactions, dry_run)
self.save_m2m(instance, row, using_transactions, dry_run)
diff.compare_with(self, instance, dry_run)

How to change the choices of the ChoiceField of django-rest-framework from a model class?

Th models.py is:
from django.db import models
class Type(models.Model):
letter = models.CharField(max_length = 1)
name = models.CharField(max_length = 10)
class Code(models.Model):
type = models.ForeignKey(Type, related_name = 'code', blank = True, default = None)
the serializers.py is:
import collections
from rest_framework import serializers
from code.models import Type, Code
class TypeSerializer(serializers.HyperlinkedModelSerializer):
class Meta:
model = Type
fields = ('letter','name')
class TypeField(serializers.HyperlinkedModelSerializer):
class Meta:
model = Type
fields = ('letter',)
class CodeSerializer(serializers.HyperlinkedModelSerializer):
type = TypeField() #serializers.ChoiceField(???)
def create(self, validated_data):
c = Code()
c.type = Type.objects.get(letter = validated_data['type']['letter'])
c.save()
return c
class Meta:
model = Code
fields = ('type',)
the views.py is:
from rest_framework import viewsets
from code.models import Code, Type
from code.serializers import CodeSerializer, TypeSerializer
class CodeViewSet(viewsets.ModelViewSet):
queryset = Code.objects.all()
serializer_class = CodeSerializer
class TypeViewSet(viewsets.ModelViewSet):
queryset = Type.objects.all()
serializer_class = TypeSerializer
Is possible use a ChoiseField for choose the type inside the Code, When I create an istance of the Code object instead a CharField?
Possible Solution
I find a possible solution, I derived class ChoiseField to DynamicChoiceField
class DynamicChoiceField(serializers.ChoiceField):
def __init__(self, **kwargs):
super(DynamicChoiceField, self).__init__([],**kwargs)
def set_choices(self, choices):
pairs = [
isinstance(item, (list, tuple)) and len(item) == 2
for item in choices
]
if all(pairs):
self.choices = OrderedDict([(key, display_value) for key, display_value in choices])
else:
self.choices = OrderedDict([(item, item) for item in choices])
self.choice_strings_to_values = dict([
(six.text_type(key), key) for key in self.choices.keys()
])
and change the CodeSerializer in:
class CodeSerializer(serializers.HyperlinkedModelSerializer):
type = TypeField(read_only=True)
choise_of_type = DynamicChoiceField(allow_blank=False, write_only=True)
def __init__(self, *args, **kwargs):
types = Type.objects.all()
choices = [(t.letter,t.name) for t in types]
self.fields['choise_of_type'].set_choices(choices)
super(CodeSerializer, self).__init__(*args, **kwargs)
def create(self, validated_data):
c = Code()
c.type = Type.objects.get(letter = validated_data['choise_of_type'])
c.save()
return c
class Meta:
model = Code
fields = ('type', 'choise_of_type',)
the only flaw of this solution is that I need of two fields insted only one (type,choise_of_type)

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

how can I accelerate data migration from a large database in django - django

Use queryset update with F expressions, example: ChildElement.objects.all().update(number=F('old_number'))

Related

Admin Sorting after applying SimpleListFilter

Bulk create on related models using csv

Overwrite fields in Django Serializer

Django import export: update without add

How to change the choices of the ChoiceField of django-rest-framework from a model class?

Categories

Resources