Django: Merge databases for same app - django

I have the same django app running on two servers, each has its own local database. I want to migrate to a single server, again with a local database.
What's the easiest way to populate the new database with the models data of the old databases without losing references between models? (primary keys issues etc...)

there's always the dump data from django, which is pretty easy to use.
or you could do this manually:
if the 2 databases share the same data (they are mirror one to another) and the same table structure, you could just run a syncdb from django to create the new table structure and then dump and import (i'm assuming you're using mysql, but the general idea is the same) the old database into the new one
if the two databases share different data (still with the same structure) you should import every single row of the two databases: this way, you'll keep relations etc, but you'll have your unique id updated to the new sole db.
if the two databases are different in both data and structure, you'll have to run two sincdb and two imports, but this doesn't seem to be your case
This is a reference to MySQL dump command

If your databases share the same data model with different objects, you could use this custom command I created for a similar problem.
Instead of merging fixture data with your existing models (as it does loaddata) it appends all fixtures object by resetting all pk.
M2M relations are managed at the end of the process, mapping the old primary keys with the new primary keys:
import os
import warnings
from collections import defaultdict
from django.core.management import CommandError
from django.core.management.utils import parse_apps_and_model_labels
from django.core.management.commands.loaddata import Command as LoadDataCommand, humanize
from django.core.management.color import no_style
from django.db import (
DEFAULT_DB_ALIAS, DatabaseError, IntegrityError, connections, router
)
from django.core import serializers
from django.db import transaction
from django.db.models.fields.related import RelatedField, ManyToManyField
class Command(LoadDataCommand):
help = 'Installs the named fixture(s) in the database.'
missing_args_message = (
"No database fixture specified. Please provide the path of at least "
"one fixture in the command line."
)
def add_arguments(self, parser):
parser.add_argument('args', metavar='fixture', nargs='+', help='Fixture labels.')
parser.add_argument(
'--database', default=DEFAULT_DB_ALIAS,
help='Nominates a specific database to load fixtures into. Defaults to the "default" database.',
)
parser.add_argument(
'--app', dest='app_label',
help='Only look for fixtures in the specified app.',
)
parser.add_argument(
'-e', '--exclude', action='append', default=[],
help='An app_label or app_label.ModelName to exclude. Can be used multiple times.',
)
parser.add_argument(
'--format',
help='Format of serialized data when reading from stdin.',
)
# TODO delete equals to overridden
def handle(self, *fixture_labels, **options):
self.using = options['database']
self.app_label = options['app_label']
self.verbosity = options['verbosity']
self.excluded_models, self.excluded_apps = parse_apps_and_model_labels(options['exclude'])
self.format = options['format']
with transaction.atomic(using=self.using):
self.appenddata(fixture_labels)
# Close the DB connection -- unless we're still in a transaction. This
# is required as a workaround for an edge case in MySQL: if the same
# connection is used to create tables, load data, and query, the query
# can return incorrect results. See Django #7572, MySQL #37735.
if transaction.get_autocommit(self.using):
connections[self.using].close()
def appenddata(self, fixture_labels):
# Most of the code is used only to manage transaction and fixture file format reuser it and override load_label instead
self.loaddata(fixture_labels)
def load_label(self, fixture_label):
"""Load fixtures files for a given label."""
self.objs_idx = ObjectDict()
self.objects = []
self.show_progress = self.verbosity >= 3
self.deferred_m2m = []
for fixture_file, fixture_dir, fixture_name in self.find_fixtures(fixture_label):
_, ser_fmt, cmp_fmt = self.parse_name(os.path.basename(fixture_file))
open_method, mode = self.compression_formats[cmp_fmt]
fixture = open_method(fixture_file, mode)
try:
self.fixture_count += 1
objects_in_fixture = 0
loaded_objects_in_fixture = 0
if self.verbosity >= 2:
self.stdout.write(
"Installing %s fixture '%s' from %s."
% (ser_fmt, fixture_name, humanize(fixture_dir))
)
objects = serializers.deserialize(
ser_fmt, fixture, using=self.using, ignorenonexistent=True,
handle_forward_references=False,
)
for obj in objects:
objects_in_fixture += 1
if (obj.object._meta.app_config in self.excluded_apps or
type(obj.object) in self.excluded_models):
continue
if router.allow_migrate_model(self.using, obj.object.__class__):
loaded_objects_in_fixture += 1
self.models.add(obj.object.__class__)
# Load all fixture in memory
self.objs_idx.append_deserialized_object(obj)
self.objects.append(obj)
if obj.deferred_fields:
self.objs_with_deferred_fields.append(obj)
if objects and self.show_progress:
self.stdout.write('') # add a newline after progress indicator
self.loaded_object_count += loaded_objects_in_fixture
self.fixture_object_count += objects_in_fixture
except Exception as e:
if not isinstance(e, CommandError):
e.args = ("Problem installing fixture '%s': %s" % (fixture_file, e),)
raise e
finally:
fixture.close()
# Warn if the fixture we loaded contains 0 objects.
if objects_in_fixture == 0:
warnings.warn(
"No fixture data found for '%s'. (File format may be "
"invalid.)" % fixture_name,
RuntimeWarning
)
# Once you have all object in memory you can load them
for obj in self.objects:
self.process_object(obj.object)
if self.verbosity >= 1:
self.stdout.write('... All objects saved ...')
# Once all objects have been save (append mode) and new pks have been assigned add m2m relations
for obj, field_attname, related_pk in self.deferred_m2m:
attr = getattr(obj, field_attname)
attr.add(related_pk)
if self.verbosity >= 3:
self.stdout.write('Adding relation for field {0}: {1} -> {2}'.format(field_attname, obj.pk, related_pk))
# Disabled for security reason
# raise ValueError('Disabled')
def process_object(self, obj):
if obj is None:
raise ValueError('None object in process object')
old_pk = obj.pk
new_pk = self.objs_idx[obj]['new_pk']
# Object has been save yet no work
if new_pk:
return new_pk
self.manage_related_field(obj)
if self.verbosity >= 2:
self.stdout.write('Saving object: (%s, %s)' % (obj.__class__, obj))
obj.pk = None
try:
obj.save(using=self.using)
if self.show_progress:
self.stdout.write(
'\rSaving object: (%s, %s)' % (obj.__class__, obj),
ending=''
)
# psycopg2 raises ValueError if data contains NUL chars.
except (DatabaseError, IntegrityError, ValueError) as e:
e.args = ("Could not load %(app_label)s.%(object_name)s(pk=%(pk)s): %(error_msg)s" % {
'app_label': obj.object._meta.app_label,
'object_name': obj.object._meta.object_name,
'pk': obj.object.pk,
'error_msg': e,
},)
raise
self.objs_idx.data[obj._meta.model][old_pk]['new_pk'] = obj.pk
return obj.pk
def manage_related_field(self, obj):
related_fields = [field for field in obj._meta.get_fields() if isinstance(field, RelatedField)]
if len(related_fields) > 0: # has not related field
for field in related_fields:
if field.related_model in self.excluded_models:
continue
if type(field) is ManyToManyField:
attr = getattr(obj, field.attname)
attr.clear()
m2m_pks = self.objs_idx[obj]['deserialized_object'].m2m_data[field.name]
for m2m_pk in m2m_pks:
related_obj = self.objs_idx.data[field.related_model][m2m_pk]['object']
new_related_pk = self.process_object(related_obj)
self.deferred_m2m.append((obj, field.attname, new_related_pk))
# attr.add(new_related_pk)
else:
related_obj = self.objs_idx.data[field.related_model][getattr(obj, field.attname)]['object']
if related_obj is not None:
new_related_pk = self.process_object(related_obj)
setattr(obj, field.attname, new_related_pk)
class ObjectDict(object):
"""
Dictionary to easily retrieve fixture object based on class and their original primary key
"""
def __init__(self):
self.data = defaultdict(lambda: defaultdict(lambda: {'new_pk': None, 'object': None}))
#staticmethod
def from_deserialized_objects(deserialized_objects):
instance = ObjectDict()
for deserialized_object in deserialized_objects:
instance.append_deserialized_object(deserialized_object)
return instance
def __getitem__(self, item):
return self.data[item._meta.model][item.old_pk]
def append_deserialized_object(self, deserialized_object):
obj = deserialized_object.object
setattr(obj, 'old_pk', obj.pk)
self[obj]['object'] = obj
self[obj]['deserialized_object'] = deserialized_object
I suggest you to test everything in the django default test DB, with this TestCase.
It uses a pre dumped fixture file from the django app (using python manage.py dumpdata) to populate the test DB, after that it applies the custom command to append all object from other database fixture dumps.
from collections import defaultdict
from django.core import serializers
from django.core.management.utils import parse_apps_and_model_labels
from django.test import Client, TestCase
from utils.tests import reverse
from django.core.management import call_command
from django.test import TestCase
from django.apps import apps
from apps.commons.accounts.models import User
from apps.commons.accounts.tests import MultiUserTestCase
class TestCustomCommands(TestCase):
# Create fixture from the primary database to test everything
fixtures = ['tmp/dump/test_append_data_fixtures_pre.json']
def test_appenddata(self):
fixture_to_import = 'tmp/dump/fixtures_to_import.json'
excludes = ['sites.Site']
# Counts objects before appenddata per model
count_pre = {}
for model in apps.get_models():
count_pre[model] = model.objects.count()
self.excluded_models, self.excluded_apps = parse_apps_and_model_labels(excludes)
# Counts objects to append per model
with open(fixture_to_import, 'r') as f:
objects = serializers.deserialize('json', f, ignorenonexistent=True)
count_new = defaultdict(lambda: 0)
for obj in objects:
if obj.object._meta.model in excludes:
continue
count_new[obj.object._meta.model] += 1
command = ['appenddata', fixture_to_import]
for exclude in excludes:
command += ['-e', exclude]
command += ['-v', '0']
call_command(*command)
# Verify with count that all objects have been imported
for model in apps.get_models():
self.assertEqual(count_pre[model] + count_new[model], model.objects.count(), msg='Count mismatch for model %s' % model)
Example of test (appending data from Website2 into Website1):
# Website 1
python manage.py dumpdata app1 app2 ... > test_append_data_fixtures_pre.json
# Website 2
python manage.py dumpdata app1 app2 ... > fixture_to_import.json
# Website 1, run the provided test
python manage.py test TestCustomCommands.test_appenddata
Example of usage (appending data from Website2 into Website1):
# Website 1
python manage.py appenddata fixture_to_import.json

Related

Flask app-builder how to make REST API with file items

I'm making a REST api that files can be uploaded based in MODEL-VIEW in flask-appbuilder like this.
But I don't know how to call REST API (POST /File).
I tried several different ways. but I couldn't.
Let me know the correct or the alternative ways.
[client code]
file = {'file':open('test.txt', 'rb'),'description':'test'}
requests.post(url, headers=headers, files=file)
==> Failed
model.py
class Files(Model):
__tablename__ = "project_files"
id = Column(Integer, primary_key=True)
file = Column(FileColumn, nullable=False)
description = Column(String(150))
def download(self):
return Markup(
'<a href="'
+ url_for("ProjectFilesModelView.download", filename=str(self.file))
+ '">Download</a>'
)
def file_name(self):
return get_file_original_name(str(self.file))
view.py
class FileApi(ModelRestApi):
resource_name = "File"
datamodel = SQLAInterface(Files)
allow_browser_login = True
appbuilder.add_api(FileApi)
FileColumn is only a string field that saves the file name in the database. The actual file is saved to config['UPLOAD_FOLDER'].
This is taken care of by flask_appbuilder.filemanager.FileManager.
Furthermore, ModelRestApi assumes that you are POSTing JSON data. In order to upload files, I followed Flask's documentation, which suggests to send a multipart/form-data request. Because of this, one needs to override ModelRestApi.post_headless().
This is my solution, where I also make sure that when a Files database row
is deleted, so is the relative file from the filesystem.
from flask_appbuilder.models.sqla.interface import SQLAInterface
from flask_appbuilder.api import ModelRestApi
from flask_appbuilder.const import API_RESULT_RES_KEY
from flask_appbuilder.filemanager import FileManager
from flask import current_app, request
from marshmallow import ValidationError
from sqlalchemy.exc import IntegrityError
from app.models import Files
class FileApi(ModelRestApi):
resource_name = "file"
datamodel = SQLAInterface(Files)
def post_headless(self):
if not request.form or not request.files:
msg = "No data"
current_app.logger.error(msg)
return self.response_400(message=msg)
file_obj = request.files.getlist('file')
if len(file_obj) != 1:
msg = ("More than one file provided.\n"
"Please upload exactly one file at a time")
current_app.logger.error(msg)
return self.response_422(message=msg)
else:
file_obj = file_obj[0]
fm = FileManager()
uuid_filename = fm.generate_name(file_obj.filename, file_obj)
form = request.form.to_dict(flat=True)
# Add the unique filename provided by FileManager, which will
# be saved to the database. The original filename can be
# retrieved using
# flask_appbuilder.filemanager.get_file_original_name()
form['file'] = uuid_filename
try:
item = self.add_model_schema.load(
form,
session=self.datamodel.session)
except ValidationError as err:
current_app.logger.error(err)
return self.response_422(message=err.messages)
# Save file to filesystem
fm.save_file(file_obj, item.file)
try:
self.datamodel.add(item, raise_exception=True)
return self.response(
201,
**{API_RESULT_RES_KEY: self.add_model_schema.dump(
item, many=False),
"id": self.datamodel.get_pk_value(item),
},
)
except IntegrityError as e:
# Delete file from filesystem if the db record cannot be
# created
fm.delete_file(item.file)
current_app.logger.error(e)
return self.response_422(message=str(e.orig))
def pre_delete(self, item):
"""
Delete file from filesystem before removing the record from the
database
"""
fm = FileManager()
current_app.logger.info(f"Deleting {item.file} from filesystem")
fm.delete_file(item.file)
You can use this.
from app.models import Project, ProjectFiles
class DataFilesModelView(ModelView):
datamodel = SQLAInterface(ProjectFiles)
label_columns = {"file_name": "File Name", "download": "Download"}
add_columns = ["file", "description", "project"]
edit_columns = ["file", "description", "project"]
list_columns = ["file_name", "download"]
show_columns = ["file_name", "download"]
Last add the view to the menu.
appbuilder.add_view(DataFilesModelView,"File View")

Remove old permissions in django

In my Django site there are some permissions entries linked to applications that I've removed. For example I have permissions entries linked to "Dashboard" and "Jet" applications. How can you remove them?
Permissions have foreign keys to content types under the hood, so removing the content types for the models that no longer exist will also remove the permissions for those models.
Fortunately, Django also provides a manage.py command to remove old content types: remove_stale_contenttypes. Running that command will list the content types that no longer exist and the related objects (including permissions) that will be deleted, allowing you to review the changes and approve them.
$ manage.py remove_stale_contenttypes
Some content types in your database are stale and can be deleted.
Any objects that depend on these content types will also be deleted.
The content types and dependent objects that would be deleted are:
- Content type for stale_app.removed_model
- 4 auth.Permission object(s)
This list doesn't include any cascade deletions to data outside of Django's
models (uncommon).
Are you sure you want to delete these content types?
If you're unsure, answer 'no'.
Type 'yes' to continue, or 'no' to cancel:
To start, make an empty migration file:
python manage.py makemigrations --empty yourappname
Change the migration (this is an example, adjust to your needs):
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations
def add_permissions(apps, schema_editor):
pass
def remove_permissions(apps, schema_editor):
"""Reverse the above additions of permissions."""
ContentType = apps.get_model('contenttypes.ContentType')
Permission = apps.get_model('auth.Permission')
content_type = ContentType.objects.get(
model='somemodel',
app_label='yourappname',
)
# This cascades to Group
Permission.objects.filter(
content_type=content_type,
codename__in=('add_somemodel', 'change_somemodel', 'delete_somemodel'),
).delete()
class Migration(migrations.Migration):
dependencies = [
('yourappname', '0001_initial'),
]
operations = [
migrations.RunPython(remove_permissions, add_permissions),
]
I did it this way:
import re
for perm in Permission.objects.all():
if re.match( r".+modelname.+permissionname.+",str(perm)):
print(perm)
perm.delete()
If you have custom or model based (default) permissions you wish to remove you could write a command like this to accomplish this task:
from django.conf import settings
from django.contrib.auth.models import Permission
from django.core.management.base import BaseCommand
import django.apps
class Command(BaseCommand):
help = 'Remove custom permissions that are no longer in models'
def handle(self, *args, **options):
# get the db name needed for removal...
database_name = input('Database Name: ')
default_perm_names = list()
# are real perms in db, may not be accurate
db_custom_perm_names = list()
# will be used to ensure they are correct.
meta_custom_perm_names = list()
default_and_custom_perms = list()
for model in django.apps.apps.get_models():
# add to models found to fix perms from removed models
app_label = model._meta.app_label
lower_model_name = model._meta.model_name
all_model_permissions = Permission.objects.using(database_name).filter(content_type__app_label=app_label, content_type__model=lower_model_name)
default_and_custom_perms.extend([x for x in all_model_permissions])
# get the custom meta permissions, these should be in the meta of the class
# will be a list or tuple or list, [0=codename, 1=name]
meta_permissions = model._meta.permissions
if meta_permissions:
for perm in all_model_permissions:
# will be the model name from the content type, this is how django makes default perms
# we are trying to remove them so now we can figure out which ones are default by provided name
model_name_lower = perm.content_type.name
# default_perms = ['add', 'change', 'view', 'delete', 'undelete']
# append them to the list of default names
default_perm_names.append(f'Can add {model_name_lower}')
default_perm_names.append(f'Can change {model_name_lower}')
default_perm_names.append(f'Can view {model_name_lower}')
default_perm_names.append(f'Can delete {model_name_lower}')
default_perm_names.append(f'Can undelete {model_name_lower}')
# will mean this is a custom perm...so add it
if not perm.name in default_perm_names:
db_custom_perm_names.append(perm.name)
# the perms to ensure are correct...
for model_perm in meta_permissions:
# get the meta perm, will be a list or tuple or list, [0=codename, 1=name]
custom_perm = Permission.objects.using(database_name).get(codename=model_perm[0], name=model_perm[1])
meta_custom_perm_names.append(custom_perm.name)
perms_to_remove = [perm for perm in db_custom_perm_names if perm not in meta_custom_perm_names]
if not perms_to_remove:
print('There are no stale custom permissions to remove.')
# print(perms_to_remove)
# now remove the custom permissions that were removed from the model
for actual_permission_to_remove in Permission.objects.using(database_name).filter(name__in=perms_to_remove):
# print(actual_permission_to_remove)
actual_permission_to_remove.delete(using=database_name)
print(actual_permission_to_remove, '...deleted')
for perm in [x for x in Permission.objects.using(database_name)]:
# loop all perms...if it is not in the model perms it does not exist...
if perm.content_type.model not in [x.content_type.model for x in default_and_custom_perms]:
perm.delete(using=database_name)
print(perm, 'regular permission...deleted')
If you also wish to ensure that the default permissions are added from Django you can add this snippet in the command:
from django.apps import apps
from django.contrib.auth.management import create_permissions
from apps.client.models import ClientInformation
# add all permissions the django way
# get the db name needed from settings.py
database_name = 'default' # or whatever DB you are looking for
print(f'adding all permissions if not there to {database_name}')
for app_config in apps.get_app_configs():
# print(app_config)
app_config.models_module = True
create_permissions(app_config, using=database_name)
app_config.models_module = None
Then call via python manage.py fix_permissions if you name your command file fix_permissions.py
I've reworked #ViaTech's code to use Django's contrib.auth.management._get_all_permissions() functions which makes it more straight forward:
from typing import List, Set, Tuple
import django.apps
# noinspection PyProtectedMember
from django.contrib.auth.management import _get_all_permissions
from django.contrib.auth.models import Permission
from django.contrib.contenttypes.models import ContentType
from django.core.management.base import BaseCommand
from django.db import DEFAULT_DB_ALIAS
class Command(BaseCommand):
help = "Remove custom permissions that are no longer defined in models"
def add_arguments(self, parser):
parser.add_argument(
"--database",
default=DEFAULT_DB_ALIAS,
help=f'Specifies the database to use. Default is "{DEFAULT_DB_ALIAS}".',
)
parser.add_argument(
"--dry",
action="store_true",
help="Do a dry run not actually deleting any permissions",
)
def handle(self, *args, **options) -> str:
using = options["database"]
# This will hold the permissions that models have defined,
# i.e. default permissions plus additional custom permissions:
# (content_type.pk, codename)
defined_perms: List[Tuple[int, str]] = []
for model in django.apps.apps.get_models():
ctype = ContentType.objects.db_manager(using).get_for_model(
model, for_concrete_model=False
)
# noinspection PyProtectedMember
for (codename, _) in _get_all_permissions(model._meta):
defined_perms.append((ctype.id, codename))
# All permissions in current database (including stale ones)
all_perms = Permission.objects.using(using).all()
stale_perm_pks: Set[int] = set()
for perm in all_perms:
if (perm.content_type.pk, perm.codename) not in defined_perms:
stale_perm_pks.add(perm.pk)
self.stdout.write(f"Delete permission: {perm}")
# Delete all stale permissions
if options["dry"]:
result = f"DRY RUN: {len(stale_perm_pks)} stale permissions NOT deleted"
else:
if stale_perm_pks:
Permission.objects.filter(pk__in=stale_perm_pks).delete()
result = f"{len(stale_perm_pks)} stale permissions deleted"
return result

How to order NDB query by the key?

I try to use task queues on Google App Engine. I want to utilize the Mapper class shown in the App Engine documentation "Background work with the deferred library".
I get an exception on the ordering of the query result by the key
def get_query(self):
...
q = q.order("__key__")
...
Exception:
File "C:... mapper.py", line 41, in get_query
q = q.order("__key__")
File "C:\Program Files (x86)\Google\google_appengine\google\appengine\ext\ndb\query.py", line 1124, in order
'received %r' % arg)
TypeError: order() expects a Property or query Order; received '__key__'
INFO 2017-03-09 11:56:32,448 module.py:806] default: "POST /_ah/queue/deferred HTTP/1.1" 500 114
The article is from 2009, so I guess something might have changed.
My environment: Windows 7, Python 2.7.9, Google App Engine SDK 1.9.50
There are somewhat similar questions about ordering in NDB on SO.
What bugs me this code is from the official doc, presumably updated in Feb 2017 (recently) and posted by someone within top 0.1 % of SO users by reputation.
So I must be doing something wrong. What is the solution?
Bingo.
Avinash Raj is correct. If it were an answer I'd accept it.
Here is the full class code
#!/usr/bin/python2.7
# -*- coding: utf-8 -*-
from google.appengine.ext import deferred
from google.appengine.ext import ndb
from google.appengine.runtime import DeadlineExceededError
import logging
class Mapper(object):
"""
from https://cloud.google.com/appengine/docs/standard/python/ndb/queries
corrected with suggestions from Stack Overflow
http://stackoverflow.com/questions/42692319/how-to-order-ndb-query-by-the-key
"""
# Subclasses should replace this with a model class (eg, model.Person).
KIND = None
# Subclasses can replace this with a list of (property, value) tuples to filter by.
FILTERS = []
def __init__(self):
logging.info("Mapper.__init__: {}")
self.to_put = []
self.to_delete = []
def map(self, entity):
"""Updates a single entity.
Implementers should return a tuple containing two iterables (to_update, to_delete).
"""
return ([], [])
def finish(self):
"""Called when the mapper has finished, to allow for any final work to be done."""
pass
def get_query(self):
"""Returns a query over the specified kind, with any appropriate filters applied."""
q = self.KIND.query()
for prop, value in self.FILTERS:
q = q.filter(prop == value)
if __name__ == '__main__':
q = q.order(self.KIND.key) # the fixed version. The original q.order('__key__') failed
# see http://stackoverflow.com/questions/42692319/how-to-order-ndb-query-by-the-key
return q
def run(self, batch_size=100):
"""Starts the mapper running."""
logging.info("Mapper.run: batch_size: {}".format(batch_size))
self._continue(None, batch_size)
def _batch_write(self):
"""Writes updates and deletes entities in a batch."""
if self.to_put:
ndb.put_multi(self.to_put)
self.to_put = []
if self.to_delete:
ndb.delete_multi(self.to_delete)
self.to_delete = []
def _continue(self, start_key, batch_size):
q = self.get_query()
# If we're resuming, pick up where we left off last time.
if start_key:
key_prop = getattr(self.KIND, '_key')
q = q.filter(key_prop > start_key)
# Keep updating records until we run out of time.
try:
# Steps over the results, returning each entity and its index.
for i, entity in enumerate(q):
map_updates, map_deletes = self.map(entity)
self.to_put.extend(map_updates)
self.to_delete.extend(map_deletes)
# Do updates and deletes in batches.
if (i + 1) % batch_size == 0:
self._batch_write()
# Record the last entity we processed.
start_key = entity.key
self._batch_write()
except DeadlineExceededError:
# Write any unfinished updates to the datastore.
self._batch_write()
# Queue a new task to pick up where we left off.
deferred.defer(self._continue, start_key, batch_size)
return
self.finish()

Is it possible to use a natural key for a GenericForeignKey in Django?

I have the following:
target_content_type = models.ForeignKey(ContentType, related_name='target_content_type')
target_object_id = models.PositiveIntegerField()
target = generic.GenericForeignKey('target_content_type', 'target_object_id')
I would like dumpdata --natural to emit a natural key for this relation. Is this possible? If not, is there an alternative strategy that would not tie me to target's primary key?
TL;DR - Currently there is no sane way of doing so, short of creating a custom Serializer / Deserializer pair.
The problem with models that have generic relations is that Django doesn't see target as a field at all, only target_content_type and target_object_id, and it tries to serialize and deserialize them individually.
The classes responsible for serializing and deserializing Django models are in the modules django.core.serializers.base and django.core.serializers.python. All the others (xml, json and yaml) extend either of them (and python extends base). The field serialization is done like this (irrelevant lines ommited):
for obj in queryset:
for field in concrete_model._meta.local_fields:
if field.rel is None:
self.handle_field(obj, field)
else:
self.handle_fk_field(obj, field)
Here's the first complication: the foreign key to ContentType is handled ok, with natural keys as we expected. But the PositiveIntegerField is handled by handle_field, that is implemented like this:
def handle_field(self, obj, field):
value = field._get_val_from_obj(obj)
# Protected types (i.e., primitives like None, numbers, dates,
# and Decimals) are passed through as is. All other values are
# converted to string first.
if is_protected_type(value):
self._current[field.name] = value
else:
self._current[field.name] = field.value_to_string(obj)
i.e. the only possibility for customization here (subclassing PositiveIntegerField and defining a custom value_to_string) will have no effect, since the serializer won't call it. Changing the data type of target_object_id to something else than a integer will probably break many other stuff, so it's not an option.
We could define our custom handle_field to emit natural keys in this case, but then comes the second complication: the deserialization is done like this:
for (field_name, field_value) in six.iteritems(d["fields"]):
field = Model._meta.get_field(field_name)
...
data[field.name] = field.to_python(field_value)
Even if we customized the to_python method, it acts on the field_value alone, out of the context of the object. It's not a problem when using integers, since it will be interpreted as the model's primary key no matter what model it is. But to deserialize a natural key, first we need to know which model that key belongs to, and that information isn't available unless we got a reference to the object (and the target_content_type field had already been deserialized).
As you can see, it's not an impossible task - supporting natural keys in generic relations - but to accomplish that a lot of things would need to be changed in the serialization and deserialization code. The steps necessary, then (if anyone feels up to the task) are:
Create a custom Field extending PositiveIntegerField, with methods to encode/decode an object - calling the referenced models' natural_key and get_by_natural_key;
Override the serializer's handle_field to call the encoder if present;
Implement a custom deserializer that: 1) imposes some order in the fields, ensuring the content type is deserialized before the natural key; 2) calls the decoder, passing not only the field_value but also a reference to the decoded ContentType.
I've written a custom Serializer and Deserializer which supports GenericFK's. Checked it briefly and it seems to do the job.
This is what I came up with:
import json
from django.contrib.contenttypes.generic import GenericForeignKey
from django.utils import six
from django.core.serializers.json import Serializer as JSONSerializer
from django.core.serializers.python import Deserializer as \
PythonDeserializer, _get_model
from django.core.serializers.base import DeserializationError
import sys
class Serializer(JSONSerializer):
def get_dump_object(self, obj):
dumped_object = super(CustomJSONSerializer, self).get_dump_object(obj)
if self.use_natural_keys and hasattr(obj, 'natural_key'):
dumped_object['pk'] = obj.natural_key()
# Check if there are any generic fk's in this obj
# and add a natural key to it which will be deserialized by a matching Deserializer.
for virtual_field in obj._meta.virtual_fields:
if type(virtual_field) == GenericForeignKey:
content_object = getattr(obj, virtual_field.name)
dumped_object['fields'][virtual_field.name + '_natural_key'] = content_object.natural_key()
return dumped_object
def Deserializer(stream_or_string, **options):
"""
Deserialize a stream or string of JSON data.
"""
if not isinstance(stream_or_string, (bytes, six.string_types)):
stream_or_string = stream_or_string.read()
if isinstance(stream_or_string, bytes):
stream_or_string = stream_or_string.decode('utf-8')
try:
objects = json.loads(stream_or_string)
for obj in objects:
Model = _get_model(obj['model'])
if isinstance(obj['pk'], (tuple, list)):
o = Model.objects.get_by_natural_key(*obj['pk'])
obj['pk'] = o.pk
# If has generic fk's, find the generic object by natural key, and set it's
# pk according to it.
for virtual_field in Model._meta.virtual_fields:
if type(virtual_field) == GenericForeignKey:
natural_key_field_name = virtual_field.name + '_natural_key'
if natural_key_field_name in obj['fields']:
content_type = getattr(o, virtual_field.ct_field)
content_object_by_natural_key = content_type.model_class().\
objects.get_by_natural_key(obj['fields'][natural_key_field_name][0])
obj['fields'][virtual_field.fk_field] = content_object_by_natural_key.pk
for obj in PythonDeserializer(objects, **options):
yield obj
except GeneratorExit:
raise
except Exception as e:
# Map to deserializer error
six.reraise(DeserializationError, DeserializationError(e), sys.exc_info()[2])
I updated the OmriToptix answer for Django 2.2 and above.
In Django 2.0:
The Model._meta.virtual_fields attribute is removed.
So, the new Serializer and Deserializer:
import json
from django.contrib.contenttypes.fields import GenericForeignKey
from django.utils import six
from django.core.serializers.json import Serializer as JSONSerializer
from django.core.serializers.python import Deserializer as \
PythonDeserializer, _get_model
from django.core.serializers.base import DeserializationError
import sys
class Serializer(JSONSerializer):
def get_dump_object(self, obj):
dumped_object = super(JSONSerializer, self).get_dump_object(obj)
if hasattr(obj, 'natural_key'):
dumped_object['pk'] = obj.natural_key()
for field in obj._meta.get_fields():
if type(field) == GenericForeignKey:
content_object = getattr(obj, field.name)
dumped_object['fields'][field.name + '_natural_key'] = content_object.natural_key()
return dumped_object
def Deserializer(stream_or_string, **options):
if not isinstance(stream_or_string, (bytes, six.string_types)):
stream_or_string = stream_or_string.read()
if isinstance(stream_or_string, bytes):
stream_or_string = stream_or_string.decode('utf-8')
try:
objects = json.loads(stream_or_string)
for obj in objects:
Model = _get_model(obj['model'])
if isinstance(obj['pk'], (tuple, list)):
o = Model.objects.get_by_natural_key(*obj['pk'])
obj['pk'] = o.pk
for field in Model._meta.get_fields():
if type(field) == GenericForeignKey:
natural_key_field_name = field.name + '_natural_key'
if natural_key_field_name in obj['fields']:
content_type = getattr(o, field.ct_field)
content_object_by_natural_key = content_type.model_class().\
objects.get_by_natural_key(*obj['fields'][natural_key_field_name])
obj['fields'][field.fk_field] = content_object_by_natural_key.pk
del obj['fields'][natural_key_field_name]
for obj in PythonDeserializer(objects, **options):
yield obj
except GeneratorExit:
raise
except Exception as e:
six.reraise(DeserializationError, DeserializationError(e), sys.exc_info()[2])
Then, in your settings.py, set this configuration:
SERIALIZATION_MODULES = {
"json": "path.to.serializer_file"
}
Now, you can use:
python3 manage.py dumpdata --natural-foreign --natural-primary > dump.json
Other way, if you need to dump some data (filter querysets), you can make it from code:
from path.to.serializers import Serializer, Deserializer
# Serialize
registers = YourModel.objects.filter(some_attribute=some_value)
dump = Serializer().serialize(registers, use_natural_foreign_keys=True, use_natural_primary_keys=True)
# Deserialize
for deserialized_object in Deserializer(dump, use_natural_foreign_keys=True, use_natural_primary_keys=True):
print(deserialized_object.object) # See here https://docs.djangoproject.com/en/2.2/topics/serialization/

Choose test database?

I'm trying to run
./manage.py test
But it tells me
Got an error creating the test database: permission denied to create database
Obviously it doesn't have permission to create the database, but I'm on a shared server, so there's not much I can do about that. I can create a new database through the control panel but I don't think there's any way I can let Django do it automatically.
So, can't I create the test database manually and instead tell Django to flush it every time, rather than recreating the whole thing?
I had a similar issue. But I wanted Django to just bypass the creation of a test database for one of my instances (it is not a mirror tough). Following Mark's suggestion, I created a custom test runner, as follows
from django.test.simple import DjangoTestSuiteRunner
class ByPassableDBDjangoTestSuiteRunner(DjangoTestSuiteRunner):
def setup_databases(self, **kwargs):
from django.db import connections
old_names = []
mirrors = []
for alias in connections:
connection = connections[alias]
# If the database is a test mirror, redirect its connection
# instead of creating a test database.
if connection.settings_dict['TEST_MIRROR']:
mirrors.append((alias, connection))
mirror_alias = connection.settings_dict['TEST_MIRROR']
connections._connections[alias] = connections[mirror_alias]
elif connection.settings_dict.get('BYPASS_CREATION','no') == 'no':
old_names.append((connection, connection.settings_dict['NAME']))
connection.creation.create_test_db(self.verbosity, autoclobber=not self.interactive)
return old_names, mirrors
Then I created an extra dict entry in one of my databases entries inside settings.py, 'BYPASS_CREATION':'yes',
Finally, I configured a new TestRunner with
TEST_RUNNER = 'auth.data.runner.ByPassableDBDjangoTestSuiteRunner'
I would suggest using sqlite3 for testing purposes while keeping on using mysql/postgres/etc for production.
This can be achieved by placing this in your settings file:
if 'test' in sys.argv:
DATABASES['default'] = {'ENGINE': 'django.db.backends.sqlite3'}
see Running django tests with sqlite
a temporary sqlite database file will be created in your django project home which you will have write access to. The other advantage is that sqlite3 is much faster for testing. You may however run in to problems if you are using any mysql/postgres specific raw sql (which you should try to avoid anyway).
I think a better solution might be to define your own test runner.
I added this to the comments above but it got kind of lost - recent changes to webfaction make this MUCH easier. You can now create new private database instances.
Follow the instructions there, and when creating a new user make sure to give them the permission to ALTER USER new_username CREATEDB;.
You probably also should change the default cron settings so they don't try to check if this database is up and runnings as frequently.
You could use django-nose as your TEST_RUNNER. Once installed, if you pass the following environment variable, it will not delete and re-create the database (create it manually yourself first).
REUSE_DB=1 ./manage.py test
You can also add the following to settings.py so you don't have to write REUSE_DB=1 every time you want to run tests:
os.environ['REUSE_DB'] = "1"
Note: this will also leave all your tables in the databases which means test setup will be a little quicker, but you will have to manually update the tables (or delete and re-create the database yourself) when you change your models.
my variant to reusing database:
from django.test.simple import DjangoTestSuiteRunner
from django.core.management import call_command
class TestRunner(DjangoTestSuiteRunner):
def setup_databases(self, **kwargs):
from django.db import connections
settings = connections['default'].settings_dict
settings['NAME'] = settings['TEST_NAME']
settings['USER'] = settings['TEST_USER']
settings['PASSWORD'] = settings['TEST_PASSWD']
call_command('syncdb', verbosity=1, interactive=False, load_initial_data=False)
def teardown_databases(self, old_config, **kwargs):
from django.db import connection
cursor = connection.cursor()
cursor.execute('show tables;')
parts = ('DROP TABLE IF EXISTS %s;' % table for (table,) in cursor.fetchall())
sql = 'SET FOREIGN_KEY_CHECKS = 0;\n' + '\n'.join(parts) + 'SET FOREIGN_KEY_CHECKS = 1;\n'
connection.cursor().execute(sql)
The following is a django test suite runner to create database using Webfaction XML-RPC API. Note, setting up the database using the API may take up to a minute, and the script may appear to be stuck momentarily, just wait for a little while.
NOTE: there is a security risk of having control panel password in the webfaction server, because someone breaching into your web server SSH could take over your Webfaction account. If that is a concern, set USE_SESSKEY to True and use the fabric script below this script to pass a session id to the server. The session key expires in 1 hour from the last API call.
File test_runner.py: in the server, you need to configure ./manage.py test to use WebfactionTestRunner
"""
This test runner uses Webfaction XML-RPC API to create and destroy database
"""
# you can put your control panel username and password here.
# NOTE: there is a security risk of having control panel password in
# the webfaction server, because someone breaching into your web server
# SSH could take over your Webfaction account. If that is a concern,
# set USE_SESSKEY to True and use the fabric script below this script to
# generate a session.
USE_SESSKEY = True
# CP_USERNAME = 'webfactionusername' # required if and only if USE_SESSKEY is False
# CP_PASSWORD = 'webfactionpassword' # required if and only if USE_SESSKEY is False
import sys
import os
from django.test.simple import DjangoTestSuiteRunner
from django import db
from webfaction import Webfaction
def get_sesskey():
f = os.path.expanduser("~/sesskey")
sesskey = open(f).read().strip()
os.remove(f)
return sesskey
if USE_SESSKEY:
wf = Webfaction(get_sesskey())
else:
wf = Webfaction()
wf.login(CP_USERNAME, CP_PASSWORD)
def get_db_user_and_type(connection):
db_types = {
'django.db.backends.postgresql_psycopg2': 'postgresql',
'django.db.backends.mysql': 'mysql',
}
return (
connection.settings_dict['USER'],
db_types[connection.settings_dict['ENGINE']],
)
def _create_test_db(self, verbosity, autoclobber):
"""
Internal implementation - creates the test db tables.
"""
test_database_name = self._get_test_db_name()
db_user, db_type = get_db_user_and_type(self.connection)
try:
wf.create_db(db_user, test_database_name, db_type)
except Exception as e:
sys.stderr.write(
"Got an error creating the test database: %s\n" % e)
if not autoclobber:
confirm = raw_input(
"Type 'yes' if you would like to try deleting the test "
"database '%s', or 'no' to cancel: " % test_database_name)
if autoclobber or confirm == 'yes':
try:
if verbosity >= 1:
print("Destroying old test database '%s'..."
% self.connection.alias)
wf.delete_db(test_database_name, db_type)
wf.create_db(db_user, test_database_name, db_type)
except Exception as e:
sys.stderr.write(
"Got an error recreating the test database: %s\n" % e)
sys.exit(2)
else:
print("Tests cancelled.")
sys.exit(1)
db.close_connection()
return test_database_name
def _destroy_test_db(self, test_database_name, verbosity):
"""
Internal implementation - remove the test db tables.
"""
db_user, db_type = get_db_user_and_type(self.connection)
wf.delete_db(test_database_name, db_type)
self.connection.close()
class WebfactionTestRunner(DjangoTestSuiteRunner):
def __init__(self, *args, **kwargs):
# Monkey patch BaseDatabaseCreation with our own version
from django.db.backends.creation import BaseDatabaseCreation
BaseDatabaseCreation._create_test_db = _create_test_db
BaseDatabaseCreation._destroy_test_db = _destroy_test_db
return super(WebfactionTestRunner, self).__init__(*args, **kwargs)
File webfaction.py: this is a thin wrapper for Webfaction API, it need to be importable by both test_runner.py (in the remote server) and the fabfile.py (in the local machine)
import xmlrpclib
class Webfaction(object):
def __init__(self, sesskey=None):
self.connection = xmlrpclib.ServerProxy("https://api.webfaction.com/")
self.sesskey = sesskey
def login(self, username, password):
self.sesskey, _ = self.connection.login(username, password)
def create_db(self, db_user, db_name, db_type):
""" Create a database owned by db_user """
self.connection.create_db(self.sesskey, db_name, db_type, 'unused')
# deletes the default user created by Webfaction API
self.connection.make_user_owner_of_db(self.sesskey, db_user, db_name, db_type)
self.connection.delete_db_user(self.sesskey, db_name, db_type)
def delete_db(self, db_name, db_type):
try:
self.connection.delete_db_user(self.sesskey, db_name, db_type)
except xmlrpclib.Fault as e:
print 'ignored error:', e
try:
self.connection.delete_db(self.sesskey, db_name, db_type)
except xmlrpclib.Fault as e:
print 'ignored error:', e
File fabfile.py: A sample fabric script to generate session key, needed only if USE_SESSKEY=True
from fabric.api import *
from fabric.operations import run, put
from webfaction import Webfaction
import io
env.hosts = ["webfactionusername#webfactionusername.webfactional.com"]
env.password = "webfactionpassword"
def run_test():
wf = Webfaction()
wf.login(env.hosts[0].split('#')[0], env.password)
sesskey_file = '~/sesskey'
sesskey = wf.sesskey
try:
put(io.StringIO(unicode(sesskey)), sesskey_file, mode='0600')
# put your test code here
# e.g. run('DJANGO_SETTINGS_MODULE=settings /path/to/virtualenv/python /path/to/manage.py test --testrunner=test_runner.WebfactionTestRunner')
raise Exception('write your test here')
finally:
run("rm -f %s" % sesskey_file)
The accepted answer didn't work for me. It's so outdated, that it didn't run on my legacy codebase with djano 1.5.
I wrote a blogpost entirely describing how I solved this issue by creating an alternative test runner and changing django settings to provide all the required config and to use new test runner.
You need to specify a sqlite ENGINE when using unit tests. Open the settings.py and add the just after DATABASES section:
import sys
if 'test' in sys.argv or 'test_coverage' in sys.argv: #Covers regular testing and django-coverage
DATABASES['default']['ENGINE'] = 'django.db.backends.sqlite3'
DATABASES['default']['NAME'] = ':memory:'
Modify the following methods in django/db/backends/creation.py:
def _destroy_test_db(self, test_database_name, verbosity):
"Internal implementation - remove the test db tables."
# Remove the test database to clean up after
# ourselves. Connect to the previous database (not the test database)
# to do so, because it's not allowed to delete a database while being
# connected to it.
self._set_test_dict()
cursor = self.connection.cursor()
self.set_autocommit()
time.sleep(1) # To avoid "database is being accessed by other users" errors.
cursor.execute("""SELECT table_name FROM information_schema.tables WHERE table_schema='public'""")
rows = cursor.fetchall()
for row in rows:
try:
print "Dropping table '%s'" % row[0]
cursor.execute('drop table %s cascade ' % row[0])
except:
print "Couldn't drop '%s'" % row[0]
#cursor.execute("DROP DATABASE %s" % self.connection.ops.quote_name(test_database_name))
self.connection.close()
def _create_test_db(self, verbosity, autoclobber):
"Internal implementation - creates the test db tables."
suffix = self.sql_table_creation_suffix()
if self.connection.settings_dict['TEST_NAME']:
test_database_name = self.connection.settings_dict['TEST_NAME']
else:
test_database_name = TEST_DATABASE_PREFIX + self.connection.settings_dict['NAME']
qn = self.connection.ops.quote_name
# Create the test database and connect to it. We need to autocommit
# if the database supports it because PostgreSQL doesn't allow
# CREATE/DROP DATABASE statements within transactions.
self._set_test_dict()
cursor = self.connection.cursor()
self.set_autocommit()
return test_database_name
def _set_test_dict(self):
if "TEST_NAME" in self.connection.settings_dict:
self.connection.settings_dict["NAME"] = self.connection.settings_dict["TEST_NAME"]
if "TEST_USER" in self.connection.settings_dict:
self.connection.settings_dict['USER'] = self.connection.settings_dict["TEST_USER"]
if "TEST_PASSWORD" in self.connection.settings_dict:
self.connection.settings_dict['PASSWORD'] = self.connection.settings_dict["TEST_PASSWORD"]
Seems to work... just add the extra settings to your settings.py if you need 'em.
Simple workaround: change TEST_DATABASE_PREFIX in django/db/backends/base/creation.py as you like.