How to update table in database while file is being transferred using django celery? - django

I have a task like this in Django:
from celery import task
import subprocess, celery
#celery.task
def file(password, source12, destination):
return subprocess.Popen(['sshpass', '-p', password, 'rsync', '-avz', '--info=progress2', source12, destination],
stderr=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0]
This transfers file from one server to another using rsync.
Here's my views:
def sync(request):
"""Sync the files into the server with the progress bar"""
choices = request.POST.getlist('choice')
for i in choices:
new_source = source +"/"+ i
start_date1 = datetime.datetime.utcnow().replace(tzinfo=utc)
source12 = new_source.replace(' ', '') #Remove whitespaces
result = file.delay(password, source12, destination)
result.get()
a = result.ready()
start_date = start_date1.strftime("%B %d, %Y, %H:%M%p")
extension = os.path.splitext(i)[1][1:] #Get the file_extension
fullname = os.path.join(destination, i) #Get the file_full_size to calculate size
st = int(os.path.getsize(fullname))
f_size = size(st, system=alternative)
I want to update the table in database which I want to update and show it to the user. The table should be updated while the file is being transferred. How can I do that using django-celery?

There is nothing too special about Celery when it comes to Django. You can just update the database like you would normally do. The only thing you may need to think about are the transactions.
Just to be sure I would recommend using either manual commits or autocommits to update the database. Although I would suggest using redis/memcached instead of the database for these kind of status updates. They are more suited for this purpose.

Related

sqlalchemy - data doesnt get pushed to database while commit but present in session(in memory)

I am adding data to sqlalchemy. But sometimes data is not getting updated or inserted to database. But the commit is successful and I can see the data in memory of session's object.
ie
session.identity_map
Running on sqlalchemy 1.3.3. python 2.7. ubuntu 18.04
from sqlalchemy.orm import Session
from . import Errors as ExecuteErrors
class Errors(object):
def __init__(self, sqlalchemy_engine, d):
self.sqlalchemy_engine = sqlalchemy_engine
self.d = d
def upsert(self, error):
session = Session(self.sqlalchemy_engine)
row = session.query(ExecuteErrors).filter_by(**{'c_name':error['c_name'], 'c_type':error['c_type'],
'f_name':error['f_name']}).scalar()
session.close()
if row:
self.update(error)
else:
self.insert(error)
def insert(self, error):
e = ExecuteErrors(**{'c_name':error['c_name'], 'c_type':error['c_type'], 'f_name':error['f_name'],
'msg':error['msg'], 'details':error['details']})
session = Session(self.sqlalchemy_engine, expire_on_commit=False)
session.add(e)
session.identity_map
session.commit()
session.close()
def update(self, error):
session = Session(self.sqlalchemy_engine, expire_on_commit=False)
session.query(ExecuteErrors).filter_by(**{'c_name':error['c_name'], 'c_type':error['c_type'],
'f_name':error['f_name']}).update({'msg': error['msg'], 'details': error['details']})
session.commit()
session.close()
def get_errors(self):
session = Session(self.sqlalchemy_engine)
e = session.query(ExecuteErrors).all()
session.close()
return e
def clear(self):
session = Session(self.sqlalchemy_engine)
session.query(ExecuteErrors).delete()
session.commit()
session.close()
Calling this with:
e = Error(engine, 'emp')
e.upsert({'c_name':'filter','c_type':'task','f_name':'f1','msg':'TypeError','details':'xyz'})
This should add row in database or update row with new data.
Its working for some insert and for some not.
You could find a possible workaround by explicitely flushing your session when needed.
That said, I think you should reconsider the way you're using sessions. Sessions are intended to manage database connections but you're using them as if there where actual connections.
IMHO, a better way to do would be to create a session at Error instanciation and use it when needed in all your methods.
An even better way to proceed could be to create a session at begining of your "calling" module and pass it to the Error instanciation, and to any other object which need access to database.
Doing this, you could even experience better performance and it may solve your problem (?)
More details about how to manage sessions in the sqlalchemy doc.
EDIT: In addition, the sqlalchemy doc lists some potential problems when used with sqlite. One of them could be the cause of your problem.

java.sql.SQLExceptionPyRaisable on the second attempt connecting to Athena using Django

I am using the python module called PyAthenaJDBC in order to query Athena using the provided JDBC driver.
Here is the link : https://pypi.python.org/pypi/PyAthenaJDBC/
I have been facing some persistent issue. I keep getting this java error whenever I use the Athena connection twice in a row.
As a matter of fact, I was able to connect to Athena, show databases, create new tables and even query the content. I am building an application using Django and running its server to use Athena
However, I am obliged to re-run the server in order for the Athena connection to work once again,
Here is a glimpse of the class I have built
import os
import configparser
import pyathenajdbc
#Get aws credentials for the moment
aws_config_file = '~/.aws/config'
Config = configparser.ConfigParser()
Config.read(os.path.expanduser(aws_config_file))
access_key_id = Config['default']['aws_access_key_id']
secret_key_id = Config['default']['aws_secret_access_key']
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
athena_jdbc_driver_path = BASE_DIR + "/lib/static/AthenaJDBC.jar"
log_path = BASE_DIR + "/lib/static/queries.log"
class PyAthenaLoader():
def __init__(self):
pyathenajdbc.ATHENA_JAR = athena_jdbc_driver_path
def connecti(self):
self.conn = pyathenajdbc.connect(
s3_staging_dir="s3://aws-athena-query-results--us-west-2",
access_key=access_key_id,
secret_key=secret_key_id,
#profile_name = "default",
#credential_file = aws_config_file,
region_name="us-west-2",
log_path=log_path,
driver_path=athena_jdbc_driver_path
)
def databases(self):
dbs = self.query("show databases;")
return dbs
def tables(self, database):
tables = self.query("show tables in {0};".format(database))
return tables
def create(self):
self.connecti()
try:
with self.conn.cursor() as cursor:
cursor.execute(
"""CREATE EXTERNAL TABLE IF NOT EXISTS sales4 (
Day_ID date,
Product_Id string,
Store_Id string,
Sales_Units int,
Sales_Cost float,
Currency string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '|',
'field.delim' = '|',
'collection.delimm' = 'undefined',
'mapkey.delim' = 'undefined'
) LOCATION 's3://athena-internship/';
""")
res = cursor.description
finally:
self.conn.close()
return res
def query(self, req):
self.connecti()
try:
with self.conn.cursor() as cursor:
cursor.execute(req)
print(cursor.description)
res = cursor.fetchall()
finally:
self.conn.close()
return res
def info(self):
res = []
for i in dir(pyathenajdbc):
temp = i + ' = ' + str(dic[i])
#print(temp)
res.append(temp)
return res
Example of usage :
def test(request):
athena = jdbc.PyAthenaLoader()
res = athena.query('Select * from sales;')
return render(request, 'test.html', {'data': res})
Works just fine!
However refreshing the page would cause this error :
Error
Note that I am using a local .jar file: I thought that would solve the issue but I was wrong
Even if I remove the path of the JDBC driver and let the module download it from s3, the error persists:
File "/home/tewfikghariani/.virtualenvs/venv/lib/python3.4/site-packages/pyathenajdbc/connection.py", line 69, in init
ATHENA_CONNECTION_STRING.format(region=self.region_name, schema=schema_name), props)
jpype._jexception.java.sql.SQLExceptionPyRaisable:
java.sql.SQLException: No suitable driver found for
jdbc:awsathena://athena.us-west-2.amazonaws.com:443/hive/default/
Furthermore, when I run the module on its own, it works just fine.
When I set multiple connection inside my view before rendering the template, that works just fine as well.
I guess the issue is related to the django view, once one of the views is performing a connection with athena, the next connection is not possible anymore and the error is raised unless I restart the server
Any help? If other details are missing I will provide them immediately.
Update:
After posting the issue in github, the author solved this problem and released a new version that works perfectly.
It was a multi-threading problem with JPype.
Question answered!
ref : https://github.com/laughingman7743/PyAthenaJDBC/pull/8

Django Celerybeat PeriodicTask running far more than expected

I'm struggling with Django, Celery, djcelery & PeriodicTasks.
I've created a task to pull a report for Adsense to generate a live stat report. Here is my task:
import datetime
import httplib2
import logging
from apiclient.discovery import build
from celery.task import PeriodicTask
from django.contrib.auth.models import User
from oauth2client.django_orm import Storage
from .models import Credential, Revenue
logger = logging.getLogger(__name__)
class GetReportTask(PeriodicTask):
run_every = datetime.timedelta(minutes=2)
def run(self, *args, **kwargs):
scraper = Scraper()
scraper.get_report()
class Scraper(object):
TODAY = datetime.date.today()
YESTERDAY = TODAY - datetime.timedelta(days=1)
def get_report(self, start_date=YESTERDAY, end_date=TODAY):
logger.info('Scraping Adsense report from {0} to {1}.'.format(
start_date, end_date))
user = User.objects.get(pk=1)
storage = Storage(Credential, 'id', user, 'credential')
credential = storage.get()
if not credential is None and credential.invalid is False:
http = httplib2.Http()
http = credential.authorize(http)
service = build('adsense', 'v1.2', http=http)
reports = service.reports()
report = reports.generate(
startDate=start_date.strftime('%Y-%m-%d'),
endDate=end_date.strftime('%Y-%m-%d'),
dimension='DATE',
metric='EARNINGS',
)
data = report.execute()
for row in data['rows']:
date = row[0]
revenue = row[1]
try:
record = Revenue.objects.get(date=date)
except Revenue.DoesNotExist:
record = Revenue()
record.date = date
record.revenue = revenue
record.save()
else:
logger.error('Invalid Adsense Credentials')
I'm using Celery & RabbitMQ. Here are my settings:
# Celery/RabbitMQ
BROKER_HOST = "localhost"
BROKER_PORT = 5672
BROKER_USER = "myuser"
BROKER_PASSWORD = "****"
BROKER_VHOST = "myvhost"
CELERYD_CONCURRENCY = 1
CELERYD_NODES = "w1"
CELERY_RESULT_BACKEND = "amqp"
CELERY_TIMEZONE = 'America/Denver'
CELERYBEAT_SCHEDULER = 'djcelery.schedulers.DatabaseScheduler'
import djcelery
djcelery.setup_loader()
On first glance everything seems to work, but after turning on the logger and watching it run I have found that it is running the task at least four times in a row - sometimes more. It also seems to be running every minute instead of every two minutes. I've tried changing the run_every to use a crontab but I get the same results.
I'm starting celerybeat using supervisor. Here is the command I use:
python manage.py celeryd -B -E -c 1
Any ideas as to why its not working as expected?
Oh, and one more thing, after the day changes, it continues to use the date range it first ran with. So as days progress it continues to get stats for the day the task started running - unless I run the task manually at some point then it changes to the date I last ran it manually. Can someone tell me why this happens?
Consider creating a separate queue with one worker process and fixed rate for this type of tasks and just add the tasks in this new queue instead of running them in directly from celerybeat. I hope that could help you to figure out what is wrong with your code, is it problem with celerybeat or your tasks are running longer than expected.
#task(queue='create_report', rate_limit='0.5/m')
def create_report():
scraper = Scraper()
scraper.get_report()
class GetReportTask(PeriodicTask):
run_every = datetime.timedelta(minutes=2)
def run(self, *args, **kwargs):
create_report.delay()
in settings.py
CELERY_ROUTES = {
'myapp.tasks.create_report': {'queue': 'create_report'},
}
start additional celery worker with that would handle tasks in your queue
celery worker -c 1 -Q create_report -n create_report.local
Problem 2. Your YESTERDAY and TODAY variables are set at class level, so within one thread they are set only once.

Choose test database?

I'm trying to run
./manage.py test
But it tells me
Got an error creating the test database: permission denied to create database
Obviously it doesn't have permission to create the database, but I'm on a shared server, so there's not much I can do about that. I can create a new database through the control panel but I don't think there's any way I can let Django do it automatically.
So, can't I create the test database manually and instead tell Django to flush it every time, rather than recreating the whole thing?
I had a similar issue. But I wanted Django to just bypass the creation of a test database for one of my instances (it is not a mirror tough). Following Mark's suggestion, I created a custom test runner, as follows
from django.test.simple import DjangoTestSuiteRunner
class ByPassableDBDjangoTestSuiteRunner(DjangoTestSuiteRunner):
def setup_databases(self, **kwargs):
from django.db import connections
old_names = []
mirrors = []
for alias in connections:
connection = connections[alias]
# If the database is a test mirror, redirect its connection
# instead of creating a test database.
if connection.settings_dict['TEST_MIRROR']:
mirrors.append((alias, connection))
mirror_alias = connection.settings_dict['TEST_MIRROR']
connections._connections[alias] = connections[mirror_alias]
elif connection.settings_dict.get('BYPASS_CREATION','no') == 'no':
old_names.append((connection, connection.settings_dict['NAME']))
connection.creation.create_test_db(self.verbosity, autoclobber=not self.interactive)
return old_names, mirrors
Then I created an extra dict entry in one of my databases entries inside settings.py, 'BYPASS_CREATION':'yes',
Finally, I configured a new TestRunner with
TEST_RUNNER = 'auth.data.runner.ByPassableDBDjangoTestSuiteRunner'
I would suggest using sqlite3 for testing purposes while keeping on using mysql/postgres/etc for production.
This can be achieved by placing this in your settings file:
if 'test' in sys.argv:
DATABASES['default'] = {'ENGINE': 'django.db.backends.sqlite3'}
see Running django tests with sqlite
a temporary sqlite database file will be created in your django project home which you will have write access to. The other advantage is that sqlite3 is much faster for testing. You may however run in to problems if you are using any mysql/postgres specific raw sql (which you should try to avoid anyway).
I think a better solution might be to define your own test runner.
I added this to the comments above but it got kind of lost - recent changes to webfaction make this MUCH easier. You can now create new private database instances.
Follow the instructions there, and when creating a new user make sure to give them the permission to ALTER USER new_username CREATEDB;.
You probably also should change the default cron settings so they don't try to check if this database is up and runnings as frequently.
You could use django-nose as your TEST_RUNNER. Once installed, if you pass the following environment variable, it will not delete and re-create the database (create it manually yourself first).
REUSE_DB=1 ./manage.py test
You can also add the following to settings.py so you don't have to write REUSE_DB=1 every time you want to run tests:
os.environ['REUSE_DB'] = "1"
Note: this will also leave all your tables in the databases which means test setup will be a little quicker, but you will have to manually update the tables (or delete and re-create the database yourself) when you change your models.
my variant to reusing database:
from django.test.simple import DjangoTestSuiteRunner
from django.core.management import call_command
class TestRunner(DjangoTestSuiteRunner):
def setup_databases(self, **kwargs):
from django.db import connections
settings = connections['default'].settings_dict
settings['NAME'] = settings['TEST_NAME']
settings['USER'] = settings['TEST_USER']
settings['PASSWORD'] = settings['TEST_PASSWD']
call_command('syncdb', verbosity=1, interactive=False, load_initial_data=False)
def teardown_databases(self, old_config, **kwargs):
from django.db import connection
cursor = connection.cursor()
cursor.execute('show tables;')
parts = ('DROP TABLE IF EXISTS %s;' % table for (table,) in cursor.fetchall())
sql = 'SET FOREIGN_KEY_CHECKS = 0;\n' + '\n'.join(parts) + 'SET FOREIGN_KEY_CHECKS = 1;\n'
connection.cursor().execute(sql)
The following is a django test suite runner to create database using Webfaction XML-RPC API. Note, setting up the database using the API may take up to a minute, and the script may appear to be stuck momentarily, just wait for a little while.
NOTE: there is a security risk of having control panel password in the webfaction server, because someone breaching into your web server SSH could take over your Webfaction account. If that is a concern, set USE_SESSKEY to True and use the fabric script below this script to pass a session id to the server. The session key expires in 1 hour from the last API call.
File test_runner.py: in the server, you need to configure ./manage.py test to use WebfactionTestRunner
"""
This test runner uses Webfaction XML-RPC API to create and destroy database
"""
# you can put your control panel username and password here.
# NOTE: there is a security risk of having control panel password in
# the webfaction server, because someone breaching into your web server
# SSH could take over your Webfaction account. If that is a concern,
# set USE_SESSKEY to True and use the fabric script below this script to
# generate a session.
USE_SESSKEY = True
# CP_USERNAME = 'webfactionusername' # required if and only if USE_SESSKEY is False
# CP_PASSWORD = 'webfactionpassword' # required if and only if USE_SESSKEY is False
import sys
import os
from django.test.simple import DjangoTestSuiteRunner
from django import db
from webfaction import Webfaction
def get_sesskey():
f = os.path.expanduser("~/sesskey")
sesskey = open(f).read().strip()
os.remove(f)
return sesskey
if USE_SESSKEY:
wf = Webfaction(get_sesskey())
else:
wf = Webfaction()
wf.login(CP_USERNAME, CP_PASSWORD)
def get_db_user_and_type(connection):
db_types = {
'django.db.backends.postgresql_psycopg2': 'postgresql',
'django.db.backends.mysql': 'mysql',
}
return (
connection.settings_dict['USER'],
db_types[connection.settings_dict['ENGINE']],
)
def _create_test_db(self, verbosity, autoclobber):
"""
Internal implementation - creates the test db tables.
"""
test_database_name = self._get_test_db_name()
db_user, db_type = get_db_user_and_type(self.connection)
try:
wf.create_db(db_user, test_database_name, db_type)
except Exception as e:
sys.stderr.write(
"Got an error creating the test database: %s\n" % e)
if not autoclobber:
confirm = raw_input(
"Type 'yes' if you would like to try deleting the test "
"database '%s', or 'no' to cancel: " % test_database_name)
if autoclobber or confirm == 'yes':
try:
if verbosity >= 1:
print("Destroying old test database '%s'..."
% self.connection.alias)
wf.delete_db(test_database_name, db_type)
wf.create_db(db_user, test_database_name, db_type)
except Exception as e:
sys.stderr.write(
"Got an error recreating the test database: %s\n" % e)
sys.exit(2)
else:
print("Tests cancelled.")
sys.exit(1)
db.close_connection()
return test_database_name
def _destroy_test_db(self, test_database_name, verbosity):
"""
Internal implementation - remove the test db tables.
"""
db_user, db_type = get_db_user_and_type(self.connection)
wf.delete_db(test_database_name, db_type)
self.connection.close()
class WebfactionTestRunner(DjangoTestSuiteRunner):
def __init__(self, *args, **kwargs):
# Monkey patch BaseDatabaseCreation with our own version
from django.db.backends.creation import BaseDatabaseCreation
BaseDatabaseCreation._create_test_db = _create_test_db
BaseDatabaseCreation._destroy_test_db = _destroy_test_db
return super(WebfactionTestRunner, self).__init__(*args, **kwargs)
File webfaction.py: this is a thin wrapper for Webfaction API, it need to be importable by both test_runner.py (in the remote server) and the fabfile.py (in the local machine)
import xmlrpclib
class Webfaction(object):
def __init__(self, sesskey=None):
self.connection = xmlrpclib.ServerProxy("https://api.webfaction.com/")
self.sesskey = sesskey
def login(self, username, password):
self.sesskey, _ = self.connection.login(username, password)
def create_db(self, db_user, db_name, db_type):
""" Create a database owned by db_user """
self.connection.create_db(self.sesskey, db_name, db_type, 'unused')
# deletes the default user created by Webfaction API
self.connection.make_user_owner_of_db(self.sesskey, db_user, db_name, db_type)
self.connection.delete_db_user(self.sesskey, db_name, db_type)
def delete_db(self, db_name, db_type):
try:
self.connection.delete_db_user(self.sesskey, db_name, db_type)
except xmlrpclib.Fault as e:
print 'ignored error:', e
try:
self.connection.delete_db(self.sesskey, db_name, db_type)
except xmlrpclib.Fault as e:
print 'ignored error:', e
File fabfile.py: A sample fabric script to generate session key, needed only if USE_SESSKEY=True
from fabric.api import *
from fabric.operations import run, put
from webfaction import Webfaction
import io
env.hosts = ["webfactionusername#webfactionusername.webfactional.com"]
env.password = "webfactionpassword"
def run_test():
wf = Webfaction()
wf.login(env.hosts[0].split('#')[0], env.password)
sesskey_file = '~/sesskey'
sesskey = wf.sesskey
try:
put(io.StringIO(unicode(sesskey)), sesskey_file, mode='0600')
# put your test code here
# e.g. run('DJANGO_SETTINGS_MODULE=settings /path/to/virtualenv/python /path/to/manage.py test --testrunner=test_runner.WebfactionTestRunner')
raise Exception('write your test here')
finally:
run("rm -f %s" % sesskey_file)
The accepted answer didn't work for me. It's so outdated, that it didn't run on my legacy codebase with djano 1.5.
I wrote a blogpost entirely describing how I solved this issue by creating an alternative test runner and changing django settings to provide all the required config and to use new test runner.
You need to specify a sqlite ENGINE when using unit tests. Open the settings.py and add the just after DATABASES section:
import sys
if 'test' in sys.argv or 'test_coverage' in sys.argv: #Covers regular testing and django-coverage
DATABASES['default']['ENGINE'] = 'django.db.backends.sqlite3'
DATABASES['default']['NAME'] = ':memory:'
Modify the following methods in django/db/backends/creation.py:
def _destroy_test_db(self, test_database_name, verbosity):
"Internal implementation - remove the test db tables."
# Remove the test database to clean up after
# ourselves. Connect to the previous database (not the test database)
# to do so, because it's not allowed to delete a database while being
# connected to it.
self._set_test_dict()
cursor = self.connection.cursor()
self.set_autocommit()
time.sleep(1) # To avoid "database is being accessed by other users" errors.
cursor.execute("""SELECT table_name FROM information_schema.tables WHERE table_schema='public'""")
rows = cursor.fetchall()
for row in rows:
try:
print "Dropping table '%s'" % row[0]
cursor.execute('drop table %s cascade ' % row[0])
except:
print "Couldn't drop '%s'" % row[0]
#cursor.execute("DROP DATABASE %s" % self.connection.ops.quote_name(test_database_name))
self.connection.close()
def _create_test_db(self, verbosity, autoclobber):
"Internal implementation - creates the test db tables."
suffix = self.sql_table_creation_suffix()
if self.connection.settings_dict['TEST_NAME']:
test_database_name = self.connection.settings_dict['TEST_NAME']
else:
test_database_name = TEST_DATABASE_PREFIX + self.connection.settings_dict['NAME']
qn = self.connection.ops.quote_name
# Create the test database and connect to it. We need to autocommit
# if the database supports it because PostgreSQL doesn't allow
# CREATE/DROP DATABASE statements within transactions.
self._set_test_dict()
cursor = self.connection.cursor()
self.set_autocommit()
return test_database_name
def _set_test_dict(self):
if "TEST_NAME" in self.connection.settings_dict:
self.connection.settings_dict["NAME"] = self.connection.settings_dict["TEST_NAME"]
if "TEST_USER" in self.connection.settings_dict:
self.connection.settings_dict['USER'] = self.connection.settings_dict["TEST_USER"]
if "TEST_PASSWORD" in self.connection.settings_dict:
self.connection.settings_dict['PASSWORD'] = self.connection.settings_dict["TEST_PASSWORD"]
Seems to work... just add the extra settings to your settings.py if you need 'em.
Simple workaround: change TEST_DATABASE_PREFIX in django/db/backends/base/creation.py as you like.

Syncing data between devel/live databases in Django

With Django's new multi-db functionality in the development version, I've been trying to work on creating a management command that let's me synchronize the data from the live site down to a developer machine for extended testing. (Having actual data, particularly user-entered data, allows me to test a broader range of inputs.)
Right now I've got a "mostly" working command. It can sync "simple" model data but the problem I'm having is that it ignores ManyToMany fields which I don't see any reason for it do so. Anyone have any ideas of either how to fix that or a better want to handle this? Should I be exporting that first query to a fixture first and then re-importing it?
from django.core.management.base import LabelCommand
from django.db.utils import IntegrityError
from django.db import models
from django.conf import settings
LIVE_DATABASE_KEY = 'live'
class Command(LabelCommand):
help = ("Synchronizes the data between the local machine and the live server")
args = "APP_NAME"
label = 'application name'
requires_model_validation = False
can_import_settings = True
def handle_label(self, label, **options):
# Make sure we're running the command on a developer machine and that we've got the right settings
db_settings = getattr(settings, 'DATABASES', {})
if not LIVE_DATABASE_KEY in db_settings:
print 'Could not find "%s" in database settings.' % LIVE_DATABASE_KEY
return
if db_settings.get('default') == db_settings.get(LIVE_DATABASE_KEY):
print 'Data cannot synchronize with self. This command must be run on a non-production server.'
return
# Fetch all models for the given app
try:
app = models.get_app(label)
app_models = models.get_models(app)
except:
print "The app '%s' could not be found or models could not be loaded for it." % label
for model in app_models:
print 'Syncing %s.%s ...' % (model._meta.app_label, model._meta.object_name)
# Query each model from the live site
qs = model.objects.all().using(LIVE_DATABASE_KEY)
# ...and save it to the local database
for record in qs:
try:
record.save(using='default')
except IntegrityError:
# Skip as the record probably already exists
pass
Django command extension's Dumpscript should help a lot.
This doesn't answer your question exactly but why not just do a db dump and a db restore?