how to set unique value for each row on alembic migration - flask

I added a unique attribute uid for MyModel model:
class MyModel(db.Model):
...
uid = db.Column(db.String(50), nullable=False)
...
__table_args__ = (UniqueConstraint('uid', name='unique_uid'),)
I have a migration:
def upgrade():
op.add_column('mymodel', sa.Column('uid', sa.String(length=50), nullable=True))
mymodel = table('mymodel', column('uid'))
op.execute(mymodel.update().values(uid=generate_uid()))
op.create_unique_constraint('unique_uid', 'mymodel', ['uid'])
op.alter_column(
table_name='mymodel',
column_name='uid',
nullable=False
)
On run db upgrade i've got an error:
...
psycopg2.IntegrityError: could not create unique index "unique_uid"
DETAIL: Key (uid)=(c92U6txA2) is duplicated.
How to set unique value for each row on op.execute(mymodel.update().values(uid=generate_uid()))?
$ pip freeze
alembic==0.8.6
Flask==0.10.1
Flask-Fixtures==0.3.3
Flask-Login==0.3.2
Flask-Migrate==1.8.0
Flask-Script==2.0.5
Flask-SQLAlchemy==2.1
itsdangerous==0.24
Jinja2==2.8
Mako==1.0.4
MarkupSafe==0.23
psycopg2==2.6.1
python-editor==1.0
requests==2.10.0
SQLAlchemy==1.0.13
Werkzeug==0.11.9

The possible solution:
from sqlalchemy.orm import Session
from alembic import op
import sqlalchemy as sa
def upgrade():
conn = op.get_bind()
session = Session(bind=conn)
op.add_column('mymodel', sa.Column('uid', sa.String(length=50), nullable=True))
for item in session.query(MyModel).filter_by(uid=None):
item.uid = generate_uid()
session.commit()
op.create_unique_constraint('unique_uid', 'mymodel', ['uid'])
op.alter_column(
table_name='mymodel',
column_name='uid',
nullable=False
)

The migration script that you wrote puts the same uid on all the rows, the generate_uid() function is called once, and its result is then added into all the rows. So then when the index is created you get a duplicated key error.
Depending on what your uids are and the database you maybe able to write a single SQL statement that creates unique ids for all your rows, but the safe bet would be to do a loop and update each row separately.

Related

Auto generate migrations alembic + SQLAlchemy imperative declaration

I'm exploring a new DDD project using SQLAlchemy and Alembic for the first time. I'd like to use imperative mapping to isolate my domain objects.
All the doc I can find about auto generating migrations with Alembic is using declarative mapping. Is it because I have to manually write all migrations if I want to use imperative mapping ?
I had to import the metada of the Table I manually defined
I came to this page because autogenerating migrations no longer worked with the upgrade to SQLAlchemy 1.4, as the metadata were no longer recognized and the automatically generated migration deleted every table (DROP table in upgrade, CREATE TABLE in downgrade).
I have first tried to import the tables metadata like this :
target_metadata = [orm.table_company.metadata, orm.table_user.metadata, orm.table_3.metadata, orm.table_4.metadata]
It resulted in the following error code:
alembic/autogenerate/api.py", line 462, in table_key_to_table
ValueError: Duplicate table keys across multiple MetaData objects: "tb_company", "tb_user", "tb_3", "tb_4"
I have found that rather than importing one metadata object per table, you can access it in a single pass with target_metadata = orm.mapper_registry.metadata :
SQL Alchemy 1.4
adapters/orm.py
from myapp import domain
from sqlalchemy.orm import registry
from sqlalchemy.schema import MetaData
metadata = MetaData()
mapper_registry = registry(metadata=metadata)
# define your tables here
table_user = Table(
"tb_user",
mapper_registry.metadata,
Column("id", Integer, primary_key=True, autoincrement=True),
Column(
"pk_user",
UUID(as_uuid=True),
primary_key=True,
server_default=text("uuid_generate_v4()"),
default=uuid.uuid4,
unique=True,
nullable=False,
),
Column(
"fk_company",
UUID(as_uuid=True),
ForeignKey("tb_company.pk_company"),
),
Column("first_name", String(255)),
Column("last_name", String(255)),
)
# map your domain objects to the tables
def start_mappers():
mapper_registry.map_imperatively(
domain.model.User,
table_user,
properties={
"company": relationship(
domain.Company,
backref="users"
)
},
)
alembic/env.py
from myapp.adapters import orm
# (...)
target_metadata = orm.mapper_registry.metadata
SQL Alchemy 1.3
Using classical / imperative mapping, Alembic could generate migrations from SQL Alchemy 1.3 with the following syntax:
adapters/orm.py
from myapp import domain
# (...)
from sqlalchemy import Table, Column
from sqlalchemy.orm import mapper, relationship
from sqlalchemy.schema import MetaData
metadata = MetaData()
# define your tables here
table_user = Table(
"tb_user",
metadata,
Column("id", Integer, primary_key=True, autoincrement=True),
# (...)
)
# map your domain objects to the tables
def start_mappers():
user_mapper = mapper(
domain.User,
tb_user,
properties={
"company": relationship(
domain.Company,
backref="users"
),
},
)
alembic/env.py
from myapp.adapters import orm
# (...)
target_metadata = orm.metadata

Set Django auto-increment primary key to specific value [duplicate]

I have a User model, I want its id start from 10000, then its id should auto-increment like:
10001, 10002, 10003, 10004...
My User class:
class User(AbstractUser):
username = models.CharField(max_length=64)
...
Is it possible to make it come true?
EDIT-1
Before ask this question, I have read this link:Is there a way to set the id value of new Django objects to start at a certain value?
But I don't think the answers are good, so I mean if in Django there is a configuration for achieve this?
the way is the same as to do datamigrations with RAW_SQL, change APPNAME on your:
python manage.py makemigrations APPNAME --empty
inside the created file:
operations = [
migrations.RunSQL(
'ALTER SEQUENCE APPNAME_USER_id_seq RESTART WITH 10000;'
)
]
The solution is to set autoincrement field like:
user_id = models.AutoField(primary_key=True)
After this, you can run this command on the database side. You can run this python command by using signals:
ALTER SEQUENCE user_id RESTART WITH 10000;
You can do this by different method.
from django.db.models.signals import post_syncdb
from django.db import connection, transaction
cursor = connection.cursor()
cursor = cursor.execute(""" ALTER SEQUENCE user_id RESTART WITH 10000; """)
transaction.commit_unless_managed()
post_syncdb.connect(auto_increment_start, sender=app_models)
In Django, a model can't have more than one AutoField. And this is used to set a primary key different from the default key.
My solution is to do it manually:
$ ./manage.py shell
Python 3.6.5 (default, Apr 1 2018, 05:46:30)
Type 'copyright', 'credits' or 'license' for more information
IPython 6.4.0 -- An enhanced Interactive Python. Type '?' for help.
In [1]: from django.contrib.auth.models import User
In [2]: u = User.objects.create_user('name', '', '')
In [3]: User.objects.filter(id=u.id).update(id=10000-1)
Out[3]: 1
In [4]: u.delete()
Out[4]:
(0,
{'admin.LogEntry': 0,
'auth.User_groups': 0,
'auth.User_user_permissions': 0,
'auth.User': 0})
In [5]: uu = User.objects.create_user('user', '', '')
In [6]: uu.id
Out[6]: 10000
For MySQL add this in your migration file:
Replace TABLE_NAME and START_VALUE with your table's name and value with which you want to start.
operations = [
migrations.RunSQL('ALTER TABLE TABLE_NAME AUTO_INCREMENT=START_VALUE ;')
]

Alembic/Flask-Migrate not detecting after_create events

I have a simple Flask-SQLAlchemy model (with event listener to create trigger):
from flask_sqlalchemy import SQLAlchemy
db = SQLAlchemy()
class Confirm(db.Model):
created = db.Column(db.DateTime, default=db.func.current_timestamp(), nullable=False)
modified = db.Column(db.DateTime, default=db.func.current_timestamp(), onupdate=db.func.current_timestamp(), nullable=False)
id = db.Column(db.String(36), primary_key=True)
class ConfirmOld(db.Model):
orig_created = db.Column(db.DateTime)
orig_modified = db.Column(db.DateTime)
orig_id = db.Column(db.String(36))
confirm_delete = DDL('''\
CREATE TRIGGER confirm_delete
BEFORE DELETE
ON confirm FOR EACH ROW
BEGIN
INSERT INTO confirm_old ( orig_created, orig_modified, orig_id )
VALUES ( OLD.created, OLD.modified, OLD.id );
END;
''')
event.listen(Confirm.__table__, 'after_create', confirm_delete)
When I run Alembic migrate and upgrade, the TRIGGER is not created (in MySQL). However, it is created and works properly when I use db.create_all().
Is it possible to get Alembic / Flask-Migrate to create and manage my triggers (i.e., custom DDL that is run on after_create events)?
I have faced the same issue tried a solution with Replacable object but didn't work:
I manage to make it work by editing the migration script and execute the trigger creation query.
Here are the step:
Run flask db migrate -m 'adding custom trigger on table x it will generate a migration script for you under version sub-folder of migration folder.
check the folder created under version and edit it like this :
create your trigger query like this :
in the file :
trigger = '''
CREATE TRIGGER confirm_delete
BEFORE DELETE
ON confirm FOR EACH ROW
BEGIN
INSERT INTO confirm_old ( orig_created, orig_modified, orig_id )
VALUES ( OLD.created, OLD.modified, OLD.id );
END;
'''
in the upgrade method :
add this line :
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
# ### end Alembic commands ###
### add your queries here execute
op.execute(trigger)
If you run flask db upgrade it will execute the query and update the database
to downgrade the database add this in the downgrade method:
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
# ### end Alembic commands ###
op.execute('drop trigger if exists confirm_delete on confirm cascade;')
If you check your database change will be applied .
PS : The more elegant solution should be what is suggest here
with Replaceable object , tried it but It doesn't work may be my alembic is not update .
Here is how the solution should looks like:
create a ReplaceableObjects class :
class ReplaceableObject(object):
def __init__(self, name, sqltext):
self.name = name
self.sqltext = sqltext
instantiate it with your query statement.
delete_trigger = ReplaceableObject('delete_trigger', trigger)
Update your upgrade and downgrade function like this :
def upgrade():
op.create_sp(delete_trigger)
def downgrade():
op.drop_sp(delete_trigger)
Hope it will helps others...
in Flask the listen is ignored.
Fixed this by using Table instead.
def after_create_table_handler(table: Table, conn: Connection, **kwargs):
pass
event.listen(Table, 'after_create', after_create_table_handler)

Why is Flask-Migrate making me do a 2-steps migration?

I'm working on a project with Flask, SQLAlchemy, Alembic and their wrappers for Flask (Flask-SQLAlchemy and Flask-Migrate). I have four migrations:
1c5f54d4aa34 -> 4250dfa822a4 (head), Feed: Countries
312c1d408043 -> 1c5f54d4aa34, Feed: Continents
41984a51dbb2 -> 312c1d408043, Basic Structure
<base> -> 41984a51dbb2, Init Alembic
When I start a new and clean database and try to run the migrations I get an error:
vagrant#precise32:/vagrant$ python manage.py db upgrade
...
sqlalchemy.exc.ProgrammingError: (ProgrammingError) relation "continent" does not exist
...
If I ask Flask-Migrate to run all migrations but the last, it works. If after that I run the upgrade command again, it works – that is, it fully upgrades my database without a single change in code:
vagrant#precise32:/vagrant$ python manage.py db upgrade 312c1d408043
INFO [alembic.migration] Context impl PostgresqlImpl.
INFO [alembic.migration] Will assume transactional DDL.
INFO [alembic.migration] Running upgrade -> 41984a51dbb2, Init Alembic
INFO [alembic.migration] Running upgrade 41984a51dbb2 -> 312c1d408043, Basic Structure
vagrant#precise32:/vagrant$ python manage.py db upgrade
INFO [alembic.migration] Context impl PostgresqlImpl.
INFO [alembic.migration] Will assume transactional DDL.
INFO [alembic.migration] Running upgrade 312c1d408043 -> 1c5f54d4aa34, Feed: Continents
INFO [alembic.migration] Running upgrade 1c5f54d4aa34 -> 4250dfa822a4, Feed: Countries
TL;DR
The last migration (Feed: Countries) run queries on the table fed by the previous one (Feed: Continents). If I have the continents table create and fed, the scripts should work. But it doesn't.
Why do I have to stop the migration process between then to re-start it in another command? I really don't get this. Is it some command Alembic executes after a serie of migrations? Any ideas?
Just in case
My models are defined as follows:
class Country(db.Model):
__tablename__ = 'country'
id = db.Column(db.Integer, primary_key=True)
alpha2 = db.Column(db.String(2), index=True, unique=True)
title = db.Column(db.String(140))
continent_id = db.Column(db.Integer, db.ForeignKey('continent.id'))
continent = db.relationship('Continent', backref='countries')
def __repr__(self):
return '<Country #{}: {}>'.format(self.id, self.title)
class Continent(db.Model):
__tablename__ = 'continent'
id = db.Column(db.Integer, primary_key=True)
alpha2 = db.Column(db.String(2), index=True, unique=True)
title = db.Column(db.String(140))
def __repr__(self):
return '<Continent #{}: {}>'.format(self.id, self.title)
Many thanks,
UPDATE 1: The upgrade method of the last two migrations
As #Miguel asked in a comment, here there are the upgrade methods of the last two migrations:
Feed: Continents
def upgrade():
csv_path = app.config['BASEDIR'].child('migrations', 'csv', 'en')
csv_file = csv_path.child('continents.csv')
with open(csv_file) as file_handler:
csv = list(reader(file_handler))
csv.pop(0)
data = [{'alpha2': c[0].lower(), 'title': c[1]} for c in csv]
op.bulk_insert(Continent.__table__, data)
Feed: Countries (which depends on the table fed on the last migration)
def upgrade():
# load countries iso3166.csv and build a dictionary
csv_path = app.config['BASEDIR'].child('migrations', 'csv', 'en')
csv_file = csv_path.child('iso3166.csv')
countries = dict()
with open(csv_file) as file_handler:
csv = list(reader(file_handler))
for c in csv:
countries[c[0]] = c[1]
# load countries-continents from country_continent.csv
csv_file = csv_path.child('country_continent.csv')
with open(csv_file) as file_handler:
csv = list(reader(file_handler))
country_continent = [{'country': c[0], 'continent': c[1]} for c in csv]
# loop
data = list()
for item in country_continent:
# get continent id
continent_guess = item['continent'].lower()
continent = Continent.query.filter_by(alpha2=continent_guess).first()
# include country
if continent is not None:
country_name = countries.get(item['country'], False)
if country_name:
data.append({'alpha2': item['country'].lower(),
'title': country_name,
'continent_id': continent.id})
The CSV I'm using are basically following this patterns:
continents.csv
...
AS, "Asia"
EU, "Europe"
NA, "North America"
...
iso3166.csv
...
CL,"Chile"
CM,"Cameroon"
CN,"China"
...
_country_continent.csv_
...
US,NA
UY,SA
UZ,AS
...
So Feed: Continents feeds the continent table, and Feed: Countries feeds the country table. But it has to query the continents table in order to make the proper link between the country and the continent.
UPDATE 2: Some one from Reddit already offered an explanation and a workaround
I asked the same question on Reddit, and themathemagician said:
I've run into this before, and the issue is that the migrations don't
execute individually, but instead alembic batches all of them (or all
of them that need to be run) and then executes the SQL. This means
that by the time the last migration is trying to run, the tables don't
actually exist yet so you can't actually make queries. Doing
from alembic import op
def upgrade():
#migration stuff
op.execute('COMMIT')
#run queries
This isn't the most elegant solution (and that was for Postgres, the
command may be different for other dbs), but it worked for me. Also,
this isn't actually an issue with Flask-Migrate as much as an issue
with alembic, so if you want to Google for more info, search for
alembic. Flask-Migrate is just a wrapper around alembic that works
with Flask-Script easily.
As indicated by #themathemagician on reddit, Alembic by default runs all the migrations in a single transaction, so depending on the database engine and what you do in your migration scripts, some operations that depend on things added in a previous migration may fail.
I haven't tried this myself, but Alembic 0.6.5 introduced a transaction_per_migration option, which might address this. This is an option to the configure() call in env.py. If you are using the default config files as Flask-Migrate creates them, then this is where you fix this in migrations/env.py:
def run_migrations_online():
"""Run migrations in 'online' mode.
# ...
context.configure(
connection=connection,
target_metadata=target_metadata,
transaction_per_migration=True # <-- add this
)
# ...
Also note that if you plan to also run offline migrations you need to fix the configure() call in the run_migrations_offline() in the same way.
Give this a try and let me know if it addresses the problem.

Django + PostgreSQL: How to reset primary key?

I have been working on an application in Django. To begin with, for simplicity, I had been using sqlite3 for the database.
However, once I moved to PostgreSQL, I've run into a bit of a problem: the primary key does not reset once I clear out a table.
This app is a game that is played over a long time period (weeks). As such, every time a new game starts, all of the data is cleared out of the database and then new, randomized data is added.
I'd like to be able to "start over" with primary keys starting at 1 each time I clean/rebuild the game.
The code still works as-is, but integers are a pretty natural way for describing the objects in my game. I'd like to have each new game start at 1 rather than wherever the last game left off.
How can I reset the primary key counter in PostgreSQL? Keep in mind that I don't need to preserve the data in the table since I am wiping it out anyway.
In your app directory try this:
python manage.py help sqlsequencereset
Pipe it into psql like this to actually run the reset:
python manage.py sqlsequencereset myapp1 myapp2 | psql
Edit: here's an example of the output from this command on one of my tables:
BEGIN;
SELECT setval('"project_row_id_seq"', coalesce(max("id"), 1), max("id") IS NOT null) FROM "project_row";
COMMIT;
As suggested by "Van Gale" you can get the commands to solve your problem running sqlsequencereset.
or
You can execute the SQL query generated by sqlsequencereset from within python in this way (using the default database):
from django.core.management.color import no_style
from django.db import connection
from myapps.models import MyModel1, MyModel2
sequence_sql = connection.ops.sequence_reset_sql(no_style(), [MyModel1, MyModel2])
with connection.cursor() as cursor:
for sql in sequence_sql:
cursor.execute(sql)
I tested this code with Python3.6, Django 2.0 and PostgreSQL 10.
If you perform a raw sql, can do this:
ALTER SEQUENCE youApp_id_seq RESTART WITH 1;
docs:
http://www.postgresql.org/docs/8.2/static/sql-altersequence.html
I view auto-increment primary keys as purely internal identifiers for database records, and I don't like exposing them to users. Granted, it's a common design to use them as part of URLs, but even there slugs or other identifiers feel more appropriate.
If you do not want to have to manually grab the apps you need, or if you have a series of different databases, this command will dynamically gather all connections from settings.py and reset the sequence.
To run use: python manage.py reset_sequences
import psycopg2
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db import connections
def dictfetchall(cursor):
"""Return all rows from a cursor as a dict"""
columns = [col[0] for col in cursor.description]
return [
dict(zip(columns, row))
for row in cursor.fetchall()
]
class Command(BaseCommand):
help = "Resets sequencing errors in Postgres which normally occur due to importing/restoring a DB"
def handle(self, *args, **options):
# loop over all databases in system to figure out the tables that need to be reset
for name_to_use_for_connection, connection_settings in settings.DATABASES.items():
db_name = connection_settings['NAME']
host = connection_settings['HOST']
user = connection_settings['USER']
port = connection_settings['PORT']
password = connection_settings['PASSWORD']
# connect to this specific DB
conn_str = f"host={host} port={port} user={user} password={password}"
conn = psycopg2.connect(conn_str)
conn.autocommit = True
select_all_table_statement = f"""SELECT *
FROM information_schema.tables
WHERE table_schema = 'public'
ORDER BY table_name;
"""
# just a visual representation of where we are
print('-' * 20, db_name)
try:
not_reset_tables = list()
# use the specific name for the DB
with connections[name_to_use_for_connection].cursor() as cursor:
# using the current db as the cursor connection
cursor.execute(select_all_table_statement)
rows = dictfetchall(cursor)
# will loop over table names in the connected DB
for row in rows:
find_pk_statement = f"""
SELECT k.COLUMN_NAME
FROM information_schema.table_constraints t
LEFT JOIN information_schema.key_column_usage k
USING(constraint_name,table_schema,table_name)
WHERE t.constraint_type='PRIMARY KEY'
AND t.table_name='{row['table_name']}';
"""
cursor.execute(find_pk_statement)
pk_column_names = dictfetchall(cursor)
for pk_dict in pk_column_names:
column_name = pk_dict['column_name']
# time to build the reset sequence command for each table
# taken from django: https://docs.djangoproject.com/en/3.0/ref/django-admin/#sqlsequencereset
# example: SELECT setval(pg_get_serial_sequence('"[TABLE]"','id'), coalesce(max("id"), 1), max("id") IS NOT null) FROM "[TABLE]";
try:
reset_statement = f"""SELECT setval(pg_get_serial_sequence('"{row['table_name']}"','{column_name}'),
coalesce(max("{column_name}"), 1), max("{column_name}") IS NOT null) FROM "{row['table_name']}" """
cursor.execute(reset_statement)
return_values = dictfetchall(cursor)
# will be 1 row
for value in return_values:
print(f"Sequence reset to {value['setval']} for {row['table_name']}")
except Exception as ex:
# will only fail if PK is not an integer...
# currently in my system this is from django.contrib.sessions
not_reset_tables.append(f"{row['table_name']} not reset")
except psycopg2.Error as ex:
raise SystemExit(f'Error: {ex}')
conn.close()
print('-' * 5, ' ALL ERRORS ', '-' * 5)
for item_statement in not_reset_tables:
# shows which tables produced errors, so far I have only
# seen this with PK's that are not integers because of the MAX() method
print(item_statement)
# just a visual representation of where we are
print('-' * 20, db_name)
You need to truncate the table.
See http://www.postgresql.org/docs/8.1/static/sql-truncate.html