Skip to content
This repository was archived by the owner on Mar 13, 2020. It is now read-only.

Commit 402c6db

Browse files
authored
[OSC-1237] add schema revision tool alembic (#30)
* add schema revision tool alembic * update integration and unit tests to use alembic * separate version table schemas * update readme * move all tests into single directory
1 parent 4f63e7d commit 402c6db

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+380
-51
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
.vscode/
44

55
# config files
6-
rdl/tests/config/connection.json
6+
**/unit_tests/config/connection.json
77

88
# Byte-compiled / optimized / DLL files
99
__pycache__/

appveyor.yml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,18 @@ install:
3232

3333
build_script:
3434
#Setup the source MSSQL database
35-
- sqlcmd -b -E -S "(local)\SQL2016" -i .\integration_tests\mssql_source\source_database_setup\create_database.sql
36-
- sqlcmd -b -E -f 65001 -S "(local)\SQL2016" -d RelationalDataLoaderIntegrationTestSource -i .\integration_tests\mssql_source\source_database_setup\create_large_table.sql
37-
- sqlcmd -b -E -f 65001 -S "(local)\SQL2016" -d RelationalDataLoaderIntegrationTestSource -i .\integration_tests\mssql_source\source_database_setup\create_compound_pk.sql
35+
- sqlcmd -b -E -S "(local)\SQL2016" -i .\tests\integration_tests\mssql_source\source_database_setup\create_database.sql
36+
- sqlcmd -b -E -f 65001 -S "(local)\SQL2016" -d RelationalDataLoaderIntegrationTestSource -i .\tests\integration_tests\mssql_source\source_database_setup\create_large_table.sql
37+
- sqlcmd -b -E -f 65001 -S "(local)\SQL2016" -d RelationalDataLoaderIntegrationTestSource -i .\tests\integration_tests\mssql_source\source_database_setup\create_compound_pk.sql
3838

3939
#Setup the target PostgreSQL database
4040
- psql -c "SELECT VERSION()"
4141
- createdb %DBNAME%
4242
- psql -d %DBNAME% -c "CREATE EXTENSION IF NOT EXISTS citext"
4343
- C:\projects\relational-data-loader\venv\Scripts\activate.bat
4444
#Install the dependencies for rdl.
45-
- pip install -r requirements.txt
45+
- pip install .
46+
- alembic -c rdl/alembic.ini -x postgresql+psycopg2://postgres:there_is_no_password_due_to_pg_trust@localhost/relational_data_loader_integration_tests upgrade head
4647

4748
test_script:
4849
# unit tests
@@ -58,11 +59,11 @@ test_script:
5859
- test_full_refresh_from_mssql.cmd
5960
- test_incremental_refresh_from_mssql.cmd
6061

61-
- sqlcmd -b -E -f 65001 -S "(local)\SQL2016" -d RelationalDataLoaderIntegrationTestSource -i .\integration_tests\mssql_source\source_database_setup\change_compound_pk.sql
62+
- sqlcmd -b -E -f 65001 -S "(local)\SQL2016" -d RelationalDataLoaderIntegrationTestSource -i .\tests\integration_tests\mssql_source\source_database_setup\change_compound_pk.sql
6263

6364
- test_incremental_refresh_from_mssql.cmd
6465

65-
- sqlcmd -b -E -f 65001 -S "(local)\SQL2016" -d RelationalDataLoaderIntegrationTestSource -i .\integration_tests\mssql_source\source_database_setup\change_large_table.sql
66+
- sqlcmd -b -E -f 65001 -S "(local)\SQL2016" -d RelationalDataLoaderIntegrationTestSource -i .\tests\integration_tests\mssql_source\source_database_setup\change_large_table.sql
6667

6768
- test_full_refresh_from_mssql.cmd
6869
- test_audit.cmd

rdl/RelationalDataLoader.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ def execute_process_command(self):
3434
destination_db = create_engine(self.args.destination_connection_string)
3535
session_maker = sessionmaker(bind=destination_db)
3636
repository = DataLoadTrackerRepository(session_maker)
37-
repository.ensure_schema_exists(destination_db)
3837

3938
data_load_manager = DataLoadManager(self.args.configuration_folder, source_db, destination_db, repository)
4039
data_load_manager.start_imports(self.args.force_full_refresh_models)

rdl/alembic.ini

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# A generic, single database configuration.
2+
3+
[alembic]
4+
# path to migration scripts
5+
script_location = %(here)s/alembic
6+
7+
# template used to generate migration files
8+
# file_template = %%(rev)s_%%(slug)s
9+
10+
# timezone to use when rendering the date
11+
# within the migration file as well as the filename.
12+
# string value is passed to dateutil.tz.gettz()
13+
# leave blank for localtime
14+
# timezone =
15+
16+
# max length of characters to apply to the
17+
# "slug" field
18+
#truncate_slug_length = 40
19+
20+
# set to 'true' to run the environment during
21+
# the 'revision' command, regardless of autogenerate
22+
# revision_environment = false
23+
24+
# set to 'true' to allow .pyc and .pyo files without
25+
# a source .py file to be detected as revisions in the
26+
# versions/ directory
27+
# sourceless = false
28+
29+
# version location specification; this defaults
30+
# to alembic/versions. When using multiple version
31+
# directories, initial revisions must be specified with --version-path
32+
version_locations = %(here)s/alembic/versions
33+
34+
# the output encoding used when revision files
35+
# are written from script.py.mako
36+
# output_encoding = utf-8
37+
38+
# Logging configuration
39+
[loggers]
40+
keys = root,sqlalchemy,alembic
41+
42+
[handlers]
43+
keys = console
44+
45+
[formatters]
46+
keys = generic
47+
48+
[logger_root]
49+
level = WARN
50+
handlers = console
51+
qualname =
52+
53+
[logger_sqlalchemy]
54+
level = WARN
55+
handlers =
56+
qualname = sqlalchemy.engine
57+
58+
[logger_alembic]
59+
level = INFO
60+
handlers =
61+
qualname = alembic
62+
63+
[handler_console]
64+
class = StreamHandler
65+
args = (sys.stderr,)
66+
level = NOTSET
67+
formatter = generic
68+
69+
[formatter_generic]
70+
format = %(levelname)-5.5s [%(name)s] %(message)s
71+
datefmt = %H:%M:%S

rdl/alembic/README.md

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# Alembic
2+
3+
## Usage
4+
5+
### To upgrade to the latest schema
6+
7+
```bash
8+
alembic -c rdl/alembic.ini -x $DESTINATION_DB_URL upgrade head
9+
```
10+
11+
### Updating the schema
12+
13+
Ensure any new tables inherit from the same Base used in `alembic/env.py`
14+
15+
```python
16+
from rdl.data_load_tracking.DataLoadExecution import Base
17+
```
18+
19+
Whenever you make a schema change, run
20+
21+
```bash
22+
pip install .
23+
alembic -c rdl/alembic.ini -x $DESTINATION_DB_URL revision -m "$REVISION_MESSAGE" --autogenerate
24+
```
25+
26+
check that the new version in `alembic/versions` is correct
27+
28+
### Downgrading the schema
29+
30+
Whenever you want to downgrade the schema
31+
32+
```bash
33+
alembic -c rdl/alembic.ini -x $DESTINATION_DB_URL history # see the list of revision ids
34+
alembic -c rdl/alembic.ini -x $DESTINATION_DB_URL current # see the current revision id
35+
alembic -c rdl/alembic.ini -x $DESTINATION_DB_URL downgrade -1 # revert back one revision
36+
alembic -c rdl/alembic.ini -x $DESTINATION_DB_URL downgrade $revision_id # revert back to a revision id, found using the history command
37+
```
38+
39+
## Troubleshooting
40+
41+
### Inaccurate autogenerated revisions
42+
43+
Does your autogenerated revision not look right?
44+
45+
Try editing the function `use_schema` in `alembic/env.py`, this determines what alembic looks for in the database.
46+
47+
[Relevant Documentation](https://alembic.sqlalchemy.org/en/latest/api/runtime.html?highlight=include_schemas#alembic.runtime.environment.EnvironmentContext.configure.params.include_object)
48+
49+
### New models aren't showing up in upgrade section
50+
51+
Ensure all model classes inherit from the same Base that `alembic/env.py` imports, and that the following class
52+
properties are set
53+
54+
```python
55+
__tablename__ = 'your_mapped_table_name'
56+
__table_args__ = {'schema': Constants.DATA_PIPELINE_EXECUTION_SCHEMA_NAME}
57+
```
58+
59+
Also try importing the models into `alembic/env.py`, eg
60+
61+
```python
62+
from rdl.data_load_tracking import DataLoadExecution
63+
```
64+
65+
### Alembic won't pick up my change
66+
67+
[Alembic only supports some changes](https://alembic.sqlalchemy.org/en/latest/autogenerate.html#what-does-autogenerate-detect-and-what-does-it-not-detect)
68+
69+
Try adding raw sql in the `upgrade()` and `downgrade()` functions of your revision
70+
71+
```python
72+
op.execute(RAW_SQL)
73+
```

rdl/alembic/env.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
from __future__ import with_statement
2+
3+
from logging.config import fileConfig
4+
5+
from sqlalchemy import engine_from_config, create_engine
6+
from sqlalchemy import pool
7+
8+
from alembic import context
9+
from rdl.data_load_tracking.DataLoadExecution import Base
10+
from rdl.shared import Constants
11+
12+
# this is the Alembic Config object, which provides
13+
# access to the values within the .ini file in use.
14+
config = context.config
15+
16+
# Interpret the config file for Python logging.
17+
# This line sets up loggers basically.
18+
fileConfig(config.config_file_name)
19+
20+
# add your model's MetaData object here
21+
# for 'autogenerate' support
22+
# from myapp import mymodel
23+
target_metadata = Base.metadata
24+
25+
# other values from the config, defined by the needs of env.py,
26+
# can be acquired:
27+
# my_important_option = config.get_main_option("my_important_option")
28+
# ... etc.
29+
30+
if not context.get_x_argument():
31+
raise AttributeError(
32+
"example usage `alembic -c rdl/alembic.ini -x postgresql+psycopg2://postgres:postgres@localhost/postgres downgrade -1`")
33+
34+
url = context.get_x_argument()[0]
35+
36+
37+
def use_schema(object, name, type_, reflected, compare_to):
38+
if type_ == 'table' and object.schema != Constants.DATA_PIPELINE_EXECUTION_SCHEMA_NAME:
39+
return False
40+
if (type_ == "column" and
41+
not reflected and
42+
object.info.get("skip_autogenerate", False)):
43+
return False
44+
if type_ == 'table' and name == 'alembic_version':
45+
return False
46+
return True
47+
48+
49+
def run_migrations_offline():
50+
"""Run migrations in 'offline' mode.
51+
52+
This configures the context with just a URL
53+
and not an Engine, though an Engine is acceptable
54+
here as well. By skipping the Engine creation
55+
we don't even need a DBAPI to be available.
56+
57+
Calls to context.execute() here emit the given string to the
58+
script output.
59+
60+
"""
61+
context.configure(
62+
url=url, target_metadata=target_metadata, literal_binds=True, include_schemas=True,
63+
include_object=use_schema, version_table=f'alembic_version_{Constants.DATA_PIPELINE_EXECUTION_SCHEMA_NAME}'
64+
)
65+
66+
with context.begin_transaction():
67+
context.run_migrations()
68+
69+
70+
def run_migrations_online():
71+
"""Run migrations in 'online' mode.
72+
73+
In this scenario we need to create an Engine
74+
and associate a connection with the context.
75+
76+
"""
77+
connectable = create_engine(
78+
url,
79+
poolclass=pool.NullPool,
80+
)
81+
82+
with connectable.connect() as connection:
83+
context.configure(
84+
connection=connection, target_metadata=target_metadata, include_schemas=True,
85+
include_object=use_schema, version_table=f'alembic_version_{Constants.DATA_PIPELINE_EXECUTION_SCHEMA_NAME}'
86+
)
87+
88+
with context.begin_transaction():
89+
context.run_migrations()
90+
91+
92+
if context.is_offline_mode():
93+
run_migrations_offline()
94+
else:
95+
run_migrations_online()

rdl/alembic/script.py.mako

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""${message}
2+
3+
Revision ID: ${up_revision}
4+
Revises: ${down_revision | comma,n}
5+
Create Date: ${create_date}
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
${imports if imports else ""}
11+
12+
# revision identifiers, used by Alembic.
13+
revision = ${repr(up_revision)}
14+
down_revision = ${repr(down_revision)}
15+
branch_labels = ${repr(branch_labels)}
16+
depends_on = ${repr(depends_on)}
17+
18+
19+
def upgrade():
20+
${upgrades if upgrades else "pass"}
21+
22+
23+
def downgrade():
24+
${downgrades if downgrades else "pass"}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
"""add failure_reason column to data_load_execution
2+
3+
Revision ID: 0d4a3ce9c0a9
4+
Revises: 710e28aa5978
5+
Create Date: 2019-04-03 12:15:16.898526
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = '0d4a3ce9c0a9'
14+
down_revision = '710e28aa5978'
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
op.add_column('data_load_execution', sa.Column('failure_reason', sa.String(length=1000), nullable=True), schema='rdl')
22+
# ### end Alembic commands ###
23+
24+
25+
def downgrade():
26+
# ### commands auto generated by Alembic - please adjust! ###
27+
op.drop_column('data_load_execution', 'failure_reason', schema='rdl')
28+
# ### end Alembic commands ###
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
"""add data_load_execution table
2+
3+
Revision ID: 710e28aa5978
4+
Revises:
5+
Create Date: 2019-04-03 11:52:21.994634
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
from sqlalchemy.dialects import postgresql
11+
12+
# revision identifiers, used by Alembic.
13+
revision = '710e28aa5978'
14+
down_revision = None
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
op.execute('CREATE SCHEMA IF NOT EXISTS rdl')
22+
op.create_table('data_load_execution',
23+
sa.Column('id', sa.Integer(), nullable=False),
24+
sa.Column('correlation_id', postgresql.UUID(as_uuid=True), nullable=True),
25+
sa.Column('model_name', sa.String(length=250), nullable=False),
26+
sa.Column('status', sa.String(length=25), nullable=False),
27+
sa.Column('last_sync_version', sa.BigInteger(), nullable=False),
28+
sa.Column('sync_version', sa.BigInteger(), nullable=False),
29+
sa.Column('is_full_refresh', sa.Boolean(), nullable=False),
30+
sa.Column('full_refresh_reason', sa.String(length=100), nullable=False),
31+
sa.Column('completed_on', sa.DateTime(timezone=True),
32+
server_default=sa.text('now()'), nullable=True),
33+
sa.Column('execution_time_ms', sa.Integer(), nullable=False),
34+
sa.Column('rows_processed', sa.Integer(), nullable=False),
35+
sa.Column('model_checksum', sa.String(length=100), nullable=False),
36+
sa.PrimaryKeyConstraint('id'),
37+
schema='rdl'
38+
)
39+
# ### end Alembic commands ###
40+
41+
42+
def downgrade():
43+
# ### commands auto generated by Alembic - please adjust! ###
44+
op.drop_table('data_load_execution', schema='rdl')
45+
# ### end Alembic commands ###

0 commit comments

Comments
 (0)