Skip to content
This repository was archived by the owner on Mar 13, 2020. It is now read-only.

Commit cd2046a

Browse files
authored
Merge pull request #1 from PageUpPeopleOrg/OSC-907-CI
Relational data loader integration tests
2 parents 92b6f01 + bc5cc78 commit cd2046a

26 files changed

+995
-8
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ __pycache__/
1111
# Distribution / packaging
1212
.Python
1313
env/
14+
lib/
1415
build/
1516
develop-eggs/
1617
dist/
1718
downloads/
1819
eggs/
1920
.eggs/
20-
lib/
2121
lib64/
2222
parts/
2323
sdist/

README.md

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,39 @@
11
# RelationalDataLoader
2-
A utility for taking data from MS-SQL and loading it into PostgeSQL
2+
## About
3+
A utility for taking data from MS-SQL and loading it into PostgreSQL
4+
5+
6+
## Usage
7+
Execute `py rdl.py SOURCE DESTINATION CONFIGURATION-FOLDER [log-level] [full-refresh]`
8+
9+
Where `SOURCE` takes the following formats
10+
**CSV:** `csv://.\test_data\full_refresh`
11+
**MSSQL:** `mssql+pyodbc://dwsource`
12+
13+
In the above example, dwsource is a 64bit ODBC system dsn
14+
15+
16+
`DESTINATION` takes the following format
17+
**PostgreSQL:** `postgresql+psycopg2://postgres:xxxx@localhost/dest_dw`
18+
19+
20+
### Examples
21+
#### CSV Source
22+
23+
`py rdl.py csv://.\test_data\full_refresh postgresql+psycopg2://postgres:xxxx@localhost/dest_dw .\configuration\ --log-level INFO --full-refresh yes`
24+
`py rdl.py csv://.\test_data\incremental_refresh postgresql+psycopg2://postgres:xxxx@localhost/dest_dw .\configuration\ --log-level INFO --full-refresh no`
25+
26+
27+
#### MSSQL Source
28+
29+
30+
31+
### Troubleshooting
32+
Run with `--log-level DEBUG` on the command line.
33+
34+
35+
##Other Notes
36+
###Testing
37+
The test batch files assume there is a user by the name of `postgres` on the system.
38+
It also sends through a nonense password - it is assumed that the target system is running in 'trust' mode.
39+
See https://www.postgresql.org/docs/9.1/static/auth-pg-hba-conf.html for details on trust mode

appveyor.yml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
version: 1.0.{build}
2+
build: off
3+
4+
environment:
5+
PGUSER: "postgres"
6+
PGPASSWORD: "Password12!"
7+
PGPORT: 5432
8+
PGHOST: "localhost"
9+
DBNAME: relational_data_loader_integration_tests
10+
matrix:
11+
- PYTHON: "C:\\Python36-x64" #This needs to be a double slash
12+
PYTHON_VERSION: "3.6.x"
13+
PYTHON_ARCH: "64"
14+
15+
services:
16+
- postgresql101
17+
- mssql2016
18+
19+
init:
20+
- set PATH=C:\program Files\PostgreSQL\10\bin\;%PATH%
21+
- ps: Set-Content "c:\program files\postgresql\10\data\pg_hba.conf" "host all all ::1/128 trust"
22+
- ps: Add-Content "c:\program files\postgresql\10\data\pg_hba.conf" "host all all 127.0.0.1/32 trust"
23+
#Enable this line to enable RDP for the build.
24+
#- ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
25+
26+
install:
27+
28+
#Setup Python
29+
- SET PATH=%PYTHON%;%PYTHON%\Scripts;%PATH% #Swap path to python to latest version (as per matrix above)
30+
- python --version
31+
- python -m venv c:\projects\relational-data-loader\venv\
32+
33+
build_script:
34+
#Setup the source MSSQL database
35+
- sqlcmd -b -E -S "(local)\SQL2016" -i .\integration_tests\mssql_source\source_database_setup\create_database.sql
36+
- sqlcmd -b -E -f 65001 -S "(local)\SQL2016" -d RelationalDataLoaderIntegrationTestSource -i .\integration_tests\mssql_source\source_database_setup\create_large_table.sql
37+
38+
#Setup the target PostgreSQL database
39+
- psql -c "SELECT VERSION()"
40+
- createdb %DBNAME%
41+
- psql -d %DBNAME% -c "CREATE EXTENSION IF NOT EXISTS citext"
42+
43+
#Install the dependencies for rdl.
44+
- pip install -r requirements.txt
45+
46+
47+
test_script:
48+
- test_full_refresh_from_csv.cmd
49+
- test_incremental_refresh_from_csv.cmd
50+
- test_full_refresh_from_mssql.cmd
51+
52+
on_finish:
53+
#Enable this line to make the build pause after completion for RDP troubleshooting.
54+
#- ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
SET client_encoding TO 'UTF8';
2+
3+
DROP TABLE IF EXISTS results;
4+
5+
CREATE TEMPORARY TABLE results AS
6+
WITH expected(id, int_column_1, date_column_1, decimal_column_1, date_time_column_1, string_column_1) AS (
7+
SELECT 1, 111.0, '1976-12-01'::DATE, 12.1212, '1976-12-01 01:00:00.000000'::TIMESTAMP, 'A Basic String'
8+
UNION ALL
9+
SELECT 2, NULL, NULL, NULL, NULL, NULL
10+
UNION ALL
11+
SELECT 3, 333.0, '2001-01-01', 33.333, NULL, 'This Text Has a Quote Before "Dave'
12+
UNION ALL
13+
SELECT 4, NULL, NULL, NULL, NULL, 'ം ഃ അ ആ ഇ ഈ ഉ ഊ ഋ ഌ എ ഏ'
14+
UNION ALL
15+
SELECT 5, NULL, NULL, NULL, NULL, 'This row will be updated in the incremental review test'
16+
),
17+
18+
actual AS (
19+
SELECT id, int_column_1, date_column_1, decimal_column_1, date_time_column_1, string_column_1
20+
FROM rdl_integration_tests.load_source_data
21+
)
22+
23+
SELECT * FROM expected
24+
EXCEPT
25+
SELECT * FROM actual;
26+
27+
DO $$
28+
BEGIN
29+
PERFORM * FROM results;
30+
IF FOUND THEN RAISE EXCEPTION '[FULL REFRESH TEST] FAIL: The actual data did not match the expected data for the CSV refresh';
31+
ELSE
32+
RAISE NOTICE '[FULL REFRESH TEST] PASS';
33+
END IF;
34+
END $$;
35+
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
SET client_encoding TO 'UTF8';
2+
DROP TABLE IF EXISTS results;
3+
4+
CREATE TEMPORARY TABLE results AS
5+
WITH expected(id, int_column_1, date_column_1, decimal_column_1, date_time_column_1, string_column_1) AS (
6+
SELECT 1, 111.0, '1976-12-01'::DATE, 12.1212, '1976-12-01 01:00:00.000000'::TIMESTAMP, 'A Basic String'
7+
UNION ALL
8+
SELECT 2, NULL, NULL, NULL, NULL, NULL
9+
UNION ALL
10+
SELECT 3, 333.0, '2001-01-01', 33.333, NULL, 'This Text Has a Quote Before "Dave'
11+
UNION ALL
12+
SELECT 4, NULL, NULL, NULL, NULL, 'ം ഃ അ ആ ഇ ഈ ഉ ഊ ഋ ഌ എ ഏ'
13+
UNION ALL
14+
SELECT 5, NULL, NULL, NULL, NULL, 'This row WAS updated in the incremental review test'
15+
UNION ALL
16+
SELECT 6, 111.0, '1976-12-01'::DATE, 12.1212, '1976-12-01 01:00:00.000000'::TIMESTAMP, 'A Basic String'
17+
UNION ALL
18+
SELECT 7, 111.0, '1976-12-01'::DATE, 12.1212, '1976-12-01 01:00:00.000000'::TIMESTAMP, 'Another Basic String'
19+
),
20+
21+
actual AS (
22+
SELECT id, int_column_1, date_column_1, decimal_column_1, date_time_column_1, string_column_1
23+
FROM rdl_integration_tests.load_source_data
24+
)
25+
26+
SELECT * FROM expected
27+
EXCEPT
28+
SELECT * FROM actual;
29+
30+
DO $$
31+
BEGIN
32+
PERFORM * FROM results;
33+
IF FOUND THEN RAISE EXCEPTION '[INCREMENTAL REFRESH TEST] FAIL: The actual data did not match the expected data for the CSV refresh';
34+
ELSE
35+
RAISE NOTICE '[INCREMENTAL REFRESH TEST] PASS';
36+
END IF;
37+
END $$;
38+

configuration/ColumnTest.json renamed to integration_tests/csv_source/config/ColumnTest.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
"source_name": "IntColumn1",
2727
"destination": {
2828
"name": "int_column_1",
29-
"type": "citext.CIText",
29+
"type": "sqlalchemy.Numeric",
3030
"nullable": true
3131
}
3232
},

test_data/full_refresh/ColumnTest.csv renamed to integration_tests/csv_source/full_refresh_data/ColumnTest.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ id,StringColumn1,IntColumn1,DecimalColumn1,DateColumn1,DateTimeColumn1
22
1,"A Basic String",111,12.1212,01-Dec-1976,01-dec-1976 1:00 am
33
2,,,,,
44
3,"This Text Has a Quote Before ""Dave", 333,33.333, 01-01-01,
5-
4,"ം ഃ അ ആ ഇ ഈ ഉ ഊ ഋ ഌ എ ഏ ",,,,
5+
4,"ം ഃ അ ആ ഇ ഈ ഉ ഊ ഋ ഌ എ ഏ",,,,
66
5,"This row will be updated in the incremental review test"

test_data/incremental_refresh/ColumnTest.csv renamed to integration_tests/csv_source/incremental_refresh_data/ColumnTest.csv

File renamed without changes.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
SET client_encoding TO 'UTF8';
2+
3+
4+
DO $$
5+
BEGIN
6+
IF (SELECT COUNT(*) FROM rdl_integration_tests.load_large_data ) = 1000000 THEN
7+
RAISE NOTICE '[LARGE MSSQL IMPORT TEST] PASS';
8+
ELSE
9+
RAISE EXCEPTION '[LARGE MSSQL IMPORT TEST] FAIL: Did not find the required 1,000,000 rows.';
10+
END IF;
11+
END $$;
12+
13+
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
{
2+
3+
"source_table": {
4+
"name": "LargeTable",
5+
"schema": "dbo",
6+
"primary_key": "Id"
7+
},
8+
"target_schema": "rdl_integration_tests",
9+
"stage_table": "stage_large_data",
10+
"load_table": "load_large_data",
11+
12+
"batch": {
13+
"size": 100000
14+
},
15+
"columns": [
16+
{
17+
"source_name": "Id",
18+
"destination": {
19+
"name": "id",
20+
"type": "sqlalchemy.Integer",
21+
"nullable": false,
22+
"primary_key": true
23+
}
24+
},
25+
{
26+
"source_name": "DateColumn1",
27+
"destination": {
28+
"name": "date_column_1",
29+
"type": "sqlalchemy.DateTime",
30+
"nullable": true
31+
}
32+
},
33+
{
34+
"source_name": "IntColumn1",
35+
"destination": {
36+
"name": "int_column_1",
37+
"type": "sqlalchemy.Numeric",
38+
"nullable": true
39+
}
40+
},
41+
{
42+
"source_name": "DateColumn2",
43+
"destination": {
44+
"name": "date_column_2",
45+
"type": "sqlalchemy.DateTime",
46+
"nullable": true
47+
}
48+
},
49+
{
50+
"source_name": "StringColumn1",
51+
"destination": {
52+
"name": "string_column_1",
53+
"type": "citext.CIText",
54+
"nullable": true
55+
}
56+
},
57+
{
58+
"source_name": "StringColumn2",
59+
"destination": {
60+
"name": "string_column_2",
61+
"type": "citext.CIText",
62+
"nullable": true
63+
}
64+
},
65+
{
66+
"source_name": "GuidColumn",
67+
"destination": {
68+
"name": "guid_column",
69+
"type": "citext.CIText",
70+
"nullable": true
71+
}
72+
}
73+
74+
]
75+
}

0 commit comments

Comments
 (0)