Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ Check `pgdatadiff --help`

Docker images are available.

`docker run -it davidjmarkey/pgdatadiff:0.2.1 /usr/bin/pgdatadiff`
```
docker run -it davidjmarkey/pgdatadiff:0.2.1 /usr/bin/pgdatadiff --help
```


6 changes: 4 additions & 2 deletions pgdatadiff/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
Usage:
pgdatadiff --firstdb=<firstconnectionstring> --seconddb=<secondconnectionstring> [--schema=<schema>] [--only-data|--only-sequences] [--count-only] [--count-with-max] [--chunk-size=<size>] [--exclude-tables=<table1,table2>]
pgdatadiff --firstdb=<firstconnectionstring> --seconddb=<secondconnectionstring> [--schema=<schema>] [--only-data|--only-sequences] [--count-only] [--count-with-max] [--chunk-size=<size>] [--exclude-tables=<table1,table2>] [--include-tables=<table1,table2>]
pgdatadiff --version

Options:
Expand All @@ -11,7 +11,8 @@
--schema="public" The schema of tables in comparison
--only-data Only compare data, exclude sequences
--only-sequences Only compare seqences, exclude data
--exclude-tables="" Exclude tables from data comparison Must be a comma separated string [default: empty string]
--exclude-tables="" Exclude tables from data comparison Must be a comma separated string
--include-tables="" Only include tables in data comparison Must be a comma separated string
--count-only Do a quick test based on counts alone
--chunk-size=10000 The chunk size when comparing data [default: 10000]
--count-with-max Use MAX(id) when a table uses a sequence, otherwise use COUNT.
Expand Down Expand Up @@ -39,6 +40,7 @@ def main():
count_only=arguments['--count-only'],
count_with_max=arguments['--count-with-max'],
exclude_tables=arguments['--exclude-tables'],
include_tables=arguments['--include-tables'],
schema=arguments['--schema'])

if not arguments['--only-sequences']:
Expand Down
35 changes: 25 additions & 10 deletions pgdatadiff/pgdatadiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def make_session(connection_string):

class DBDiff(object):

def __init__(self, firstdb, seconddb, schema, chunk_size=10000, count_only=False, count_with_max=False, exclude_tables=""):
def __init__(self, firstdb, seconddb, schema, chunk_size=10000, count_only=False, count_with_max=False, progress=True, exclude_tables="", include_tables=""):
firstsession, firstengine = make_session(firstdb)
secondsession, secondengine = make_session(seconddb)
self.firstsession = firstsession
Expand All @@ -33,7 +33,15 @@ def __init__(self, firstdb, seconddb, schema, chunk_size=10000, count_only=False
self.chunk_size = int(chunk_size)
self.count_only = count_only
self.count_with_max = count_with_max
self.exclude_tables = exclude_tables.split(',')
self.progress = progress
if exclude_tables is None:
self.exclude_tables = []
else:
self.exclude_tables = exclude_tables.split(',')
if include_tables is None:
self.include_tables = []
else:
self.include_tables = (include_tables or "").split(',')
self.schema_names = self.firstinspector.get_schema_names()
self.schema = schema or 'public'
if self.schema not in self.schema_names:
Expand All @@ -43,11 +51,9 @@ def __init__(self, firstdb, seconddb, schema, chunk_size=10000, count_only=False
def diff_table_data(self, tablename):
try:
firsttable = Table(tablename, self.firstmeta, autoload=True)
firstquery = self.firstsession.query(
firsttable)
firstquery = self.firstsession.query(firsttable)
secondtable = Table(tablename, self.secondmeta, autoload=True)
secondquery = self.secondsession.query(
secondtable)
secondquery = self.secondsession.query(secondtable)
if self.count_with_max is True:
column = self.column_using_sequence(tablename)
pk_columns = self.firstinspector.get_pk_constraint(tablename)['constrained_columns']
Expand Down Expand Up @@ -90,7 +96,7 @@ def diff_table_data(self, tablename):

position = 0

while position <= firstquery.count():
while position <= first_count:
firstresult = self.firstsession.execute(
SQL_TEMPLATE_HASH,
{"row_limit": self.chunk_size,
Expand All @@ -103,8 +109,15 @@ def diff_table_data(self, tablename):
return False, f"data is different - for rows from {position} - to" \
f" {position + self.chunk_size}"
position += self.chunk_size
self.display_progress(position, first_count)
return True, "data is identical."

def display_progress(self, position, first_count):
if position > first_count:
position = first_count
if first_count > self.chunk_size and self.progress is True:
print(f' Progress: {"{:2.1f}".format(position/first_count*100)}%')

def column_using_sequence(self, tablename):
GET_COLUMN_OF_TABLES_WITH_SEQUENCES = f"""SELECT
attrib.attname AS column_name
Expand Down Expand Up @@ -177,14 +190,16 @@ def diff_all_table_data(self):
print(bold(red('Starting table analysis.')))
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=sa_exc.SAWarning)
tables = sorted(
self.firstinspector.get_table_names(schema=self.schema))
tables = sorted(self.firstinspector.get_table_names(schema=self.schema))
if len(self.include_tables) > 0:
# Intersection of 2 array
tables = [value for value in tables if value in self.include_tables]
if len(tables) == 0:
print(bold(red(f'No tables found in schema: {self.schema}')))
return 0
for table in tables:
if table in self.exclude_tables:
print(bold(yellow(f"Ignoring table {table}")))
print(bold(yellow(f"Ignoring table {table} (excluded)")))
continue
with Halo(
text=f"Analysing table {table}. "
Expand Down