diff --git a/docs/changes/DM-49446.feature.rst b/docs/changes/DM-49446.feature.rst
new file mode 100644
index 00000000..43f392e1
--- /dev/null
+++ b/docs/changes/DM-49446.feature.rst
@@ -0,0 +1,6 @@
+The ``diff`` module was completely overhauled based on best practices for using the ``deepdiff`` library.
+The formatted differences are now displayed in a JSON format, which is more readable and structured than the previous print outs.
+A number of bugs and issues with the comparison logic and the display of the diffs were also fixed, improving the accuracy of the schema diffs.
+The error handling in the database diff class was improved so that exception messages should be more informative and helpful.
+A command line option has been added for writing the diff output to a file.
+Finally, an additional command line option allows filtering only on specified tables in the two schemas being compared.
diff --git a/docs/documenteer.toml b/docs/documenteer.toml
index ccccf045..1970f07f 100644
--- a/docs/documenteer.toml
+++ b/docs/documenteer.toml
@@ -34,3 +34,4 @@ python_api_dir = "dev/internals"
python = "https://docs.python.org/3"
sqlalchemy = "https://docs.sqlalchemy.org/en/latest"
lsst = "https://pipelines.lsst.io/v/weekly"
+click = "https://click.palletsprojects.com"
diff --git a/python/felis/__init__.py b/python/felis/__init__.py
index bc196a61..2849b5ff 100644
--- a/python/felis/__init__.py
+++ b/python/felis/__init__.py
@@ -19,9 +19,21 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
-from .datamodel import Schema
+from .datamodel import (
+ CheckConstraint,
+ Column,
+ ColumnGroup,
+ Constraint,
+ DataType,
+ ForeignKeyConstraint,
+ Index,
+ Schema,
+ SchemaVersion,
+ Table,
+ UniqueConstraint,
+)
from .db.schema import create_database
-from .diff import DatabaseDiff, FormattedSchemaDiff, SchemaDiff
+from .diff import DatabaseDiff, FormattedSchemaDiff
from .metadata import MetaDataBuilder
from importlib.metadata import PackageNotFoundError, version
diff --git a/python/felis/cli.py b/python/felis/cli.py
index 001786f9..d7d2d679 100644
--- a/python/felis/cli.py
+++ b/python/felis/cli.py
@@ -431,6 +431,14 @@ def validate(
default="deepdiff",
)
@click.option("-E", "--error-on-change", is_flag=True, help="Exit with error code if schemas are different")
+@click.option("--table", "tables", multiple=True, help="Table names to filter on.")
+@click.option(
+ "--output-file",
+ "-o",
+ type=click.File(mode="w"),
+ help="Write diff output to a file insteading of printing",
+ default=None,
+)
@click.argument("files", nargs=-1, type=click.File())
@click.pass_context
def diff(
@@ -438,6 +446,8 @@ def diff(
engine_url: str | None,
comparator: str,
error_on_change: bool,
+ tables: list[str],
+ output_file: IO[str] | None,
files: Iterable[IO[str]],
) -> None:
schemas = [
@@ -447,12 +457,16 @@ def diff(
diff: SchemaDiff
if len(schemas) == 2 and engine_url is None:
if comparator == "alembic":
+ if tables:
+ raise click.ClickException("Table filtering is not supported for Alembic comparator")
db_context = create_database(schemas[0])
assert isinstance(db_context.engine, Engine)
diff = DatabaseDiff(schemas[1], db_context.engine)
else:
diff = FormattedSchemaDiff(schemas[0], schemas[1])
elif len(schemas) == 1 and engine_url is not None:
+ if tables:
+ raise click.ClickException("Table filtering is not supported for database comparison")
engine = create_engine(engine_url)
diff = DatabaseDiff(schemas[0], engine)
else:
@@ -460,7 +474,7 @@ def diff(
"Invalid arguments - provide two schemas or a schema and a database engine URL"
)
- diff.print()
+ diff.print(output_file)
if diff.has_changes and error_on_change:
raise click.ClickException("Schema was changed")
diff --git a/python/felis/diff.py b/python/felis/diff.py
index 348d751f..a0107e32 100644
--- a/python/felis/diff.py
+++ b/python/felis/diff.py
@@ -21,21 +21,22 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+import copy
+import json
import logging
-import pprint
-import re
-from collections.abc import Callable
-from typing import Any
+from typing import IO, Any
+import sqlalchemy
from alembic.autogenerate import compare_metadata
from alembic.migration import MigrationContext
from deepdiff.diff import DeepDiff
+from deepdiff.model import DiffLevel
from sqlalchemy import Engine, MetaData
from .datamodel import Schema
from .metadata import MetaDataBuilder
-__all__ = ["DatabaseDiff", "SchemaDiff"]
+__all__ = ["DatabaseDiff", "FormattedSchemaDiff", "SchemaDiff"]
logger = logging.getLogger(__name__)
@@ -43,28 +44,139 @@
logging.getLogger("alembic").setLevel(logging.WARNING)
+def _normalize_lists_by_name(obj: Any) -> Any:
+ """
+ Recursively normalize structures:
+ - Lists of dicts under specified keys become dicts keyed by 'name'.
+ - Lists of strings under specified keys become sorted lists.
+ - Everything else is recursively normalized in place.
+
+ Parameters
+ ----------
+ obj
+ The object to normalize, which can be a list, dict, or any other type.
+ """
+ dict_like_keys = {"tables", "columns", "constraints", "indexes", "column_groups"}
+ set_like_keys = {"columns", "referencedColumns"}
+
+ if isinstance(obj, list):
+ return [_normalize_lists_by_name(item) for item in obj]
+
+ elif isinstance(obj, dict):
+ normalized: dict[str, Any] = {}
+
+ for k, v in obj.items():
+ if isinstance(v, list):
+ if k in dict_like_keys and all(isinstance(i, dict) and "name" in i for i in v):
+ logger.debug(f"Normalizing list of dicts under key '{k}' to dict keyed by 'name'")
+ normalized[k] = {i["name"]: _normalize_lists_by_name(i) for i in v}
+ elif k in set_like_keys and all(isinstance(i, str) for i in v):
+ logger.debug(f"Normalizing list of strings under key '{k}' to sorted list: {v}")
+ normalized[k] = sorted(v)
+ else:
+ normalized[k] = [_normalize_lists_by_name(i) for i in v]
+ else:
+ normalized[k] = _normalize_lists_by_name(v)
+
+ return normalized
+
+ else:
+ return obj
+
+
class SchemaDiff:
"""
Compare two schemas using DeepDiff and print the differences.
Parameters
----------
- schema1
- The first schema to compare.
- schema2
- The second schema to compare.
+ schema_old
+ The old schema to compare, typically the original schema.
+ schema_new
+ The new schema to compare, typically the modified schema.
+ table_filter
+ A list of table names to filter on.
+ strip_ids
+ Whether to strip '@id' fields from the schemas before comparison.
+
+ Notes
+ -----
+ This class uses DeepDiff to compare two schemas and provides methods to
+ retrieve the differences. It is designed to be extended for more structured
+ output, such as in `FormattedSchemaDiff` and would not typically be used
+ directly.
"""
- def __init__(self, schema1: Schema, schema2: Schema):
- self.dict1 = schema1.model_dump(exclude_none=True)
- self.dict2 = schema2.model_dump(exclude_none=True)
- self.diff = DeepDiff(self.dict1, self.dict2, ignore_order=True)
+ def __init__(
+ self,
+ schema_old: Schema,
+ schema_new: Schema,
+ table_filter: list[str] | None = None,
+ strip_ids: bool = True,
+ ):
+ self.schema_old = copy.deepcopy(schema_old)
+ self.schema_new = copy.deepcopy(schema_new)
+ if table_filter:
+ logger.debug(f"Filtering on tables: {table_filter}")
+ self.table_filter = table_filter or []
+ self.strip_ids = strip_ids
+ self._create_diff()
+
+ def _create_diff(self) -> dict[str, Any]:
+ if self.table_filter:
+ self.schema_old.tables = [
+ table for table in self.schema_old.tables if table.name in self.table_filter
+ ]
+ logger.debug(f"Filtered old schema tables: {[table.name for table in self.schema_old.tables]}")
+ self.schema_new.tables = [
+ table for table in self.schema_new.tables if table.name in self.table_filter
+ ]
+ logger.debug(f"Filtered new schema tables: {[table.name for table in self.schema_new.tables]}")
+ self.dict_old = _normalize_lists_by_name(self.schema_old._model_dump(strip_ids=self.strip_ids))
+ self.dict_new = _normalize_lists_by_name(self.schema_new._model_dump(strip_ids=self.strip_ids))
+ logger.debug(f"Normalized old dict:\n{json.dumps(self.dict_old, indent=2)}")
+ logger.debug(f"Normalized new dict:\n{json.dumps(self.dict_new, indent=2)}")
+ self._diff = DeepDiff(
+ self.dict_old,
+ self.dict_new,
+ ignore_order=True,
+ view="tree",
+ )
+ return self._diff
- def print(self) -> None:
+ @property
+ def diff(self) -> dict[str, Any]:
"""
- Print the differences between the two schemas.
+ Return the differences between the two schemas.
+
+ Returns
+ -------
+ dict
+ The differences between the two schemas.
"""
- pprint.pprint(self.diff)
+ return self._diff
+
+ def to_change_list(self) -> list[dict[str, Any]]:
+ """
+ Convert differences to a structured format.
+
+ Returns
+ -------
+ list[dict[str, Any]]
+ List of change dictionaries.
+ """
+ raise NotImplementedError("Subclasses must implement to_change_list()")
+
+ def print(self, output_stream: IO[str] | None = None) -> None:
+ """
+ Print the differences between the two schemas in raw format.
+
+ Parameters
+ ----------
+ output_stream
+ The output stream for printing the differences.
+ """
+ print(self.diff, file=output_stream)
@property
def has_changes(self) -> bool:
@@ -79,134 +191,173 @@ def has_changes(self) -> bool:
return len(self.diff) > 0
+class DiffHandler:
+ def collect(self, diff_items: list[DiffLevel]) -> list[dict[str, Any]]:
+ """Collect differences from the provided diff items.
+
+ Parameters
+ ----------
+ diff_items
+ The list of differences to collect.
+ """
+ raise NotImplementedError
+
+
+class ValuesChangedHandler(DiffHandler):
+ def collect(self, diff_items: list[DiffLevel]) -> list[dict[str, Any]]:
+ results = []
+ for diff in diff_items:
+ results.append(
+ {
+ "change_type": diff.report_type,
+ "path": diff.path(),
+ "old_value": diff.t1,
+ "new_value": diff.t2,
+ }
+ )
+ return results
+
+
+class IterableItemAddedHandler(DiffHandler):
+ def collect(self, diff_items: list[DiffLevel]) -> list[dict[str, Any]]:
+ results = []
+ for diff in diff_items:
+ results.append(
+ {
+ "change_type": diff.report_type,
+ "path": diff.path(),
+ "value": diff.t2,
+ }
+ )
+ return results
+
+
+class IterableItemRemovedHandler(DiffHandler):
+ def collect(self, diff_items: list[DiffLevel]) -> list[dict[str, Any]]:
+ results = []
+ for diff in diff_items:
+ results.append(
+ {
+ "change_type": diff.report_type,
+ "path": diff.path(),
+ "value": diff.t1,
+ }
+ )
+ return results
+
+
+class DictionaryItemAddedHandler(DiffHandler):
+ def collect(self, diff_items: list[DiffLevel]) -> list[dict[str, Any]]:
+ results = []
+ for diff in diff_items:
+ keys = diff.path(output_format="list")
+ added_key = keys[-1] if keys else None
+ results.append(
+ {
+ "change_type": diff.report_type,
+ "path": diff.path(),
+ "added_key": added_key,
+ "value": diff.t2,
+ }
+ )
+ return results
+
+
+class DictionaryItemRemovedHandler(DiffHandler):
+ def collect(self, diff_items: list[DiffLevel]) -> list[dict[str, Any]]:
+ results = []
+ for diff in diff_items:
+ keys = diff.path(output_format="list")
+ removed_key = keys[-1] if keys else None
+ results.append(
+ {
+ "change_type": diff.report_type,
+ "path": diff.path(),
+ "removed_key": removed_key,
+ "value": diff.t1,
+ }
+ )
+ return results
+
+
class FormattedSchemaDiff(SchemaDiff):
"""
- Compare two schemas using DeepDiff and print the differences using a
- customized output format.
+ Compare two schemas using DeepDiff and emit structured JSON differences.
Parameters
----------
- schema1
- The first schema to compare.
- schema2
- The second schema to compare.
+ schema_old
+ The old schema to compare, typically the original schema.
+ schema_new
+ The new schema to compare, typically the modified schema.
+ table_filter
+ A list of table names to filter on.
+
+ Notes
+ -----
+ This class extends `SchemaDiff` to provide a more structured output of
+ differences. It formats the differences into a list of dictionaries, each
+ representing a change with details such as change type, path, and values
+ involved.
+
+ Output dictionaries representing the changes are formatted as follows::
+
+ {
+ "change_type": str,
+ "id": str,
+ "path": str,
+ "old_value": Any (for value changes),
+ "new_value": Any (for value changes),
+ "value": Any (for additions/removals)
+ }
+
+ The changes can be printed to JSON using the `print` method.
"""
- def __init__(self, schema1: Schema, schema2: Schema):
- super().__init__(schema1, schema2)
+ def __init__(self, schema_old: Schema, schema_new: Schema, table_filter: list[str] = []):
+ super().__init__(schema_old, schema_new, table_filter)
- def print(self) -> None:
- """
- Print the differences between the two schemas using a custom format.
- """
- handlers: dict[str, Callable[[dict[str, Any]], None]] = {
- "values_changed": self._handle_values_changed,
- "iterable_item_added": self._handle_iterable_item_added,
- "iterable_item_removed": self._handle_iterable_item_removed,
- "dictionary_item_added": self._handle_dictionary_item_added,
- "dictionary_item_removed": self._handle_dictionary_item_removed,
+ # Define a mapping between types of changes and their handlers
+ self.handlers = {
+ "values_changed": ValuesChangedHandler(),
+ "iterable_item_added": IterableItemAddedHandler(),
+ "iterable_item_removed": IterableItemRemovedHandler(),
+ "dictionary_item_added": DictionaryItemAddedHandler(),
+ "dictionary_item_removed": DictionaryItemRemovedHandler(),
}
- for change_type, handler in handlers.items():
- if change_type in self.diff:
- handler(self.diff[change_type])
-
- def _print_header(self, id_dict: dict[str, Any], keys: list[int | str]) -> None:
- # id = self._get_id(id_dict, keys)
- # Don't display ID here for now; it is always just the schema ID.
- print(f"{self._get_key_display(keys)}")
- # print(f"{id} @ {self._get_key_display(keys)}")
-
- def _handle_values_changed(self, changes: dict[str, Any]) -> None:
- for key in changes:
- keys = self._parse_deepdiff_path(key)
- value1 = changes[key]["old_value"]
- value2 = changes[key]["new_value"]
- self._print_header(self.dict1, keys)
- print(f"- {value1}")
- print(f"+ {value2}")
-
- def _handle_iterable_item_added(self, changes: dict[str, Any]) -> None:
- for key in changes:
- keys = self._parse_deepdiff_path(key)
- value = changes[key]
- self._print_header(self.dict2, keys)
- print(f"+ {value}")
-
- def _handle_iterable_item_removed(self, changes: dict[str, Any]) -> None:
- for key in changes:
- keys = self._parse_deepdiff_path(key)
- value = changes[key]
- self._print_header(self.dict1, keys)
- print(f"- {value}")
-
- def _handle_dictionary_item_added(self, changes: dict[str, Any]) -> None:
- for key in changes:
- keys = self._parse_deepdiff_path(key)
- value = keys[-1]
- keys.pop()
- self._print_header(self.dict2, keys)
- print(f"+ {value}")
-
- def _handle_dictionary_item_removed(self, changes: dict[str, Any]) -> None:
- for key in changes:
- keys = self._parse_deepdiff_path(key)
- value = keys[-1]
- keys.pop()
- self._print_header(self.dict1, keys)
- print(f"- {value}")
-
- @staticmethod
- def _get_id(values: dict, keys: list[str | int]) -> str:
- # Unused for now, pending updates to diff tool in DM-49446.
- value: list | dict = values
- last_id = None
-
- for key in keys:
- logger.debug(f"Processing key <{key}> with type {type(key)}")
- logger.debug(f"Type of value: {type(value)}")
- if isinstance(value, dict) and "id" in value:
- last_id = value["id"]
- elif isinstance(value, list) and isinstance(key, int):
- if 0 <= key < len(value):
- value = value[key]
- else:
- raise ValueError(f"Index '{key}' is out of range for list of length {len(value)}")
- value = value[key]
-
- if isinstance(value, dict) and "id" in value:
- last_id = value["id"]
-
- if last_id is not None:
- return last_id
- else:
- raise ValueError("No 'id' found in the specified path")
+ def to_change_list(self) -> list[dict[str, Any]]:
+ """
+ Convert differences to a structured format.
- @staticmethod
- def _get_key_display(keys: list[str | int]) -> str:
- return ".".join(str(k) for k in keys)
+ Returns
+ -------
+ list[dict[str, Any]]
+ List of changes in their dictionary representation.
+ """
+ changes = []
- @staticmethod
- def _parse_deepdiff_path(path: str) -> list[str | int]:
- if path.startswith("root"):
- path = path[4:]
+ for change_type, handler in self.handlers.items():
+ if change_type in self.diff:
+ changes.extend(handler.collect(self.diff[change_type]))
- pattern = re.compile(r"\['([^']+)'\]|\[(\d+)\]")
- matches = pattern.findall(path)
+ return changes
- keys = []
- for match in matches:
- if match[0]: # String key
- keys.append(match[0])
- elif match[1]: # Integer index
- keys.append(int(match[1]))
+ def print(self, output_stream: IO[str] | None = None) -> None:
+ """
+ Print the differences between the two schemas as JSON.
- return keys
+ Parameters
+ ----------
+ output_stream
+ The output stream for printing the differences.
+ """
+ print(json.dumps(self.to_change_list(), indent=2), file=output_stream)
class DatabaseDiff(SchemaDiff):
"""
- Compare a schema with a database and print the differences.
+ Compare a schema with a database and emit structured differences.
Parameters
----------
@@ -214,21 +365,101 @@ class DatabaseDiff(SchemaDiff):
The schema to compare.
engine
The database engine to compare with.
+
+ Notes
+ -----
+ The `DatabaseDiff` class uses SQLAlchemy to reflect the database schema
+ and compare it with the provided `~felis.datamodel.Schema` object. It
+ generates a list of differences between the two schemas, which can be
+ printed or converted to a structured format.
+
+ The error-handling during the reflection and comparison process is
+ robust, catching various exceptions that may arise from database
+ connectivity issues, invalid configurations, or unexpected errors.
+ This is done because otherwise some obscure errors may be raised
+ during the reflection process and configuration of alembic, which are not
+ very informative to the user.
"""
def __init__(self, schema: Schema, engine: Engine):
+ self.schema = schema
+ self.engine = engine
+ self._generate_diff()
+
+ def _generate_diff(self) -> None:
+ """Generate the differences between the provided schema and
+ database.
+ """
db_metadata = MetaData()
- with engine.connect() as connection:
- db_metadata.reflect(bind=connection)
- mc = MigrationContext.configure(
- connection, opts={"compare_type": True, "target_metadata": db_metadata}
+ with self.engine.connect() as connection:
+ # Reflect the database schema
+ try:
+ db_metadata.reflect(bind=connection)
+ except (sqlalchemy.exc.DatabaseError, sqlalchemy.exc.OperationalError) as e:
+ raise RuntimeError(f"Database reflection failed: {e}") from e
+ except AttributeError as e: # Happens when no database is provided in the URL
+ raise ValueError(
+ f"Invalid engine URL: <{self.engine.url}> (Missing database or schema?)"
+ ) from e
+ except sqlalchemy.exc.ArgumentError as e:
+ raise ValueError(f"Invalid database URL or configuration: {e}") from e
+ except Exception as e:
+ raise RuntimeError(f"Unexpected error during database reflection: {e}") from e
+
+ # Configure the alembic migration context using the reflected
+ # metadata
+ try:
+ mc = MigrationContext.configure(
+ connection, opts={"compare_type": True, "target_metadata": db_metadata}
+ )
+ except (sqlalchemy.exc.DatabaseError, TypeError, ValueError) as e:
+ raise RuntimeError(f"Migration context configuration failed: {e}") from e
+ except Exception as e:
+ raise RuntimeError(f"Unexpected error in migration context configuration: {e}") from e
+
+ # Build the schema metadata for comparison
+ try:
+ schema_metadata = MetaDataBuilder(self.schema, apply_schema_to_metadata=False).build()
+ except (ValueError, TypeError) as e:
+ raise ValueError(f"Schema metadata construction failed: {e}") from e
+ except Exception as e:
+ raise RuntimeError(f"Unexpected error in schema metadata construction: {e}") from e
+
+ # Compare the database metadata with the schema metadata
+ try:
+ self._diff = compare_metadata(mc, schema_metadata)
+ except (sqlalchemy.exc.DatabaseError, AttributeError, TypeError) as e:
+ raise RuntimeError(f"Metadata comparison failed: {e}") from e
+ except Exception as e:
+ raise RuntimeError(f"Unexpected error during metadata comparison: {e}") from e
+
+ def to_change_list(self) -> list[dict[str, Any]]:
+ """
+ Convert database differences to structured format.
+
+ Returns
+ -------
+ list[dict[str, Any]]
+ List of database change dictionaries.
+ """
+ changes = []
+ for change in self._diff:
+ changes.append(
+ {
+ "change_type": "database_diff",
+ "operation": str(change[0]) if change else "unknown",
+ "details": str(change) if change else "no details",
+ }
)
- schema_metadata = MetaDataBuilder(schema, apply_schema_to_metadata=False).build()
- self.diff = compare_metadata(mc, schema_metadata)
+ return changes
- def print(self) -> None:
+ def print(self, output_stream: IO[str] | None = None) -> None:
"""
- Print the differences between the schema and the database.
+ Print the differences between the schema and the database as JSON.
+
+ Parameters
+ ----------
+ output_stream
+ The output stream for printing the differences.
"""
- if self.has_changes:
- pprint.pprint(self.diff)
+ print(json.dumps(self.to_change_list(), indent=2), file=output_stream)
diff --git a/python/felis/tests/run_cli.py b/python/felis/tests/run_cli.py
index 04102fdf..4c022d0b 100644
--- a/python/felis/tests/run_cli.py
+++ b/python/felis/tests/run_cli.py
@@ -23,6 +23,7 @@
import logging
+import click
from click.testing import CliRunner
from felis.cli import cli
@@ -38,7 +39,7 @@ def run_cli(
print_cmd: bool = False,
print_output: bool = False,
id_generation: bool = False,
-) -> None:
+) -> click.testing.Result:
"""Run a CLI command and check the exit code.
Parameters
@@ -57,6 +58,11 @@ def run_cli(
Whether to print the output, by default False.
id_generation : bool
Whether to enable id generation, by default False.
+
+ Returns
+ -------
+ click.testing.Result
+ The result of the command execution.
"""
if not cmd:
raise ValueError("No command provided.")
@@ -77,3 +83,4 @@ def run_cli(
assert result.exit_code != 0
else:
assert result.exit_code == 0
+ return result
diff --git a/tests/data/test_diff1.yaml b/tests/data/test_diff1.yaml
index 3a8393ac..e2f5fcdc 100644
--- a/tests/data/test_diff1.yaml
+++ b/tests/data/test_diff1.yaml
@@ -1,39 +1,42 @@
---
name: test_diff
description: Test diff
-"@id": "#test_diff"
version: "1.2.3"
tables:
- - name: test_table1
- "@id": "#test_table1"
- description: Test table 1
+- name: test_table1
+ description: Test table 1
+ columns:
+ - name: column1
+ datatype: int
+ description: Column 1
+ - name: column2
+ datatype: string
+ description: Column 2
+ length: 30
+ nullable: false
+ - name: column3
+ datatype: string
+ description: Column 3
+ length: 100
+ indexes:
+ - name: idx_column1
+ description: Index on column 1
columns:
- - name: column1
- "@id": "#test_table1.column1"
- datatype: int
- description: Column 1
- - name: column2
- "@id": "#test_table1.column2"
- datatype: string
- description: Column 2
- length: 30
- nullable: false
- - name: column3
- "@id": "#test_table1.column3"
- datatype: string
- description: Column 3
- length: 100
- indexes:
- - name: idx_column1
- "@id": "#test_table1_idx_column1"
- description: Index on column 1
- columns:
- - "#test_table1.column1"
- constraints:
- - name: uniq_column2
- "@id": "#test_table1_uniq_column2"
- "@type": "Unique"
- description: Unique column 2
- columns:
- - "#test_table1.column2"
- primaryKey: "#test_table1.column1"
+ - "#test_table1.column1"
+ columnGroups:
+ - name: group1
+ description: Group 1
+ columns:
+ - "#test_table1.column1"
+ - name: group2
+ description: Group 2
+ columns:
+ - "#test_table1.column1"
+ - "#test_table1.column2"
+ constraints:
+ - name: uniq_column2
+ "@type": "Unique"
+ description: Unique column 2
+ columns:
+ - "#test_table1.column2"
+ primaryKey: "#test_table1.column1"
diff --git a/tests/data/test_diff2.yaml b/tests/data/test_diff2.yaml
index e667a1da..a2406a25 100644
--- a/tests/data/test_diff2.yaml
+++ b/tests/data/test_diff2.yaml
@@ -3,32 +3,52 @@ name: test_diff
description: Another test diff
"@id": "#test_diff"
version: "4.5.6"
+votable:utype: Schema
tables:
- - name: test_table1
- "@id": "#test_table1"
- description: Test table 1
+- name: test_table1
+ "@id": "#test_table1"
+ description: Test table 1
+ columns:
+ - name: column1
+ "@id": "#test_table1.column1"
+ datatype: int
+ description: Column 1
+ ivoa:ucd: meta.id
+ - name: column2
+ "@id": "#test_table1.column2"
+ datatype: string
+ description: Column 2
+ length: 30
+ nullable: false
+ indexes:
+ - name: idx_column2
+ "@id": "#test_table1_idx_column2"
+ description: Index on column 2
columns:
- - name: column1
- "@id": "#test_table1.column1"
- datatype: int
- description: Column 1
- - name: column2
- "@id": "#test_table1.column2"
- datatype: string
- description: Column 2
- length: 30
- nullable: false
- indexes:
- - name: idx_column2
- "@id": "#test_table1_idx_column2"
- description: Index on column 2
- columns:
- - "#test_table1.column2"
- constraints:
- - name: uniq_column2
- "@id": "#test_table1_uniq_column2"
- "@type": "Unique"
- description: Unique column 2
- columns:
- - "#test_table1.column2"
- primaryKey: "#test_table1.column1"
+ - "#test_table1.column2"
+ columnGroups:
+ - name: group1
+ description: Group 1
+ columns:
+ - "#test_table1.column1"
+ - "#test_table1.column2"
+ - name: group2
+ description: Group 2
+ columns:
+ - "#test_table1.column1"
+ constraints:
+ - name: uniq_column2
+ "@id": "#test_table1_uniq_column2"
+ "@type": "Unique"
+ description: Unique column 2
+ columns:
+ - "#test_table1.column2"
+ primaryKey: "#test_table1.column1"
+- name: test_table2
+ "@id": "#test_table2"
+ description: Test table 2
+ columns:
+ - name: column1
+ "@id": "#test_table2.column1"
+ datatype: int
+ description: Column 1
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 9ec93119..cc589906 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -145,17 +145,51 @@ def test_diff_database(self) -> None:
test_diff2 = os.path.join(TEST_DIR, "data", "test_diff2.yaml")
engine = create_engine(self.sqlite_url)
- metadata_db = MetaDataBuilder(Schema.from_uri(test_diff1), apply_schema_to_metadata=False).build()
+ metadata_db = MetaDataBuilder(
+ Schema.from_uri(test_diff1, context={"id_generation": True}), apply_schema_to_metadata=False
+ ).build()
metadata_db.create_all(engine)
run_cli(["diff", f"--engine-url={self.sqlite_url}", test_diff2])
+ def test_diff_database_with_table_filter(self) -> None:
+ """Test for ``diff`` command with database and table filter. This
+ should fail as table filters are not supported for this comparator.
+ """
+ test_diff1 = os.path.join(TEST_DIR, "data", "test_diff1.yaml")
+ test_diff2 = os.path.join(TEST_DIR, "data", "test_diff2.yaml")
+
+ db_url = f"sqlite:///{self.tmpdir}/tap_schema.sqlite3"
+ engine = create_engine(db_url)
+ metadata_db = MetaDataBuilder(
+ Schema.from_uri(test_diff1, context={"id_generation": True}), apply_schema_to_metadata=False
+ ).build()
+ metadata_db.create_all(engine)
+
+ run_cli(
+ ["diff", f"--engine-url={db_url}", "--table", "table1", test_diff2],
+ expect_error=True,
+ )
+
def test_diff_alembic(self) -> None:
"""Test for ``diff`` command with ``--alembic`` comparator option."""
test_diff1 = os.path.join(TEST_DIR, "data", "test_diff1.yaml")
test_diff2 = os.path.join(TEST_DIR, "data", "test_diff2.yaml")
run_cli(["diff", "--comparator", "alembic", test_diff1, test_diff2], print_output=True)
+ def test_diff_alembic_with_table_filter(self) -> None:
+ """Test for ``diff`` command with ``--alembic`` option and a table
+ filter. This should fail as table filters are not supported for this
+ comparator.
+ """
+ test_diff1 = os.path.join(TEST_DIR, "data", "test_diff1.yaml")
+ test_diff2 = os.path.join(TEST_DIR, "data", "test_diff2.yaml")
+
+ run_cli(
+ ["diff", "--comparator", "alembic", "--table", "table1", test_diff1, test_diff2],
+ expect_error=True,
+ )
+
def test_diff_error(self) -> None:
"""Test for ``diff`` command with bad arguments."""
test_diff1 = os.path.join(TEST_DIR, "data", "test_diff1.yaml")
@@ -167,6 +201,13 @@ def test_diff_error_on_change(self) -> None:
test_diff2 = os.path.join(TEST_DIR, "data", "test_diff2.yaml")
run_cli(["diff", "--error-on-change", test_diff1, test_diff2], expect_error=True, print_output=True)
+ def test_diff_with_table_filter(self) -> None:
+ """Test for ``diff`` command and a table filter."""
+ test_diff1 = os.path.join(TEST_DIR, "data", "test_diff1.yaml")
+ test_diff2 = os.path.join(TEST_DIR, "data", "test_diff2.yaml")
+
+ run_cli(["diff", "--table", "table1", test_diff1, test_diff2], print_output=True)
+
def test_dump_yaml(self) -> None:
"""Test for ``dump`` command with YAML output."""
with tempfile.NamedTemporaryFile(delete=True, suffix=".yaml") as temp_file:
diff --git a/tests/test_diff.py b/tests/test_diff.py
index fc876aef..85067e75 100644
--- a/tests/test_diff.py
+++ b/tests/test_diff.py
@@ -19,139 +19,181 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+import os
import unittest
+from typing import Any
from sqlalchemy import create_engine
-from felis import datamodel as dm
+from felis import Column, Index, Schema, Table
from felis.diff import DatabaseDiff, FormattedSchemaDiff, SchemaDiff
from felis.metadata import MetaDataBuilder
-class TestSchemaDiff(unittest.TestCase):
+class SchemaDiffTestCase(unittest.TestCase):
"""Test the SchemaDiff class."""
- def _diff(self, schema1, schema2):
- return SchemaDiff(schema1, schema2).diff
+ def _diff(
+ self, print_diff: bool = False, label: str = "", table_filter: list[str] | None = None
+ ) -> dict[str, Any]:
+ """Generate a diff between the two schemas managed by the test case,
+ optionally printing the differences to the console for debugging
+ purposes.
+ """
+ diff = SchemaDiff(self.sch1, self.sch2, table_filter=table_filter).diff
+ if print_diff:
+ print(label, "diff:", diff)
+ return diff
+
+ def setUp(self) -> None:
+ """Set up two schemas for testing."""
+ self.sch1: Schema = Schema(
+ name="schema", id="#schema", version="1.0.0", description="Schema", tables=[]
+ )
+ self.sch2: Schema = Schema(
+ name="schema", id="#schema", version="1.0.0", description="Schema", tables=[]
+ )
- def test_schema_diff(self) -> None:
- """Test the comparison output generated by the SchemaDiff class."""
- # Two schemas with different values
- schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema2 = dm.Schema(name="schema2", id="#schema2", version="4.5.6", description="Schema 2", tables=[])
- diff = self._diff(schema1, schema2)
+ def test_schema_changed(self) -> None:
+ """Test comparison of schemas with different attribute values."""
+ self.sch2.name = "schema2"
+ self.sch2.version = "1.0.1"
+ self.sch2.description = "Schema 2"
+ diff = self._diff()
self.assertSetEqual(
- set(diff.get("values_changed").keys()),
- set(f"root['{key}']" for key in ["name", "id", "version", "description"]),
+ set(diff_level.path() for diff_level in diff["values_changed"]),
+ set(f"root['{key}']" for key in ["name", "version", "description"]),
)
- # Call formatted handler function
- FormattedSchemaDiff(schema1, schema2)._handle_values_changed(diff["values_changed"])
-
- # Table added
- schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- diff = self._diff(schema1, schema2)
- self.assertIn("iterable_item_added", diff)
- self.assertIn("root['tables'][0]", diff["iterable_item_added"])
-
- # Call formatted handler function
- FormattedSchemaDiff(schema1, schema2)._handle_iterable_item_added(diff["iterable_item_added"])
-
- # Table removed
- schema2.tables.clear()
- schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- diff = self._diff(schema1, schema2)
- self.assertIn("iterable_item_removed", diff)
- self.assertIn("root['tables'][0]", diff["iterable_item_removed"])
-
- # Call formatted handler function
- FormattedSchemaDiff(schema1, schema2)._handle_iterable_item_removed(diff["iterable_item_removed"])
-
- # Different table descriptions
- schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 2", columns=[]))
- diff = self._diff(schema1, schema2)
+ def test_table_added(self) -> None:
+ """Test the addition of a table to a schema. Because of how the data is
+ restructured for comparison, a table addition will show up as a
+ dictionary item added.
+ """
+ self.sch2.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ diff = self._diff()
+ self.assertIn("dictionary_item_added", diff)
+ self.assertEqual(diff["dictionary_item_added"][0].path(), "root['tables']['table1']")
+
+ def test_table_removed(self) -> None:
+ """Test the removal of a table from a schema. Because of how the data
+ is restructured for comparison, a table removal will show up as a
+ dictionary item removed.
+ """
+ self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ diff = self._diff()
+ # Because of how the data is restructured for comparison, a table
+ # removal will show up as a dictionary item removed.
+ self.assertIn("dictionary_item_removed", diff)
+ self.assertEqual(diff["dictionary_item_removed"][0].path(), "root['tables']['table1']")
+
+ def test_table_descriptions_changed(self) -> None:
+ """Test the change of a table's description."""
+ self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ self.sch2.tables.append(Table(name="table1", id="#table1", description="Table 1 changed", columns=[]))
+ diff = self._diff()
+
self.assertIn("values_changed", diff)
- self.assertIn("root['tables'][0]['description']", diff["values_changed"])
- old_value = diff["values_changed"]["root['tables'][0]['description']"]["old_value"]
- new_value = diff["values_changed"]["root['tables'][0]['description']"]["new_value"]
- self.assertEqual(old_value, "Table 1")
- self.assertEqual(new_value, "Table 2")
-
- # Two different tables
- schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- schema2.tables.append(dm.Table(name="table2", id="#table2", description="Table 2", columns=[]))
- diff = self._diff(schema1, schema2)
- self.assertSetEqual(
- set(diff.get("values_changed").keys()),
- set(f"root['tables'][0]['{key}']" for key in ["name", "id", "description"]),
+ values_changed = diff["values_changed"][0]
+ self.assertEqual(values_changed.path(), "root['tables']['table1']['description']")
+ self.assertEqual(values_changed.t1, "Table 1")
+ self.assertEqual(values_changed.t2, "Table 1 changed")
+
+ def test_tables_changed(self) -> None:
+ """Test schemas with different tables."""
+ self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ self.sch2.tables.append(Table(name="table2", id="#table2", description="Table 2", columns=[]))
+ diff = self._diff()
+
+ values_changed = diff["values_changed"][0]
+ self.assertEqual(values_changed.path(), "root['tables']")
+ self.assertEqual(
+ values_changed.t1, {"table1": {"name": "table1", "description": "Table 1", "columns": {}}}
+ )
+ self.assertEqual(
+ values_changed.t2, {"table2": {"name": "table2", "description": "Table 2", "columns": {}}}
)
+ def test_columns_changed(self) -> None:
# Two tables with different columns
- schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- schema2.tables[0].columns.append(
- dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
+ self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ self.sch2.tables.append(
+ Table(
+ name="table1",
+ id="#table1",
+ description="Table 1",
+ columns=[
+ Column(
+ name="column1", id="#column1", datatype="string", length=256, description="Column 1"
+ )
+ ],
+ )
)
- diff = self._diff(schema1, schema2)
- self.assertIn("iterable_item_added", diff)
- self.assertIn("root['tables'][0]['columns'][0]", diff["iterable_item_added"])
-
- # Same columns in different order (no diff)
- schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- schema1.tables[0].columns.append(
- dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
+ diff = self._diff()
+ dictionary_item_added = diff["dictionary_item_added"][0]
+ self.assertEqual(dictionary_item_added.path(), "root['tables']['table1']['columns']['column1']")
+ self.assertEqual(str(dictionary_item_added.t1), "not present")
+ self.assertEqual(
+ dictionary_item_added.t2,
+ {
+ "name": "column1",
+ "description": "Column 1",
+ "datatype": "string",
+ "length": 256,
+ "votable:arraysize": "256*",
+ },
)
- schema1.tables[0].columns.append(
- dm.Column(name="column2", datatype="string", length=256, id="#column2", description="Column 2")
+
+ def test_column_order_changed(self) -> None:
+ """Test the same columns in a different order. This should not be
+ considered a difference.
+ """
+ self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ self.sch2.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ self.sch1.tables[0].columns.append(
+ Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
)
- schema2.tables[0].columns.append(
- dm.Column(name="column2", datatype="string", length=256, id="#column2", description="Column 2")
+ self.sch1.tables[0].columns.append(
+ Column(name="column2", datatype="string", length=256, id="#column2", description="Column 2")
)
- schema2.tables[0].columns.append(
- dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
+ self.sch2.tables[0].columns.append(
+ Column(name="column2", datatype="string", length=256, id="#column2", description="Column 2")
+ )
+ self.sch2.tables[0].columns.append(
+ Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
)
- diff = self._diff(schema1, schema2)
+ diff = self._diff()
self.assertEqual(len(diff), 0)
- # Same columns with different descriptions
- schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- schema1.tables[0].columns.append(
- dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
+ def test_column_description_changed(self) -> None:
+ """Test the change of a column's description."""
+ self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ self.sch2.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ self.sch1.tables[0].columns.append(
+ Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
)
- schema2.tables[0].columns.append(
- dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 2")
+ self.sch2.tables[0].columns.append(
+ Column(name="column1", datatype="string", length=256, id="#column1", description="Column 2")
)
- diff = self._diff(schema1, schema2)
+ diff = self._diff()
+ print("test_column_descriptions_changed diff:", diff)
self.assertIn("values_changed", diff)
- self.assertIn("root['tables'][0]['columns'][0]['description']", diff["values_changed"])
- old_value = diff["values_changed"]["root['tables'][0]['columns'][0]['description']"]["old_value"]
- new_value = diff["values_changed"]["root['tables'][0]['columns'][0]['description']"]["new_value"]
- self.assertEqual(old_value, "Column 1")
- self.assertEqual(new_value, "Column 2")
-
- # Added a field to a column
- schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- schema1.tables[0].columns.append(
- dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
+ values_changed = diff["values_changed"][0]
+ self.assertEqual(
+ values_changed.path(), "root['tables']['table1']['columns']['column1']['description']"
)
- schema2.tables[0].columns.append(
- dm.Column(
+ self.assertEqual(values_changed.t1, "Column 1")
+ self.assertEqual(values_changed.t2, "Column 2")
+
+ def test_field_added_to_column(self) -> None:
+ """Test the addition of a field to a column."""
+ self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ self.sch2.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ self.sch1.tables[0].columns.append(
+ Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
+ )
+ self.sch2.tables[0].columns.append(
+ Column(
name="column1",
datatype="string",
length=256,
@@ -160,20 +202,22 @@ def test_schema_diff(self) -> None:
ivoa_ucd="meta.id;src;meta.main ",
)
)
- diff = self._diff(schema1, schema2)
+ diff = self._diff()
+ print("test_field_added_to_column diff:", diff)
self.assertIn("dictionary_item_added", diff)
- self.assertIn("root['tables'][0]['columns'][0]['ivoa_ucd']", diff["dictionary_item_added"])
-
- # Call formatted handler function
- FormattedSchemaDiff(schema1, schema2)._handle_dictionary_item_added(diff["dictionary_item_added"])
-
- # Removed a field from a column
- schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- schema1.tables[0].columns.append(
- dm.Column(
+ dictionary_item_added = diff["dictionary_item_added"][0]
+ self.assertEqual(
+ dictionary_item_added.path(), "root['tables']['table1']['columns']['column1']['ivoa:ucd']"
+ )
+ self.assertEqual(str(dictionary_item_added.t1), "not present")
+ self.assertEqual(dictionary_item_added.t2, "meta.id;src;meta.main")
+
+ def test_field_removed_from_column(self) -> None:
+ """Test the removal of a field from a column."""
+ self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ self.sch2.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ self.sch1.tables[0].columns.append(
+ Column(
name="column1",
datatype="string",
length=256,
@@ -182,87 +226,115 @@ def test_schema_diff(self) -> None:
ivoa_ucd="meta.id;src;meta.main ",
)
)
- schema2.tables[0].columns.append(
- dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
+ self.sch2.tables[0].columns.append(
+ Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
)
- diff = self._diff(schema1, schema2)
- self.assertIn("dictionary_item_removed", diff)
- self.assertIn("root['tables'][0]['columns'][0]['ivoa_ucd']", diff["dictionary_item_removed"])
+ diff = self._diff()
+ print("test_field_removed_from_column diff:", diff)
- # Call formatted handler function
- FormattedSchemaDiff(schema1, schema2)._handle_dictionary_item_removed(diff["dictionary_item_removed"])
+ self.assertIn("dictionary_item_removed", diff)
+ dictionary_item_removed = diff["dictionary_item_removed"][0]
+ self.assertEqual(
+ dictionary_item_removed.path(), "root['tables']['table1']['columns']['column1']['ivoa:ucd']"
+ )
+ self.assertEqual(dictionary_item_removed.t1, "meta.id;src;meta.main")
+ self.assertEqual(str(dictionary_item_removed.t2), "not present")
- def test_index_diff(self) -> None:
+ def test_index_columns_changed(self) -> None:
"""Test differences in indices between tables."""
- schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- schema1.tables[0].columns.append(
- dm.Column(name="column1", datatype="int", id="#column1", description="Column 1")
+ self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ self.sch1.tables[0].columns.append(
+ Column(name="column1", datatype="int", id="#column1", description="Column 1")
)
- schema1.tables[0].indexes.append(
- dm.Index(name="index1", id="#index1", description="Index 1", columns=["column1"])
+ self.sch1.tables[0].indexes.append(
+ Index(name="index1", id="#index1", description="Index 1", columns=["column1"])
)
-
- schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
- schema2.tables[0].columns.append(
- dm.Column(name="column2", datatype="int", id="#column2", description="Column 2")
+ self.sch2.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ self.sch2.tables[0].columns.append(
+ Column(name="column2", datatype="int", id="#column2", description="Column 2")
)
- schema2.tables[0].indexes.append(
- dm.Index(name="index1", id="#index1", description="Index 1", columns=["column2"])
+ self.sch2.tables[0].indexes.append(
+ Index(name="index1", id="#index1", description="Index 1", columns=["column2"])
)
- diff = self._diff(schema1, schema2)
- self.assertIn("values_changed", diff)
- self.assertIn("root['tables'][0]['indexes'][0]['columns'][0]", diff["values_changed"])
- new_value = diff["values_changed"]["root['tables'][0]['indexes'][0]['columns'][0]"]["new_value"]
- old_value = diff["values_changed"]["root['tables'][0]['indexes'][0]['columns'][0]"]["old_value"]
- self.assertEqual(old_value, "column1")
- self.assertEqual(new_value, "column2")
-
- # Print formatted diff to make sure it works for these changes
- FormattedSchemaDiff(schema1, schema2).print()
-
- def test_print(self) -> None:
- schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema2 = dm.Schema(name="schema2", id="#schema2", version="4.5.6", description="Schema 2", tables=[])
+ diff = self._diff(print_diff=True, label="test_index_diff")
+ values_changed = diff["values_changed"][1]
+ self.assertEqual(values_changed.path(), "root['tables']['table1']['indexes']['index1']['columns'][0]")
+ self.assertEqual(values_changed.t1, "column1")
+ self.assertEqual(values_changed.t2, "column2")
+
+ def test_schema_diff_print(self) -> None:
+ schema1 = Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
+ schema2 = Schema(name="schema2", id="#schema2", version="4.5.6", description="Schema 2", tables=[])
SchemaDiff(schema1, schema2).print()
- def test_formatted_print(self) -> None:
- schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema2 = dm.Schema(name="schema2", id="#schema2", version="4.5.6", description="Schema 2", tables=[])
- FormattedSchemaDiff(schema1, schema2).print()
+ def test_table_filter(self) -> None:
+ self.sch1.tables.extend(
+ [
+ Table(name="table1", id="#table1", description="Table 1", columns=[]),
+ Table(name="table2", id="#table2", description="Table 2", columns=[]),
+ Table(name="table3", id="#table3", description="Table 3", columns=[]),
+ ]
+ )
+ self.sch2.tables.extend(
+ [
+ Table(name="table1", id="#table1", description="Table 1", columns=[]),
+ Table(name="table2", id="#table2", description="Table 2", columns=[]),
+ ]
+ )
+
+ diff = self._diff(table_filter=["table1"])
+ self.assertEqual(len(diff), 0)
+
+ diff = self._diff(table_filter=["table2"])
+ self.assertEqual(len(diff), 0)
+
+ diff = self._diff(table_filter=["table3"], print_diff=True, label="test_table_filter")
+ dictionary_item_removed = diff["dictionary_item_removed"][0]
+ self.assertEqual(dictionary_item_removed.path(), "root['tables']['table3']")
+ self.assertEqual(
+ str(dictionary_item_removed.t1), "{'name': 'table3', 'description': 'Table 3', 'columns': {}}"
+ )
+ self.assertEqual(str(dictionary_item_removed.t2), "not present")
+
+
+class FormattedSchemaDiffTestCase(unittest.TestCase):
+ """Test the FormattedSchemaDiff class."""
+
+ def test_formatted_diff_print(self) -> None:
+ """Test the formatted output of the SchemaDiff by printing the
+ differences between two YAML schema files files.
+ """
+ test_dir = os.path.abspath(os.path.dirname(__file__))
+ test_diff1_path = os.path.join(test_dir, "data", "test_diff1.yaml")
+ test_diff2_path = os.path.join(test_dir, "data", "test_diff2.yaml")
- def test_parse_deepdiff_path(self) -> None:
- path = "root['tables'][0]['columns'][0]['ivoa_ucd']"
- keys = FormattedSchemaDiff._parse_deepdiff_path(path)
- self.assertListEqual(keys, ["tables", 0, "columns", 0, "ivoa_ucd"])
+ context = {"id_generation": True}
+ sch1 = Schema.from_uri(test_diff1_path, context=context)
+ sch2 = Schema.from_uri(test_diff2_path, context=context)
- def test_get_id_error(self) -> None:
- id_dict = {"tables": [{"indexes": [{"columns": [{"name": "column1"}, {"name": "column2"}]}]}]}
- keys = ["tables", 0, "indexes", 0, "columns", 0]
- with self.assertRaises(ValueError):
- FormattedSchemaDiff._get_id(id_dict, keys)
+ formatted_diff = FormattedSchemaDiff(sch1, sch2)
+ formatted_diff.print()
-class TestDatabaseDiff(unittest.TestCase):
+class DatabaseDiffTestCase(unittest.TestCase):
"""Test the DatabaseDiff class."""
def test_database_diff(self) -> None:
"""Test the comparison output generated by the DatabaseDiff class."""
# Two tables with different columns
- schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ schema1 = Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
+ schema1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
schema1.tables[0].columns.append(
- dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
+ Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
)
- schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
- schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[]))
+ schema2 = Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[])
+ schema2.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[]))
schema2.tables[0].columns.append(
- dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
+ Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1")
)
schema2.tables[0].columns.append(
- dm.Column(name="column2", datatype="string", length=256, id="#column2", description="Column 2")
+ Column(name="column2", datatype="string", length=256, id="#column2", description="Column 2")
)
metadata_db = MetaDataBuilder(schema1, apply_schema_to_metadata=False).build()