diff --git a/docs/changes/DM-49446.feature.rst b/docs/changes/DM-49446.feature.rst new file mode 100644 index 00000000..43f392e1 --- /dev/null +++ b/docs/changes/DM-49446.feature.rst @@ -0,0 +1,6 @@ +The ``diff`` module was completely overhauled based on best practices for using the ``deepdiff`` library. +The formatted differences are now displayed in a JSON format, which is more readable and structured than the previous print outs. +A number of bugs and issues with the comparison logic and the display of the diffs were also fixed, improving the accuracy of the schema diffs. +The error handling in the database diff class was improved so that exception messages should be more informative and helpful. +A command line option has been added for writing the diff output to a file. +Finally, an additional command line option allows filtering only on specified tables in the two schemas being compared. diff --git a/docs/documenteer.toml b/docs/documenteer.toml index ccccf045..1970f07f 100644 --- a/docs/documenteer.toml +++ b/docs/documenteer.toml @@ -34,3 +34,4 @@ python_api_dir = "dev/internals" python = "https://docs.python.org/3" sqlalchemy = "https://docs.sqlalchemy.org/en/latest" lsst = "https://pipelines.lsst.io/v/weekly" +click = "https://click.palletsprojects.com" diff --git a/python/felis/__init__.py b/python/felis/__init__.py index bc196a61..2849b5ff 100644 --- a/python/felis/__init__.py +++ b/python/felis/__init__.py @@ -19,9 +19,21 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from .datamodel import Schema +from .datamodel import ( + CheckConstraint, + Column, + ColumnGroup, + Constraint, + DataType, + ForeignKeyConstraint, + Index, + Schema, + SchemaVersion, + Table, + UniqueConstraint, +) from .db.schema import create_database -from .diff import DatabaseDiff, FormattedSchemaDiff, SchemaDiff +from .diff import DatabaseDiff, FormattedSchemaDiff from .metadata import MetaDataBuilder from importlib.metadata import PackageNotFoundError, version diff --git a/python/felis/cli.py b/python/felis/cli.py index 001786f9..d7d2d679 100644 --- a/python/felis/cli.py +++ b/python/felis/cli.py @@ -431,6 +431,14 @@ def validate( default="deepdiff", ) @click.option("-E", "--error-on-change", is_flag=True, help="Exit with error code if schemas are different") +@click.option("--table", "tables", multiple=True, help="Table names to filter on.") +@click.option( + "--output-file", + "-o", + type=click.File(mode="w"), + help="Write diff output to a file insteading of printing", + default=None, +) @click.argument("files", nargs=-1, type=click.File()) @click.pass_context def diff( @@ -438,6 +446,8 @@ def diff( engine_url: str | None, comparator: str, error_on_change: bool, + tables: list[str], + output_file: IO[str] | None, files: Iterable[IO[str]], ) -> None: schemas = [ @@ -447,12 +457,16 @@ def diff( diff: SchemaDiff if len(schemas) == 2 and engine_url is None: if comparator == "alembic": + if tables: + raise click.ClickException("Table filtering is not supported for Alembic comparator") db_context = create_database(schemas[0]) assert isinstance(db_context.engine, Engine) diff = DatabaseDiff(schemas[1], db_context.engine) else: diff = FormattedSchemaDiff(schemas[0], schemas[1]) elif len(schemas) == 1 and engine_url is not None: + if tables: + raise click.ClickException("Table filtering is not supported for database comparison") engine = create_engine(engine_url) diff = DatabaseDiff(schemas[0], engine) else: @@ -460,7 +474,7 @@ def diff( "Invalid arguments - provide two schemas or a schema and a database engine URL" ) - diff.print() + diff.print(output_file) if diff.has_changes and error_on_change: raise click.ClickException("Schema was changed") diff --git a/python/felis/diff.py b/python/felis/diff.py index 348d751f..a0107e32 100644 --- a/python/felis/diff.py +++ b/python/felis/diff.py @@ -21,21 +21,22 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import copy +import json import logging -import pprint -import re -from collections.abc import Callable -from typing import Any +from typing import IO, Any +import sqlalchemy from alembic.autogenerate import compare_metadata from alembic.migration import MigrationContext from deepdiff.diff import DeepDiff +from deepdiff.model import DiffLevel from sqlalchemy import Engine, MetaData from .datamodel import Schema from .metadata import MetaDataBuilder -__all__ = ["DatabaseDiff", "SchemaDiff"] +__all__ = ["DatabaseDiff", "FormattedSchemaDiff", "SchemaDiff"] logger = logging.getLogger(__name__) @@ -43,28 +44,139 @@ logging.getLogger("alembic").setLevel(logging.WARNING) +def _normalize_lists_by_name(obj: Any) -> Any: + """ + Recursively normalize structures: + - Lists of dicts under specified keys become dicts keyed by 'name'. + - Lists of strings under specified keys become sorted lists. + - Everything else is recursively normalized in place. + + Parameters + ---------- + obj + The object to normalize, which can be a list, dict, or any other type. + """ + dict_like_keys = {"tables", "columns", "constraints", "indexes", "column_groups"} + set_like_keys = {"columns", "referencedColumns"} + + if isinstance(obj, list): + return [_normalize_lists_by_name(item) for item in obj] + + elif isinstance(obj, dict): + normalized: dict[str, Any] = {} + + for k, v in obj.items(): + if isinstance(v, list): + if k in dict_like_keys and all(isinstance(i, dict) and "name" in i for i in v): + logger.debug(f"Normalizing list of dicts under key '{k}' to dict keyed by 'name'") + normalized[k] = {i["name"]: _normalize_lists_by_name(i) for i in v} + elif k in set_like_keys and all(isinstance(i, str) for i in v): + logger.debug(f"Normalizing list of strings under key '{k}' to sorted list: {v}") + normalized[k] = sorted(v) + else: + normalized[k] = [_normalize_lists_by_name(i) for i in v] + else: + normalized[k] = _normalize_lists_by_name(v) + + return normalized + + else: + return obj + + class SchemaDiff: """ Compare two schemas using DeepDiff and print the differences. Parameters ---------- - schema1 - The first schema to compare. - schema2 - The second schema to compare. + schema_old + The old schema to compare, typically the original schema. + schema_new + The new schema to compare, typically the modified schema. + table_filter + A list of table names to filter on. + strip_ids + Whether to strip '@id' fields from the schemas before comparison. + + Notes + ----- + This class uses DeepDiff to compare two schemas and provides methods to + retrieve the differences. It is designed to be extended for more structured + output, such as in `FormattedSchemaDiff` and would not typically be used + directly. """ - def __init__(self, schema1: Schema, schema2: Schema): - self.dict1 = schema1.model_dump(exclude_none=True) - self.dict2 = schema2.model_dump(exclude_none=True) - self.diff = DeepDiff(self.dict1, self.dict2, ignore_order=True) + def __init__( + self, + schema_old: Schema, + schema_new: Schema, + table_filter: list[str] | None = None, + strip_ids: bool = True, + ): + self.schema_old = copy.deepcopy(schema_old) + self.schema_new = copy.deepcopy(schema_new) + if table_filter: + logger.debug(f"Filtering on tables: {table_filter}") + self.table_filter = table_filter or [] + self.strip_ids = strip_ids + self._create_diff() + + def _create_diff(self) -> dict[str, Any]: + if self.table_filter: + self.schema_old.tables = [ + table for table in self.schema_old.tables if table.name in self.table_filter + ] + logger.debug(f"Filtered old schema tables: {[table.name for table in self.schema_old.tables]}") + self.schema_new.tables = [ + table for table in self.schema_new.tables if table.name in self.table_filter + ] + logger.debug(f"Filtered new schema tables: {[table.name for table in self.schema_new.tables]}") + self.dict_old = _normalize_lists_by_name(self.schema_old._model_dump(strip_ids=self.strip_ids)) + self.dict_new = _normalize_lists_by_name(self.schema_new._model_dump(strip_ids=self.strip_ids)) + logger.debug(f"Normalized old dict:\n{json.dumps(self.dict_old, indent=2)}") + logger.debug(f"Normalized new dict:\n{json.dumps(self.dict_new, indent=2)}") + self._diff = DeepDiff( + self.dict_old, + self.dict_new, + ignore_order=True, + view="tree", + ) + return self._diff - def print(self) -> None: + @property + def diff(self) -> dict[str, Any]: """ - Print the differences between the two schemas. + Return the differences between the two schemas. + + Returns + ------- + dict + The differences between the two schemas. """ - pprint.pprint(self.diff) + return self._diff + + def to_change_list(self) -> list[dict[str, Any]]: + """ + Convert differences to a structured format. + + Returns + ------- + list[dict[str, Any]] + List of change dictionaries. + """ + raise NotImplementedError("Subclasses must implement to_change_list()") + + def print(self, output_stream: IO[str] | None = None) -> None: + """ + Print the differences between the two schemas in raw format. + + Parameters + ---------- + output_stream + The output stream for printing the differences. + """ + print(self.diff, file=output_stream) @property def has_changes(self) -> bool: @@ -79,134 +191,173 @@ def has_changes(self) -> bool: return len(self.diff) > 0 +class DiffHandler: + def collect(self, diff_items: list[DiffLevel]) -> list[dict[str, Any]]: + """Collect differences from the provided diff items. + + Parameters + ---------- + diff_items + The list of differences to collect. + """ + raise NotImplementedError + + +class ValuesChangedHandler(DiffHandler): + def collect(self, diff_items: list[DiffLevel]) -> list[dict[str, Any]]: + results = [] + for diff in diff_items: + results.append( + { + "change_type": diff.report_type, + "path": diff.path(), + "old_value": diff.t1, + "new_value": diff.t2, + } + ) + return results + + +class IterableItemAddedHandler(DiffHandler): + def collect(self, diff_items: list[DiffLevel]) -> list[dict[str, Any]]: + results = [] + for diff in diff_items: + results.append( + { + "change_type": diff.report_type, + "path": diff.path(), + "value": diff.t2, + } + ) + return results + + +class IterableItemRemovedHandler(DiffHandler): + def collect(self, diff_items: list[DiffLevel]) -> list[dict[str, Any]]: + results = [] + for diff in diff_items: + results.append( + { + "change_type": diff.report_type, + "path": diff.path(), + "value": diff.t1, + } + ) + return results + + +class DictionaryItemAddedHandler(DiffHandler): + def collect(self, diff_items: list[DiffLevel]) -> list[dict[str, Any]]: + results = [] + for diff in diff_items: + keys = diff.path(output_format="list") + added_key = keys[-1] if keys else None + results.append( + { + "change_type": diff.report_type, + "path": diff.path(), + "added_key": added_key, + "value": diff.t2, + } + ) + return results + + +class DictionaryItemRemovedHandler(DiffHandler): + def collect(self, diff_items: list[DiffLevel]) -> list[dict[str, Any]]: + results = [] + for diff in diff_items: + keys = diff.path(output_format="list") + removed_key = keys[-1] if keys else None + results.append( + { + "change_type": diff.report_type, + "path": diff.path(), + "removed_key": removed_key, + "value": diff.t1, + } + ) + return results + + class FormattedSchemaDiff(SchemaDiff): """ - Compare two schemas using DeepDiff and print the differences using a - customized output format. + Compare two schemas using DeepDiff and emit structured JSON differences. Parameters ---------- - schema1 - The first schema to compare. - schema2 - The second schema to compare. + schema_old + The old schema to compare, typically the original schema. + schema_new + The new schema to compare, typically the modified schema. + table_filter + A list of table names to filter on. + + Notes + ----- + This class extends `SchemaDiff` to provide a more structured output of + differences. It formats the differences into a list of dictionaries, each + representing a change with details such as change type, path, and values + involved. + + Output dictionaries representing the changes are formatted as follows:: + + { + "change_type": str, + "id": str, + "path": str, + "old_value": Any (for value changes), + "new_value": Any (for value changes), + "value": Any (for additions/removals) + } + + The changes can be printed to JSON using the `print` method. """ - def __init__(self, schema1: Schema, schema2: Schema): - super().__init__(schema1, schema2) + def __init__(self, schema_old: Schema, schema_new: Schema, table_filter: list[str] = []): + super().__init__(schema_old, schema_new, table_filter) - def print(self) -> None: - """ - Print the differences between the two schemas using a custom format. - """ - handlers: dict[str, Callable[[dict[str, Any]], None]] = { - "values_changed": self._handle_values_changed, - "iterable_item_added": self._handle_iterable_item_added, - "iterable_item_removed": self._handle_iterable_item_removed, - "dictionary_item_added": self._handle_dictionary_item_added, - "dictionary_item_removed": self._handle_dictionary_item_removed, + # Define a mapping between types of changes and their handlers + self.handlers = { + "values_changed": ValuesChangedHandler(), + "iterable_item_added": IterableItemAddedHandler(), + "iterable_item_removed": IterableItemRemovedHandler(), + "dictionary_item_added": DictionaryItemAddedHandler(), + "dictionary_item_removed": DictionaryItemRemovedHandler(), } - for change_type, handler in handlers.items(): - if change_type in self.diff: - handler(self.diff[change_type]) - - def _print_header(self, id_dict: dict[str, Any], keys: list[int | str]) -> None: - # id = self._get_id(id_dict, keys) - # Don't display ID here for now; it is always just the schema ID. - print(f"{self._get_key_display(keys)}") - # print(f"{id} @ {self._get_key_display(keys)}") - - def _handle_values_changed(self, changes: dict[str, Any]) -> None: - for key in changes: - keys = self._parse_deepdiff_path(key) - value1 = changes[key]["old_value"] - value2 = changes[key]["new_value"] - self._print_header(self.dict1, keys) - print(f"- {value1}") - print(f"+ {value2}") - - def _handle_iterable_item_added(self, changes: dict[str, Any]) -> None: - for key in changes: - keys = self._parse_deepdiff_path(key) - value = changes[key] - self._print_header(self.dict2, keys) - print(f"+ {value}") - - def _handle_iterable_item_removed(self, changes: dict[str, Any]) -> None: - for key in changes: - keys = self._parse_deepdiff_path(key) - value = changes[key] - self._print_header(self.dict1, keys) - print(f"- {value}") - - def _handle_dictionary_item_added(self, changes: dict[str, Any]) -> None: - for key in changes: - keys = self._parse_deepdiff_path(key) - value = keys[-1] - keys.pop() - self._print_header(self.dict2, keys) - print(f"+ {value}") - - def _handle_dictionary_item_removed(self, changes: dict[str, Any]) -> None: - for key in changes: - keys = self._parse_deepdiff_path(key) - value = keys[-1] - keys.pop() - self._print_header(self.dict1, keys) - print(f"- {value}") - - @staticmethod - def _get_id(values: dict, keys: list[str | int]) -> str: - # Unused for now, pending updates to diff tool in DM-49446. - value: list | dict = values - last_id = None - - for key in keys: - logger.debug(f"Processing key <{key}> with type {type(key)}") - logger.debug(f"Type of value: {type(value)}") - if isinstance(value, dict) and "id" in value: - last_id = value["id"] - elif isinstance(value, list) and isinstance(key, int): - if 0 <= key < len(value): - value = value[key] - else: - raise ValueError(f"Index '{key}' is out of range for list of length {len(value)}") - value = value[key] - - if isinstance(value, dict) and "id" in value: - last_id = value["id"] - - if last_id is not None: - return last_id - else: - raise ValueError("No 'id' found in the specified path") + def to_change_list(self) -> list[dict[str, Any]]: + """ + Convert differences to a structured format. - @staticmethod - def _get_key_display(keys: list[str | int]) -> str: - return ".".join(str(k) for k in keys) + Returns + ------- + list[dict[str, Any]] + List of changes in their dictionary representation. + """ + changes = [] - @staticmethod - def _parse_deepdiff_path(path: str) -> list[str | int]: - if path.startswith("root"): - path = path[4:] + for change_type, handler in self.handlers.items(): + if change_type in self.diff: + changes.extend(handler.collect(self.diff[change_type])) - pattern = re.compile(r"\['([^']+)'\]|\[(\d+)\]") - matches = pattern.findall(path) + return changes - keys = [] - for match in matches: - if match[0]: # String key - keys.append(match[0]) - elif match[1]: # Integer index - keys.append(int(match[1])) + def print(self, output_stream: IO[str] | None = None) -> None: + """ + Print the differences between the two schemas as JSON. - return keys + Parameters + ---------- + output_stream + The output stream for printing the differences. + """ + print(json.dumps(self.to_change_list(), indent=2), file=output_stream) class DatabaseDiff(SchemaDiff): """ - Compare a schema with a database and print the differences. + Compare a schema with a database and emit structured differences. Parameters ---------- @@ -214,21 +365,101 @@ class DatabaseDiff(SchemaDiff): The schema to compare. engine The database engine to compare with. + + Notes + ----- + The `DatabaseDiff` class uses SQLAlchemy to reflect the database schema + and compare it with the provided `~felis.datamodel.Schema` object. It + generates a list of differences between the two schemas, which can be + printed or converted to a structured format. + + The error-handling during the reflection and comparison process is + robust, catching various exceptions that may arise from database + connectivity issues, invalid configurations, or unexpected errors. + This is done because otherwise some obscure errors may be raised + during the reflection process and configuration of alembic, which are not + very informative to the user. """ def __init__(self, schema: Schema, engine: Engine): + self.schema = schema + self.engine = engine + self._generate_diff() + + def _generate_diff(self) -> None: + """Generate the differences between the provided schema and + database. + """ db_metadata = MetaData() - with engine.connect() as connection: - db_metadata.reflect(bind=connection) - mc = MigrationContext.configure( - connection, opts={"compare_type": True, "target_metadata": db_metadata} + with self.engine.connect() as connection: + # Reflect the database schema + try: + db_metadata.reflect(bind=connection) + except (sqlalchemy.exc.DatabaseError, sqlalchemy.exc.OperationalError) as e: + raise RuntimeError(f"Database reflection failed: {e}") from e + except AttributeError as e: # Happens when no database is provided in the URL + raise ValueError( + f"Invalid engine URL: <{self.engine.url}> (Missing database or schema?)" + ) from e + except sqlalchemy.exc.ArgumentError as e: + raise ValueError(f"Invalid database URL or configuration: {e}") from e + except Exception as e: + raise RuntimeError(f"Unexpected error during database reflection: {e}") from e + + # Configure the alembic migration context using the reflected + # metadata + try: + mc = MigrationContext.configure( + connection, opts={"compare_type": True, "target_metadata": db_metadata} + ) + except (sqlalchemy.exc.DatabaseError, TypeError, ValueError) as e: + raise RuntimeError(f"Migration context configuration failed: {e}") from e + except Exception as e: + raise RuntimeError(f"Unexpected error in migration context configuration: {e}") from e + + # Build the schema metadata for comparison + try: + schema_metadata = MetaDataBuilder(self.schema, apply_schema_to_metadata=False).build() + except (ValueError, TypeError) as e: + raise ValueError(f"Schema metadata construction failed: {e}") from e + except Exception as e: + raise RuntimeError(f"Unexpected error in schema metadata construction: {e}") from e + + # Compare the database metadata with the schema metadata + try: + self._diff = compare_metadata(mc, schema_metadata) + except (sqlalchemy.exc.DatabaseError, AttributeError, TypeError) as e: + raise RuntimeError(f"Metadata comparison failed: {e}") from e + except Exception as e: + raise RuntimeError(f"Unexpected error during metadata comparison: {e}") from e + + def to_change_list(self) -> list[dict[str, Any]]: + """ + Convert database differences to structured format. + + Returns + ------- + list[dict[str, Any]] + List of database change dictionaries. + """ + changes = [] + for change in self._diff: + changes.append( + { + "change_type": "database_diff", + "operation": str(change[0]) if change else "unknown", + "details": str(change) if change else "no details", + } ) - schema_metadata = MetaDataBuilder(schema, apply_schema_to_metadata=False).build() - self.diff = compare_metadata(mc, schema_metadata) + return changes - def print(self) -> None: + def print(self, output_stream: IO[str] | None = None) -> None: """ - Print the differences between the schema and the database. + Print the differences between the schema and the database as JSON. + + Parameters + ---------- + output_stream + The output stream for printing the differences. """ - if self.has_changes: - pprint.pprint(self.diff) + print(json.dumps(self.to_change_list(), indent=2), file=output_stream) diff --git a/python/felis/tests/run_cli.py b/python/felis/tests/run_cli.py index 04102fdf..4c022d0b 100644 --- a/python/felis/tests/run_cli.py +++ b/python/felis/tests/run_cli.py @@ -23,6 +23,7 @@ import logging +import click from click.testing import CliRunner from felis.cli import cli @@ -38,7 +39,7 @@ def run_cli( print_cmd: bool = False, print_output: bool = False, id_generation: bool = False, -) -> None: +) -> click.testing.Result: """Run a CLI command and check the exit code. Parameters @@ -57,6 +58,11 @@ def run_cli( Whether to print the output, by default False. id_generation : bool Whether to enable id generation, by default False. + + Returns + ------- + click.testing.Result + The result of the command execution. """ if not cmd: raise ValueError("No command provided.") @@ -77,3 +83,4 @@ def run_cli( assert result.exit_code != 0 else: assert result.exit_code == 0 + return result diff --git a/tests/data/test_diff1.yaml b/tests/data/test_diff1.yaml index 3a8393ac..e2f5fcdc 100644 --- a/tests/data/test_diff1.yaml +++ b/tests/data/test_diff1.yaml @@ -1,39 +1,42 @@ --- name: test_diff description: Test diff -"@id": "#test_diff" version: "1.2.3" tables: - - name: test_table1 - "@id": "#test_table1" - description: Test table 1 +- name: test_table1 + description: Test table 1 + columns: + - name: column1 + datatype: int + description: Column 1 + - name: column2 + datatype: string + description: Column 2 + length: 30 + nullable: false + - name: column3 + datatype: string + description: Column 3 + length: 100 + indexes: + - name: idx_column1 + description: Index on column 1 columns: - - name: column1 - "@id": "#test_table1.column1" - datatype: int - description: Column 1 - - name: column2 - "@id": "#test_table1.column2" - datatype: string - description: Column 2 - length: 30 - nullable: false - - name: column3 - "@id": "#test_table1.column3" - datatype: string - description: Column 3 - length: 100 - indexes: - - name: idx_column1 - "@id": "#test_table1_idx_column1" - description: Index on column 1 - columns: - - "#test_table1.column1" - constraints: - - name: uniq_column2 - "@id": "#test_table1_uniq_column2" - "@type": "Unique" - description: Unique column 2 - columns: - - "#test_table1.column2" - primaryKey: "#test_table1.column1" + - "#test_table1.column1" + columnGroups: + - name: group1 + description: Group 1 + columns: + - "#test_table1.column1" + - name: group2 + description: Group 2 + columns: + - "#test_table1.column1" + - "#test_table1.column2" + constraints: + - name: uniq_column2 + "@type": "Unique" + description: Unique column 2 + columns: + - "#test_table1.column2" + primaryKey: "#test_table1.column1" diff --git a/tests/data/test_diff2.yaml b/tests/data/test_diff2.yaml index e667a1da..a2406a25 100644 --- a/tests/data/test_diff2.yaml +++ b/tests/data/test_diff2.yaml @@ -3,32 +3,52 @@ name: test_diff description: Another test diff "@id": "#test_diff" version: "4.5.6" +votable:utype: Schema tables: - - name: test_table1 - "@id": "#test_table1" - description: Test table 1 +- name: test_table1 + "@id": "#test_table1" + description: Test table 1 + columns: + - name: column1 + "@id": "#test_table1.column1" + datatype: int + description: Column 1 + ivoa:ucd: meta.id + - name: column2 + "@id": "#test_table1.column2" + datatype: string + description: Column 2 + length: 30 + nullable: false + indexes: + - name: idx_column2 + "@id": "#test_table1_idx_column2" + description: Index on column 2 columns: - - name: column1 - "@id": "#test_table1.column1" - datatype: int - description: Column 1 - - name: column2 - "@id": "#test_table1.column2" - datatype: string - description: Column 2 - length: 30 - nullable: false - indexes: - - name: idx_column2 - "@id": "#test_table1_idx_column2" - description: Index on column 2 - columns: - - "#test_table1.column2" - constraints: - - name: uniq_column2 - "@id": "#test_table1_uniq_column2" - "@type": "Unique" - description: Unique column 2 - columns: - - "#test_table1.column2" - primaryKey: "#test_table1.column1" + - "#test_table1.column2" + columnGroups: + - name: group1 + description: Group 1 + columns: + - "#test_table1.column1" + - "#test_table1.column2" + - name: group2 + description: Group 2 + columns: + - "#test_table1.column1" + constraints: + - name: uniq_column2 + "@id": "#test_table1_uniq_column2" + "@type": "Unique" + description: Unique column 2 + columns: + - "#test_table1.column2" + primaryKey: "#test_table1.column1" +- name: test_table2 + "@id": "#test_table2" + description: Test table 2 + columns: + - name: column1 + "@id": "#test_table2.column1" + datatype: int + description: Column 1 diff --git a/tests/test_cli.py b/tests/test_cli.py index 9ec93119..cc589906 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -145,17 +145,51 @@ def test_diff_database(self) -> None: test_diff2 = os.path.join(TEST_DIR, "data", "test_diff2.yaml") engine = create_engine(self.sqlite_url) - metadata_db = MetaDataBuilder(Schema.from_uri(test_diff1), apply_schema_to_metadata=False).build() + metadata_db = MetaDataBuilder( + Schema.from_uri(test_diff1, context={"id_generation": True}), apply_schema_to_metadata=False + ).build() metadata_db.create_all(engine) run_cli(["diff", f"--engine-url={self.sqlite_url}", test_diff2]) + def test_diff_database_with_table_filter(self) -> None: + """Test for ``diff`` command with database and table filter. This + should fail as table filters are not supported for this comparator. + """ + test_diff1 = os.path.join(TEST_DIR, "data", "test_diff1.yaml") + test_diff2 = os.path.join(TEST_DIR, "data", "test_diff2.yaml") + + db_url = f"sqlite:///{self.tmpdir}/tap_schema.sqlite3" + engine = create_engine(db_url) + metadata_db = MetaDataBuilder( + Schema.from_uri(test_diff1, context={"id_generation": True}), apply_schema_to_metadata=False + ).build() + metadata_db.create_all(engine) + + run_cli( + ["diff", f"--engine-url={db_url}", "--table", "table1", test_diff2], + expect_error=True, + ) + def test_diff_alembic(self) -> None: """Test for ``diff`` command with ``--alembic`` comparator option.""" test_diff1 = os.path.join(TEST_DIR, "data", "test_diff1.yaml") test_diff2 = os.path.join(TEST_DIR, "data", "test_diff2.yaml") run_cli(["diff", "--comparator", "alembic", test_diff1, test_diff2], print_output=True) + def test_diff_alembic_with_table_filter(self) -> None: + """Test for ``diff`` command with ``--alembic`` option and a table + filter. This should fail as table filters are not supported for this + comparator. + """ + test_diff1 = os.path.join(TEST_DIR, "data", "test_diff1.yaml") + test_diff2 = os.path.join(TEST_DIR, "data", "test_diff2.yaml") + + run_cli( + ["diff", "--comparator", "alembic", "--table", "table1", test_diff1, test_diff2], + expect_error=True, + ) + def test_diff_error(self) -> None: """Test for ``diff`` command with bad arguments.""" test_diff1 = os.path.join(TEST_DIR, "data", "test_diff1.yaml") @@ -167,6 +201,13 @@ def test_diff_error_on_change(self) -> None: test_diff2 = os.path.join(TEST_DIR, "data", "test_diff2.yaml") run_cli(["diff", "--error-on-change", test_diff1, test_diff2], expect_error=True, print_output=True) + def test_diff_with_table_filter(self) -> None: + """Test for ``diff`` command and a table filter.""" + test_diff1 = os.path.join(TEST_DIR, "data", "test_diff1.yaml") + test_diff2 = os.path.join(TEST_DIR, "data", "test_diff2.yaml") + + run_cli(["diff", "--table", "table1", test_diff1, test_diff2], print_output=True) + def test_dump_yaml(self) -> None: """Test for ``dump`` command with YAML output.""" with tempfile.NamedTemporaryFile(delete=True, suffix=".yaml") as temp_file: diff --git a/tests/test_diff.py b/tests/test_diff.py index fc876aef..85067e75 100644 --- a/tests/test_diff.py +++ b/tests/test_diff.py @@ -19,139 +19,181 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import os import unittest +from typing import Any from sqlalchemy import create_engine -from felis import datamodel as dm +from felis import Column, Index, Schema, Table from felis.diff import DatabaseDiff, FormattedSchemaDiff, SchemaDiff from felis.metadata import MetaDataBuilder -class TestSchemaDiff(unittest.TestCase): +class SchemaDiffTestCase(unittest.TestCase): """Test the SchemaDiff class.""" - def _diff(self, schema1, schema2): - return SchemaDiff(schema1, schema2).diff + def _diff( + self, print_diff: bool = False, label: str = "", table_filter: list[str] | None = None + ) -> dict[str, Any]: + """Generate a diff between the two schemas managed by the test case, + optionally printing the differences to the console for debugging + purposes. + """ + diff = SchemaDiff(self.sch1, self.sch2, table_filter=table_filter).diff + if print_diff: + print(label, "diff:", diff) + return diff + + def setUp(self) -> None: + """Set up two schemas for testing.""" + self.sch1: Schema = Schema( + name="schema", id="#schema", version="1.0.0", description="Schema", tables=[] + ) + self.sch2: Schema = Schema( + name="schema", id="#schema", version="1.0.0", description="Schema", tables=[] + ) - def test_schema_diff(self) -> None: - """Test the comparison output generated by the SchemaDiff class.""" - # Two schemas with different values - schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema2 = dm.Schema(name="schema2", id="#schema2", version="4.5.6", description="Schema 2", tables=[]) - diff = self._diff(schema1, schema2) + def test_schema_changed(self) -> None: + """Test comparison of schemas with different attribute values.""" + self.sch2.name = "schema2" + self.sch2.version = "1.0.1" + self.sch2.description = "Schema 2" + diff = self._diff() self.assertSetEqual( - set(diff.get("values_changed").keys()), - set(f"root['{key}']" for key in ["name", "id", "version", "description"]), + set(diff_level.path() for diff_level in diff["values_changed"]), + set(f"root['{key}']" for key in ["name", "version", "description"]), ) - # Call formatted handler function - FormattedSchemaDiff(schema1, schema2)._handle_values_changed(diff["values_changed"]) - - # Table added - schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - diff = self._diff(schema1, schema2) - self.assertIn("iterable_item_added", diff) - self.assertIn("root['tables'][0]", diff["iterable_item_added"]) - - # Call formatted handler function - FormattedSchemaDiff(schema1, schema2)._handle_iterable_item_added(diff["iterable_item_added"]) - - # Table removed - schema2.tables.clear() - schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - diff = self._diff(schema1, schema2) - self.assertIn("iterable_item_removed", diff) - self.assertIn("root['tables'][0]", diff["iterable_item_removed"]) - - # Call formatted handler function - FormattedSchemaDiff(schema1, schema2)._handle_iterable_item_removed(diff["iterable_item_removed"]) - - # Different table descriptions - schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 2", columns=[])) - diff = self._diff(schema1, schema2) + def test_table_added(self) -> None: + """Test the addition of a table to a schema. Because of how the data is + restructured for comparison, a table addition will show up as a + dictionary item added. + """ + self.sch2.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + diff = self._diff() + self.assertIn("dictionary_item_added", diff) + self.assertEqual(diff["dictionary_item_added"][0].path(), "root['tables']['table1']") + + def test_table_removed(self) -> None: + """Test the removal of a table from a schema. Because of how the data + is restructured for comparison, a table removal will show up as a + dictionary item removed. + """ + self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + diff = self._diff() + # Because of how the data is restructured for comparison, a table + # removal will show up as a dictionary item removed. + self.assertIn("dictionary_item_removed", diff) + self.assertEqual(diff["dictionary_item_removed"][0].path(), "root['tables']['table1']") + + def test_table_descriptions_changed(self) -> None: + """Test the change of a table's description.""" + self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + self.sch2.tables.append(Table(name="table1", id="#table1", description="Table 1 changed", columns=[])) + diff = self._diff() + self.assertIn("values_changed", diff) - self.assertIn("root['tables'][0]['description']", diff["values_changed"]) - old_value = diff["values_changed"]["root['tables'][0]['description']"]["old_value"] - new_value = diff["values_changed"]["root['tables'][0]['description']"]["new_value"] - self.assertEqual(old_value, "Table 1") - self.assertEqual(new_value, "Table 2") - - # Two different tables - schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - schema2.tables.append(dm.Table(name="table2", id="#table2", description="Table 2", columns=[])) - diff = self._diff(schema1, schema2) - self.assertSetEqual( - set(diff.get("values_changed").keys()), - set(f"root['tables'][0]['{key}']" for key in ["name", "id", "description"]), + values_changed = diff["values_changed"][0] + self.assertEqual(values_changed.path(), "root['tables']['table1']['description']") + self.assertEqual(values_changed.t1, "Table 1") + self.assertEqual(values_changed.t2, "Table 1 changed") + + def test_tables_changed(self) -> None: + """Test schemas with different tables.""" + self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + self.sch2.tables.append(Table(name="table2", id="#table2", description="Table 2", columns=[])) + diff = self._diff() + + values_changed = diff["values_changed"][0] + self.assertEqual(values_changed.path(), "root['tables']") + self.assertEqual( + values_changed.t1, {"table1": {"name": "table1", "description": "Table 1", "columns": {}}} + ) + self.assertEqual( + values_changed.t2, {"table2": {"name": "table2", "description": "Table 2", "columns": {}}} ) + def test_columns_changed(self) -> None: # Two tables with different columns - schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - schema2.tables[0].columns.append( - dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") + self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + self.sch2.tables.append( + Table( + name="table1", + id="#table1", + description="Table 1", + columns=[ + Column( + name="column1", id="#column1", datatype="string", length=256, description="Column 1" + ) + ], + ) ) - diff = self._diff(schema1, schema2) - self.assertIn("iterable_item_added", diff) - self.assertIn("root['tables'][0]['columns'][0]", diff["iterable_item_added"]) - - # Same columns in different order (no diff) - schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - schema1.tables[0].columns.append( - dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") + diff = self._diff() + dictionary_item_added = diff["dictionary_item_added"][0] + self.assertEqual(dictionary_item_added.path(), "root['tables']['table1']['columns']['column1']") + self.assertEqual(str(dictionary_item_added.t1), "not present") + self.assertEqual( + dictionary_item_added.t2, + { + "name": "column1", + "description": "Column 1", + "datatype": "string", + "length": 256, + "votable:arraysize": "256*", + }, ) - schema1.tables[0].columns.append( - dm.Column(name="column2", datatype="string", length=256, id="#column2", description="Column 2") + + def test_column_order_changed(self) -> None: + """Test the same columns in a different order. This should not be + considered a difference. + """ + self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + self.sch2.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + self.sch1.tables[0].columns.append( + Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") ) - schema2.tables[0].columns.append( - dm.Column(name="column2", datatype="string", length=256, id="#column2", description="Column 2") + self.sch1.tables[0].columns.append( + Column(name="column2", datatype="string", length=256, id="#column2", description="Column 2") ) - schema2.tables[0].columns.append( - dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") + self.sch2.tables[0].columns.append( + Column(name="column2", datatype="string", length=256, id="#column2", description="Column 2") + ) + self.sch2.tables[0].columns.append( + Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") ) - diff = self._diff(schema1, schema2) + diff = self._diff() self.assertEqual(len(diff), 0) - # Same columns with different descriptions - schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - schema1.tables[0].columns.append( - dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") + def test_column_description_changed(self) -> None: + """Test the change of a column's description.""" + self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + self.sch2.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + self.sch1.tables[0].columns.append( + Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") ) - schema2.tables[0].columns.append( - dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 2") + self.sch2.tables[0].columns.append( + Column(name="column1", datatype="string", length=256, id="#column1", description="Column 2") ) - diff = self._diff(schema1, schema2) + diff = self._diff() + print("test_column_descriptions_changed diff:", diff) self.assertIn("values_changed", diff) - self.assertIn("root['tables'][0]['columns'][0]['description']", diff["values_changed"]) - old_value = diff["values_changed"]["root['tables'][0]['columns'][0]['description']"]["old_value"] - new_value = diff["values_changed"]["root['tables'][0]['columns'][0]['description']"]["new_value"] - self.assertEqual(old_value, "Column 1") - self.assertEqual(new_value, "Column 2") - - # Added a field to a column - schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - schema1.tables[0].columns.append( - dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") + values_changed = diff["values_changed"][0] + self.assertEqual( + values_changed.path(), "root['tables']['table1']['columns']['column1']['description']" ) - schema2.tables[0].columns.append( - dm.Column( + self.assertEqual(values_changed.t1, "Column 1") + self.assertEqual(values_changed.t2, "Column 2") + + def test_field_added_to_column(self) -> None: + """Test the addition of a field to a column.""" + self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + self.sch2.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + self.sch1.tables[0].columns.append( + Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") + ) + self.sch2.tables[0].columns.append( + Column( name="column1", datatype="string", length=256, @@ -160,20 +202,22 @@ def test_schema_diff(self) -> None: ivoa_ucd="meta.id;src;meta.main ", ) ) - diff = self._diff(schema1, schema2) + diff = self._diff() + print("test_field_added_to_column diff:", diff) self.assertIn("dictionary_item_added", diff) - self.assertIn("root['tables'][0]['columns'][0]['ivoa_ucd']", diff["dictionary_item_added"]) - - # Call formatted handler function - FormattedSchemaDiff(schema1, schema2)._handle_dictionary_item_added(diff["dictionary_item_added"]) - - # Removed a field from a column - schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - schema1.tables[0].columns.append( - dm.Column( + dictionary_item_added = diff["dictionary_item_added"][0] + self.assertEqual( + dictionary_item_added.path(), "root['tables']['table1']['columns']['column1']['ivoa:ucd']" + ) + self.assertEqual(str(dictionary_item_added.t1), "not present") + self.assertEqual(dictionary_item_added.t2, "meta.id;src;meta.main") + + def test_field_removed_from_column(self) -> None: + """Test the removal of a field from a column.""" + self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + self.sch2.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + self.sch1.tables[0].columns.append( + Column( name="column1", datatype="string", length=256, @@ -182,87 +226,115 @@ def test_schema_diff(self) -> None: ivoa_ucd="meta.id;src;meta.main ", ) ) - schema2.tables[0].columns.append( - dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") + self.sch2.tables[0].columns.append( + Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") ) - diff = self._diff(schema1, schema2) - self.assertIn("dictionary_item_removed", diff) - self.assertIn("root['tables'][0]['columns'][0]['ivoa_ucd']", diff["dictionary_item_removed"]) + diff = self._diff() + print("test_field_removed_from_column diff:", diff) - # Call formatted handler function - FormattedSchemaDiff(schema1, schema2)._handle_dictionary_item_removed(diff["dictionary_item_removed"]) + self.assertIn("dictionary_item_removed", diff) + dictionary_item_removed = diff["dictionary_item_removed"][0] + self.assertEqual( + dictionary_item_removed.path(), "root['tables']['table1']['columns']['column1']['ivoa:ucd']" + ) + self.assertEqual(dictionary_item_removed.t1, "meta.id;src;meta.main") + self.assertEqual(str(dictionary_item_removed.t2), "not present") - def test_index_diff(self) -> None: + def test_index_columns_changed(self) -> None: """Test differences in indices between tables.""" - schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - schema1.tables[0].columns.append( - dm.Column(name="column1", datatype="int", id="#column1", description="Column 1") + self.sch1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + self.sch1.tables[0].columns.append( + Column(name="column1", datatype="int", id="#column1", description="Column 1") ) - schema1.tables[0].indexes.append( - dm.Index(name="index1", id="#index1", description="Index 1", columns=["column1"]) + self.sch1.tables[0].indexes.append( + Index(name="index1", id="#index1", description="Index 1", columns=["column1"]) ) - - schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) - schema2.tables[0].columns.append( - dm.Column(name="column2", datatype="int", id="#column2", description="Column 2") + self.sch2.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) + self.sch2.tables[0].columns.append( + Column(name="column2", datatype="int", id="#column2", description="Column 2") ) - schema2.tables[0].indexes.append( - dm.Index(name="index1", id="#index1", description="Index 1", columns=["column2"]) + self.sch2.tables[0].indexes.append( + Index(name="index1", id="#index1", description="Index 1", columns=["column2"]) ) - diff = self._diff(schema1, schema2) - self.assertIn("values_changed", diff) - self.assertIn("root['tables'][0]['indexes'][0]['columns'][0]", diff["values_changed"]) - new_value = diff["values_changed"]["root['tables'][0]['indexes'][0]['columns'][0]"]["new_value"] - old_value = diff["values_changed"]["root['tables'][0]['indexes'][0]['columns'][0]"]["old_value"] - self.assertEqual(old_value, "column1") - self.assertEqual(new_value, "column2") - - # Print formatted diff to make sure it works for these changes - FormattedSchemaDiff(schema1, schema2).print() - - def test_print(self) -> None: - schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema2 = dm.Schema(name="schema2", id="#schema2", version="4.5.6", description="Schema 2", tables=[]) + diff = self._diff(print_diff=True, label="test_index_diff") + values_changed = diff["values_changed"][1] + self.assertEqual(values_changed.path(), "root['tables']['table1']['indexes']['index1']['columns'][0]") + self.assertEqual(values_changed.t1, "column1") + self.assertEqual(values_changed.t2, "column2") + + def test_schema_diff_print(self) -> None: + schema1 = Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) + schema2 = Schema(name="schema2", id="#schema2", version="4.5.6", description="Schema 2", tables=[]) SchemaDiff(schema1, schema2).print() - def test_formatted_print(self) -> None: - schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema2 = dm.Schema(name="schema2", id="#schema2", version="4.5.6", description="Schema 2", tables=[]) - FormattedSchemaDiff(schema1, schema2).print() + def test_table_filter(self) -> None: + self.sch1.tables.extend( + [ + Table(name="table1", id="#table1", description="Table 1", columns=[]), + Table(name="table2", id="#table2", description="Table 2", columns=[]), + Table(name="table3", id="#table3", description="Table 3", columns=[]), + ] + ) + self.sch2.tables.extend( + [ + Table(name="table1", id="#table1", description="Table 1", columns=[]), + Table(name="table2", id="#table2", description="Table 2", columns=[]), + ] + ) + + diff = self._diff(table_filter=["table1"]) + self.assertEqual(len(diff), 0) + + diff = self._diff(table_filter=["table2"]) + self.assertEqual(len(diff), 0) + + diff = self._diff(table_filter=["table3"], print_diff=True, label="test_table_filter") + dictionary_item_removed = diff["dictionary_item_removed"][0] + self.assertEqual(dictionary_item_removed.path(), "root['tables']['table3']") + self.assertEqual( + str(dictionary_item_removed.t1), "{'name': 'table3', 'description': 'Table 3', 'columns': {}}" + ) + self.assertEqual(str(dictionary_item_removed.t2), "not present") + + +class FormattedSchemaDiffTestCase(unittest.TestCase): + """Test the FormattedSchemaDiff class.""" + + def test_formatted_diff_print(self) -> None: + """Test the formatted output of the SchemaDiff by printing the + differences between two YAML schema files files. + """ + test_dir = os.path.abspath(os.path.dirname(__file__)) + test_diff1_path = os.path.join(test_dir, "data", "test_diff1.yaml") + test_diff2_path = os.path.join(test_dir, "data", "test_diff2.yaml") - def test_parse_deepdiff_path(self) -> None: - path = "root['tables'][0]['columns'][0]['ivoa_ucd']" - keys = FormattedSchemaDiff._parse_deepdiff_path(path) - self.assertListEqual(keys, ["tables", 0, "columns", 0, "ivoa_ucd"]) + context = {"id_generation": True} + sch1 = Schema.from_uri(test_diff1_path, context=context) + sch2 = Schema.from_uri(test_diff2_path, context=context) - def test_get_id_error(self) -> None: - id_dict = {"tables": [{"indexes": [{"columns": [{"name": "column1"}, {"name": "column2"}]}]}]} - keys = ["tables", 0, "indexes", 0, "columns", 0] - with self.assertRaises(ValueError): - FormattedSchemaDiff._get_id(id_dict, keys) + formatted_diff = FormattedSchemaDiff(sch1, sch2) + formatted_diff.print() -class TestDatabaseDiff(unittest.TestCase): +class DatabaseDiffTestCase(unittest.TestCase): """Test the DatabaseDiff class.""" def test_database_diff(self) -> None: """Test the comparison output generated by the DatabaseDiff class.""" # Two tables with different columns - schema1 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema1.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) + schema1 = Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) + schema1.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) schema1.tables[0].columns.append( - dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") + Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") ) - schema2 = dm.Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) - schema2.tables.append(dm.Table(name="table1", id="#table1", description="Table 1", columns=[])) + schema2 = Schema(name="schema1", id="#schema1", version="1.2.3", description="Schema 1", tables=[]) + schema2.tables.append(Table(name="table1", id="#table1", description="Table 1", columns=[])) schema2.tables[0].columns.append( - dm.Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") + Column(name="column1", datatype="string", length=256, id="#column1", description="Column 1") ) schema2.tables[0].columns.append( - dm.Column(name="column2", datatype="string", length=256, id="#column2", description="Column 2") + Column(name="column2", datatype="string", length=256, id="#column2", description="Column 2") ) metadata_db = MetaDataBuilder(schema1, apply_schema_to_metadata=False).build()