diff --git a/tests/test_cli_inspect_paths.py b/tests/test_cli_inspect_paths.py index 20da842..821d39d 100644 --- a/tests/test_cli_inspect_paths.py +++ b/tests/test_cli_inspect_paths.py @@ -9,7 +9,9 @@ from toolkit.cli.app import app -def test_inspect_paths_reports_dataset_repo_layout_from_other_cwd(tmp_path: Path, monkeypatch) -> None: +def test_inspect_paths_reports_dataset_repo_layout_from_other_cwd( + tmp_path: Path, monkeypatch +) -> None: src = Path("project-example") dst = tmp_path / "project-example" shutil.copytree(src, dst) @@ -30,11 +32,26 @@ def test_inspect_paths_reports_dataset_repo_layout_from_other_cwd(tmp_path: Path assert result.exit_code == 0, result.output assert f"config_path: {config_path}" in result.output assert f"root: {dst / '_smoke_out'}" in result.output - assert f"raw_dir: {dst / '_smoke_out' / 'data' / 'raw' / 'project_example' / '2022'}" in result.output - assert f"raw_manifest: {dst / '_smoke_out' / 'data' / 'raw' / 'project_example' / '2022' / 'manifest.json'}" in result.output - assert f"clean_output: {dst / '_smoke_out' / 'data' / 'clean' / 'project_example' / '2022' / 'project_example_2022_clean.parquet'}" in result.output - assert f"clean_validation: {dst / '_smoke_out' / 'data' / 'clean' / 'project_example' / '2022' / '_validate' / 'clean_validation.json'}" in result.output - assert f"mart_manifest: {dst / '_smoke_out' / 'data' / 'mart' / 'project_example' / '2022' / 'manifest.json'}" in result.output + assert ( + f"raw_dir: {dst / '_smoke_out' / 'data' / 'raw' / 'project_example' / '2022'}" + in result.output + ) + assert ( + f"raw_manifest: {dst / '_smoke_out' / 'data' / 'raw' / 'project_example' / '2022' / 'manifest.json'}" + in result.output + ) + assert ( + f"clean_output: {dst / '_smoke_out' / 'data' / 'clean' / 'project_example' / '2022' / 'project_example_2022_clean.parquet'}" + in result.output + ) + assert ( + f"clean_validation: {dst / '_smoke_out' / 'data' / 'clean' / 'project_example' / '2022' / '_validate' / 'clean_validation.json'}" + in result.output + ) + assert ( + f"mart_manifest: {dst / '_smoke_out' / 'data' / 'mart' / 'project_example' / '2022' / 'manifest.json'}" + in result.output + ) assert "raw_hints:" in result.output assert "primary_output_file:" in result.output assert "suggested_read_exists: True" in result.output @@ -53,7 +70,16 @@ def test_inspect_paths_json_is_notebook_friendly(tmp_path: Path, monkeypatch) -> result = runner.invoke( app, - ["inspect", "paths", "--config", str(config_path), "--year", "2022", "--json", "--strict-config"], + [ + "inspect", + "paths", + "--config", + str(config_path), + "--year", + "2022", + "--json", + "--strict-config", + ], ) assert result.exit_code == 0, result.output @@ -118,7 +144,16 @@ def test_inspect_paths_json_reports_resolved_support_outputs(tmp_path: Path) -> result = runner.invoke( app, - ["inspect", "paths", "--config", str(config_path), "--year", "2022", "--json", "--strict-config"], + [ + "inspect", + "paths", + "--config", + str(config_path), + "--year", + "2022", + "--json", + "--strict-config", + ], ) assert result.exit_code == 0, result.output @@ -132,3 +167,105 @@ def test_inspect_paths_json_reports_resolved_support_outputs(tmp_path: Path) -> str(support_root / "data" / "mart" / "support_ds" / "2024" / "support_table.parquet") ] assert support_payload["mart"].endswith("support_table.parquet") + + +def test_inspect_paths_json_exposes_layer_profiles(tmp_path: Path) -> None: + runner = CliRunner() + + config_path = tmp_path / "dataset.yml" + root_dir = tmp_path / "out" + config_path.write_text( + "\n".join( + [ + f'root: "{root_dir.as_posix()}"', + "dataset:", + ' name: "demo_ds"', + " years: [2022]", + "raw: {}", + "clean:", + ' sql: "sql/clean.sql"', + "mart:", + " tables:", + ' - name: "mart_example"', + ' sql: "sql/mart/mart_example.sql"', + ] + ), + encoding="utf-8", + ) + + clean_dir = root_dir / "data" / "clean" / "demo_ds" / "2022" + mart_dir = root_dir / "data" / "mart" / "demo_ds" / "2022" + clean_dir.mkdir(parents=True, exist_ok=True) + mart_dir.mkdir(parents=True, exist_ok=True) + + (clean_dir / "metadata.json").write_text( + json.dumps( + { + "output_profile": { + "row_count": 39506, + "columns": [ + {"name": "comune", "type": "VARCHAR"}, + {"name": "reddito", "type": "DOUBLE"}, + ], + } + }, + indent=2, + ), + encoding="utf-8", + ) + (mart_dir / "metadata.json").write_text( + json.dumps( + { + "clean_input_profile": { + "row_count": 39506, + "columns": [ + {"name": "comune", "type": "VARCHAR"}, + {"name": "reddito", "type": "DOUBLE"}, + ], + }, + "table_profiles": { + "mart_example": { + "row_count": 7904, + "columns": [ + {"name": "comune", "type": "VARCHAR"}, + {"name": "totale", "type": "DOUBLE"}, + ], + } + }, + "transition_profiles": [ + { + "target_name": "mart_example", + "source_row_count": 39506, + "target_row_count": 7904, + "added_columns": ["totale"], + "removed_columns": ["reddito"], + "type_changes": [{"column": "comune", "from": "VARCHAR", "to": "TEXT"}], + } + ], + }, + indent=2, + ), + encoding="utf-8", + ) + + result = runner.invoke( + app, + [ + "inspect", + "paths", + "--config", + str(config_path), + "--year", + "2022", + "--json", + "--strict-config", + ], + ) + + assert result.exit_code == 0, result.output + payload = json.loads(result.output) + assert payload["layer_profiles"]["clean_output"]["row_count"] == 39506 + assert payload["layer_profiles"]["mart_clean_input"]["columns_preview"][0]["name"] == "comune" + assert payload["layer_profiles"]["mart_tables"][0]["name"] == "mart_example" + assert payload["layer_profiles"]["clean_to_mart"][0]["target_name"] == "mart_example" + assert payload["layer_profiles"]["clean_to_mart"][0]["type_change_count"] == 1 diff --git a/tests/test_cli_status.py b/tests/test_cli_status.py index fd1b585..5855947 100644 --- a/tests/test_cli_status.py +++ b/tests/test_cli_status.py @@ -21,13 +21,26 @@ def _write_run_record(path: Path, run_id: str, started_at: str, status: str) -> "finished_at": None, "status": status, "layers": { - "raw": {"status": "SUCCESS", "started_at": started_at, "finished_at": started_at}, - "clean": {"status": "FAILED", "started_at": started_at, "finished_at": started_at}, + "raw": { + "status": "SUCCESS", + "started_at": started_at, + "finished_at": started_at, + }, + "clean": { + "status": "FAILED", + "started_at": started_at, + "finished_at": started_at, + }, "mart": {"status": "PENDING", "started_at": None, "finished_at": None}, }, "validations": { "raw": {"passed": True, "errors_count": 0, "warnings_count": 1, "checks": []}, - "clean": {"passed": False, "errors_count": 2, "warnings_count": 0, "checks": []}, + "clean": { + "passed": False, + "errors_count": 2, + "warnings_count": 0, + "checks": [], + }, "mart": {}, }, "error": "clean validation failed" if status == "FAILED" else None, @@ -78,7 +91,16 @@ def test_status_uses_same_run_dir_as_writer(tmp_path: Path, monkeypatch) -> None result = runner.invoke( app, - ["status", "--dataset", "demo_ds", "--year", "2022", "--latest", "--config", str(config_path)], + [ + "status", + "--dataset", + "demo_ds", + "--year", + "2022", + "--latest", + "--config", + str(config_path), + ], ) assert result.exit_code == 0 @@ -134,7 +156,9 @@ def test_status_reports_raw_hints_when_raw_artifacts_exist(tmp_path: Path, monke ), encoding="utf-8", ) - (raw_dir / "_profile" / "suggested_read.yml").write_text("clean:\n read:\n delim: \";\"\n", encoding="utf-8") + (raw_dir / "_profile" / "suggested_read.yml").write_text( + 'clean:\n read:\n delim: ";"\n', encoding="utf-8" + ) run_dir = get_run_dir(project_dir / "out", "demo_ds", 2022) _write_run_record(run_dir / "run-123.json", "run-123", "2026-03-04T10:00:00+00:00", "SUCCESS") @@ -144,7 +168,16 @@ def test_status_reports_raw_hints_when_raw_artifacts_exist(tmp_path: Path, monke result = runner.invoke( app, - ["status", "--dataset", "demo_ds", "--year", "2022", "--latest", "--config", str(config_path)], + [ + "status", + "--dataset", + "demo_ds", + "--year", + "2022", + "--latest", + "--config", + str(config_path), + ], ) assert result.exit_code == 0 @@ -157,7 +190,9 @@ def test_status_reports_raw_hints_when_raw_artifacts_exist(tmp_path: Path, monke assert "header_preamble_detected" in result.output -def test_status_reports_validation_summary_from_layer_artifacts(tmp_path: Path, monkeypatch) -> None: +def test_status_reports_validation_summary_from_layer_artifacts( + tmp_path: Path, monkeypatch +) -> None: project_dir = tmp_path / "project" config_path = project_dir / "dataset.yml" project_dir.mkdir() @@ -246,7 +281,9 @@ def test_status_reports_validation_summary_from_layer_artifacts(tmp_path: Path, { "ok": False, "errors": ["Missing required MART tables: ['mart_missing']"], - "warnings": ["MART table_rules reference tables not declared in mart.tables: ['mart_extra']"], + "warnings": [ + "MART table_rules reference tables not declared in mart.tables: ['mart_extra']" + ], "summary": { "required_tables": ["mart_ok", "mart_missing"], "tables": ["mart_ok"], @@ -293,7 +330,16 @@ def test_status_reports_validation_summary_from_layer_artifacts(tmp_path: Path, result = runner.invoke( app, - ["status", "--dataset", "demo_ds", "--year", "2022", "--latest", "--config", str(config_path)], + [ + "status", + "--dataset", + "demo_ds", + "--year", + "2022", + "--latest", + "--config", + str(config_path), + ], ) assert result.exit_code == 0 @@ -305,3 +351,114 @@ def test_status_reports_validation_summary_from_layer_artifacts(tmp_path: Path, assert "missing_tables=mart_missing" in result.output assert "missing_outputs=mart_ok.parquet" in result.output assert "cross_year: state=passed warnings=0 errors=0" in result.output + + +def test_status_reports_layer_profiles_from_metadata(tmp_path: Path, monkeypatch) -> None: + project_dir = tmp_path / "project" + config_path = project_dir / "dataset.yml" + project_dir.mkdir() + + config_path.write_text( + """ +root: "./out" +dataset: + name: demo_ds + years: [2022] +raw: {} +clean: + sql: "sql/clean.sql" +mart: + tables: + - name: mart_example + sql: "sql/mart/mart_example.sql" +""".strip(), + encoding="utf-8", + ) + + sql_dir = project_dir / "sql" / "mart" + sql_dir.mkdir(parents=True, exist_ok=True) + (project_dir / "sql" / "clean.sql").write_text("select 1 as value", encoding="utf-8") + (sql_dir / "mart_example.sql").write_text("select * from clean_input", encoding="utf-8") + + clean_dir = project_dir / "out" / "data" / "clean" / "demo_ds" / "2022" + mart_dir = project_dir / "out" / "data" / "mart" / "demo_ds" / "2022" + clean_dir.mkdir(parents=True, exist_ok=True) + mart_dir.mkdir(parents=True, exist_ok=True) + + (clean_dir / "metadata.json").write_text( + json.dumps( + { + "output_profile": { + "row_count": 120, + "columns": [ + {"name": "id", "type": "BIGINT"}, + {"name": "regione", "type": "VARCHAR"}, + ], + } + }, + indent=2, + ), + encoding="utf-8", + ) + (mart_dir / "metadata.json").write_text( + json.dumps( + { + "clean_input_profile": { + "row_count": 120, + "columns": [ + {"name": "id", "type": "BIGINT"}, + {"name": "regione", "type": "VARCHAR"}, + ], + }, + "table_profiles": { + "mart_example": { + "row_count": 20, + "columns": [ + {"name": "regione", "type": "VARCHAR"}, + {"name": "totale", "type": "DOUBLE"}, + ], + } + }, + "transition_profiles": [ + { + "target_name": "mart_example", + "source_row_count": 120, + "target_row_count": 20, + "added_columns": ["totale"], + "removed_columns": ["id"], + "type_changes": [], + } + ], + }, + indent=2, + ), + encoding="utf-8", + ) + + run_dir = get_run_dir(project_dir / "out", "demo_ds", 2022) + _write_run_record(run_dir / "run-123.json", "run-123", "2026-03-04T10:00:00+00:00", "SUCCESS") + + monkeypatch.chdir(tmp_path) + runner = CliRunner() + result = runner.invoke( + app, + [ + "status", + "--dataset", + "demo_ds", + "--year", + "2022", + "--latest", + "--config", + str(config_path), + ], + ) + + assert result.exit_code == 0 + assert "layer_profiles:" in result.output + assert "clean_output: rows=120 columns=2 preview=id:BIGINT, regione:VARCHAR" in result.output + assert ( + "mart_clean_input: rows=120 columns=2 preview=id:BIGINT, regione:VARCHAR" in result.output + ) + assert "mart_example: rows=20 columns=2 preview=regione:VARCHAR, totale:DOUBLE" in result.output + assert "mart_example: rows 120 -> 20 added=1 removed=1 type_changes=0" in result.output diff --git a/toolkit/cli/cmd_inspect.py b/toolkit/cli/cmd_inspect.py index e173b67..17d8e0f 100644 --- a/toolkit/cli/cmd_inspect.py +++ b/toolkit/cli/cmd_inspect.py @@ -6,7 +6,7 @@ import typer -from toolkit.cli.common import iter_years +from toolkit.cli.common import format_profile_preview, iter_years, load_layer_profile_summaries from toolkit.core.config import load_config from toolkit.core.paths import layer_year_dir from toolkit.core.support import resolve_support_payloads @@ -116,10 +116,16 @@ def _clean_paths(root: Path, dataset: str, year: int) -> dict[str, str]: def _mart_output_paths(root: Path, year_dir: Path, tables: list[dict[str, Any]]) -> list[Path]: - return [year_dir / f"{table['name']}.parquet" for table in tables if isinstance(table, dict) and table.get("name")] + return [ + year_dir / f"{table['name']}.parquet" + for table in tables + if isinstance(table, dict) and table.get("name") + ] -def _mart_paths(root: Path, dataset: str, year: int, tables: list[dict[str, Any]]) -> dict[str, Any]: +def _mart_paths( + root: Path, dataset: str, year: int, tables: list[dict[str, Any]] +) -> dict[str, Any]: mart_dir = layer_year_dir(root, "mart", dataset, year) return { "dir": str(mart_dir), @@ -174,6 +180,7 @@ def _payload_for_year(cfg, year: int) -> dict[str, Any]: "skip": profile_hints.get("skip_suggested"), "warnings": profile_hints.get("warnings") or [], }, + "layer_profiles": load_layer_profile_summaries(root, cfg.dataset, year), "latest_run": latest_payload, } @@ -182,7 +189,9 @@ def paths( config: str = typer.Option(..., "--config", "-c", help="Path to dataset.yml"), year: int | None = typer.Option(None, "--year", help="Dataset year"), as_json: bool = typer.Option(False, "--json", help="Emit JSON output for notebooks/scripts"), - strict_config: bool = typer.Option(False, "--strict-config", help="Treat deprecated config forms as errors"), + strict_config: bool = typer.Option( + False, "--strict-config", help="Treat deprecated config forms as errors" + ), ): """ Mostra i path stabili di output e l'ultimo run record per dataset/year. @@ -193,7 +202,9 @@ def paths( payload = [_payload_for_year(cfg, selected_year) for selected_year in years] if as_json: - typer.echo(json.dumps(payload if len(payload) > 1 else payload[0], indent=2, ensure_ascii=False)) + typer.echo( + json.dumps(payload if len(payload) > 1 else payload[0], indent=2, ensure_ascii=False) + ) return for item in payload: @@ -217,6 +228,30 @@ def paths( typer.echo(" - warnings:") for warning in item["raw_hints"]["warnings"]: typer.echo(f" - {warning}") + if item["layer_profiles"]: + typer.echo("layer_profiles:") + clean_output = item["layer_profiles"].get("clean_output") + if clean_output is not None: + typer.echo(f" clean_output: {format_profile_preview(clean_output)}") + mart_clean_input = item["layer_profiles"].get("mart_clean_input") + if mart_clean_input is not None: + typer.echo(f" mart_clean_input: {format_profile_preview(mart_clean_input)}") + mart_tables = item["layer_profiles"].get("mart_tables") or [] + if mart_tables: + typer.echo(" mart_tables:") + for table in mart_tables: + typer.echo(f" {table['name']}: {format_profile_preview(table)}") + transitions = item["layer_profiles"].get("clean_to_mart") or [] + if transitions: + typer.echo(" clean_to_mart:") + for transition in transitions: + typer.echo( + f" {transition['target_name']}: " + f"rows {transition['source_row_count']} -> {transition['target_row_count']} " + f"added={len(transition['added_columns'])} " + f"removed={len(transition['removed_columns'])} " + f"type_changes={transition['type_change_count']}" + ) typer.echo(f"clean_dir: {item['paths']['clean']['dir']}") typer.echo(f"clean_output: {item['paths']['clean']['output']}") typer.echo(f"clean_manifest: {item['paths']['clean']['manifest']}") @@ -235,7 +270,9 @@ def paths( typer.echo(f" - name: {support['name']}") typer.echo(f" dataset: {support['dataset']}") typer.echo(f" config_path: {support['config_path']}") - typer.echo(f" years: {', '.join(str(year_value) for year_value in support['years'])}") + typer.echo( + f" years: {', '.join(str(year_value) for year_value in support['years'])}" + ) typer.echo(f" mart: {support['mart']}") typer.echo(" outputs:") for output in support["outputs"]: @@ -254,7 +291,9 @@ def paths( def schema_diff( config: str = typer.Option(..., "--config", "-c", help="Path to dataset.yml"), as_json: bool = typer.Option(False, "--json", help="Emit JSON output"), - strict_config: bool = typer.Option(False, "--strict-config", help="Treat deprecated config forms as errors"), + strict_config: bool = typer.Option( + False, "--strict-config", help="Treat deprecated config forms as errors" + ), ): """ Confronta i principali segnali di schema RAW tra gli anni configurati. diff --git a/toolkit/cli/cmd_status.py b/toolkit/cli/cmd_status.py index c9aa3e8..976a0c8 100644 --- a/toolkit/cli/cmd_status.py +++ b/toolkit/cli/cmd_status.py @@ -6,6 +6,7 @@ import typer +from toolkit.cli.common import format_profile_preview, load_layer_profile_summaries from toolkit.core.config import load_config from toolkit.core.paths import layer_dataset_dir, layer_year_dir from toolkit.core.run_context import get_run_dir, latest_run, read_run_record @@ -149,14 +150,18 @@ def _layer_validation_summary( if isinstance(required, list) and isinstance(columns, list): missing_columns = [column for column in required if column not in set(columns)] if missing_columns: - details.append(f"missing_columns={', '.join(str(column) for column in missing_columns)}") + details.append( + f"missing_columns={', '.join(str(column) for column in missing_columns)}" + ) if layer in {"mart", "cross_year"}: required_tables = summary.get("required_tables") or [] tables = summary.get("tables") or [] if isinstance(required_tables, list) and isinstance(tables, list): missing_tables = [table for table in required_tables if table not in set(tables)] if missing_tables: - details.append(f"missing_tables={', '.join(str(table) for table in missing_tables)}") + details.append( + f"missing_tables={', '.join(str(table) for table in missing_tables)}" + ) if ok is True: state = "passed" @@ -217,13 +222,50 @@ def _print_validation_summary( typer.echo(f" {detail}") +def _print_layer_profiles(root: Path, dataset: str, year: int) -> None: + profiles = load_layer_profile_summaries(root, dataset, year) + if profiles is None: + return + + typer.echo("") + typer.echo("layer_profiles:") + + clean_output = profiles.get("clean_output") + if clean_output is not None: + typer.echo(f" clean_output: {format_profile_preview(clean_output)}") + + mart_clean_input = profiles.get("mart_clean_input") + if mart_clean_input is not None: + typer.echo(f" mart_clean_input: {format_profile_preview(mart_clean_input)}") + + mart_tables = profiles.get("mart_tables") or [] + if mart_tables: + typer.echo(" mart_tables:") + for table in mart_tables: + typer.echo(f" {table['name']}: {format_profile_preview(table)}") + + transitions = profiles.get("clean_to_mart") or [] + if transitions: + typer.echo(" clean_to_mart:") + for item in transitions: + typer.echo( + f" {item['target_name']}: " + f"rows {item['source_row_count']} -> {item['target_row_count']} " + f"added={len(item['added_columns'])} " + f"removed={len(item['removed_columns'])} " + f"type_changes={item['type_change_count']}" + ) + + def status( dataset: str = typer.Option(..., "--dataset", help="Dataset name"), year: int = typer.Option(..., "--year", help="Dataset year"), run_id: str | None = typer.Option(None, "--run-id", help="Specific run id"), latest: bool = typer.Option(False, "--latest", help="Show latest run"), config: str = typer.Option(..., "--config", "-c", help="Path to dataset.yml"), - strict_config: bool = typer.Option(False, "--strict-config", help="Treat deprecated config forms as errors"), + strict_config: bool = typer.Option( + False, "--strict-config", help="Treat deprecated config forms as errors" + ), ): """ Mostra lo stato dell'ultimo run o di uno specifico run_id. @@ -264,6 +306,7 @@ def status( for layer in ("raw", "clean", "mart"): typer.echo(_layer_row(record, layer)) _print_validation_summary(Path(cfg.root), dataset, year, record, has_cross_year) + _print_layer_profiles(Path(cfg.root), dataset, year) if record.get("status") == "FAILED" and record.get("error"): typer.echo("") diff --git a/toolkit/cli/common.py b/toolkit/cli/common.py index 5c76feb..50712f8 100644 --- a/toolkit/cli/common.py +++ b/toolkit/cli/common.py @@ -1,7 +1,11 @@ from __future__ import annotations +import json +from pathlib import Path + from toolkit.core.config import load_config from toolkit.core.logging import get_logger +from toolkit.core.paths import layer_year_dir def load_cfg_and_logger( @@ -66,3 +70,117 @@ def iter_selected_years( raise ValueError(f"Year(s) not configured in dataset.yml: {listed}") return requested + + +def _read_json(path: Path) -> dict | None: + try: + return json.loads(path.read_text(encoding="utf-8")) + except Exception: + return None + + +def _profile_summary(profile: dict | None, *, max_columns: int = 6) -> dict | None: + if not isinstance(profile, dict): + return None + + columns = profile.get("columns") or [] + preview: list[dict[str, str | None]] = [] + for item in columns[:max_columns]: + if not isinstance(item, dict): + continue + preview.append( + { + "name": item.get("name"), + "type": item.get("type"), + } + ) + + return { + "row_count": profile.get("row_count"), + "columns_count": len(columns) if isinstance(columns, list) else 0, + "columns_preview": preview, + "columns_truncated": max( + 0, (len(columns) if isinstance(columns, list) else 0) - len(preview) + ), + } + + +def _transition_summary(item: dict | None) -> dict | None: + if not isinstance(item, dict): + return None + + type_changes = item.get("type_changes") or [] + return { + "target_name": item.get("target_name"), + "source_row_count": item.get("source_row_count"), + "target_row_count": item.get("target_row_count"), + "added_columns": list(item.get("added_columns") or []), + "removed_columns": list(item.get("removed_columns") or []), + "type_change_count": len(type_changes) if isinstance(type_changes, list) else 0, + } + + +def load_layer_profile_summaries(root: Path, dataset: str, year: int) -> dict[str, object] | None: + clean_metadata = ( + _read_json(layer_year_dir(root, "clean", dataset, year) / "metadata.json") or {} + ) + mart_metadata = _read_json(layer_year_dir(root, "mart", dataset, year) / "metadata.json") or {} + + clean_output = _profile_summary(clean_metadata.get("output_profile")) + mart_clean_input = _profile_summary(mart_metadata.get("clean_input_profile")) + + mart_tables: list[dict[str, object]] = [] + for name, profile in ( + (mart_metadata.get("table_profiles") or {}).items() + if isinstance(mart_metadata.get("table_profiles"), dict) + else [] + ): + summary = _profile_summary(profile) + if summary is None: + continue + mart_tables.append({"name": name, **summary}) + + transitions: list[dict[str, object]] = [] + raw_transitions = mart_metadata.get("transition_profiles") or [] + if isinstance(raw_transitions, list): + for item in raw_transitions: + summary = _transition_summary(item) + if summary is not None: + transitions.append(summary) + + has_any = any( + [clean_output is not None, mart_clean_input is not None, mart_tables, transitions] + ) + if not has_any: + return None + + return { + "clean_output": clean_output, + "mart_clean_input": mart_clean_input, + "mart_tables": mart_tables, + "clean_to_mart": transitions, + } + + +def format_profile_preview(summary: dict[str, object] | None) -> str: + if not isinstance(summary, dict): + return "rows=? columns=?" + + columns_preview = summary.get("columns_preview") or [] + rendered_columns: list[str] = [] + if isinstance(columns_preview, list): + for item in columns_preview: + if not isinstance(item, dict): + continue + rendered_columns.append(f"{item.get('name')}:{item.get('type')}") + + suffix = "" + truncated = summary.get("columns_truncated") + if isinstance(truncated, int) and truncated > 0: + suffix = f" (+{truncated} more)" + + return ( + f"rows={summary.get('row_count')} " + f"columns={summary.get('columns_count')} " + f"preview={', '.join(rendered_columns) if rendered_columns else '-'}{suffix}" + )