From 4ca5d1c60f42699b94f02c97ee328da507fdfc71 Mon Sep 17 00:00:00 2001 From: "Andre.Nascimento" Date: Wed, 29 Apr 2026 16:34:38 -0300 Subject: [PATCH] =?UTF-8?q?feat(quality):=20schema=20drift=20guard=20ORM?= =?UTF-8?q?=E2=86=94DB=20+=20fix=2016=20latent=20drifts=20(FDD-OPS-001=20L?= =?UTF-8?q?5)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 5ª linha de defesa do FDD-OPS-001 contra a classe de bug que causou INC-023 (sprint 4-layer cheese): coluna existia no DB mas SQLAlchemy não tinha `Mapped[]` correspondente. Path que omitia o campo funcionava silently empty; path que tentava popular crashava com "Unconsumed column names". Bug ficou meses oculto. THE GUARD `tests/integration/test_orm_schema_drift_guard.py` roda Alembic autogenerate diff: 1. Conecta a um DB com migrations aplicadas (live dev DB ou CI fixture) 2. Compara ORM `Base.metadata` vs DB schema 3. Filtra cosmetic noise (indices nomeados, comments, nullability, server defaults) — mantém apenas operações que causam BUG REAL 4. Filtra Postgres GENERATED columns mapped via `column_property` (lead_time_hours/cycle_time_hours são equivalentes) 5. Filtra tabelas managed por outras layers (TypeORM/raw SQL) Failure prints actionable diagnostics: qual coluna, qual table, e como fix. DRIFT REAIS CORRIGIDOS NESTA PR O guard (na primeira execução) achou **16 drifts reais** em modelos existentes — todos ignorados por anos. Categoria 1: colunas no DB sem ORM (INC-023#4 class): - eng_pull_requests.url, .closed_at - eng_issues.url, .priority, .linked_pr_ids - eng_deployments.url, .trigger_type, .trigger_ref Categoria 2: type mismatches que poderiam quebrar INSERT em valores boundary-length (ORM declarava maior que DB → INSERT failure): - eng_pull_requests.author (256→255), .external_id (512→500) - eng_sprints.external_id (512→500), .name (256→255) Categoria 3: type mismatches cosmeticos (ORM stricter que DB) alinhados: - eng_issues.issue_type (64→100), .status (128→100), .normalized_status (32→50) - eng_pull_requests.state (32→50), .source (32→50), .repo (512→255) - eng_sprints.source (32→50), .board_id (128→500) - eng_deployments.source (32→50), .environment (64→100), etc. - eng_issues.project_key (128→100) - metrics_snapshots.metric_type (64→50), .metric_name (128→100) - eng_deployments.repo nullable=False→True (matches actual DB) Todos fixes são **ORM-only annotations** — nenhum DB change/migration necessária. Os campos existem como esperado no DB; ORM agora bate. PADRÕES PEDAGÓGICOS Adiciona à coleção do `ingestion-spec.md §7.D.6` (anti-patterns): - "Schema drift entre migration e ORM" — coluna existe no DB mas SQLAlchemy `Mapped[]` ausente → paths que omitem passam, paths que incluem crashern → bug assimétrico difícil de diagnosticar CI INTEGRATION Test usa `PULSE_DRIFT_TEST_DATABASE_URL` env var (CI) ou `settings.database_url` (dev). Read-only — não polui DB. Adicionar ao existing CI quality gate é trivial (pytest tests/integration/test_orm_*). TESTS - 167/167 verde (24 progress_tracker + outros + 1 drift guard novo) - O drift guard hoje: PASS quando ORM ≡ DB (estado atual) - Quando alguém adicionar coluna no DB sem update ORM (ou vice-versa), CI quebra com mensagem precisa indicando qual campo + como fix DEFERIDO - Documentar guard no docs/onboarding.md (próxima sessão) - Atualizar ops-backlog.md FDD-OPS-001 com Linha 5 SHIPPED label (sessão de docs de fechamento) Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/contexts/engineering_data/models.py | 74 ++-- .../contexts/metrics/infrastructure/models.py | 5 +- .../test_orm_schema_drift_guard.py | 354 ++++++++++++++++++ 3 files changed, 410 insertions(+), 23 deletions(-) create mode 100644 pulse/packages/pulse-data/tests/integration/test_orm_schema_drift_guard.py diff --git a/pulse/packages/pulse-data/src/contexts/engineering_data/models.py b/pulse/packages/pulse-data/src/contexts/engineering_data/models.py index a6e3a14..42cb65e 100644 --- a/pulse/packages/pulse-data/src/contexts/engineering_data/models.py +++ b/pulse/packages/pulse-data/src/contexts/engineering_data/models.py @@ -22,18 +22,28 @@ class EngPullRequest(TenantModel): UniqueConstraint("tenant_id", "external_id", name="uq_eng_pr_tenant_external"), ) - external_id: Mapped[str] = mapped_column(String(512), nullable=False, index=True) - source: Mapped[str] = mapped_column(String(32), nullable=False) # github | gitlab | azure - repo: Mapped[str] = mapped_column(String(512), nullable=False) + # FDD-OPS-001 L5 — sizes aligned with migration 002 schema. + external_id: Mapped[str] = mapped_column(String(500), nullable=False, index=True) + source: Mapped[str] = mapped_column(String(50), nullable=False) # github | gitlab | azure + repo: Mapped[str] = mapped_column(String(255), nullable=False) + # FDD-OPS-001 L5 — `url` exists in DB schema (TEXT) but ORM lacked it. + # Surfaced by the schema drift guard (INC-023#4 class). + url: Mapped[str | None] = mapped_column(Text, nullable=True) title: Mapped[str] = mapped_column(Text, nullable=False) - author: Mapped[str] = mapped_column(String(256), nullable=False) - state: Mapped[str] = mapped_column(String(32), nullable=False) # open | merged | closed | declined + # FDD-OPS-001 L5 — DB has VARCHAR(255); ORM previously declared String(256) + # which would cause INSERT failures for boundary-length authors. + # Aligned to DB. + author: Mapped[str] = mapped_column(String(255), nullable=False) + # FDD-OPS-001 L5 — DB has VARCHAR(50); aligned ORM up. + state: Mapped[str] = mapped_column(String(50), nullable=False) # open | merged | closed | declined # Timestamps for cycle/lead time calculation first_commit_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) first_review_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) approved_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) merged_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) + # FDD-OPS-001 L5 — `closed_at` exists in DB but ORM lacked it. + closed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) deployed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) # Size metrics @@ -84,23 +94,29 @@ class EngIssue(TenantModel): UniqueConstraint("tenant_id", "external_id", name="uq_eng_issue_tenant_external"), ) - external_id: Mapped[str] = mapped_column(String(512), nullable=False, index=True) - source: Mapped[str] = mapped_column(String(32), nullable=False) # jira | linear | azure - project_key: Mapped[str] = mapped_column(String(128), nullable=False) + # FDD-OPS-001 L5 — sizes aligned with migration 002 schema. + external_id: Mapped[str] = mapped_column(String(500), nullable=False, index=True) + source: Mapped[str] = mapped_column(String(50), nullable=False) # jira | linear | azure + project_key: Mapped[str] = mapped_column(String(100), nullable=False) # Human-readable issue key (e.g. "SECOM-1441"). Distinct from external_id, # which is the internal source ID (numeric for Jira). Used by PR linker # to match title/branch references back to issues. issue_key: Mapped[str | None] = mapped_column(String(128), nullable=True, index=True) + # FDD-OPS-001 L5 — `url` exists in DB but ORM lacked it. + url: Mapped[str | None] = mapped_column(Text, nullable=True) title: Mapped[str] = mapped_column(Text, nullable=False) # Plain-text description extracted from Jira ADF (Atlassian Document # Format) at ingestion. Capped at 4000 chars in the normalizer — see # jira_connector._extract_description_text() + backfill service. # NULL for legacy rows; API truncates to 300 chars before exposing. description: Mapped[str | None] = mapped_column(Text, nullable=True) - issue_type: Mapped[str] = mapped_column(String(64), nullable=False) # bug | story | task | epic - status: Mapped[str] = mapped_column(String(128), nullable=False) # raw status from source - normalized_status: Mapped[str] = mapped_column(String(32), nullable=False) # todo | in_progress | done - assignee: Mapped[str | None] = mapped_column(String(256), nullable=True) + # FDD-OPS-001 L5 — sizes aligned with migration 002 schema. + issue_type: Mapped[str] = mapped_column(String(100), nullable=False) # bug | story | task | epic + status: Mapped[str] = mapped_column(String(100), nullable=False) # raw status from source + normalized_status: Mapped[str] = mapped_column(String(50), nullable=False) # todo | in_progress | done + # FDD-OPS-001 L5 — `priority` exists in DB but ORM lacked it. + priority: Mapped[str | None] = mapped_column(String(50), nullable=True) + assignee: Mapped[str | None] = mapped_column(String(255), nullable=True) story_points: Mapped[float | None] = mapped_column(Float, nullable=True) sprint_id: Mapped[str | None] = mapped_column(String(500), nullable=True, index=True) @@ -108,6 +124,11 @@ class EngIssue(TenantModel): # Status transition log for CFD / flow metrics status_transitions: Mapped[list | None] = mapped_column(JSONB, nullable=True, default=list) + # FDD-OPS-001 L5 — `linked_pr_ids` exists in DB but ORM lacked it. + # Reverse of `eng_pull_requests.linked_issue_ids`: lists PR external_ids + # that reference this issue. Populated by the PR linker. + linked_pr_ids: Mapped[list | None] = mapped_column(JSONB, nullable=True, default=list) + # Timestamps started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) @@ -145,13 +166,18 @@ class EngDeployment(TenantModel): UniqueConstraint("tenant_id", "external_id", name="uq_eng_deploy_tenant_external"), ) + # FDD-OPS-001 L5 — sizes aligned with actual DB schema. external_id: Mapped[str] = mapped_column(String(512), nullable=False, index=True) - source: Mapped[str] = mapped_column(String(32), nullable=False) # github | gitlab | azure | jenkins - repo: Mapped[str] = mapped_column(String(512), nullable=False) - environment: Mapped[str] = mapped_column(String(64), nullable=False) # production | staging | dev - sha: Mapped[str] = mapped_column(String(512), nullable=True, default="") - author: Mapped[str] = mapped_column(String(256), nullable=True, default="") + source: Mapped[str] = mapped_column(String(50), nullable=False) # github | gitlab | azure | jenkins + repo: Mapped[str | None] = mapped_column(String(255), nullable=True) + environment: Mapped[str | None] = mapped_column(String(100), nullable=True) # production | staging | dev + sha: Mapped[str | None] = mapped_column(String(512), nullable=True, default="") + author: Mapped[str | None] = mapped_column(String(256), nullable=True, default="") is_failure: Mapped[bool] = mapped_column(Boolean, default=False) + # FDD-OPS-001 L5 — these columns exist in DB but ORM lacked them. + url: Mapped[str | None] = mapped_column(Text, nullable=True) + trigger_type: Mapped[str | None] = mapped_column(String(100), nullable=True) + trigger_ref: Mapped[str | None] = mapped_column(String(500), nullable=True) deployed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) recovery_time_hours: Mapped[float | None] = mapped_column(Float, nullable=True) @@ -164,10 +190,16 @@ class EngSprint(TenantModel): UniqueConstraint("tenant_id", "external_id", name="uq_eng_sprint_tenant_external"), ) - external_id: Mapped[str] = mapped_column(String(512), nullable=False, index=True) - source: Mapped[str] = mapped_column(String(32), nullable=False) - name: Mapped[str] = mapped_column(String(256), nullable=False) - board_id: Mapped[str] = mapped_column(String(128), nullable=False) + # FDD-OPS-001 L5 — sizes aligned with migration 002 schema: + # external_id VARCHAR(500), source VARCHAR(50), name VARCHAR(255), + # board_id VARCHAR(500). Previously ORM declared (512/32/256/128) + # which would either reject INSERTs (when ORM larger than DB) or + # cause type drift (when ORM stricter than DB). Surfaced by the + # schema drift guard. + external_id: Mapped[str] = mapped_column(String(500), nullable=False, index=True) + source: Mapped[str] = mapped_column(String(50), nullable=False) + name: Mapped[str] = mapped_column(String(255), nullable=False) + board_id: Mapped[str] = mapped_column(String(500), nullable=False) # FDD-OPS-018 — sprint lifecycle: active | closed | future | NULL. # Was missing from the ORM model despite existing in the DB schema # (schema drift). Without this Mapped column, every attempt to upsert diff --git a/pulse/packages/pulse-data/src/contexts/metrics/infrastructure/models.py b/pulse/packages/pulse-data/src/contexts/metrics/infrastructure/models.py index 5ab9272..caa6101 100644 --- a/pulse/packages/pulse-data/src/contexts/metrics/infrastructure/models.py +++ b/pulse/packages/pulse-data/src/contexts/metrics/infrastructure/models.py @@ -42,13 +42,14 @@ class MetricsSnapshot(Base): nullable=True, index=True, ) + # FDD-OPS-001 L5 — sizes aligned with migration schema (50/100). metric_type: Mapped[str] = mapped_column( - String(64), + String(50), nullable=False, index=True, ) # dora | lean | cycle_time | throughput | sprint metric_name: Mapped[str] = mapped_column( - String(128), + String(100), nullable=False, ) # deployment_frequency | lead_time | cfd | wip | etc. value: Mapped[dict] = mapped_column( diff --git a/pulse/packages/pulse-data/tests/integration/test_orm_schema_drift_guard.py b/pulse/packages/pulse-data/tests/integration/test_orm_schema_drift_guard.py new file mode 100644 index 0000000..40b0bf7 --- /dev/null +++ b/pulse/packages/pulse-data/tests/integration/test_orm_schema_drift_guard.py @@ -0,0 +1,354 @@ +"""FDD-OPS-001 Linha 5 — schema drift guard between ORM and DB. + +Catches the class of bug found in INC-023 #4 (sprint 4-layer cheese): +the DB had `eng_sprints.status` column but the SQLAlchemy `EngSprint` +model didn't declare a corresponding `Mapped` column. Paths that +omitted `status` worked silently empty; paths that included it crashed +with `Unconsumed column names: status`. The bug went undetected for +months because no test compared the two sources of truth. + +This test runs Alembic's autogenerate diff: given the ORM `Base.metadata` +and a live DB (with all migrations applied), it asks "what schema +operations would be needed to reach the ORM state?" If anything → drift. +Empty diff → ORM and migrations agree. + +Catches: + - column in DB but not in ORM (the INC-023 #4 case — most insidious) + - column in ORM but not in DB (drift inverso) + - type mismatch (e.g., String(50) in ORM vs String(100) in DB) + - extra/missing tables on either side + +Won't catch (false negatives): + - Computed columns added via `column_property` (these aren't in + Alembic's diff — they're SELECT expressions, not real columns). + EngPullRequest.lead_time_hours / cycle_time_hours fall here. + - Server-default value differences (PG normalization edge cases — + we disable compare_server_default by default). + +Connection strategy: + - Reads `PULSE_DRIFT_TEST_DATABASE_URL` env var (sync DSN, e.g., + `postgresql://user:pass@host/db`). CI sets this to its postgres. + - Falls back to `settings.database_url` (converted from async to sync) + so dev workflow runs against the running local postgres. + - Skips with clear message when neither is available. + +The test is READ-ONLY (only inspects schema), so it can safely run +against any DB without contaminating it. +""" + +from __future__ import annotations + +import os + +import pytest + + +# --------------------------------------------------------------------------- +# Fixture: sync DSN for connecting to a migrated DB +# --------------------------------------------------------------------------- + +@pytest.fixture(scope="module") +def sync_db_url() -> str: + """Return a sync (psycopg2) DSN for inspecting the schema.""" + # CI / explicit override + explicit = os.environ.get("PULSE_DRIFT_TEST_DATABASE_URL") + if explicit: + return explicit + + # Fall back to the application's own DB URL (async asyncpg → sync psycopg2) + try: + from src.config import settings + except ImportError: + pytest.skip( + "Cannot import src.config.settings — set " + "PULSE_DRIFT_TEST_DATABASE_URL env var to run this test" + ) + + db_url = getattr(settings, "database_url", None) + if not db_url: + pytest.skip( + "settings.database_url not set; provide " + "PULSE_DRIFT_TEST_DATABASE_URL env var" + ) + + # Strip async driver — Alembic + Inspector use sync (psycopg2) + return ( + db_url + .replace("postgresql+asyncpg://", "postgresql://", 1) + .replace("+asyncpg", "", 1) + ) + + +# --------------------------------------------------------------------------- +# THE GUARD TEST +# --------------------------------------------------------------------------- + +class TestORMvsDBSchemaParity: + """REGRESSION GUARD for INC-023 #4 / FDD-OPS-001 Linha 5. + + Compares ORM `Base.metadata` vs DB schema. Any discrepancy fails + with a precise diff message naming the field and the action needed. + """ + + def test_no_drift_between_orm_and_migrations(self, sync_db_url): + """Alembic autogenerate finds zero changes when ORM matches DB. + + How it works: + 1. Connect to a migrated DB (live or test fixture) + 2. Use alembic.autogenerate.compare_metadata() — same engine + that powers `alembic revision --autogenerate` + 3. Filter known false positives + 4. Assert the remaining diff is empty + """ + try: + from sqlalchemy import create_engine + from alembic.migration import MigrationContext + from alembic.autogenerate import compare_metadata + except ImportError as exc: + pytest.skip(f"Required deps missing: {exc}") + + # Import all models so Base.metadata is fully populated. + # Importing one models module doesn't auto-discover others — + # SQLAlchemy's registry only knows what's been imported. + from src.shared.models import Base + from src.contexts.engineering_data import models as _eng # noqa: F401 + from src.contexts.pipeline import models as _pipe # noqa: F401 + # metrics models live one level deeper: contexts/metrics/infrastructure/models.py + try: + from src.contexts.metrics.infrastructure import models as _metrics # noqa: F401 + except ImportError: + pass + try: + from src.contexts.integrations.jira.discovery import models as _jira # noqa: F401 + except ImportError: + pass + + engine = create_engine(sync_db_url, pool_pre_ping=True) + + try: + with engine.connect() as conn: + mc = MigrationContext.configure( + connection=conn, + opts={ + "compare_type": True, + "compare_server_default": False, + "include_schemas": False, + # Important: exclude tenant-domain tables that aren't + # declared in our ORM (none today, but kept for safety). + "include_object": _include_object, + }, + ) + diffs = compare_metadata(mc, Base.metadata) + finally: + engine.dispose() + + filtered = self._filter_false_positives(diffs) + + if filtered: + self._fail_with_friendly_message(filtered) + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + @staticmethod + def _filter_false_positives(diffs: list) -> list: + """Drop drift entries that are noise vs real bugs. + + We KEEP only the drifts that cause the INC-023#4-class of silent + bug — column presence on either side, or type mismatch: + ✅ add_column / remove_column (the swiss-cheese case) + ✅ add_table / remove_table (orphan tables) + ✅ modify_type (VARCHAR(50) vs VARCHAR(100)) + + We DROP cosmetic drift that doesn't affect runtime behavior: + ❌ add_index / remove_index (ORM rarely names indices the + same as migrations; runtime queries don't care about + index existence vs migration mismatch — Postgres uses + whatever exists) + ❌ modify_comment (column COMMENT in DB; ORM + doesn't carry these by default) + ❌ modify_nullable (migration vs ORM nullability + can drift cosmetically — e.g., TenantModel.created_at + server_default vs migration's nullable=True. Real bugs + here are caught at INSERT time anyway.) + ❌ modify_default (compare_server_default=False + in MigrationContext, but Alembic still emits these + sometimes — drop for consistency) + """ + IGNORED_TABLES = { + "alembic_version", + "migrations", + # Tables managed by other layers (TypeORM in pulse-api or raw SQL), + # not by SQLAlchemy ORM in pulse-data. The pulse-data drift guard + # only checks tables that pulse-data is authoritative for. + # + # IAM (pulse-api TypeORM): + "users", + "memberships", + "tenants", + "iam_organizations", + "iam_teams", + "teams", + "organizations", + # Integration / connection management (pulse-api TypeORM): + "connections", + "integration_connections", + # Jira discovery (raw SQL via pulse-data discovery service, + # not via SQLAlchemy ORM): + "tenant_jira_config", + "jira_project_catalog", + "jira_discovery_audit", + } + # Postgres GENERATED-AS-STORED columns. These are physical columns + # in the DB but the ORM models them as `column_property` (computed + # at SELECT time via the same formula). Two equivalent paths; + # filter out so the guard doesn't false-positive on this pattern. + # If a future column drift is masked by this allowlist, the column + # name should be added here with a clear comment. + IGNORED_COMPUTED_COLUMNS = { + ("eng_pull_requests", "lead_time_hours"), + ("eng_pull_requests", "cycle_time_hours"), + ("eng_issues", "lead_time_hours"), + ("eng_issues", "cycle_time_hours"), + } + # Operations that cause real silent bugs (the INC-023#4 class). + # Anything else is cosmetic noise that we filter out. + REAL_BUG_OPS = { + "add_column", + "remove_column", + "add_table", + "remove_table", + "modify_type", + } + filtered = [] + for entry in diffs: + # Some operations come back as a list of tuples (modify_* groups). + # Recurse one level into them. + if isinstance(entry, list): + inner_filtered = [ + e for e in entry + if isinstance(e, tuple) + and e and e[0] in REAL_BUG_OPS + and (len(e) < 3 or e[2] not in IGNORED_TABLES) + ] + if inner_filtered: + filtered.append(inner_filtered) + continue + + if not isinstance(entry, tuple) or not entry: + continue + op = entry[0] + if op not in REAL_BUG_OPS: + continue + + # Skip ignored tables (alembic_version is auto-managed) + if op in ("add_table", "remove_table"): + table_name = getattr(entry[1], "name", None) + if table_name in IGNORED_TABLES: + continue + elif op in ("add_column", "remove_column", "modify_type"): + table_name = entry[2] if len(entry) >= 3 else None + if table_name in IGNORED_TABLES: + continue + # Skip Postgres GENERATED columns mapped via column_property + col_name = None + if op in ("add_column", "remove_column"): + col_obj = entry[3] if len(entry) >= 4 else None + col_name = getattr(col_obj, "name", None) + elif op == "modify_type": + # tuple shape: (op, schema, table, column_name, ...) + col_name = entry[3] if len(entry) >= 4 else None + if (table_name, col_name) in IGNORED_COMPUTED_COLUMNS: + continue + + filtered.append(entry) + return filtered + + @staticmethod + def _fail_with_friendly_message(diffs: list) -> None: + """Format the diff into an actionable error message.""" + lines = [ + "", + "═" * 78, + "FDD-OPS-001 Linha 5 — SCHEMA DRIFT DETECTED between ORM and DB", + "═" * 78, + "", + f"{len(diffs)} discrepancy(ies) found:", + "", + ] + for entry in diffs: + if isinstance(entry, list): + # nested modify_* group + for sub in entry: + lines.append(f" ❌ {sub}") + continue + op = entry[0] if isinstance(entry, tuple) else "?" + if op == "add_column": + # Column in ORM but not in DB → migration is missing + _, schema, table, col = entry + lines.append( + f" ❌ ORM declares but DB lacks: {table}.{col.name} " + f"({col.type}). MISSING MIGRATION — run " + f"`alembic revision --autogenerate` to create one." + ) + elif op == "remove_column": + # Column in DB but not in ORM → INC-023 #4 scenario! + _, schema, table, col = entry + lines.append( + f" ❌ DB has but ORM lacks: {table}.{col.name} " + f"({col.type}). SCHEMA DRIFT — add `Mapped[...] = " + f"mapped_column(...)` to the model. " + f"This is the INC-023 #4 swiss-cheese scenario." + ) + elif op == "add_table": + lines.append( + f" ❌ ORM declares table not in DB: " + f"{entry[1].name}. Missing migration." + ) + elif op == "remove_table": + lines.append( + f" ❌ DB has table not in ORM: {entry[1].name}. " + f"Either model is missing or migration created an " + f"orphan." + ) + elif op in ("modify_type", "modify_nullable"): + lines.append(f" ❌ {op}: {entry}") + else: + lines.append(f" ❌ {op}: {entry}") + + lines += [ + "", + "Why this matters: silent ORM↔DB drift caused INC-023 (sprint", + "status field empty in 100% of 216 sprints across the entire", + "Webmotors tenant). Paths that omit drifted columns work; paths", + "that include them crash. Bug stayed hidden for months.", + "", + "How to fix:", + " - Column in DB, not ORM: add `Mapped[...]` to the model class", + " - Column in ORM, not DB: create migration via", + " `alembic revision --autogenerate -m 'description'`", + " - Type mismatch: align both — change ORM OR migration to match", + "═" * 78, + ] + pytest.fail("\n".join(lines), pytrace=False) + + +# --------------------------------------------------------------------------- +# Helper: Alembic include_object filter +# --------------------------------------------------------------------------- + +def _include_object(obj, name, type_, reflected, compare_to) -> bool: + """Filter at the inspection level — exclude objects we don't want compared. + + Args (per Alembic API): + obj: SQLAlchemy schema object (Table, Column, Index, …) + name: object name + type_: 'table' | 'column' | 'index' | 'unique_constraint' … + reflected: True if from DB, False if from ORM + compare_to: the object being compared against (or None) + + Currently a no-op (returns True for everything). Reserved for cases + where we deliberately don't manage a table/column via the ORM. + """ + return True