diff --git a/.gitignore b/.gitignore
index f8d0620..8d180d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -56,6 +56,8 @@ npm-debug.log*
 
 # === Claude Code ===
 .claude/settings.local.json
+.claude/scheduled_tasks.lock
+.claude/projects/
 
 # === Factory (read-only reference, not deployed) ===
 # factory/ is tracked but treated as read-only reference docs
diff --git a/pulse/.env.example b/pulse/.env.example
index 5397540..1123775 100644
--- a/pulse/.env.example
+++ b/pulse/.env.example
@@ -25,15 +25,25 @@ PULSE_DATA_PORT=8000
 # -- Source Connector Tokens ------------------------------------------------
 # GitHub Personal Access Token (repo, read:org scopes)
 GITHUB_TOKEN=
+# GitHub org slug — REQUIRED. Used by discover_repos() — see ingestion-spec §2.3.
+GITHUB_ORG=
 # GitLab Personal Access Token (read_api scope)
 GITLAB_TOKEN=
 # Jira API Token + email
 JIRA_API_TOKEN=
 JIRA_EMAIL=
+# Jira base URL (e.g., https://your-org.atlassian.net)
+JIRA_BASE_URL=
+# JIRA_PROJECTS is intentionally absent. PULSE uses dynamic discovery
+# (ingestion-spec §2.3); the active project list is maintained in
+# `jira_project_catalog` table and resolved by ModeResolver. Do NOT add
+# JIRA_PROJECTS unless you set DYNAMIC_JIRA_DISCOVERY_ENABLED=false (not
+# recommended).
+DYNAMIC_JIRA_DISCOVERY_ENABLED=true
 # Azure DevOps Personal Access Token (Code, Work Items read)
 AZURE_DEVOPS_PAT=
 # Jenkins API credentials (read-only: Overall/Read, Job/Read, Run/Read)
-JENKINS_BASE_URL=https://jenkins.webmotors.com.br
+JENKINS_BASE_URL=
 JENKINS_USERNAME=
 JENKINS_API_TOKEN=
 
diff --git a/pulse/config/connections.yaml b/pulse/config/connections.yaml
index 6605f00..e8c9b34 100644
--- a/pulse/config/connections.yaml
+++ b/pulse/config/connections.yaml
@@ -20,17 +20,13 @@ connections:
     token_env: GITHUB_TOKEN
     base_url: https://api.github.com
     sync_interval_minutes: 15
+    # Per ingestion-spec §2.3 (Discovery-Only): NO explicit list of repos.
+    # The connector calls `discover_repos(active_months=12)` on each cycle
+    # via GraphQL `organization.repositories(orderBy: PUSHED_AT)` filtered
+    # by activity. New repos appear automatically; archived ones drop off
+    # without manual YAML edits.
     scope:
-      repositories:
-        - "webmotors-private/webmotors.next.ui"
-        - "webmotors-private/webmotors.portal.ui"
-        - "webmotors-private/webmotors.buyer.ui"
-        - "webmotors-private/webmotors.buyer.desktop.ui"
-        - "webmotors-private/webmotors.catalogo.next.ui"
-        - "webmotors-private/webmotors.fipe.next.ui"
-        - "webmotors-private/webmotors.pf"
-        - "webmotors-private/eleanor.flutter"
-        - "webmotors-private/webmotors.app.pf.search.bff"
+      active_months: 12
 
   - name: "Webmotors Jenkins"
     source: jenkins
@@ -41,13 +37,11 @@ connections:
     sync_interval_minutes: 15
     scope:
       # Job list is loaded from config/jenkins-job-mapping.json (auto-generated).
-      # Generated 2026-04-14 by READ-ONLY SCM scan of 544 active PRD Jenkins
-      # jobs — each job's lastBuild → remoteUrls resolves the GitHub repo.
-      # Total: 577 PRD jobs across 283 repos.
-      #
-      # To regenerate: run scripts/discover_jenkins_jobs.py (READ-ONLY).
-      # The sync worker reads prd_jobs from the mapping file at startup.
-      jobs_from_mapping: true  # Signals config.py to use jenkins-job-mapping.json
+      # Generated by READ-ONLY SCM scan of active PRD Jenkins jobs — each
+      # job's lastBuild → remoteUrls resolves the GitHub repo. Per
+      # ingestion-spec §3.6, regen via scripts/discover_jenkins_jobs.py
+      # when new repos appear (manual or weekly cron).
+      jobs_from_mapping: true
 
   - name: "Webmotors Jira"
     source: jira
@@ -56,19 +50,15 @@ connections:
     token_env: JIRA_API_TOKEN
     base_url: https://webmotors.atlassian.net
     sync_interval_minutes: 15
+    # Per ingestion-spec §2.3 (Discovery-Only): NO explicit project list.
+    # `ProjectDiscoveryService` lists ALL Jira projects; `SmartPrioritizer`
+    # auto-activates projects with ≥3 PR references. Tenant config in
+    # `tenant_jira_config` controls discovery mode (must be 'smart' for
+    # auto-activation). PII-flagged projects require manual approval.
     scope:
-      projects:
-        # Canais Digitais Web (Kanban)
-        - "DESC"    # PF - Descobrir veículo
-        - "ENO"     # PF - Encontrar oferta
-        - "ANCR"    # PF - Anunciar
-        - "PUSO"    # PF - USO
-        # Canais Digitais App (Kanban)
-        - "APPF"    # PF - Aplicativo
-        # Sprint-based projects
-        - "FID"     # Fidelidade
-        - "CTURBO"  # Consultor Turbo Lab
-        - "PTURB"   # Portal Turbo Lab
+      mode: smart
+      smart_min_pr_references: 3
+      smart_pr_scan_days: 90
 
 # Issue status mapping — Webmotors Jira (Portuguese) → PULSE normalized
 # Primary source; overrides DEFAULT_STATUS_MAPPING in normalizer.py.
diff --git a/pulse/docs/backlog/ops-backlog.md b/pulse/docs/backlog/ops-backlog.md
index c3b7262..1b18bd1 100644
--- a/pulse/docs/backlog/ops-backlog.md
+++ b/pulse/docs/backlog/ops-backlog.md
@@ -787,3 +787,928 @@ clientes, não pela equipe.
 
 ---
 
+## FDD-OPS-012 · Issue sync — batch-per-project (simetria com PRs)
+
+**Epic:** Data Pipeline Reliability · **Release:** R1
+**Priority:** **P1** · **Persona:** Engineering (visibility + memory safety)
+**Owner class:** `pulse-data-engineer`
+**Trigger:** 2026-04-28 — full re-ingestion travada por horas em fase
+"search/jql" sem nenhuma issue persistida no DB. Diagnóstico: arquitetura
+do `_sync_issues()` é bulk-fetch-then-persist, enquanto `_sync_pull_requests()`
+foi migrada pra batch-per-repo em 2026-04-23 (commit `7f9f339`). Issues
+ficou pra trás.
+
+### Problema
+
+`packages/pulse-data/src/workers/devlake_sync.py:_sync_issues()` segue o
+padrão antigo:
+
+```python
+raw_issues = await self._reader.fetch_issues(...)            # ← BLOQUEIA até paginar TUDO
+changelogs = await self._reader.fetch_issue_changelogs(ids)  # ← + N calls extras
+normalized = [normalize_issue(...) for raw in raw_issues]    # ← TUDO em RAM
+count = await self._upsert_issues(normalized)                # ← upsert único
+```
+
+Para 32 projetos × ~12k issues médias = ~376k issues:
+- **Tempo até primeira linha persistida**: 2-5h (pagination + changelogs serial)
+- **Pico de memória**: ~1-2 GB de issue dicts (no atual setup, OK; se Webmotors crescer pra 1M+, OOM)
+- **Visibilidade zero durante fetch**: `eng_issues.COUNT()` fica em 0 por horas — operadores acham que travou
+- **Recovery se sync abortar mid-fetch**: zero progress preserved (toda paginação se perde)
+
+PRs já resolveram isso em `7f9f339`:
+
+```python
+# devlake_sync.py:_sync_pull_requests() (post-7f9f339)
+async for repo_name, raw_prs in self._reader.fetch_pull_requests_batched(since=since):
+    # 1 repo at a time → normalize → upsert → progress signal
+```
+
+Resultado: PRs persistem em batches de ~100 a cada poucos segundos, operador
+vê COUNT crescendo, recovery preserva 95%+ do trabalho em caso de crash.
+
+### Solução
+
+Espelhar o padrão de PRs em issues:
+
+1. **Refactor `JiraConnector.fetch_issues()` em `fetch_issues_batched()`** —
+   AsyncIterator que yielda `(project_key, batch_of_issues)` por página JQL
+   (ou por projeto, granularidade a definir).
+
+2. **Refactor `_sync_issues()` em devlake_sync.py** — loop async sobre
+   batches, normaliza + upsert por batch, atualiza progress, publica Kafka
+   por batch.
+
+3. **Manter changelog fetch inline com expand=changelog** — não fazer call
+   separada `fetch_issue_changelogs(ids)`. JQL já suporta `expand=changelog`
+   inline (veja `jira_connector.py:212`). Verificar se está sendo usado.
+
+4. **Watermark batch-aware** — atualizar watermark a cada N batches (ex: 10),
+   não só no final. Permite resume após crash sem perder muito.
+
+### Acceptance Criteria
+
+```
+Given a fresh re-ingestion against a Webmotors-scale tenant (32 projects, 376k issues)
+ When _sync_issues() runs
+ Then eng_issues.COUNT() starts growing within 60 seconds (not after hours)
+  AND each batch persists ~100-500 issues
+  AND total memory peak stays below 800 MB (vs 2 GB current)
+  AND if the worker crashes mid-sync, ≥80% of fetched issues are already in DB
+
+Given the new batch-per-project mode is enabled
+ When operator queries `SELECT COUNT(*) FROM eng_issues` repeatedly
+ Then count increases monotonically during the sync (not 0 → 376k jump)
+
+Given Pipeline Monitor exposes /pipeline/ingestion-progress
+ When _sync_issues() is mid-run
+ Then progress endpoint shows current_source = "<project_key>" and
+      records_ingested updates per batch (parity with PR sync)
+```
+
+### Anti-surveillance check
+PASS — sem mudança em payload de métrica. Refactor é puramente sobre
+fluxo de ingestão.
+
+### Dependencies
+Nenhuma. Pode ser implementado isoladamente.
+
+### Estimate
+**M (4-6h)**:
+- 1.5h refactor `JiraConnector.fetch_issues_batched()`
+- 1.5h refactor `_sync_issues()` em devlake_sync.py
+- 1h ajustar progress tracking + watermarks
+- 1-2h tests (unit pra batched fetcher + integration test contra fixture mock)
+
+### Riscos de não fazer
+
+- Cada full re-ingestion futura leva 3-5h cega (igual hoje)
+- Quando Webmotors crescer ou primeiro tenant 2× maior chegar, OOM
+- Operador não tem visibilidade durante o fetch — mascarando travas como
+  a que aconteceu hoje (cycle 2 falhou silenciosamente em 21:23 e ninguém
+  notou por 14h)
+
+### Bonus
+
+Esta FDD se conecta com **FDD-OPS-008** (per-endpoint perf budgets) — uma
+vez que issues sync seja batched, fica viável adicionar performance
+assertions: "batch persist deve completar em < 30s" → falha CI se regredir.
+
+---
+
+## FDD-OPS-013 · Eliminate redundant `fetch_issue_changelogs` call in `_sync_issues`
+
+**Epic:** Data Pipeline Reliability · **Release:** R1 (P0 — fixes
+24h+ blocking phase observed 2026-04-28)
+**Priority:** **P0** · **Persona:** Data engineering, all customers
+**Owner class:** `pulse-data-engineer`
+**Trigger:** 2026-04-28 — full re-ingestion stuck for hours in
+sequential `GET /rest/api/3/issue/{id}?expand=changelog` calls (~3
+calls/sec for 250k+ issues = ~24h estimated). Diagnosed as redundant.
+
+### Problema
+
+`_sync_issues()` faz duas chamadas que sobrepõem 100%:
+
+1. `fetch_issues()` — JQL search com `expand=changelog` inline. Já
+   retorna a changelog completa em `raw["changelog"]`.
+2. `fetch_issue_changelogs(ids)` — chama `GET /issue/{id}?expand=changelog`
+   uma vez por issue.
+
+Resultado: 376k issues × ~300ms latência = **~31 horas de chamadas
+redundantes** + pressão sobre rate limit Atlassian.
+
+O próprio connector documenta o problema (`jira_connector.py:267`):
+
+```python
+def fetch_issue_changelogs(...):
+    """...
+    Since fetch_issues already includes changelogs via expand=changelog,
+    this method is used for issues fetched WITHOUT expand (e.g., sprint issues).
+    """
+```
+
+Mas em `devlake_sync.py:614`:
+
+```python
+issue_ids = [str(raw["id"]) for raw in raw_issues]
+changelogs_by_issue = await self._reader.fetch_issue_changelogs(issue_ids)  # ← redundante
+```
+
+E `normalize_issue` recebe `changelogs=changelogs_by_issue.get(id, [])` em
+vez de extrair `raw["changelog"]` direto.
+
+### Solução
+
+**1 mudança código + 1 teste:**
+
+```python
+# devlake_sync.py:_sync_issues()
+# REMOVER:
+# issue_ids = [str(raw["id"]) for raw in raw_issues]
+# changelogs_by_issue = await self._reader.fetch_issue_changelogs(issue_ids)
+
+# SUBSTITUIR por:
+# (changelogs já estão em raw["changelog"] via expand)
+for raw in raw_issues:
+    issue_changelogs = raw.get("changelog", {}).get("histories", [])
+    issue_data = normalize_issue(
+        raw, self._tenant_id, self._status_mapping,
+        changelogs=issue_changelogs,
+    )
+    normalized.append(issue_data)
+```
+
+`fetch_issue_changelogs` permanece existindo — é usado SOMENTE para
+sprint issues que vêm sem `expand` (esse caminho fica intocado).
+
+### Acceptance Criteria
+
+```
+Given full re-ingestion against Webmotors (32 projects, 376k issues)
+ When _sync_issues() runs
+ Then NO calls are made to GET /rest/api/3/issue/{id}?expand=changelog
+   (verify via httpx logs / mock)
+  AND eng_issues.status_transitions JSONB is populated correctly
+   (parity with current behavior — verified by domain-level tests)
+  AND total wall time for issues phase drops from ~24h to ~5min
+
+Given a fresh tenant has 1000 issues across 5 projects
+ When sync runs
+ Then changelogs are extracted from inline expand response
+  AND status_transitions field has same content as before
+```
+
+### Regression test
+
+Adicionar test em `packages/pulse-data/tests/integration/`:
+
+```python
+def test_sync_issues_uses_inline_changelogs_only():
+    # Mock JiraConnector.fetch_issues returning raw with "changelog" inline
+    # Mock fetch_issue_changelogs to record calls
+    # Run _sync_issues
+    # Assert mock_fetch_issue_changelogs.call_count == 0
+    # Assert eng_issues.status_transitions populated correctly
+```
+
+Trava regressão futura (alguém pode "consertar" reintroduzindo a call).
+
+### Anti-surveillance check
+PASS — sem mudança em payload/normalização, só elimina I/O redundante.
+
+### Estimate
+**XS (1-2h)**:
+- 30min: code change in `_sync_issues()`
+- 30min: regression test
+- 30min: validate against real Webmotors data (compare status_transitions before/after)
+- ~30min margin
+
+### Dependencies
+Nenhuma. Pode ser shipped imediatamente.
+
+### Risco de não fazer
+Cada full re-ingestion (Webmotors hoje, novos tenants amanhã) leva 24h+
+em vez de minutos. SaaS-blocker.
+
+### Conexão com v2 architecture
+Este é o "quick win Phase 1" do `docs/ingestion-architecture-v2.md`. Não
+substitui Phases 2/3, mas elimina o pior gargalo single-handedly.
+
+---
+
+## FDD-OPS-014 · Per-source workers + per-scope watermarks
+
+**Epic:** Data Pipeline Architecture · **Release:** R1
+**Priority:** **P1** · **Persona:** SaaS engineering team
+**Owner class:** `pulse-data-engineer` + `pulse-engineer`
+**Trigger:** 2026-04-27/28 incidents — sync-worker monolítico travado
+em Jenkins (VPN off) bloqueando GitHub e Jira que estavam saudáveis.
+Global watermark causando full backfill ao adicionar projetos novos.
+
+### Problema (dois sintomas, uma causa)
+
+**Sintoma 1 — sem source isolation (AP-4):**
+
+`DataSyncWorker` é um único processo que roda 4 fases sequenciais
+(`issues → PRs → deploys → sprints`). Todas as 4 fontes (GitHub, Jira,
+Jenkins) compartilham:
+
+- Mesmo event loop
+- Mesma cadence de sync
+- Mesmo cycle order
+- Mesmo failure handling
+
+Consequência: **Jenkins offline (VPN drop)** ou **Jira blip** travam
+todo o ciclo, mesmo que GitHub esteja saudável. Onboarding de GitLab/ADO
+significa ainda mais código no mesmo loop monolítico.
+
+A simétrica fica esquisita: `discovery-worker` JÁ é processo separado
+(boa decisão em ADR-014). `sync-worker` ficou para trás.
+
+**Sintoma 2 — global watermark (AP-3):**
+
+`pipeline_watermarks` tem 1 row por `entity_type`, sem dimensão de
+scope:
+
+```sql
+entity_type='issues', last_synced_at='2026-04-26'  -- aplica a TODOS os 32 projetos
+```
+
+Consequência: quando discovery ativa um novo projeto, a única forma de
+backfill é resetar watermark para `2020-01-01`, o que **re-fetcha
+TODOS os 200k+ issues dos projetos existentes** sem necessidade.
+
+### Solução (2 partes coesas)
+
+**Parte 1 — split sync-worker em 3 workers:**
+
+```
+docker-compose.yml:
+  sync-worker         → REMOVE
+  github-sync-worker  → NEW (apenas GitHub PRs)
+  jira-sync-worker    → NEW (apenas Jira issues + sprints)
+  jenkins-sync-worker → NEW (apenas Jenkins deploys)
+```
+
+Cada worker:
+- Próprio event loop
+- Cadence configurável independente
+- Health-aware: pre-flight check antes de iniciar fase
+- Logging com tag de source para grep/filter
+
+**Parte 2 — per-scope watermarks:**
+
+Migration nova adiciona `scope_key` em `pipeline_watermarks`:
+
+```sql
+ALTER TABLE pipeline_watermarks
+  ADD COLUMN scope_key VARCHAR(255) NOT NULL DEFAULT '*';
+
+-- Drop unique on entity_type alone, replace:
+ALTER TABLE pipeline_watermarks
+  ADD CONSTRAINT uq_watermark_scope
+  UNIQUE (tenant_id, entity_type, scope_key);
+```
+
+Watermarks viram:
+
+| tenant_id | entity_type | scope_key | last_synced_at |
+|---|---|---|---|
+| ...001 | issues | jira:project:BG | 2026-04-26 |
+| ...001 | issues | jira:project:OKM | 2026-04-26 |
+| ...001 | pull_requests | github:repo:foo/bar | 2026-04-26 |
+| ...001 | deployments | jenkins:job:deploy-X | 2026-04-26 |
+
+Connector-side: `fetch_issues(project_key=..., since=watermark[scope_key])`.
+
+### Acceptance Criteria
+
+```
+Given Jenkins is unreachable (VPN off)
+ When the daily ingestion cycle runs
+ Then jenkins-sync-worker logs "unhealthy, skipping cycle"
+  AND github-sync-worker continues normally
+  AND jira-sync-worker continues normally
+  AND VPN reconnect → jenkins-sync-worker resumes from last per-scope watermark
+
+Given a NEW Jira project is auto-activated by discovery
+ When jira-sync-worker runs the next cycle
+ Then ONLY the new project's issues are backfilled (since 2020-01-01)
+  AND existing projects' issues are NOT re-fetched
+  AND pipeline_watermarks has a new row with scope_key=jira:project:NEW
+
+Given Webmotors has 32 active Jira projects
+ When jira-sync-worker runs incremental sync
+ Then 32 watermarks are queried (1 per scope)
+  AND each project syncs from its own last_synced_at
+  AND total cycle time scales linearly with new data, not historical data
+```
+
+### Anti-surveillance check
+PASS — sem mudança em campos persistidos.
+
+### Estimate
+**M-L (1 semana)**:
+- 1 dia: extract per-source workers (refactor `DataSyncWorker`)
+- 0.5 dia: docker-compose + Dockerfile per-source
+- 1 dia: schema migration + watermark repo refactor
+- 1 dia: connector-side scope filtering (Jira `project_keys` already there; GitHub repo-by-repo already there; Jenkins per-job)
+- 1 dia: testes (especialmente o cenário VPN drop simulation)
+- 0.5 dia: Pipeline Monitor UI per-source breakdown
+- ~1 dia margin
+
+### Dependencies
+- FDD-OPS-013 (deve shipping antes pra simplificar refactor)
+- FDD-OPS-012 (issue batch-per-project) idealmente ships antes — mas
+  pode ser paralelo
+
+### Risco de não fazer
+- Cada outage de fonte (VPN, rate-limit, Atlassian incident) trava todo
+  o pipeline
+- Onboarding de GitLab/ADO/Linear adiciona código na monolita já
+  frágil
+- SaaS multi-tenant inviável sem isolation entre tenants → entre sources
+  é o primeiro passo
+
+### Conexão com v2 architecture
+Este é o "Phase 2" de `docs/ingestion-architecture-v2.md`. Phase 3 (job
+queue + worker pool) constrói em cima.
+
+---
+
+## FDD-OPS-015 · Observable ingestion: pre-flight estimates + per-scope progress + ETA
+
+**Epic:** Data Pipeline / Ops Visibility · **Release:** R1
+**Priority:** **P1** · **Persona:** Operators (you, on-call), data engineering
+**Owner class:** `pulse-data-engineer` + `pulse-engineer` (UI)
+**Trigger:** 2026-04-27/28 — 5 cycles where I gave estimates ("ETA
+45min") that were wrong by 10×+. Operator (você) cannot answer "is it
+stuck?" without diving into logs. `COUNT(*)` is useless during
+bulk-fetch.
+
+### Problema
+
+Atualmente:
+
+1. **Sem pre-flight count.** Worker não pergunta "quantas issues match
+   esse JQL?" antes de iniciar. Apenas começa.
+2. **Sem rate-aware ETA.** Pace medido (ex: 27 calls/min) não é
+   usado pra calcular tempo restante.
+3. **Sem per-scope progress.** Quando preso, impossível distinguir
+   "BG (197k) ainda não terminou" de "estamos no projeto X".
+4. **Pipeline Monitor mostra agregado per-entity_type**, não per-scope.
+
+Consequência operacional: **5 falsos alarmes de progresso esta semana**.
+
+### Solução (3 entregas coesas)
+
+**1. Pre-flight estimate per scope:**
+
+Em cada início de fase, o worker chama o source pra contar:
+
+```python
+# Jira: count via JQL count
+estimate = await jira.count_issues(project_key=BG, since=watermark)
+# logs: "[scope=jira:project:BG] estimated 12,450 issues since 2026-04-26"
+```
+
+Se a count call em si for muito cara (alguns sources não suportam),
+heuristic: "X items since Y, extrapolated."
+
+**2. Per-batch progress with rate-aware ETA:**
+
+Cada batch persistido emite progress event:
+
+```python
+{
+  "scope": "jira:project:BG",
+  "phase": "fetching",
+  "items_done": 1200,
+  "items_total_estimate": 12450,
+  "items_per_second": 18.5,
+  "eta_seconds": 608,
+  "started_at": "...",
+  "current_high_water": "2026-04-27T10:23:00Z"
+}
+```
+
+Tabela nova `pipeline_progress` (live + historical):
+
+```sql
+CREATE TABLE pipeline_progress (
+    id UUID PRIMARY KEY,
+    tenant_id UUID,
+    scope_key VARCHAR(255),
+    entity_type VARCHAR(64),
+    phase VARCHAR(32),  -- fetching | normalizing | persisting | done | failed
+    items_done INT,
+    items_estimate INT,
+    items_per_second DOUBLE PRECISION,
+    eta_seconds INT,
+    started_at TIMESTAMPTZ,
+    last_progress_at TIMESTAMPTZ,
+    status VARCHAR(16),  -- running | done | failed | paused
+    last_error TEXT
+);
+```
+
+**3. Endpoint `/pipeline/jobs` + Pipeline Monitor UI per-scope:**
+
+```
+GET /data/v1/pipeline/jobs
+
+[
+  {
+    "scope": "jira:project:BG",
+    "entity_type": "issues",
+    "status": "running",
+    "items_done": 1200,
+    "items_estimate": 12450,
+    "progress_pct": 9.6,
+    "eta_seconds": 608,
+    "rate_per_sec": 18.5,
+    "started_at": "...",
+    "errors": []
+  },
+  ...
+]
+```
+
+Pipeline Monitor UI ganha tab "Per-scope progress" com tabela tipo Top Hat:
+scope, status, %, ETA, current rate, errors.
+
+### Acceptance Criteria
+
+```
+Given a fresh ingestion against 32 projects
+ When operator queries /pipeline/jobs after 30s
+ Then response includes 32 rows (1 per active scope)
+  AND each row has status, items_done, ETA, rate
+  AND ETA accuracy: actual_completion_time within ±20% of estimate
+   (measured: ETA at 10% complete vs actual completion at 100%)
+
+Given an ingestion job stalls (network blip, source down)
+ When 60 seconds pass without progress
+ Then job's last_progress_at falls > 60s behind now()
+  AND UI displays "stalled" badge
+  AND on-call gets clear signal "scope X is stuck"
+
+Given operator wants to investigate a slow source
+ When opens Pipeline Monitor → Per-scope tab
+ Then can sort by items_per_second
+  AND can filter by entity_type/source
+  AND can see error history per scope
+```
+
+### Anti-surveillance check
+PASS — progress data is metadata about ingestion, not user activity.
+
+### Estimate
+**M (3-5 dias)**:
+- 0.5 dia: schema migration `pipeline_progress`
+- 1 dia: pre-flight count helpers (Jira count JQL, GitHub repo count, Jenkins job count)
+- 1 dia: per-batch progress emission + ETA calculation
+- 0.5 dia: `/pipeline/jobs` endpoint
+- 1 dia: Pipeline Monitor UI tab per-scope
+- 0.5 dia: tests + dashboard polish
+
+### Dependencies
+- FDD-OPS-014 (per-scope watermarks) é pré-requisito do per-scope
+  progress
+- FDD-OPS-012 (batch-per-project) facilita progress emit per-batch
+
+### Riscos
+- Pre-flight count aumenta tempo total se overhead alto. Mitigar: se
+  count > 5s, usar heuristic
+- Estimate ruim no início (até medir rate real) — aceitar e refinar a
+  cada batch
+
+### Conexão com v2 architecture
+Este é o "Phase 1.5" de `docs/ingestion-architecture-v2.md`. Crítico
+para evitar repetir o ciclo de "estimar 45min, esperar 4h, descobrir
+que travou".
+
+---
+
+## FDD-OPS-016 · Effort estimation fallback chain (Story Points → T-shirt → Hours → Count)
+
+**Epic:** Data Quality · **Release:** R1
+**Priority:** **P1** · **Persona:** Data consumer / metric layer
+**Owner class:** `pulse-data-engineer` · **Status:** SHIPPED 2026-04-28
+
+### Problema confirmado
+
+Panorama do Pulse DB em 2026-04-28 mostrou **`story_points = 0` em todas
+as 311.007 issues**. Investigação na instância Jira da Webmotors revelou:
+
+- **`customfield_10004` ("Story Points")**: 0% populado em todos os 69 projetos
+- **`customfield_18524` ("Story point estimate")**: 0% populado também
+- Webmotors **não usa Story Points como método de estimativa**
+
+Distribuição real por projeto (amostra de 50 issues):
+
+| Projeto | T-Shirt Size | Original Estimate (h) | Tamanho/Impacto | Padrão |
+|---------|--------------|------------------------|------------------|--------|
+| ENO     | 24%          | 52%                    | 4%               | Horas + tshirt |
+| DESC    | 26%          | 34%                    | 6%               | Horas + tshirt |
+| APPF    | 0%           | 12%                    | 0%               | Horas (raro) |
+| OKM     | 4%           | 8%                     | 0%               | Quase Kanban |
+| BG, FID, PTURB | 0%   | 0%                     | 0%               | **Kanban puro** |
+
+Sem fallback, métricas de velocity, throughput-by-effort e forecast
+ficavam zeradas para 100% das issues — bloqueando todo o pilar Lean.
+
+### Solução implementada
+
+Cadeia de fallback em `JiraConnector._extract_story_points`:
+
+1. **Story Points / Story point estimate** (numérico) — uso direto
+2. **T-Shirt Size** (option) — mapa Fibonacci: PP=1, P=2, M=3, G=5, GG=8, GGG=13
+3. **Tamanho/Impacto** (option) — mesmo mapa
+4. **`timeoriginalestimate`** (segundos) — buckets: ≤4h=1, ≤8h=2, ≤16h=3, ≤24h=5, ≤40h=8, ≤80h=13, >80h=21
+5. **`None`** — issue genuinamente não estimada
+
+Discovery automático via `_discover_custom_fields` casa por nome
+("t-shirt size", "tamanho/impacto") — não hardcode customfield IDs.
+
+Telemetria de origem (`_effort_source_counts`) loggada por batched run:
+operadores conseguem ver se o squad migrou de horas pra t-shirt sem
+combar logs.
+
+### Quando `story_points = None` (Kanban puro)
+
+Quando nada está populado, a métrica downstream **DEVE contar items**
+em vez de somar pontos. Esta decisão fica na camada de métricas, **não**
+no normalizer. O normalizer só extrai o que existe.
+
+### Regras de mapeamento — escolhas e por quê
+
+- **Fibonacci-aligned**: comum na indústria, métricas downstream já
+  esperam essa escala
+- **Hours buckets calibrados** contra valores observados na Webmotors
+  (2h–124h, múltiplos de 4) — cada valor comum cai num bucket sensato
+- **Skipa SP = 0**: sentinel comum para "não estimado", trata como falta
+
+### Validação live
+
+Projeto CRMC (1.375 issues, ingestão completa pós-fix):
+- **52,3% com effort estimado** (719/1.375 issues)
+- Distribuição de valores: 1, 2, 3, 5, 8 — confirma escala Fibonacci aplicada
+
+### Migração dos 311k issues legados
+
+Como o upsert sobrescreve `story_points` em re-sync, os 311k issues
+existentes vão receber o effort correto **conforme cada projeto recebe
+updates incrementais**. Para acelerar, op pode resetar watermarks
+por projeto via SQL — custo: re-fetch da API Jira.
+
+### Arquivos
+- `pulse/packages/pulse-data/src/connectors/jira_connector.py`:
+  - Constants `TSHIRT_TO_POINTS`, `_hours_to_points`, patterns
+  - `_discover_custom_fields` agora detecta tshirt fields
+  - `_extract_story_points` reescrito com cadeia de fallback
+  - Telemetria via `_effort_source_counts` + log no fim de batched fetch
+- `pulse/packages/pulse-data/tests/unit/test_effort_fallback_chain.py`:
+  34 testes cobrindo cada hop, cada size, cada bucket de horas
+
+### Anti-surveillance check
+PASS — apenas valores agregados de effort são extraídos; nenhum dado
+identificador de pessoa é coletado.
+
+### Próximo passo (deferido)
+Adicionar coluna `effort_source` em `eng_issues` para auditoria por
+issue (qual hop produziu o valor). Útil para debugging mas não
+bloqueante. Cobertura atual via telemetria batched é suficiente
+para R1.
+
+---
+
+## FDD-OPS-017 · Status normalization with statusCategory fallback
+
+**Epic:** Data Quality (foundational) · **Release:** R1
+**Priority:** **P0** (corrupts every flow metric) · **Persona:** All metric consumers
+**Owner class:** `pulse-data-engineer` · **Status:** SHIPPED 2026-04-29
+
+### Problema confirmado
+
+Audit do panorama em 2026-04-28 mostrou distribuição absurda de
+`normalized_status` em 311k issues:
+
+  - 96,5% `done` · 3,3% `todo` · 0,2% `in_progress` · 0,1% `in_review`
+
+A Webmotors tem **104 status raw distintos** em workflows ativos. Nosso
+`DEFAULT_STATUS_MAPPING` cobria ~50, então 50+ status caíam silenciosamente
+no fallback "Unknown → todo" — incluindo:
+
+| Status raw | Issues afetadas | Bucket atual | Bucket correto |
+|---|---|---|---|
+| `FECHADO EM PROD` | 2.881 | todo | done |
+| `Em Progresso` | 6 | todo | in_progress |
+| `Em desenv` | 4 | todo | in_progress |
+| `Em Deploy Produção` | 14 | todo | in_progress |
+| `Em Monitoramento Produção` | 3 | todo | done |
+| `Homologação` | 9 | todo | in_review |
+| `Em Verificação` | 4 | todo | in_review |
+| ... | ... | ... | ... |
+
+**Impacto em CASCATA**: status_transitions herdam a classificação errada,
+então o último estado de uma issue concluída ficava registrado como
+`todo`. Resultado:
+
+- **Cycle Time** infinito (não há transição para `done`)
+- **Throughput** sub-conta (issues entregues não aparecem)
+- **WIP** super-conta (issues finalizadas continuam "em fluxo")
+- **CFD** distorcido (área de "todo" inflada)
+- **Lead Time** indeterminado
+
+Sem o fix, **todo o pilar Lean** está comprometido para qualquer projeto
+que use status PT-BR fora do nosso mapping.
+
+### Solução implementada
+
+**Estratégia híbrida** em 3 camadas:
+
+1. **Mapping textual** (`DEFAULT_STATUS_MAPPING`) — preserva a
+   granularidade `in_progress` vs `in_review` que as métricas curadas
+   precisam. Expandido para cobrir os top 80+ status PT-BR observados.
+
+2. **Fallback `statusCategory.key` da Jira** — fonte autoritativa para
+   a dimensão `done` vs `não-done`. Descoberto via `/rest/api/3/status`
+   (chamada única por lifetime do conector, ~326 status definitions na
+   Webmotors).
+   - `done` → `done`
+   - `indeterminate` → `in_progress`
+   - `new` → `todo`
+
+3. **Default final** `todo` com WARN log — só atinge status sem
+   categoria (extremamente raro).
+
+### Arquivos modificados
+
+- `pulse/packages/pulse-data/src/connectors/jira_connector.py`:
+  - `_discover_status_categories()` — descobre + cacheia `name → category`
+  - `_map_issue` anexa `status_category` (current) e
+    `status_categories_map` (todos, para histórico de transitions)
+- `pulse/packages/pulse-data/src/contexts/engineering_data/normalizer.py`:
+  - `normalize_status(raw, mapping, status_category=...)` — assinatura nova
+  - `build_status_transitions(..., status_categories_map=...)` — classifica
+    cada `to_status` histórica via map
+  - `DEFAULT_STATUS_MAPPING` expandido (~80 entradas novas PT-BR)
+- `pulse/packages/pulse-data/tests/unit/test_status_normalization.py`:
+  44 testes novos (textual ganha quando definido, category fallback,
+  Webmotors regression cases, transitions integração)
+
+### Validação live
+
+Cross-check do mapping contra DB atual mostrou que **3.151 issues
+reclassificarão** quando o sync re-tocar (1% do total):
+
+  - 2.923 `todo → done` (FECHADO EM PROD/HML, etc.)
+  - 161 `todo → in_review` (Homologação, Verificação, etc.)
+  - 67 `todo → in_progress` (Em Progresso, Em desenv, etc.)
+
+Esses 3.151 representam o "long tail" cuja má classificação distorcia
+métricas individuais. Os ~300k issues `done` corretos continuam corretos.
+
+### Backfill dos legados
+
+Como o upsert sobrescreve `normalized_status` e `status_transitions`,
+issues vão se reclassificar conforme cada projeto receber updates
+incrementais. Para acelerar há duas opções:
+
+1. **Reset watermark por projeto** (custo: re-fetch da API Jira)
+2. **Migration script futuro** — recalcular `normalized_status` e
+   `status_transitions[].status` direto via SQL (sem refetch). Decidido
+   deixar para issue separada — muda dado em produção, requer plano.
+
+### Anti-surveillance check
+PASS — apenas valores de status agregados; nenhum dado pessoal.
+
+### Test coverage
+116/116 verde (44 novos + 72 existentes). Cobertura inclui:
+- Textual mapping ganha sobre category mismatch
+- Cada categoria Jira fallback (`done` / `indeterminate` / `new`)
+- Casos PT-BR Webmotors regressão
+- Backward compat (legacy callers sem category)
+- `build_status_transitions` integrado com category map
+
+### Decisão de produto registrada
+
+`FECHADO EM HML` foi mapeado como `done` (segue Jira) em vez de
+`in_review`. Workflow author classifica como done; respeitamos. Se
+Webmotors quiser mantê-lo em fluxo, pode renomear para "Aguardando
+Deploy Produção" (já mapeado como in_progress).
+
+---
+
+## FDD-OPS-018 · Sprint status pipeline — 4-layer cheese fix
+
+**Epic:** Data Quality (sprint metrics) · **Release:** R1
+**Priority:** **P1** · **Persona:** Sprint metric consumers
+**Owner class:** `pulse-data-engineer` · **Status:** SHIPPED 2026-04-29
+
+### Problema confirmado
+
+100% das 216 sprints na Webmotors estavam com `status=''` no `eng_sprints`.
+O `goal` também totalmente vazio. Investigação revelou um clássico
+"swiss cheese alignment" — **4 bugs independentes** em camadas diferentes,
+cada um sozinho garantia que o status nunca fosse populado:
+
+| Camada | Bug | Sintoma sozinho |
+|---|---|---|
+| 1. Connector | `_map_sprint` mapeava OK (ACTIVE/CLOSED/FUTURE) | — |
+| 2. Normalizer | `normalize_sprint` retornava dict SEM `status` | Status nunca chega no upsert |
+| 3. Worker upsert | `_upsert_sprints` ON CONFLICT não atualizava `status`/`goal` | Sprints existentes nunca atualizam |
+| 4. Connector watermark | `_fetch_board_sprints` filtrava por `started_date < since` | Sprints antigas nunca re-fetchadas |
+| 5. ORM model | `EngSprint` no SQLAlchemy não tinha campo `status` (schema drift) | `Unconsumed column names: status` |
+
+A camada 4 é particularmente insidiosa: sprint state transitions
+(`active` → `closed`) acontecem em `endDate`, não `startDate`. Filtrar
+por started_date significa que uma sprint que começou em março e
+fechou em maio nunca tem o status atualizado depois de março.
+
+### Impacto métrico (atual e futuro)
+
+Atualmente nenhum métrico consome `eng_sprints.status` diretamente —
+por isso o bug ficou silencioso. Mas:
+- **Sprint Comparison / Velocity Trend** (já em código) precisa filtrar
+  sprints `closed` para excluir sprints em andamento da regressão linear
+- **Dashboard "current sprint"** (planejado) precisa de `status='active'`
+- **Carryover Rate** já usa heurística de `endDate < now()` mas o ideal
+  é confiar em status='closed'
+- **Goal** é input visual importante para a página da sprint
+
+### Solução implementada
+
+**Fix em todas as 4 camadas**:
+
+1. `JiraConnector._map_sprint` agora também passa `goal` adiante
+2. `normalizer.normalize_sprint` inclui `status` (lowercase: `active`/
+   `closed`/`future`/None) e `goal` (com strip de null bytes)
+3. `_upsert_sprints` ON CONFLICT atualiza `status` + `goal`
+4. `_fetch_board_sprints` removeu o filtro de watermark (volume baixo,
+   sprints mudam estado ao longo do tempo, sempre re-fetch é correto)
+5. `EngSprint` model adiciona `status: Mapped[str|None]` (corrige drift)
+
+Helper `_normalize_sprint_status` mapeia aliases comuns (open→active,
+completed→closed, planned→future) e devolve `None` para valores
+desconhecidos — não bucketiza silenciosamente.
+
+### Validação live
+
+Após o fix + ad-hoc backfill direto:
+
+| Status | Quantidade | Tem goal? |
+|---|---|---|
+| `closed` | 187 | sim |
+| `active` | 3 | sim |
+| `future` | 5 | sim |
+| (vazio) | 22 | — board órfão 873 sem projeto ativo |
+
+**195/217 = 89,9%** das sprints com status correto + 70% com goal real
+(ex: "Gestão de banner no backoffice de CNC e TEMPO para novas
+especificações técnicas"). As 22 vazias são de board órfão, fora do
+escopo deste fix.
+
+### Tests
+- `tests/unit/test_sprint_normalization.py` — 26 testes novos:
+  - status field presente no dict (5 cenários)
+  - unknown values retornam None (4)
+  - aliases (13 mapeamentos)
+  - goal passthrough (3)
+  - structural anti-regression: `_upsert_sprints.set_` inclui status + goal
+- 142/142 verde (pyramid completo)
+
+### Lição aprendida — guard against future drift
+
+ORM model drift was the most insidious of the 4 bugs. The DB had the
+column for ages; only the SQLAlchemy `EngSprint` was missing it. Any
+upsert path that included `status` would crash; any path that omitted
+it would silently produce empty data. Prevention going forward:
+
+- Pyramid test step "schema introspection vs ORM model" (deferred —
+  candidate for FDD-OPS-001 line of defense)
+- Migration review checklist: every new column → corresponding
+  Mapped column in SQLAlchemy model
+
+### Anti-surveillance check
+PASS — `goal` is squad/sprint-level free text, no individual attribution.
+
+---
+
+## FDD-DEV-METRICS-001 · Codename "dev-metrics" — proprietary estimation & forecasting model
+
+**Epic:** Product Differentiation · **Release:** R3+ (codename "dev-metrics")
+**Priority:** **P3** (large-scope, visionary) · **Persona:** Eng Manager + Squad Lead
+**Owner class:** `pulse-product-director` + `pulse-data-scientist` + `pulse-engineer`
+**Status:** PLANNED — capture only, do not start
+
+> **Marcador estratégico**: este FDD reserva o espaço no backlog do projeto
+> codinome **"dev-metrics"**, que vai reescrever completamente a UX/UI do
+> PULSE adicionando dezenas de features proprietárias e únicas na indústria.
+> Documentação completa virá no próprio release plan do "dev-metrics" — esta
+> entrada apenas garante que o tema **não se perde** entre R1 e R3.
+
+### Por que existe este card
+
+Hoje (R1) usamos uma cadeia de fallback **automática e implícita** para
+extrair effort estimation (FDD-OPS-016). Isso resolve o problema imediato
+mas **assume convenções** (Fibonacci scale, hours-bucket mapping). Squads
+diferentes têm filosofias diferentes:
+
+- "Story Points são nosso golden standard"
+- "Horas são mais honestas"
+- "Tamanho de camisa só é útil pra refinement, não pra forecast"
+- "Não estimamos. Throughput by item é nosso único KPI"
+
+Cada filosofia gera métricas diferentes. Hoje somos opinionados;
+amanhã queremos ser **configuráveis** por squad e ainda **proativos**:
+sugerir ao squad qual método cabe melhor com base no histórico real.
+
+### Visão (R3 — projeto "dev-metrics")
+
+1. **Per-squad estimation method** (admin UI):
+   - Squad escolhe: SP nativo, T-shirt, Hours, Count-only, ou "Auto"
+   - PULSE respeita a escolha em **toda** a métrica (velocity, forecast,
+     CFD por effort, scatterplot)
+   - Auto-mode: usa fallback chain atual + telemetria
+
+2. **Modelo proprietário de previsão e insights** (vantagem competitiva):
+   - Identifica drift de estimativa (squad marcando tudo como "M" há
+     6 sprints)
+   - Calibra automaticamente: "Vocês marcaram esse card como P, mas
+     histórico de issues do tipo 'bug' com label 'auth' nesta squad
+     teve 73% de chance de virar G/GG"
+   - Insight de método: "73% das squads kanban-puras como vocês têm
+     throughput estável; vocês não — possível causa: variabilidade no
+     refinement"
+   - Forecast com Monte Carlo usando o método nativo do squad
+   - **Anti-surveillance**: insights são sobre o squad/processo,
+     **nunca** sobre indivíduos
+
+3. **UX completa rescritia**:
+   - Dashboard reescrito ao redor do método escolhido
+   - Painel "estimation health" novo
+   - Drill-down comparativo: "como seria sua velocity se vocês tivessem
+     adotado method X há 3 sprints?"
+
+### Diferenciador
+
+Concorrentes (LinearB, Jellyfish, Swarmia, Athenian) hoje são opinionados
+em SP. PULSE será o **único** que respeita filosofia da squad e usa
+isso como entrada de modelo, não como ruído a ser normalizado.
+
+### Pré-requisitos (capturar agora)
+
+Quando "dev-metrics" começar:
+1. **`effort_source`** já estar em `eng_issues` (next step do
+   FDD-OPS-016) — sem isso, modelo proprietário não tem feature de método
+2. **Histórico estatístico** mínimo de ~6 sprints por squad (ou ~30
+   ciclos de Cycle Time pra Kanban) — bootstrap funciona em paralelo
+3. **Multi-tenant scope_key** (FDD-OPS-014) — consolidado, OK
+4. **Anti-surveillance review** rigoroso — modelo NÃO pode personalizar
+   por indivíduo, só por squad/repo
+
+### Lembrete operacional (CRÍTICO)
+
+**Não esquecer ao chegar em R2/R3.** Este FDD existe especificamente
+para resgatar o tema. Reviewer de release plan deve checar:
+- ✅ FDD-DEV-METRICS-001 ainda apontado no roadmap?
+- ✅ `effort_source` adicionado antes do R3 começar?
+- ✅ Telemetria do fallback chain ainda gerando dados utilizáveis?
+
+### Anti-surveillance check
+PASS by design — modelo opera em agregado por squad/issue-type, nunca
+por pessoa. Precisa review formal do CISO antes do release.
+
+### Estimate
+**XL (multi-sprint, R3)** — escopo de release inteiro, não card único.
+
+### Dependencies
+- FDD-OPS-016 (effort fallback chain) — base hoje
+- FDD-OPS-014 (per-scope) — entregue
+- Future migration: adicionar coluna `effort_source` em `eng_issues`
+
+---
+
diff --git a/pulse/docs/ingestion-architecture-v2.md b/pulse/docs/ingestion-architecture-v2.md
new file mode 100644
index 0000000..98bf48e
--- /dev/null
+++ b/pulse/docs/ingestion-architecture-v2.md
@@ -0,0 +1,657 @@
+# PULSE Ingestion Architecture — v2 Review
+
+**Status:** Proposal · **Author:** orchestrator (post-mortem of 2026-04-27/28 incidents)
+**Audience:** `pulse-data-engineer`, `pulse-engineer`, `pulse-product-director`
+**Companion docs:** `ingestion-spec.md` (current architecture), `metrics/metrics-inconsistencies.md` (data quality history)
+
+---
+
+## 1. Why this document exists
+
+This week's full re-ingestion against the Webmotors tenant exposed
+structural defects in PULSE's ingestion pipeline that **cannot be
+fixed by patches**. Five distinct failures in five days:
+
+| # | Date | Failure | Time wasted |
+|---|---|---|---|
+| 1 | 2026-04-23 | Snapshot drift (FDD-OPS-001) — workers running stale code | hours of debugging across 3 incidents |
+| 2 | 2026-04-27 | `make seed-reset` wiped 442k rows of real Webmotors data without explicit gate | full re-ingestion required |
+| 3 | 2026-04-27 | `metrics_snapshots` 50× perf regression at 7M rows — partial index missing | dashboard erro, ~2h diagnose+fix |
+| 4 | 2026-04-27 21:23 | Cycle 2 failed silently — Jira ConnectionError (network blip) → 0 issues persisted → unnoticed for 14h | 14h × engineer attention |
+| 5 | 2026-04-28 | Sync stuck 1.5h in JQL pagination, then hours in `fetch_issue_changelogs` (estimated 24-28h to converge) | currently running, decision pending |
+
+Each was **rational locally** when shipped. The sum is **not viable
+for SaaS**. When we onboard the second tenant, every problem above
+multiplies; when we onboard tenant N, we never finish.
+
+The user-stated target: **at least 10× improvement in speed,
+simplicity, resilience, and security.**
+
+This document is the proposal.
+
+---
+
+## 2. The five anti-patterns we keep hitting
+
+### AP-1: Bulk-fetch-then-persist (issues only)
+
+**Symptom (today):** `eng_issues.COUNT() = 0` for **3+ hours** while
+sync worker buffers 250k+ issues in memory before any DB write.
+
+**Code:** `packages/pulse-data/src/workers/devlake_sync.py:_sync_issues()`
+lines 605-635:
+
+```python
+raw_issues = await self._reader.fetch_issues(...)            # blocks until ALL 32 projects paginated
+changelogs = await self._reader.fetch_issue_changelogs(ids)  # 1 GET per issue (250k+ HTTP calls)
+normalized = [normalize_issue(...) for raw in raw_issues]    # all in memory
+count = await self._upsert_issues(normalized)                # single bulk upsert
+```
+
+**Why it's wrong:**
+- Time-to-first-row (TTFR): hours, not seconds
+- Memory: 1.5+ GB peak (manageable today, OOM at 2× scale)
+- Visibility: operator queries `COUNT(*)`, sees 0, can't tell if working or stuck
+- Recovery: crash mid-sync = lose 100% of fetched work
+
+**Notable:** PRs ALREADY escaped this pattern via commit `7f9f339`
+(2026-04-23), which made `_sync_pull_requests` batch-per-repo. PR sync
+now persists ~100 rows every few seconds — operator sees `COUNT(*)`
+growing in real-time. Issues was missed in that refactor.
+
+**Tracked:** FDD-OPS-012 (created 2026-04-28).
+
+---
+
+### AP-2: Redundant API calls
+
+**Symptom (today):** worker is hitting `GET /rest/api/3/issue/{id}?expand=changelog&fields=status`
+once per issue — ~3 calls/sec. For 250k issues this is ~24 hours of
+blocking HTTP work.
+
+**Code:** `devlake_sync.py:614`:
+
+```python
+issue_ids = [str(raw["id"]) for raw in raw_issues]
+changelogs_by_issue = await self._reader.fetch_issue_changelogs(issue_ids)
+```
+
+**Why it's wrong:** `fetch_issues()` already requests `expand=changelog`
+on the JQL search (`jira_connector.py:240`). The changelog data is
+**already in `raw_issues`** — the separate fetch is duplicate work.
+
+The connector itself documents this:
+
+```python
+# jira_connector.py:267
+def fetch_issue_changelogs(...):
+    """...
+    Since fetch_issues already includes changelogs via expand=changelog,
+    this method is used for issues fetched WITHOUT expand (e.g., sprint issues).
+    """
+```
+
+**Why it survives:** there's no test asserting "main issues sync uses
+inline changelogs". The redundant call is invisible until production
+scale exposes it.
+
+**Cost:** 376k HTTP calls × ~300ms = ~31 hours of pure API latency,
+plus Atlassian rate-limit pressure.
+
+**Fix:** one-line — replace the separate call with read from
+`raw["changelog"]` field already present in JQL response.
+
+**Tracked:** to be opened as FDD-OPS-013.
+
+---
+
+### AP-3: Sequential phases with global watermark
+
+**Symptom (yesterday):** cycle 2 hit a Jira ConnectionError at 21:23,
+issues sync errored silently with 0 results, sync moved on to PRs/deploys/
+sprints (which succeeded), watermark for issues never advanced. Next
+14 hours of cycles wasted because the worker kept trying issues with
+the same scope, hitting the same ordering issue, never producing data.
+
+**Code:** `devlake_sync.py:DataSyncWorker.sync()` runs phases in fixed
+order:
+
+```python
+1. _sync_issues()       # fails silently → 0 issues
+2. _sync_pull_requests() # ok → 63131 PRs
+3. _sync_deployments()   # ok → 1376 deploys
+4. _sync_sprints()       # ok → 216 sprints
+```
+
+`pipeline_watermarks` has ONE row per `entity_type` regardless of scope:
+
+```sql
+entity_type='issues', last_synced_at='2020-01-01' (when reset)
+```
+
+**Why it's wrong:**
+
+1. **Single failure point**: failure in any phase doesn't degrade
+   gracefully; watermark stays where it was, next cycle reruns same
+   work, no signal that "issues broke at 21:23, PRs were fine".
+
+2. **Global watermark = full backfill on scope expansion**: when
+   discovery activates a new project, we have to reset watermark to
+   2020-01-01 to backfill — but this also re-fetches the 200k
+   already-ingested issues from existing projects. Wasteful.
+
+3. **No bulkheads**: if Jira has a hiccup, issues phase blocks. No
+   timeout, no skip, no degraded mode.
+
+**Tracked:** to be opened as FDD-OPS-014 (per-scope watermarks +
+phase isolation).
+
+---
+
+### AP-4: No source isolation
+
+**Symptom (today AM):** sync worker stuck retrying Jenkins jobs
+(VPN was off overnight) — every cycle would burn ~10s × 200 dead jobs
+= 30+ minutes on Jenkins timeouts before getting to anything else.
+
+**Code:** all four sources (GitHub, Jira, Jenkins, future GitLab)
+share **one process**, **one event loop**, **one cycle order**.
+
+**Why it's wrong:**
+
+- Jenkins outage (VPN, infra) blocks GitHub sync (which works fine)
+- Jira rate-limited → blocks deployment ingestion that doesn't touch Jira
+- One slow source = global throughput floor
+- Adding GitLab/ADO/Linear means more code in the same shared loop
+
+**The asymmetry:** discovery already has its OWN worker
+(`discovery_scheduler.py`). The sync side wasn't given the same
+treatment.
+
+**Tracked:** FDD-OPS-014 (covers per-source workers).
+
+---
+
+### AP-5: Estimate-and-pray (no real observability)
+
+**Symptom (every cycle):** I tell you "ETA 45min", we wait 4h, find
+out it's stuck, restart, lose work. We've done this **5 times this
+week**. Each time my estimate is plausible at start, wrong by an
+order of magnitude after exposure.
+
+**Why estimates fail:**
+
+1. **No pre-flight cost estimate.** We don't ask Jira "how many issues
+   match this JQL?" before fetching. We don't ask GitHub "how many PRs
+   in active repos last 12 months?" We just start and hope.
+
+2. **Progress proxy is `COUNT(*)`** — but in bulk-fetch mode (AP-1),
+   COUNT stays 0 until the very end. Useless during the long phase.
+
+3. **No rate-aware ETA.** When pace is 27 calls/min for 10 minutes,
+   we don't multiply by remaining work to get a real ETA.
+
+4. **No per-scope visibility.** When stuck, we can't tell "is BG
+   project taking forever, or is OKM done and we're on a small one?"
+
+**Tracked:** FDD-OPS-015 (observable ingestion: pre-flight estimate +
+per-scope progress + rate-aware ETA).
+
+---
+
+## 3. Target Principles for v2 (the 10× envelope)
+
+These are non-negotiable design constraints. Every code change in
+ingestion lands or is rejected against these.
+
+### P-1: Stream by default — Time-to-first-row (TTFR) ≤ 60s
+
+Every fetcher is an `AsyncIterator` yielding small batches (50-200
+items). Each batch:
+- normalize → upsert → emit Kafka event → ack → advance watermark
+
+Memory bound: ~10 MB max in flight at any time, regardless of total volume.
+
+**Effect:** operator sees row count growing from minute 1. Crash
+recovery loses ≤1 batch.
+
+### P-2: Source-isolated workers (bulkheads)
+
+One worker process **per source** (github-sync-worker, jira-sync-worker,
+jenkins-sync-worker, future gitlab-sync-worker). Independent:
+
+- Event loop
+- Cycle cadence
+- Watermarks
+- Failure handling
+- Rate-limit budget
+
+**Effect:** Jira down ≠ GitHub down ≠ Jenkins down. Onboarding GitLab
+adds a worker; doesn't touch the others.
+
+### P-3: Per-scope watermarks (kill global)
+
+`pipeline_watermarks` keyed by `(source, entity_type, scope_key)`:
+
+```sql
+(jira, issues, project_key=BG)     last=2026-04-26 18:33
+(jira, issues, project_key=OKM)    last=2026-04-26 18:35
+(github, prs, repo=foo/bar)        last=2026-04-26 18:40
+```
+
+**Effect:** new project activated = backfill ONLY that scope. Existing
+work preserved. Per-scope progress and ETA become trivial.
+
+### P-4: Job queue + worker pool (not in-process loops)
+
+Discovery emits jobs ("ingest scope X, since Y") onto a queue
+(Redis-backed or Kafka topic). Worker pool consumes with configurable
+concurrency per source.
+
+```
+Discovery → enqueue jobs → Queue → Worker[1..N] → DB streaming
+```
+
+**Effect:**
+
+- Concurrency scales with hardware (5 parallel JQL queries vs 1)
+- Failure = job retried, not whole cycle restarted
+- New tenant = new jobs in queue, no orchestrator change
+- SaaS-ready: 100 tenants = 100× jobs but same code
+
+### P-5: Backpressure + rate-limit awareness
+
+Read API rate-limit headers (`X-RateLimit-Remaining`, `Retry-After`).
+Adapt automatically:
+
+- 90% of limit consumed → slow down (sleep proportional to remaining budget)
+- 429 / Retry-After → exponential backoff with jitter (per source)
+- GitHub GraphQL cost: track query cost vs hourly budget (5000)
+
+**Effect:** never hit hard limits. Sustained throughput is `~80% of
+limit`, not `100% then 429 storm then crash`.
+
+### P-6: Saga pattern per batch (idempotent + recoverable)
+
+Each batch is a transactional unit:
+
+```
+BEGIN
+  INSERT/UPDATE rows (ON CONFLICT DO UPDATE)
+  INSERT pipeline_event (kafka_emitted=false)
+  UPDATE pipeline_watermarks SET last_synced_at = max(batch)
+COMMIT
+
+ASYNC: emit Kafka event, mark pipeline_event.kafka_emitted=true
+```
+
+If crash before COMMIT: nothing changes, watermark unchanged, on
+restart the worker re-fetches the same batch.
+
+If crash after COMMIT but before Kafka emit: outbox pattern catches
+unemitted events on next cycle.
+
+**Effect:** zero data loss, zero duplicates (upsert idempotent), zero
+silent skips.
+
+### P-7: Observable by default
+
+Every job emits structured progress:
+
+```json
+{
+  "scope": "jira:project:BG",
+  "phase": "fetching",
+  "items_total_estimate": 197043,
+  "items_done": 12500,
+  "items_per_second": 84,
+  "eta_seconds": 2200,
+  "started_at": "...",
+  "errors": []
+}
+```
+
+Exposed via:
+- `GET /pipeline/jobs` — current state of all jobs
+- Prometheus metrics: `pulse_ingestion_items_total{source,scope,entity}`,
+  `pulse_ingestion_duration_seconds`, `pulse_ingestion_error_rate`
+- Pipeline Monitor UI — already exists, gets per-scope breakdown
+
+**Effect:** "is it stuck?" answered in 5 seconds, not 4 hours.
+
+### P-8: Health-aware orchestration
+
+Before each batch:
+
+```python
+if not source.is_reachable():
+    self.mark_unhealthy(source)
+    return
+```
+
+When source unhealthy, jobs go to "paused" queue. Periodic health
+ping (1/min) re-tests; on recovery, jobs resume from where they were.
+
+**Effect:** VPN drop = jobs pause cleanly, no error storm, no time
+wasted retrying. VPN back = automatic resume.
+
+---
+
+## 4. Proposed Architecture v2
+
+```
+┌──────────────────────────────────────────────────────────────────┐
+│                  Discovery Service (per source)                  │
+│  github-discovery     jira-discovery     jenkins-discovery       │
+│   (org-scan)          (project-scan)     (job-scan via SCM)      │
+│      │                    │                   │                  │
+│      └────────┬───────────┴───────────────────┘                  │
+│               ▼                                                  │
+│  emits jobs: { source, scope, entity, since, priority }          │
+└──────────────┬───────────────────────────────────────────────────┘
+               │
+               ▼
+┌──────────────────────────────────────────────────────────────────┐
+│       Job Queue (Redis Streams or Kafka topic)                   │
+│  jira:issues:BG       since=2026-04-26  priority=high            │
+│  jira:issues:OKM      since=2026-04-26                           │
+│  github:prs:foo/bar   since=2026-04-26                           │
+│  jenkins:deploys:job-X since=2026-04-26                          │
+└──────────────┬───────────────────────────────────────────────────┘
+               │
+               ▼
+┌──────────────────────────────────────────────────────────────────┐
+│  Worker Pool (configurable concurrency per source)               │
+│  ┌──────────────────────────────────────────────────────────┐    │
+│  │ jira-worker[1..5]                                        │    │
+│  │   pick job → BatchedFetcher → for batch in stream:       │    │
+│  │     normalize → upsert → emit_event → advance_watermark  │    │
+│  │     emit progress event                                   │    │
+│  └──────────────────────────────────────────────────────────┘    │
+│  ┌──────────────────────────────────────────────────────────┐    │
+│  │ github-worker[1..3]                                      │    │
+│  │ jenkins-worker[1..3]                                     │    │
+│  └──────────────────────────────────────────────────────────┘    │
+└──────────────┬───────────────────────────────────────────────────┘
+               │ writes
+               ▼
+┌──────────────────────────────────────────────────────────────────┐
+│                          PULSE DB                                │
+│  eng_pull_requests, eng_issues, eng_deployments, eng_sprints     │
+│  pipeline_watermarks  (source, entity, scope_key) → last_at      │
+│  pipeline_jobs        (job state: pending/running/done/failed)   │
+│  pipeline_events_outbox (Kafka emit guarantee)                   │
+│  pipeline_progress    (per-scope progress + ETA)                 │
+└──────────────────────────────────────────────────────────────────┘
+                                                                    
+┌──────────────────────────────────────────────────────────────────┐
+│  Metrics Worker (unchanged)                                      │
+│  consumes Kafka events → recomputes snapshots                    │
+└──────────────────────────────────────────────────────────────────┘
+```
+
+### Key API contracts
+
+```python
+# A fetcher is just an AsyncIterator yielding small batches
+class BatchedFetcher(Protocol):
+    def fetch(self, scope: str, since: datetime | None) -> AsyncIterator[Batch]:
+        ...
+
+@dataclass
+class Batch:
+    scope: str          # e.g., "BG"
+    items: list[dict]   # 50-200 raw items
+    source_high_water: datetime  # for watermark advancement
+    estimated_total: int | None  # if pre-flight known, for ETA
+    rate_limit: RateLimitInfo | None  # adaptive throttling
+```
+
+```python
+# Job worker is a generic loop, source-agnostic
+class IngestionJobWorker:
+    async def run_job(self, job: Job):
+        fetcher = registry.get_fetcher(job.source, job.entity)
+        async for batch in fetcher.fetch(job.scope, job.since):
+            await self.persist_batch(batch)            # transactional
+            await self.emit_progress(job, batch)        # per batch
+            await self.check_health()                   # circuit breaker
+```
+
+---
+
+## 5. The 10× envelope, decomposed
+
+| Lever | Today | v2 | Speedup | Notes |
+|---|---|---|---|---|
+| Stream vs bulk-then-persist | 250k issues × 1.5h fetch + 0.5h normalize+upsert = 2h | 100 items every ~3s = constant-time TTFR | **30×** TTFR | AP-1 + FDD-OPS-012 |
+| Kill redundant changelog fetch | 376k × 1 HTTP call (~24h) | 0 (use inline) | **∞** (eliminates phase) | AP-2 + FDD-OPS-013 |
+| Source isolation (parallel) | 4 phases sequential | 3 source workers concurrent | **3-4×** wall time | AP-4 + FDD-OPS-014 |
+| Per-source concurrency | 1 connector active | 3-5 workers per source | **5×** sustained throughput | P-4 |
+| Adaptive rate limits | naive retries, sometimes 429-banned | stay 80% of limit | **2×** sustained, **0** ban | P-5 |
+| Per-scope watermarks | new project = full reset = full backfill | new scope = scope-only backfill | **10×** for incremental ops | AP-3 + FDD-OPS-014 |
+| Health-aware (skip unreachable) | block whole cycle on Jenkins outage | pause source, others continue | qualitative — turns hours of wasted retry into 0 | P-8 |
+| Pre-flight estimate | guess | actual API count | qualitative — answers "stuck?" in seconds | P-7 + FDD-OPS-015 |
+
+**Aggregate effect on the workload that's running RIGHT NOW** (376k
+issues across 32 projects, fresh tenant):
+
+- **Today's path:** 24-30h+ (potentially infinite if changelog fetch
+  rate-limits)
+- **v2 Phase 1 path** (just AP-1+AP-2 fixes): 30-45 minutes
+- **v2 Phase 2 path** (+ source isolation): same 30-45 min for issues,
+  but now happens in parallel with PR sync, deploy sync — total cycle
+  ~45 min vs ~3h
+
+---
+
+## 6. Migration Path — non-bigbang, in 3 phases
+
+I will NOT propose a clean-room rewrite. The codebase has 1 year of
+hard-won correctness (status mapping, anti-surveillance, edge cases).
+Throwing it out is the wrong reflex.
+
+Each phase delivers value standalone and is reversible.
+
+### Phase 1: Quick Wins — fixes the immediate pain (1-2 days, P0)
+
+**Scope:** correct existing code, no architecture change.
+
+| Item | Effort | Effect |
+|---|---|---|
+| **AP-2 fix** — comment out redundant `fetch_issue_changelogs` call in `_sync_issues`; teach normalizer to read inline `raw["changelog"]` | XS (1h code + tests) | 24h+ → ~5 min for changelog phase (eliminated) |
+| **AP-1 fix** (FDD-OPS-012) — refactor `_sync_issues` to batch-per-project, mirror `_sync_pull_requests` pattern from `7f9f339` | M (4-6h) | TTFR for issues: hours → seconds; memory: 1.5GB → 50MB |
+| **Pre-flight estimate logging** — before each `_sync_*`, log "I will fetch ~N items based on JQL count / GraphQL nodeId / Jenkins job count" | XS (1h) | Operator gets actual ETA vs guess |
+
+**Total Phase 1: ~1-2 dev-days.**
+**Result on Webmotors workload: 24h → ~30-45 min for full re-ingest.**
+
+### Phase 2: Source Isolation (3-5 days, P1)
+
+**Scope:** structural — split sync-worker into per-source workers.
+
+| Item | Effort |
+|---|---|
+| Extract `JiraSyncWorker`, `GithubSyncWorker`, `JenkinsSyncWorker` from monolithic `DataSyncWorker` | M (1 day) |
+| docker-compose: 3 services instead of 1 | XS |
+| Per-source watermarks: schema migration + repo update | M (1 day) |
+| Health-aware pre-flight check before each cycle | S (2-3h) |
+| Update Pipeline Monitor UI for per-source breakdown | S (existing surface) |
+
+**Total Phase 2: 3-5 dev-days.**
+**Result: failure isolation, parallel execution, correct watermarks
+under scope expansion.**
+
+### Phase 3: Job Queue + Pool (1-2 weeks, R1)
+
+**Scope:** the SaaS-ready pattern.
+
+| Item | Effort |
+|---|---|
+| Choose job queue (Redis Streams vs Kafka topic — both already running) | XS (decision) |
+| Job state schema (`pipeline_jobs` table) | S |
+| Generic `IngestionJobWorker` consuming jobs | M (1-2 days) |
+| Refactor each source to expose `BatchedFetcher` interface | M (1 day per source) |
+| Discovery emits jobs (no longer triggers sync directly) | S |
+| Retry policy + dead-letter | M |
+| Tests + chaos eng (kill worker mid-job, verify resume) | M |
+
+**Total Phase 3: 1-2 dev-weeks.**
+**Result: SaaS-ready ingestion. Adding 100 tenants = 100× more jobs,
+not 100× more code paths.**
+
+---
+
+## 7. What we are NOT doing (out of scope)
+
+- **No connector rewrites.** GitHub/Jira/Jenkins connectors stay as-is;
+  they have well-tested correctness logic. Only the orchestration layer
+  changes.
+- **No DevLake re-introduction.** ADR-005 is settled.
+- **No event sourcing.** Outbox pattern (Phase 1.5+) is sufficient
+  for our Kafka guarantee.
+- **No SaaS multi-tenant orchestration.** Phase 3 makes it possible;
+  full multi-tenant rollout is R1 product work, separate spec.
+
+---
+
+## 8. Decisions to make NOW
+
+For the team. These are not code decisions; they need product/eng
+alignment.
+
+### D-1: Phase 1 NOW vs after current sync converges?
+
+**Option A:** Stop the current sync (lose ~3h of work), apply Phase 1
+fixes (~1-2 days), restart. Total: 2 days + 30 min final ingestion.
+Sustainable code lands.
+
+**Option B:** Wait for current sync to converge (24-30h+), then start
+Phase 1. Total: 1-2 days waste + 1-2 days Phase 1.
+
+**Recommendation:** A. Even with restart cost, A finishes faster AND
+ships durable code. Continuing with the broken pipeline is sunk cost.
+
+### D-2: Phase 2 + 3 timing
+
+Phase 2 is a clear R1 commitment. Phase 3 is the SaaS gate — must
+ship before second tenant goes live. Suggest committing both to R1
+sprint planning explicitly.
+
+### D-3: Backlog FDDs
+
+Three new FDDs come out of this:
+
+- **FDD-OPS-013** Kill redundant `fetch_issue_changelogs` (Phase 1 quick win, XS)
+- **FDD-OPS-014** Per-source workers + per-scope watermarks (Phase 2, M-L)
+- **FDD-OPS-015** Observable ingestion: pre-flight estimates + per-scope progress + ETA (Phase 1.5)
+
+(FDD-OPS-012 — issue batch-per-project — was already opened 2026-04-28.)
+
+---
+
+## 9. Success criteria — how we know v2 worked
+
+Lock these as acceptance for the migration:
+
+1. **TTFR ≤ 60s for any source/entity** (measured: time from cycle
+   start to first row in `eng_*` table) — ✅ **ATINGIDO (Phase 1, commit `4d1c9b4`)**: `_sync_issues` agora streams per-project; primeira issue persistida em <30s tipicamente
+2. **Full re-ingestion at Webmotors scale (376k issues, 64k PRs, 1.4k
+   deploys, 200 sprints) completes in ≤ 90 minutes** — ⚠️ **PARCIAL**: backfill BG (197k issues em projeto único) ainda é o gargalo dominante. Demais projetos rápidos. Estimativa total ~2-3h, não 90min — projeto BG sozinho consome maioria do tempo
+3. **Memory peak ≤ 200 MB per worker** (vs 1.5 GB today) — ✅ **ATINGIDO**: Phase 1 streaming reduz para ~50-100 MB peak observado em produção
+4. **Zero silent failures** — every error is logged with scope and
+   visible via `GET /pipeline/jobs` endpoint — ⚠️ **PARCIAL**: per-batch logs detalhados existem; `pipeline_ingestion_progress` tracking OK; falta `GET /pipeline/jobs` endpoint dedicado (FDD-OPS-015 pendente)
+5. **VPN drop simulation**: kill jenkins network in test, GitHub +
+   Jira ingestion continues unaffected, Jenkins resumes on reconnect — ❌ **NÃO ATINGIDO**: Phase 2-A/B per-scope watermarks shippadas mas worker still monolítico. P-2 source isolation requer Step 2.6 (docker-compose split em workers per-source) — pendente
+6. **Adding 1 fake project to Jira catalog** triggers backfill ONLY
+   for that scope (not full rerun of existing 32 projects) — ✅ **ATINGIDO (Phase 2-A + 2-B, commits `c2c6e5d`..`c628528`)**: per-scope watermarks `(tenant, entity, scope_key)` + read-side resolution `since_by_project`/`since_by_repo` enviam since correto por escopo
+7. **Crash recovery test**: SIGKILL worker mid-batch, restart, verify
+   ≥99% of fetched data persisted (not 0, like today) — ✅ **ATINGIDO (Phase 1)**: cada batch persiste imediatamente via `_upsert_*` antes de avançar watermark; crash recovery loses ≤1 batch (~50-100 issues)
+
+**Status agregado v2 (2026-04-29):**
+
+| Phase | Status | Commits |
+|---|---|---|
+| Phase 1 (Quick Wins — AP-1 + AP-2 + pre-flight) | ✅ SHIPPED | `4d1c9b4`, `62c183f` |
+| Phase 2-A (writes per-scope watermarks) | ✅ SHIPPED | `c2c6e5d`, `a2d5850`, `f357d05`, `15574a7`, `4f86fd2` |
+| Phase 2-B (reads per-scope watermarks) | ✅ SHIPPED | `4478f13`, `c628528` |
+| Phase 2.6 (docker-compose split per-source workers) | ⏳ PENDING | next session |
+| Phase 3 (job queue + worker pool — SaaS-ready) | ⏳ PENDING | R1 |
+| **Bonus data-quality fixes descobertos durante v2** | ✅ SHIPPED | `177830e` (changelog), `172f3f2` (effort), `0c7124d` (status), `649ed78` (sprint) |
+
+**Observação importante:** durante a engenharia Phase 1+2 emergiram 4 bugs estruturais de data quality (status_transitions=0, story_points=0, status normalization skew, sprint status vazio) que **não estavam no escopo original** mas ficaram visíveis quando começamos a olhar dados frescos pós-Phase 1. Documentados como INC-020..023 / FDD-OPS-016..018. Fix de cada um expandiu o escopo do v2 — mas todos foram resolvidos ainda dentro da janela de 2 dias.
+
+These are testable. Phase 3 acceptance hinges on items 4-7. **Item 5 (VPN simulation)** é o gating não-resolvido para confiar em SaaS multi-source.
+
+---
+
+## 10. The honest risk
+
+This document advocates for stopping a 3-hour-old sync to start a
+2-day refactor. That is itself a "another patch" pattern — promise
+something better, ask to throw away the work in flight.
+
+**Why I think this time it's different:**
+
+- The diagnosis is structural, not a one-off (5 distinct failures, all
+  same root cause family)
+- Phase 1 alone is small enough to verify in 1-2 days, not 1-2 weeks
+- The 10× number is decomposed and falsifiable — if we ship Phase 1
+  and don't see TTFR drop from hours to seconds, we made a wrong
+  diagnosis and need to revise
+- The current sync's 24h ETA is itself a falsifiable claim that I'm
+  putting in writing now — if it converges in <2h, I was wrong and
+  Phase 1's urgency is reduced
+
+But the user's frustration is correct. The default should be: "until
+proven otherwise, every ingestion run is doomed at this scale." Phase
+1 disproves that for issues. Phase 2 disproves it for cross-source
+failures. Phase 3 disproves it for SaaS multi-tenant.
+
+If we don't take this seriously now, we will rediscover all of it
+when the second tenant onboards, with much more visibility and
+political cost.
+
+---
+
+## Appendix A: Why the current architecture exists
+
+This is not blame. The current state is the natural accretion of:
+
+- ADR-005 (replace DevLake): the focus was correctness, not throughput.
+  Bulk-then-persist was acceptable when datasets were small and we were
+  proving feasibility.
+- Commit `7f9f339` (PR batch refactor): proved the streaming pattern
+  works. Should have generalized then; didn't because PRs were the
+  pain at the time.
+- Discovery service (ADR-014): correctly built as separate worker.
+  The lesson didn't propagate to sync.
+- 60+ status mappings (PT-BR): hard-won correctness. Don't break.
+- Schema-drift monitor (FDD-OPS-001 line 3): smart, defensive,
+  belongs in v2 unchanged.
+
+v2 is **not** "throw away the work." It's "promote streaming +
+isolation from local optimization in 1-2 places to architectural
+default."
+
+---
+
+## Appendix B: Counter-arguments I considered
+
+- "Just optimize the current code, don't restructure" — 5 incidents
+  in 5 days argue against. Optimization without isolation = endless
+  whack-a-mole.
+- "Wait until 2nd customer pays, then build SaaS-ready ingestion" —
+  building SaaS infra under customer time pressure is how outages
+  happen at acquisition demos.
+- "Use a 3rd-party data platform (Airbyte, Fivetran)" — explicitly
+  rejected in ADR-005 (DevLake had the same coverage gap on Postgres).
+  Adding another opaque layer doesn't solve our problems.
+- "The 10× number is hand-wavy" — fair, but each lever is decomposed
+  in §5. Falsifiable acceptance criteria in §9.
+
+---
+
+**Status of this document:** PROPOSAL. Awaiting review by
+`pulse-data-engineer`, `pulse-engineer`, `pulse-product-director`,
+and final approval from the user before any implementation.
diff --git a/pulse/docs/ingestion-spec.md b/pulse/docs/ingestion-spec.md
index 749122b..21d05d4 100644
--- a/pulse/docs/ingestion-spec.md
+++ b/pulse/docs/ingestion-spec.md
@@ -13,18 +13,24 @@
 
 This document captures every adjustment, problem, and solution encountered during PULSE's data ingestion buildout — from initial DevLake-based pipeline to current proprietary connectors with dynamic discovery. It serves as the **single source of truth** for understanding ingestion behavior and as the **specification baseline** for building a fully autonomous SaaS ingestion engine.
 
-### Current State (2026-04-14)
-
-| Metric | Value |
-|--------|-------|
-| Jira projects active | 69 |
-| Issues ingested | 373,872 |
-| PRs ingested | 63,647 |
-| PR-Issue link rate | 21.9% (13,966 PRs) |
-| Deployments (Jenkins) | 83 |
-| Sprints | 215 |
-| GitHub repos discovered | 754 (active), 1,429 (total) |
-| Ingestion cycle time | ~3h (full backfill), ~7min (incremental) |
+### Current State (2026-04-29 — pós-Phase-1 v2 + data-quality fixes)
+
+| Metric | Value | Note |
+|--------|-------|------|
+| Jira projects active | 32 (de 69 totais descobertos) | Subset ativo via discovery dinâmica (ADR-014) |
+| Issues ingested | 311.068 | Re-ingestão pós-`seed_dev` revert (commit `40ca7e4`); diff vs. 373k anterior é por escopo de projetos ativos |
+| PRs ingested | 63.131 | Estável desde 2026-04-27 |
+| PR-Issue link rate | ~5% (em recovery após reset) | Baixo temporariamente — re-link pós-ingestão completa restaura ~22% |
+| Deployments (Jenkins) | 1.376 | Auto-discovery via SCM scan (commit `d1aebf7`) |
+| Sprints | 195/217 com status correto (89,9%) | 22 vazias = board órfão 873 sem projeto ativo. Pós-FDD-OPS-018 (commit `649ed78`) |
+| GitHub repos discovered | 754 (active), 1.429 (total) | Estável |
+| Status definitions discovered | 326 (117 new + 181 indeterminate + 28 done) | Pós-FDD-OPS-017 (commit `0c7124d`) |
+| Distinct status names em uso | 104 | DEFAULT_STATUS_MAPPING expandido para ~80; fallback `statusCategory` cobre o resto |
+| Squads ativos | 27 | FID + PTURB usam Sprint; **25 são Kanban-pure** (sem sprints) |
+| Story Points usage | 0% (todos os 69 projetos) | Webmotors NÃO usa SP — fallback chain T-shirt/Hours/Count em FDD-OPS-016 |
+| Ingestion cycle time | TTFR <60s (Phase 1 v2) | Backfill BG ~197k issues continua o gargalo. Pre-fix bulk: 24-30h. Pós-fix: ~30-45 min issues + paralelo PR/deploy |
+| Coverage de `status_transitions` | ~0% legacy / 100% fresh | Rolling forward: cada incremental sync corrige; backfill retroativo opcional via watermark reset |
+| Coverage de `story_points` (effort) | 52,3% em projetos novos (CRMC), ~0% legacy | Mesma rolling-forward dinâmica que status_transitions |
 
 ---
 
@@ -42,13 +48,71 @@ This document captures every adjustment, problem, and solution encountered durin
 
 | Characteristic | Detail | Impact on Ingestion |
 |---------------|--------|-------------------|
-| Org size | ~750 active repos, 69 Jira projects | High volume, need batch processing |
-| Jira project scale | 197K issues in single project (BG) | Single JQL query can return massive payloads |
-| Custom fields | Sprint = `customfield_10007`, Story Points = `customfield_18524` | Must discover dynamically per tenant |
-| Jenkins patterns | No corporate standard; each repo has unique pipeline config | Cannot use single regex for deployment detection |
-| Language mix | Portuguese status names ("Em Desenvolvimento", "Concluido") | Status normalizer needs i18n mapping |
-| Jira reserved words | Project key "DESC" is SQL reserved word | Must quote project keys in JQL |
-| Archived projects | Some keys referenced in PRs (e.g., "RC") don't exist in Jira API | Graceful handling of orphan references |
+| Org size | ~750 active repos, 69 Jira projects, 27 squads ativos | High volume, need batch processing |
+| Squad shape | 25 de 27 squads são **Kanban-puros** (sem sprints); apenas FID + PTURB usam Scrum | Sprint metrics aplicam-se a 7% das squads — métricas de fluxo (Cycle Time, CFD, Throughput) são as primárias |
+| Jira project scale | 197K issues em projeto único (BG) | Single JQL query can return massive payloads — exige streaming per-project |
+| Custom fields | Sprint = `customfield_10007`, Story Points = `customfield_18524` (+ legacy `customfield_10004`) | Must discover dynamically per tenant via `/rest/api/3/field` |
+| Effort estimation method | **Webmotors NÃO usa Story Points** (0% dos 69 projetos). Padrões heterogêneos por squad: T-shirt size (P/M/G), `timeoriginalestimate` em horas, ou nada (Kanban-puro) | FDD-OPS-016 — fallback chain SP→T-shirt→Hours→None com discovery dinâmico de campos T-shirt/Tamanho |
+| T-shirt size fields | `customfield_18762` ("T-Shirt Size") + `customfield_15100` ("Tamanho/Impacto") | Mapeados em escala Fibonacci: PP=1, P=2, M=3, G=5, GG=8, GGG=13. Discovery por nome (case-insensitive) |
+| Status workflows | 326 status definitions descobertas; 104 raw distintos em uso ativo | DEFAULT_STATUS_MAPPING curado com ~80 PT-BR; resto via fallback `statusCategory.key` da Jira |
+| Jenkins patterns | No corporate standard; each repo has unique pipeline config | Cannot use single regex for deployment detection — auto-discovery via SCM scan (`d1aebf7`) descobriu 577 PRD jobs em 283 repos |
+| Language mix | Portuguese status names ("Em Desenvolvimento", "Concluído", "FECHADO EM PROD") | Status normalizer requer i18n mapping + `statusCategory` fallback como rede de segurança |
+| Jira reserved words | Project key "DESC" é SQL reserved word | Must quote project keys in JQL |
+| Archived projects | Some keys referenced in PRs (e.g., "RC") don't exist in Jira API | Graceful handling of orphan references — RC tem 1.348 PR refs sem Jira project correspondente |
+| NULL bytes em texto | Observado 2026-04-28 em ENO-3296 (description) | Postgres `text` rejeita 0x00; helper `_strip_null_bytes` aplicado a title/description/assignee no normalizer |
+| Network dependency | Acesso à Jira/GitHub/Jenkins via VPN corporativa | VPN drops causaram silent failures (FDD-OPS-001 / FDD-OPS-014 §AP-3, AP-4); health-aware orchestration é P-8 do v2 |
+
+### 2.3 Source Configuration Philosophy — Discovery Only
+
+**Decisão fundamental (locked-in 2026-04-27):** PULSE **NÃO mantém listas
+explícitas** de repos GitHub ou projetos Jira em `connections.yaml` ou em
+qualquer outro lugar. **Todo source é descoberto dinamicamente.**
+
+**Por quê** — três razões:
+
+1. **Listas explícitas envelhecem mal**: cada novo squad/repo/projeto
+   exige edição manual + redeploy. Webmotors evoluiu de 8 → 69 projetos
+   Jira em poucas semanas; manter sincronizado à mão não escala.
+2. **Falham silenciosamente**: PRs referenciando `SECOM-1234` ficam
+   "linkados a nada" se SECOM não está na lista. Resultado: 5.27% de
+   link rate. Após discovery: 21.9% (4× melhor) com 96-100% per active
+   project.
+3. **Não fazem sentido pra SaaS**: o produto precisa funcionar em
+   tenant novo sem que ninguém edite YAML. Discovery é a única forma de
+   "zero-config onboarding" (princípio §6.1).
+
+**O que é mantido em `connections.yaml`** (não-discoverable):
+
+| Campo | Razão |
+|---|---|
+| `connections[].source` (github/jira/jenkins) | Identifica tipo de conector pra usar |
+| `connections[].base_url` | Endpoint da source (Jira tenant URL, GitHub Enterprise vs Cloud) |
+| `connections[].token_env`/`username_env` | Onde achar credenciais (env var) |
+| `connections[].sync_interval_minutes` | Cadência de sync (decisão operacional, não discoverable) |
+| `status_mapping` (60+ entries PT-BR/EN) | Mapeamento de workflow Jira customizado → estados normalizados (todo/in_progress/in_review/done). Pode ser parcialmente AI-discovered no futuro (§6.4) |
+| `teams` (squad → repos/projects mapping) | Decisão de organização, não topologia de source — pertence ao produto |
+
+**O que foi REMOVIDO em 2026-04-27:**
+
+- `connections[].scope.repositories` (lista de 9 repos GitHub explícitos)
+- `connections[].scope.projects` (lista de 8 projetos Jira explícitos)
+
+Eram artefatos de bootstrap (teste de viabilidade no início do projeto).
+Agora dispensáveis.
+
+**Como cada source descobre:**
+
+| Source | Mecanismo | Resultado |
+|---|---|---|
+| **GitHub** | `discover_repos(active_months=12)` via GraphQL `organization.repositories(orderBy: PUSHED_AT)` filtrado por atividade | ~283 repos com atividade nos últimos 12 meses |
+| **Jira** | `ProjectDiscoveryService.run_discovery()` lista todos projetos via REST `/rest/api/3/project`, marca como `discovered`. `SmartPrioritizer.auto_activate(threshold=3)` promove pra `active` projetos com ≥3 references em PR titles | 69 projetos descobertos, ~9 dos quais auto-ativados na primeira passada (cresce conforme novos PRs chegam) |
+| **Jenkins** | `discover_jenkins_jobs.py` faz SCM scan READ-ONLY em todos os jobs, gera `config/jenkins-job-mapping.json`. Sync worker lê esse JSON. Re-rodar quando novos repos aparecem (semanal/sob demanda) | 577 PRD jobs em 283 repos |
+
+**Quando re-discovery acontece:**
+
+- Jira: cron `0 3 * * *` UTC (configurável via `tenant_jira_config.discovery_schedule_cron`); manual via `POST /admin/jira/discovery/run`
+- GitHub: a cada ciclo de sync (15min) — o `discover_repos` é chamado pelo connector se `_explicit_repos is None`
+- Jenkins: regen do JSON é manual (script `discover_jenkins_jobs.py`); idempotente
 
 ---
 
@@ -84,15 +148,146 @@ async def sync(self):
 
 ### 3.3 Key Design Decisions
 
-| Decision | Rationale | ADR |
-|----------|-----------|-----|
+| Decision | Rationale | ADR / Commit |
+|----------|-----------|--------------|
+| **Discovery-only source configuration** | See §2.3 — explicit lists kill SaaS scalability and link rate | 2026-04-27 |
 | Replaced DevLake with proprietary connectors | 99.3% issue data loss in DevLake PostgreSQL layer | ADR-005 |
 | GraphQL primary for GitHub, REST fallback | 40x faster PR fetch (50 PRs + reviews + stats in 1 call) | Commit `60fe576` |
 | Per-repo batch upsert (not all-at-end) | Memory efficiency + real-time progress visibility | Commit `7f9f339` |
-| Global watermark per entity (not per-project) | Simpler model, but requires reset for project scope expansion | Migration 002 |
+| Global watermark per entity (not per-project) | Simpler model, but requires reset for project scope expansion. **Tradeoff documented in §3.7 + Problem 5.** | Migration 002 |
 | JSONB for `linked_issue_ids` and `status_transitions` | Flexible schema, supports variable-length arrays | Migration 001 |
 | Row-Level Security on all tables | Multi-tenant isolation at DB level | Migration 001 |
 | Kafka event backbone | Decouples ingestion from metric calculation | ADR-004 |
+| **Partial index for snapshots `(tenant, metric_type, calculated_at DESC) WHERE team_id IS NULL`** | 50× perf regression on `/metrics/home` once `metrics_snapshots` >5M rows; non-partial index doesn't help due to B-tree NULL semantics | Commit `80f1796` (2026-04-27) |
+| **Worker schema-drift monitor (FDD-OPS-001 line 3)** | Detects payload-vs-dataclass mismatch when bytecode is stale; tags rows with `_schema_drift` for Pipeline Monitor surfacing | Commit `5d71618` |
+
+### 3.4 Worker Lifecycle Guarantees
+
+**Origin:** FDD-OPS-001 incidents (2026-04-16/17/18) — Python workers running
+stale code in memory while updated source was on disk. Resulted in 3
+production-local incidents in 3 days where snapshots persisted with
+obsolete logic.
+
+**Four lines of defense (all SHIPPED):**
+
+1. **Hot-reload em dev (planned, not yet shipped)** — `docker compose
+   watch` to auto-reload workers on file change
+2. **Admin recalc force-reload** — `POST /admin/metrics/recalculate`
+   calls `importlib.reload()` on domain/service modules before recalc
+3. **Snapshot schema-drift monitor (SHIPPED 2026-04-23)** — pós-write,
+   compara payload com dataclass corrente. Missing fields → log WARN
+   `FDD-OPS-001/L3` + Prometheus counter `pulse_snapshot_schema_drift_total`
+   + anota `_schema_drift` no JSONB. Pipeline Monitor consome via
+   `GET /pipeline/schema-drift?hours=N`
+4. **CI/CD force-restart on deploy (SHIPPED 2026-04-23)** —
+   `.github/workflows/deploy.yml` sempre roda
+   `docker compose up -d --force-recreate` nos 4 workers Python pós
+   build (deploy step ainda é TODO, mas o template existe)
+
+**Operacional fora do CI:** após edit em `domain/service` files local,
+o operator deve rodar `make rotate-secrets` (que faz `up -d
+--force-recreate` em 5 serviços) — `docker compose restart` NÃO relê
+o `.env` nem força reimport de módulos. Documentado em
+`docs/testing-playbook.md` §8.9.
+
+### 3.5 DB Index Strategy for Snapshots
+
+**Origin:** 2026-04-27 incident — dashboard error 30s timeout porque
+`/metrics/home` levava 54s. Causa raiz: `metrics_snapshots` cresceu
+pra 7M rows e a query `WHERE tenant_id=? AND metric_type=? AND team_id
+IS NULL ORDER BY calculated_at DESC LIMIT 200` regrediu de Index Scan
+pra Parallel Seq Scan (10s/query × 8 queries por home request = 50s+).
+
+**Indexes mantidos** (em `metrics_snapshots`):
+
+| Index | Definição | Cobre |
+|---|---|---|
+| `metrics_snapshots_pkey` | `(id)` | Primary key — sempre |
+| `uq_metrics_snapshots_*` | `UNIQUE(tenant, team, type, name, period_start, period_end)` | Upsert constraint |
+| `idx_metrics_snapshots_lookup` | `(tenant, type, name, period_start, period_end)` | Specific metric+window queries |
+| **`idx_metrics_snapshots_tenant_latest`** | `(tenant, type, calculated_at DESC) WHERE team_id IS NULL` | **`/metrics/home` tenant-wide aggregations** (NEW 2026-04-27, migration 009) |
+
+**Por que partial index** (não non-partial): B-tree não usa índice
+quando filtro inclui `IS NULL` em coluna não-NULL-aware. Partial
+index `WHERE team_id IS NULL` resolve isso e mantém o índice menor
+(exclui linhas team-scoped que têm padrão de acesso diferente).
+
+**Resultado medido**: query 10.3s → 2.4ms (**~4000× faster**). `/metrics/home`
+total: 54s → 0.6s.
+
+**Princípio pra futuro**: toda nova query crítica que faz `ORDER BY ...
+LIMIT N` em tabela >1M rows precisa de índice **explicitamente
+ordenado** pela coluna do ORDER BY. EXPLAIN ANALYZE durante PR review.
+Tracked como FDD-OPS-009 (DB query plan regression tests).
+
+### 3.6 Jenkins Job Mapping Workflow
+
+**Por que mapping em vez de discovery contínua:** Jenkins não tem
+endpoint nativo eficiente pra "list todos os PRD jobs com seus repos
+GitHub correspondentes". Precisaríamos consultar `lastBuild.remoteUrls`
+de cada job individualmente — pra 1400+ jobs Webmotors, isso é caro
+e lento.
+
+**Solução:** SCM scan one-shot, output em JSON, sync worker lê o JSON
+no boot.
+
+**Fluxo:**
+
+```
+1. Operator (humano ou cron) roda:
+     docker compose exec sync-worker python -m scripts.discover_jenkins_jobs
+
+2. Script faz READ-ONLY scan via Jenkins API:
+   - GET /api/json?tree=jobs[name,fullName,url,lastBuild[url]]
+   - Para cada job: lastBuild → workflow_run → SCM remoteUrls
+   - Classifica jobs por padrão (PRD vs DEV vs HML)
+   - Casa cada job com repo GitHub (heurísticas: nome, SCM URL)
+   - Output: config/jenkins-job-mapping.json (committed)
+
+3. sync-worker lê o JSON no startup (config flag jobs_from_mapping=true)
+   - Mantém em memória: dict[repo_full_name, list[prd_jobs]]
+   - Pra cada deploy event do Jenkins: usa o mapping pra resolver repo
+
+4. Quando regenerar:
+   - Novo repo Webmotors aparece (esperado: poucas vezes/mês)
+   - Mudança de pattern de naming dos jobs
+   - Cron sugerido (futuro): semanal, sábado 04:00 UTC
+```
+
+**Resultado atual** (`jenkins-job-mapping.json` versão 2026-04-14):
+283 repos × 577 PRD jobs.
+
+**Idempotência:** script é READ-ONLY. Re-rodar a qualquer momento é
+seguro. Dois runs consecutivos produzem JSONs equivalentes (modulo
+mudanças genuínas em Jenkins).
+
+### 3.7 Post-Ingestion Mandatory Steps
+
+Após qualquer **full re-ingestion** (DB wipe + sync from scratch),
+quatro passos pós-ingestão são **obrigatórios** pra ter dashboard
+correto. Skip qualquer um → métricas incompletas ou inconsistentes.
+
+| # | Operação | Endpoint / Comando | Tempo | Por quê |
+|---|---|---|---|---|
+| 1 | Backfill description | `POST /data/v1/admin/issues/refresh-descriptions?scope=all` | ~43min | `description` não é puxada no fetch padrão de issues (custo de payload Jira); endpoint admin busca via `GET /rest/api/3/issue/{key}`. Necessário pro Flow Health drawer mostrar contexto da issue. Cobertura final esperada ~62% (~38% das issues genuinamente sem description no Jira). |
+| 2 | Re-link PRs↔Issues | `psql < scripts/relink_prs_to_issues.sql` | ~5s | Sync worker linka PRs durante ingestão usando o snapshot de issues no momento. Discovery dinâmica pode ativar projetos depois — re-link captura PRs que ficaram sem match na primeira passada. Idempotente. |
+| 3 | Force snapshot recalc | `POST /data/v1/admin/metrics/recalculate` | ~10s | Garante que todos os 6 períodos (7d/14d/30d/60d/90d/120d) e 4 metric types (dora/lean/cycle_time/throughput) têm snapshot fresco. Workers rodam por evento Kafka, mas alguns períodos podem ficar stale se o evento não disparou em algum bucket. |
+| 4 | (Conditional) Backfill `first_commit_at` | `POST /data/v1/admin/prs/refresh-first-commits?scope=stale` | varies | **Skip se ingestão usou código pós-INC-003 fix (2026-04-17+).** Validar via SQL: se ≥90% dos PRs têm `first_commit_at < created_at`, não rodar. Se <90%, rodar com `scope=stale` (filtro `first_commit_at == created_at`). |
+
+**Validação pós-step 4:**
+
+```sql
+SELECT
+  COUNT(*) AS total,
+  COUNT(*) FILTER (WHERE first_commit_at < created_at) AS correct,
+  COUNT(*) FILTER (WHERE first_commit_at = created_at) AS stale,
+  ROUND(100.0 * COUNT(*) FILTER (WHERE first_commit_at < created_at)
+        / NULLIF(COUNT(*),0), 1) AS pct_correct
+FROM eng_pull_requests WHERE source = 'github';
+```
+
+Esperado: `pct_correct >= 90%` (alguns PRs muito pequenos onde commit
+e abertura acontecem no mesmo segundo são casos legítimos de igualdade).
 
 ---
 
@@ -255,41 +450,92 @@ WHERE entity_type = 'issues';
 
 ---
 
-### Problem 6: Status Normalization — Portuguese and Custom Workflows
+### Problem 6: Status Normalization — Hybrid Textual + Jira statusCategory Fallback
 
-**Context:** Jira workflows vary wildly across orgs and even across projects within the same org. Webmotors uses Portuguese status names.
+**Context:** Jira workflows variam selvagemente entre orgs e até entre projects do mesmo tenant. Webmotors usa status names em PT-BR (e.g., "Em Desenvolvimento", "FECHADO EM PROD"). Audit em 2026-04-28 (FDD-OPS-017 / INC-022) mostrou que a abordagem **textual-only** original era catastroficamente insuficiente.
 
-**Symptoms:**
-- "Em Desenvolvimento" not mapping to `in_progress`
-- "Concluido" (without accent) not mapping to `done`
-- Custom statuses like "Aguardando Deploy", "Em Code Review" unrecognized
+**Symptoms quantificados (2026-04-28):**
 
-**Solution:** Extensive DEFAULT_STATUS_MAPPING with 60+ entries covering English, Portuguese, and common custom workflows.
+Distribuição de `normalized_status` em 311.068 issues:
+- 96,5% `done` · 3,3% `todo` · 0,2% `in_progress` · 0,1% `in_review`
+
+Investigação revelou que a Webmotors tem **104 status raw distintos** em workflows ativos. O `DEFAULT_STATUS_MAPPING` original cobria ~50 → 50+ status caíam silenciosamente no fallback "Unknown → todo". Casos sistêmicos:
+
+| Status raw | Issues afetadas | Bucket atual (errado) | Bucket correto |
+|---|---|---|---|
+| `FECHADO EM PROD` | 2.881 | todo | done |
+| `FECHADO EM HML` | 14 | todo | done |
+| `Em Progresso` | 6 | todo | in_progress |
+| `Em desenv` | 4 | todo | in_progress |
+| `Em Deploy Produção` | 14 | todo | in_progress |
+| `Em Monitoramento Produção` | 3 | todo | done |
+| `Homologação` | 9 | todo | in_review |
+| `Em Verificação` | 4 | todo | in_review |
+| (50+ outros) | dezenas | todo | varia |
+
+**Cascada CRÍTICA**: status_transitions herdam classificação errada. A última transição registrada de uma issue concluída ficava com `status: "todo"` em vez de `done`. Resultado em CASCATA:
+
+- **Cycle Time** infinito (não há transição final para `done`)
+- **Throughput** sub-conta (issues entregues não aparecem)
+- **WIP** super-conta (issues finalizadas continuam "em fluxo")
+- **CFD / Lead Time** distorcidos
+- **Flow Efficiency** indeterminado
+
+Sem o fix, **todo o pilar Lean** está comprometido para qualquer projeto que use status fora do mapping curado.
+
+**Solução: Hybrid normalization em 3 camadas** (FDD-OPS-017, commit `0c7124d`):
 
 ```python
-DEFAULT_STATUS_MAPPING = {
-    # English
-    "open": "todo", "to do": "todo", "backlog": "todo",
-    "in progress": "in_progress", "in development": "in_progress",
-    "done": "done", "closed": "done", "resolved": "done",
-    # Portuguese
-    "em desenvolvimento": "in_progress", "em progresso": "in_progress",
-    "concluído": "done", "concluido": "done", "finalizado": "done",
-    "a fazer": "todo", "pendente": "todo",
-    # Custom patterns
-    "code review": "in_progress", "em code review": "in_progress",
-    "aguardando deploy": "in_progress", "ready for qa": "in_progress",
-    "em teste": "in_progress", "testing": "in_progress",
-    ...
-}
+def normalize_status(raw_status, status_mapping=None, status_category=None):
+    # Camada 1: Textual mapping curado (granularidade in_progress vs in_review)
+    mapping = {**DEFAULT_STATUS_MAPPING}  # ~80 PT-BR + EN entries
+    if status_mapping:
+        mapping.update({k.lower(): v for k, v in status_mapping.items()})
+    normalized = mapping.get(raw_status.lower().strip())
+    if normalized:
+        return normalized
+    
+    # Camada 2: Jira statusCategory.key fallback (autoritativo done/não-done)
+    if status_category:
+        cat = status_category.lower().strip()
+        if cat == "done":          return "done"
+        if cat == "indeterminate": return "in_progress"  # NB: collapses in_review
+        if cat == "new":           return "todo"
+    
+    # Camada 3: Default 'todo' com WARN log (extremamente raro agora)
+    logger.warning("Unknown status %r — defaulting to 'todo'", raw_status)
+    return "todo"
 ```
 
-**Result:** 99%+ status normalization accuracy for Webmotors workflows.
+**Discovery da camada 2** (`_discover_status_categories`): conector chama `/rest/api/3/status` 1× por lifetime e cacheia `name → category` para todos os 326 status defs do tenant. Webmotors: 117 new + 181 indeterminate + 28 done.
+
+**Por que híbrido (não pure textual nem pure category):**
+
+- **Textual ganha** quando definido — preserva granularidade `in_progress` vs `in_review` que o Cycle Time Breakdown precisa. Jira `statusCategory.indeterminate` colapsa os dois.
+- **Category fallback** captura o long tail tenant-custom sem manutenção contínua. Workflow author é fonte de verdade sobre done/não-done.
+- **Default 'todo'** com WARN só atinge agora status sem category — extremamente raro pós-fix.
+
+**`build_status_transitions` integrado**: `status_categories_map` (todos status → categoria) é passado adiante para classificar cada `to_status` histórico via map. O bug de cascada acima é corrigido na fonte.
+
+**Result quantificado:**
+
+3.151 issues reclassificarão na re-ingestão (1% do total) — long tail catastrófico. Distribuição já correta para os 97% restantes.
 
-**SaaS Implication:** Static mapping won't scale. Need:
-1. **Learning-based mapper**: observe workflow transitions to infer categories
-2. **Per-tenant overrides**: allow admin to map custom statuses
-3. **AI fallback**: LLM classifies unknown statuses into todo/in_progress/done
+| Transição | Quantidade |
+|---|---|
+| `todo → done` (FECHADO EM PROD/HML, etc.) | 2.923 |
+| `todo → in_review` (Homologação, Verificação) | 161 |
+| `todo → in_progress` (Em Progresso, Em desenv) | 67 |
+
+**Decisão de produto registrada** (FDD-OPS-017 backlog): `FECHADO EM HML` mapeado como `done` (segue Jira `statusCategory.key='done'` + nome literal "FECHADO"). Workflow author classifica como done; respeitamos. Se Webmotors quiser tratar como ainda em fluxo, pode renomear para "Aguardando Deploy Produção" (mapeado para `in_progress`).
+
+**SaaS Implication:** Hybrid approach é SaaS-ready out-of-the-box. Cada novo tenant:
+1. Conector descobre seus 100-300 status defs via `/rest/api/3/status` (1 chamada)
+2. Textual mapping curado (PT-BR + EN + ~80 PT-BR variants) cobre majoritário
+3. Status category fallback captura o long tail proprietário
+4. Operadores adicionam mappings textuais específicos APENAS quando precisam de granularidade `in_review` (raro)
+
+**Future** (FDD-OPS já catalogado): AI-fallback para status que faltam category — observar workflow transitions para inferir categoria (Section 6.4.2).
 
 ---
 
@@ -382,6 +628,217 @@ async def _fetch_repo_prs_graphql(self, repo_name, since):
 
 ---
 
+### Problem 11: Inline Changelog Lost in Connector Mapping (`_map_issue` drop)
+
+**Context:** FDD-OPS-013 (commit `4d1c9b4`, 2026-04-28) eliminou o redundant `fetch_issue_changelogs` round-trip extraindo changelogs **inline** do JQL response (`expand=changelog`). Função nova `extract_status_transitions_inline(raw)` no sync worker fez `raw.get("changelog", {}).get("histories", [])`. Pareceu funcionar (testes passaram). Entretanto, audit em 2026-04-28 mostrou `status_transitions = []` em **100% das 311.007 issues** — mesmo problema que Phase 1 era para resolver.
+
+**Symptoms:**
+
+- 311.007 issues no DB (todas as ingeridas pós-Phase-1) com `status_transitions = []`
+- Cycle Time não fechava — sem transição para `done`
+- Throughput sub-contava — issues `done` apareciam como em fluxo
+- WIP super-contava — issues finalizadas no bucket de "ativo"
+- Lean metrics todas comprometidas
+
+**Root Cause** (descoberto via tracing connector → worker em 2026-04-29):
+
+`JiraConnector._map_issue` (commit ancestral) extraía o changelog para um cache lateral (`self._last_changelogs[internal_id]`) **mas NÃO incluía o campo `changelog` no dict mapeado de retorno**:
+
+```python
+# Ancestral code (BUG):
+def _map_issue(self, jira_issue):
+    changelogs = self._extract_changelogs(internal_id, jira_issue)
+    if changelogs:
+        self._last_changelogs[internal_id] = changelogs   # cache lateral
+    return {
+        "id": internal_id,
+        "title": fields.get("summary", ""),
+        # ... outros campos ...
+        # ❌ NO changelog field aqui
+    }
+```
+
+O `_sync_issues` (worker) chamava `extract_status_transitions_inline(raw)` no dict mapeado — `raw.get("changelog", {})` retornava `{}` sempre porque o key não existia. Resultado: lista vazia para toda issue.
+
+**Por que escapou dos testes:** Os 10 testes em `test_inline_changelog_extraction.py` testavam `extract_status_transitions_inline` **isoladamente** contra dicts sintéticos que JÁ tinham `changelog`. O contrato entre `_map_issue` e o extractor nunca foi testado end-to-end.
+
+**Solution** (commit `177830e`, 2026-04-29):
+
+```python
+return {
+    "id": internal_id,
+    # ... outros campos ...
+    # FDD-OPS-013 — preserve raw changelog from `expand=changelog` so
+    # extract_status_transitions_inline() in the sync worker can read it.
+    "changelog": jira_issue.get("changelog", {}),
+}
+```
+
+Test guard novo: `TestMapIssuePreservesChangelogForInlineExtraction` instancia o connector, alimenta payload Jira-shaped com `expand=changelog`, asserta que o pipe end-to-end (mapper → extractor) produz transitions não-vazias.
+
+**Result:** Validado live no projeto BG: 1.994 issues re-sincados todos com 3-8 transitions normalizadas (BG-202188: 5 transitions; BG-202413: 3 transitions). Pré-fix: 0 transitions em 311k issues. Pós-fix: 100% das issues recém-tocadas carregam transitions.
+
+**Lição genérica** — *cache lateral vs return value anti-pattern*:
+
+> Connector mappers devem retornar **dados completos** no dict mapeado.
+> Esconder dados em side caches (`self._last_*`) que outros call sites
+> não conhecem é um anti-pattern. Quando outro path tenta acessar via
+> "interface natural" (dict access), o dado está invisível mas o cache
+> técnico-correto está silently populated.
+
+Test pyramid lição: testar **contratos entre componentes**, não só cada componente isolado.
+
+**SaaS Implication:** Padrão "connector retorna dados completos no return value" deve ser doc-policy ao adicionar conectores futuros (GitLab, ADO, Linear). E todo connector → worker pipe precisa de pelo menos 1 test end-to-end que use a SHAPE real da API source.
+
+---
+
+### Problem 12: Effort Estimation Without Story Points (Webmotors-style heterogeneity)
+
+**Context:** Métricas como Velocity, Throughput-by-effort, Forecast Monte Carlo dependem de "esforço" agregado. Padrão da indústria: Story Points. Audit em 2026-04-28 (FDD-OPS-016 / INC-021): **`story_points = 0` em 100% das 311.007 issues** da Webmotors.
+
+**Symptoms:**
+
+- Sample em todos os 69 projetos ativos: `customfield_10004` ("Story Points") + `customfield_18524` ("Story point estimate") **0% populados**
+- Webmotors **não usa Story Points** como método de estimativa (decisão organizacional)
+- Velocity sempre zerada, throughput-by-effort impossível, forecast sem input
+
+**Investigação em squads** (samples de 50 issues por projeto):
+
+| Projeto | T-Shirt Size | Original Estimate (h) | Tamanho/Impacto | Padrão observado |
+|---------|--------------|------------------------|------------------|--------|
+| ENO     | 24%          | 52%                    | 4%               | Horas + tshirt |
+| DESC    | 26%          | 34%                    | 6%               | Horas + tshirt |
+| APPF    | 0%           | 12%                    | 0%               | Horas (raro) |
+| OKM     | 4%           | 8%                     | 0%               | Quase Kanban |
+| BG, FID, PTURB | 0%   | 0%                     | 0%               | **Kanban puro — não estimam** |
+
+**Conclusão:** padrão heterogêneo entre squads — algumas usam horas, algumas T-shirt size, várias não estimam (Kanban-puro). Single-method approach não funciona.
+
+**Solution** (commit `172f3f2`, 2026-04-29) — **Effort Fallback Chain**:
+
+Discovery dinâmico em `_discover_custom_fields`:
+- Casa por nome (case-insensitive) os patterns `"t-shirt size"` e `"tamanho/impacto"`
+- Webmotors: descobriu `customfield_18762` ("T-Shirt Size") + `customfield_15100` ("Tamanho/Impacto")
+- Funciona em qualquer tenant (não hardcode customfield IDs)
+
+`_extract_story_points` (renomeado conceitualmente para "effort") com cadeia em ordem de prioridade:
+
+```python
+# 1+2. Native numeric Story Points (preferred — no conversion)
+for field_id in (story_points_field_id, *FALLBACK_STORY_POINTS_FIELDS, "story_points"):
+    if value > 0: return float(value)  # source: 'story_points'
+
+# 3+4. T-shirt sized fields → Fibonacci scale
+TSHIRT_TO_POINTS = {"PP": 1, "P": 2, "M": 3, "G": 5, "GG": 8, "GGG": 13,
+                    "XS": 1, "S": 2, "L": 5, "XL": 8, "XXL": 13}
+for fid in self._tshirt_field_ids:
+    if (label := unwrap(fields[fid])) and (mapped := TSHIRT_TO_POINTS.get(label.upper())):
+        return mapped  # source: 'tshirt_to_sp'
+
+# 5. Original Estimate (seconds) → SP equivalent buckets
+def _hours_to_points(h):
+    if h <= 4:  return 1
+    if h <= 8:  return 2  # ~1d
+    if h <= 16: return 3  # ~2d
+    if h <= 24: return 5
+    if h <= 40: return 8  # ~1w
+    if h <= 80: return 13 # ~2w
+    return 21
+# source: 'hours_to_sp'
+
+# 6. None — issue genuinamente não estimada (Kanban-puro)
+# source: 'unestimated'
+# CONSUMER MUST count items rather than sum points
+```
+
+**Telemetria** (`_effort_source_counts`): por batched run, log da distribuição de qual hop produziu o valor. Operadores veem drift ("squad migrou de horas para t-shirt em maio") sem combar logs.
+
+**Quando `None` (Kanban-puro):** decisão de **count vs sum** fica na camada de métrica, **não** no normalizer. Métrica downstream precisa contar items rather than sum points. Documentado em §8.12.
+
+**Result:**
+
+Validado live em CRMC (1.375 issues, projeto novo full-history pós-fix):
+- **52,3% com effort estimado** (719/1.375)
+- Distribuição de valores: 1, 2, 3, 5, 8 — confirma escala Fibonacci aplicada
+- 47,7% com `story_points = None` → métrica counta items
+
+**Future (codename "dev-metrics" R3+)** — FDD-DEV-METRICS-001:
+
+Hoje a fallback chain é **automática e implícita**. Diferentes filosofias produzem métricas diferentes. R3 vai entregar:
+- Per-squad estimation method choice (admin UI: SP / T-shirt / Hours / Count-only / Auto)
+- Modelo proprietário de previsão e insights (drift detection, calibração contra histórico, Monte Carlo com método nativo)
+- UX completa rescritta ao redor da escolha
+- Anti-surveillance by design (insights por squad/processo, nunca individual)
+
+**Diferenciador competitivo:** LinearB / Jellyfish / Swarmia / Athenian são opinionated em SP. PULSE é o **único** que respeita filosofia da squad e usa isso como entrada de modelo, não como ruído a normalizar.
+
+**SaaS Implication:** Effort fallback chain é SaaS-ready (descoberta dinâmica). Para "dev-metrics" (R3+), precisa adicionar:
+- Coluna `effort_source` em `eng_issues` (auditoria por issue)
+- Migration deferred — registrado como prerequisite no FDD-DEV-METRICS-001
+
+---
+
+### Problem 13: Sprint Status Pipeline — 4-Layer Swiss Cheese
+
+**Context:** 100% das 216 sprints no `eng_sprints` da Webmotors com `status=''`. `goal` também totalmente vazio. Audit (FDD-OPS-018 / INC-023, 2026-04-29) revelou clássico **swiss cheese alignment** — quatro bugs independentes em camadas diferentes, cada um sozinho garantindo o resultado.
+
+**Symptoms:**
+
+- `SELECT status, COUNT(*) FROM eng_sprints` → `('', 216)`
+- Sprint Comparison / Velocity Trend não pode filtrar `closed` para excluir sprints em andamento da regressão
+- "Current sprint" planejado precisa `status='active'` — impossível sem dado
+
+**Os 4 bugs (cada um suficiente para causar o sintoma):**
+
+| # | Camada | Bug | Como sozinho garantia status vazio |
+|---|---|---|---|
+| 1 | `connectors/jira_connector.py:_map_sprint` | Mapeava OK (ACTIVE/CLOSED/FUTURE) | (não era bug — fonte estava certa) |
+| 2 | `engineering_data/normalizer.py:normalize_sprint` | Retornava dict **sem** o campo `status` | Status nunca chega no upsert |
+| 3 | `workers/devlake_sync.py:_upsert_sprints` | ON CONFLICT `set_={...}` não incluía `status`/`goal` | Sprints existentes (que existem) nunca atualizam |
+| 4 | `connectors/jira_connector.py:_fetch_board_sprints` | Filtrava `started_date < since` | State transitions acontecem em `endDate` — sprint que começou em março e fechou em maio nunca tem update após março |
+| 5 | `engineering_data/models.py:EngSprint` | Schema da DB tinha `status` mas ORM SQLAlchemy não tinha o `Mapped[str\|None]` correspondente | **Path que omitia status funcionava silently empty; path que tentava popular crashava com `Unconsumed column names: status`** |
+
+**Bug #5 (ORM schema drift) é o mais insidioso.** Coluna existia no DB há tempos (alguma migration anterior); ORM nunca foi atualizado. O sintoma é assimétrico: quem **omite** o campo passa silenciosamente; quem **inclui** crashar. Ninguém investiga porque "tá vazio mas não dá erro".
+
+**Solution** (commit `649ed78`, 2026-04-29) — fix em todas as camadas:
+
+1. `_map_sprint` agora também passa `goal` adiante (Jira API o retorna)
+2. `normalize_sprint` inclui `status` (lowercase: `active`/`closed`/`future`/None) + `goal` (com strip null bytes)
+3. `_upsert_sprints` ON CONFLICT `set_` atualiza ambos
+4. `_fetch_board_sprints` removeu filtro de watermark (volume baixo, ~216 total / ~5 ativas; sempre re-fetch é correto pois state transitions)
+5. `EngSprint` model adiciona `status: Mapped[str | None] = mapped_column(String(50), nullable=True)`
+
+Helper `_normalize_sprint_status` mapeia aliases comuns:
+- `open → active` · `in_progress → active`
+- `completed/complete/ended → closed`
+- `planned/upcoming → future`
+- **Unknown values → None** (não bucketiza silenciosamente — operador investiga via NULL no DB)
+
+**Por que NÃO bucketizar unknown:** Velocity / Carryover logic precisa saber QUAIS sprints estão de fato fechadas. Mapear "?" para `closed` corromperia o cálculo. Fail-loud é melhor que fail-silent aqui.
+
+**Result:**
+
+Validado live (ad-hoc backfill cobrindo 31 projetos ativos):
+
+| Status | Quantidade | Tem goal? |
+|---|---|---|
+| `closed` | 187 | sim |
+| `active` | 3 | sim |
+| `future` | 5 | sim |
+| (vazio) | 22 | board órfão 873 sem projeto ativo |
+
+**195/217 = 89,9%** das sprints com status correto + 70% com goal real (e.g., "Gestão de banner no backoffice de CNC e TEMPO para novas especificações técnicas"). As 22 vazias são de board órfão, fora do escopo deste fix.
+
+**Lição genérica — `Schema drift detection pattern`:**
+
+> Adicionar guard test "DB columns vs ORM Mapped fields" — candidato a 5ª linha de defesa do FDD-OPS-001 (eliminação de drift).
+> Migration review checklist deve incluir: toda nova coluna → Mapped column correspondente no SQLAlchemy.
+> ORM drift é o tipo de bug onde "alguns paths funcionam, outros crashern" — não tem sintoma uniforme observável, então fica oculto até alguém tentar exatamente o path quebrado.
+
+**SaaS Implication:** Sprint pipeline pós-fix está SaaS-ready. Para tenants futuros: discovery automático de boards Scrum (já existe), normalização lowercase consistente com convenção PULSE, fail-loud em status desconhecidos — operador onboarding vê NULL e investiga ao invés de receber dado silenciosamente errado.
+
+---
+
 ## 5. Entity Relationship Map
 
 ### 5.1 Cross-Source Entity Linking
@@ -566,6 +1023,75 @@ STATUS_PATTERNS = {
 - Cross-worker coordination (Redis-based token bucket)
 - Graceful degradation (reduce batch size on rate limit, don't fail)
 
+#### 6.3.6 Effort Extraction (Deterministic Core + Discovery Fallback)
+
+**Problem:** Story Points não são universais — Webmotors validou 0% de uso em 69 projetos. Squads usam métodos heterogêneos: T-shirt size (P/M/G), `timeoriginalestimate` em horas, ou nada (Kanban-puro). Single-method extraction quebra para esses tenants. Implementado em FDD-OPS-016 (commit `172f3f2`).
+
+**Discovery dinâmico** (deterministic, zero-config):
+
+```python
+# JiraConnector._discover_custom_fields()
+EFFORT_NAME_PATTERNS_TSHIRT = ("t-shirt size", "tshirt size", "tamanho/impacto")
+
+for field in fields_list:
+    name = field.get("name", "").strip().lower()
+    fid = field.get("id", "")
+    
+    # Story Points (numeric)
+    if name in ("story points", "story point estimate"):
+        self._story_points_field_id = fid
+    
+    # T-shirt sized fields (option-typed)
+    elif any(p in name for p in EFFORT_NAME_PATTERNS_TSHIRT):
+        self._tshirt_field_ids.append(fid)
+```
+
+**Fallback chain (priority order):**
+
+| # | Source | Conversão | Source label |
+|---|---|---|---|
+| 1 | `customfield_*` ("Story Points") | uso direto (numeric) | `story_points` |
+| 2 | `customfield_*` ("Story point estimate") | uso direto | `story_points` |
+| 3 | `customfield_*` ("T-Shirt Size") | mapa Fibonacci PP=1, P=2, M=3, G=5, GG=8, GGG=13 (PT-BR) ou XS/S/M/L/XL/XXL (EN) | `tshirt_to_sp` |
+| 4 | `customfield_*` ("Tamanho/Impacto") | mesmo mapa | `tshirt_to_sp` |
+| 5 | `timeoriginalestimate` (segundos) | buckets: ≤4h=1, ≤8h=2, ≤16h=3, ≤24h=5, ≤40h=8, ≤80h=13, >80h=21 | `hours_to_sp` |
+| 6 | None | sem estimativa — **métrica downstream conta items (Kanban-puro)** | `unestimated` |
+
+**Hour bucket calibration:** alinhado com "1 ideal day = ~6h productive". Buckets calibrados contra valores observados na Webmotors (2h–124h, múltiplos de 4) para que cada valor comum caia em um bucket sensato. Output já na escala SP que métricas downstream esperam.
+
+**Skip SP = 0:** sentinel comum para "não estimado", trata como falta. Cai para próximo hop da chain ao invés de retornar `0.0`.
+
+**Telemetria** (`_effort_source_counts`): incrementa contador por `source` label (incluindo `'unestimated'`). Logado per batched run:
+
+```
+[batched] effort source distribution (1375 issues): 
+  tshirt_to_sp=521 (37.9%), hours_to_sp=198 (14.4%), unestimated=656 (47.7%)
+```
+
+Operadores spotam estimation drift sem combar logs.
+
+**Anti-pattern evitado** — bucketização silenciosa de unknown values:
+
+> Ao receber um T-shirt size desconhecido (ex: "JUMBO"), o connector
+> NÃO mapeia silenciosamente para algum default. Cai para o próximo
+> hop. Se nenhum produzir valor, retorna `None` com source label
+> `'unestimated'`. Métrica downstream sabe que tem que counta items.
+
+**SaaS Implication:** Já SaaS-ready. Cada tenant onboarda com:
+1. Discovery automático de fields T-shirt e Tamanho via match de nome
+2. Story Points classico funciona out-of-the-box se usado
+3. `timeoriginalestimate` é Jira built-in (não custom field) — sempre disponível
+4. Telemetria revela qual método o tenant usa nas primeiras horas pós-onboarding
+
+**Future (FDD-DEV-METRICS-001 / codename "dev-metrics" R3+)** — promote esta cadeia automática a uma escolha **explícita por squad**:
+
+- Admin UI permite escolher método: SP / T-shirt / Hours / Count-only / Auto (current)
+- Modelo proprietário: detecta drift de estimativa, calibra contra histórico, surfaces insights ("squad marcando tudo como M há 6 sprints")
+- Forecast Monte Carlo usa o método nativo do squad (não força SP como LinearB / Jellyfish / Swarmia / Athenian fazem)
+- Anti-surveillance by design: insights por squad/processo, **nunca** individual
+
+Pré-requisito (deferred): adicionar coluna `effort_source` em `eng_issues` para auditoria por issue.
+
 ### 6.4 Non-Deterministic Components (Implement with AI)
 
 These problems have ambiguous inputs and require contextual understanding. An embedded AI agent ("Ingestion Intelligence Agent") handles them.
@@ -875,3 +1401,368 @@ IngestionPipeline:
 | `efaeba7` | Discovery service, mode resolver, guardrails |
 | `bea8b13` | Admin API + React UI for discovery |
 | `c5350dc` | Security hardening, PII gating, Phase 4 rollout |
+| `5d71618` | Snapshot drift monitor (FDD-OPS-001 line 3) + deploy workflow |
+| `0a1050c` | FDD-OPS-001 lines 1+2 — eliminate stale-code-in-workers drift |
+| `dd10d34` | FDD-OPS-002 — full Jira description backfill (61.74% coverage) |
+| `80f1796` | Partial index for snapshots — fixes 50× perf regression on `/metrics/home` |
+| `c5e38bb` | docs(architecture): ingestion v2 — diagnostic + 10× target + migration path |
+| `4d1c9b4` | FDD-OPS-012 + FDD-OPS-013 — Phase 1 v2: issues sync streams per-project + inline changelog (eliminates redundant `fetch_issue_changelogs`) |
+| `62c183f` | Strip NULL bytes (0x00) from text fields before persist — Webmotors `ENO-3296` description had 0x00 |
+| `4c2c1c5` | docs(ingestion): Phase 2 drafts — per-source workers + per-scope watermarks (FDD-OPS-014) |
+| `c2c6e5d` | Phase 2 step 2.1 — apply scope_key migration |
+| `a2d5850` | Phase 2 step 2.2 — per-scope watermark API |
+| `f357d05` | Phase 2 step 2.3 — `_sync_issues` uses per-project watermarks |
+| `15574a7` | Phase 2 steps 2.4 + 2.5 — per-repo watermark writes for PRs and deploys |
+| `4f86fd2` | FDD-OPS-014 step 2.7 (urgent) — drop legacy `uq_watermark_entity` (Postgres enforces ALL UniqueConstraints; legacy blocked per-scope inserts) |
+| `4478f13` | Phase 2-B step 2.4-B — read per-repo watermarks for PRs |
+| `c628528` | Phase 2-B step 2.5-B — read per-repo watermarks for deployments |
+| `177830e` | INC-020 / FDD-OPS-013 follow-up — preserve Jira changelog in `_map_issue` so inline extraction works (status_transitions=[] em 311k issues) |
+| `172f3f2` | INC-021 / FDD-OPS-016 — effort estimation fallback chain (Story Points → T-shirt → Hours → None) + FDD-DEV-METRICS-001 placeholder for R3+ |
+| `0c7124d` | INC-022 / FDD-OPS-017 — status normalization with `statusCategory.key` fallback (96.5% done skew + 50+ PT-BR statuses unmapped) |
+| `649ed78` | INC-023 / FDD-OPS-018 — sprint status pipeline 4-layer cheese fix (normalizer + upsert + watermark + ORM drift) |
+
+### D. Webmotors-Discovered Patterns (training material para futuros tenants)
+
+Capturados durante a engenharia 2026-04 — servem como **base de comparação** quando onboardar novos tenants e como **alvo de discoveries automáticas** para o Ingestion Intelligence Agent (Section 6.5).
+
+**D.1 — Estimação de esforço heterogênea entre squads:**
+
+- Webmotors **não usa Story Points** (0% nos 69 projetos)
+- Distribuição de método por squad sample:
+  - Squads que estimam: ENO (52% horas + 24% T-shirt), DESC (34% horas + 26% T-shirt)
+  - Squads que estimam pouco: APPF (12% horas), OKM (8% horas)
+  - **Squads Kanban-puros** (não estimam): BG, FID, PTURB, e ~22 outros (25 de 27 squads totais)
+- Fields descobertos: `customfield_18762` (T-Shirt: P/M/G), `customfield_15100` (Tamanho/Impacto: PP/P/M/G)
+- **Implicação para futuros tenants:** rodar discovery por nome ("t-shirt", "tamanho", "size") e logar telemetria de método usado por squad. Provável que tenants Kanban-pesados tenham padrão similar.
+
+**D.2 — Workflow status diversity:**
+
+- 326 status definitions descobertos via `/rest/api/3/status`
+- 104 status raw distintos populados em issues ativas
+- DEFAULT_STATUS_MAPPING curado precisa de ~80 entries para cobrir granularidade `in_review` específica de PT-BR
+- Resto cai no fallback `statusCategory.key` (autoritativo done/não-done)
+- Padrões PT-BR observados:
+  - "FECHADO EM PROD", "FECHADO EM HML", "Concluído", "Cancelado" → `done`
+  - "Em Desenvolvimento", "Em imersão", "Em andamento", "Em Progresso" → `in_progress`
+  - "Em Code Review", "Em Teste HML", "Homologação", "Aguardando Code Review" → `in_review`
+  - "BACKLOG", "Refinado", "PAUSADO" → `todo`
+- **Implicação:** mapping curado é por idioma + cultura organizacional. AI fallback (Section 6.4.2) deve aprender **por tenant** após primeiros 1k transitions observados.
+
+**D.3 — Squad shape:**
+
+- 27 squads ativos
+- **25 são Kanban-puros** (sem sprints) — métricas Lean (CFD, Throughput, WIP, Cycle Time) são primárias
+- 2 squads (FID, PTURB) usam Sprint — métricas Scrum (Velocity, Carryover) aplicam
+- **Implicação:** UX padrão deve assumir Kanban-first. Sprint metrics aparecem condicionalmente quando `eng_sprints` tem dados ativos para a squad.
+
+**D.4 — Repo & deploy scale:**
+
+- 754 GitHub repos active / 1.429 total descobertos
+- 283 repos com Jenkins config descoberto via SCM scan (commit `d1aebf7`)
+- 577 PRD jobs auto-classificados por pattern matching
+- 197.043 issues no projeto único BG (concentração extrema — single JQL retorna massive payload)
+- **Implicação:** SaaS engine deve assumir distribuição power-law (alguns projetos enormes, muitos pequenos). Streaming per-project (P-1 do v2) é não-negociável.
+
+**D.5 — Operational realities:**
+
+- VPN drops causam silent failures sem health-aware orchestration (P-8)
+- Project keys com palavras-reservadas SQL ("DESC") exigem quoting em JQL
+- Orphan project keys em PR titles ("RC" tem 1.348 references sem Jira project) — alias resolution AI necessário (Section 6.4.5)
+- NULL bytes (0x00) em descriptions PT-BR — `_strip_null_bytes` defensivo
+- Jenkins SHAs são build IDs, não git SHAs — PR↔Deploy linking via temporal correlation, não SHA match
+
+**D.6 — Anti-pattern de dev process descobertos:**
+
+- **Cache lateral vs return value** (INC-020): connector mappers escondendo dados em `self._last_*` que outros call sites não acessam
+- **Schema drift entre migration e ORM** (INC-023): coluna existe no DB mas SQLAlchemy `Mapped` ausente — paths que omitem campo passam, paths que incluem crashern
+- **Swiss cheese alignment** (INC-023): feature inteira zerada por 4+ bugs independentes em camadas diferentes; cada um sozinho garantia o sintoma
+- **Watermark filter dimension errado** (INC-023 #3): sprint state transitions em `endDate` não `startDate` — escolher dimensão correta de watermark é crítico
+- **Bucketização silenciosa de unknown values**: anti-pattern. Sempre fail-loud (None/WARN) — operador investiga via NULL no DB
+
+---
+
+## 8. Metric Field Decisions — Master Table
+
+Esta seção consolida **as decisões de qual timestamp/field é usado pra
+cada métrica**, ancorando-se nos incidentes documentados em
+`docs/metrics/metrics-inconsistencies.md`. Quando uma métrica produz
+um número estranho, comece por aqui — provavelmente é decisão de
+campo, não bug de código.
+
+### 8.1 Lead Time for Changes (DORA)
+
+**Fórmula canônica:** `deployed_at - first_commit_at` (em horas)
+
+| Field | Source | Decisão | Referência |
+|---|---|---|---|
+| `eng_pull_requests.first_commit_at` | GitHub GraphQL `commits(first:1).authoredDate` | Real authored date do primeiro commit no branch — **NÃO** a data de abertura do PR | INC-003 fix 2026-04-17, commit `c5350dc` |
+| `eng_pull_requests.deployed_at` | Temporal linking PR → Jenkins deploy via SHA matching | Populado por `link_pr_deploys()` quando deploy chega; null pra PRs sem deploy linkado | INC-004 fix 2026-04-17 |
+
+**Variantes expostas pelo backend** (decisão FDD-DSH-082, 2026-04-17):
+
+- `lead_time_for_changes_hours` (inclusive): inclui PRs sem `deployed_at` usando `merged_at` como fallback. Maior cobertura, mas não-canônico DORA.
+- `lead_time_for_changes_hours_strict`: SOMENTE PRs com `deployed_at != NULL`. Canônico DORA. Cobertura menor (depende de Jenkins linking).
+- Frontend mostra ambos em cards separados. Usuário escolhe a interpretação.
+
+**Edge case**: PR aberto-e-fechado-sem-merge → excluído do cálculo (`is_merged = false`).
+
+### 8.2 Cycle Time
+
+**Fórmula:** `merged_at - first_commit_at` (em horas) — INC-007 fix 2026-04-17
+
+**Phases breakdown** (`cycle_time/breakdown` snapshot):
+
+| Phase | De | Para |
+|---|---|---|
+| `coding` | `first_commit_at` | `pr_opened_at` (created_at) |
+| `pickup` | `pr_opened_at` | `first_review_at` |
+| `review` | `first_review_at` | `merged_at` |
+| `merge_to_deploy` | `merged_at` | `deployed_at` |
+
+**Edge case INC-012 (parcial)**: `merge_to_deploy` é null quando
+`deployed_at` é null. Stacked bar mostra 3 fases em vez de 4. Documentado
+como aceitável até full Jenkins linking (depende de FDD-DSH-050).
+
+### 8.3 Deployment Frequency
+
+**Fórmula:** `count(eng_deployments WHERE environment='production' AND deployed_at IN [period])` por unidade de tempo
+
+| Decisão | Referência |
+|---|---|
+| Filtro `environment='production'` (não staging/dev) | INC-008 fix 2026-04-17 |
+| Source = jenkins (Webmotors) | `connections.yaml` |
+| `is_failure` derivado de `result != 'SUCCESS'` no Jenkins build | normalizer `_extract_jenkins_result()` |
+| **Aberto INC-016**: builds UNSTABLE (testes falham mas compila) contam como falha — comportamento mais rigoroso que padrão DORA, sem flag pra desabilitar | P2, aceitável |
+
+### 8.4 Change Failure Rate
+
+**Fórmula:** `count(deploys WHERE is_failure) / count(deploys)` no período
+
+**Decisões:** mesmas de §8.3 (escopo de deploys idêntico).
+
+### 8.5 MTTR (Mean Time to Recovery)
+
+**Status:** ❌ **AINDA NÃO IMPLEMENTADO**
+
+`recovery_time_hours` é always null (INC-005). Calculation function existe
+e está correta, mas não há pipeline de incidents para alimentar. Card
+"Time to Restore" mostra `null` + badge "R1" + tooltip explicativo.
+
+Tracking: FDD-DSH-050 (P1, L, multi-agent — data scientist define sinal
+de incidente → data engineer cria tabela `eng_incidents` → backend → frontend).
+
+### 8.6 Throughput (PRs merged per period)
+
+**Fórmula:** `count(PRs WHERE is_merged AND merged_at IN [period])`
+
+| Decisão | Referência |
+|---|---|
+| Fetch por `merged_at` (não `created_at`) | INC-001 fix 2026-04-16 — antes, PRs com lifecycle longo eram subcontados |
+| `pr_analytics.total_merged` no payload `throughput/pr_analytics` | usado por `/metrics/home` |
+| Cycle time per-week sparkline computed inline | INC-007 fix |
+
+### 8.7 WIP (Work in Progress)
+
+**Fórmula:** `count(eng_issues WHERE normalized_status IN ('in_progress','in_review'))` no momento do snapshot
+
+**Decisões importantes:**
+
+- Status `todo` **excluído do WIP** — apenas trabalho tocado conta. Documentado em `kanban-formulas-v1.md` §2
+- "aguardando deploy produção" mapeado pra `done` (INC-019 P2 — debatível, porém fixo no `connections.yaml` status_mapping)
+- WIP é tenant-aggregate por default; per-squad é cálculo on-demand via `squad_key` query param
+
+### 8.8 Lead Time Distribution / CFD / Scatterplot (Lean)
+
+**Fonte de verdade:** `eng_issues` com `status_transitions` JSONB populado pelo Jira changelog.
+
+| Métrica | Fórmula | Edge case |
+|---|---|---|
+| Lead Time Distribution | histograma de `completed_at - created_at` por bin | INC-010 fix 2026-04-16: inclui issues longas que stradle o período |
+| CFD | contagem por status × dia, banda `done` usa `MAX(done_so_far)` | INC-009 P1 — protege contra reopens |
+| Scatterplot | um ponto por issue concluída no período (P50/85/95 lines) | mesmo escopo de fetch que LT distribution |
+
+### 8.9 Anti-Surveillance Invariant
+
+**Decisão fundamental, INVIOLÁVEL:**
+
+> Author/assignee/reporter **NUNCA** entram em payloads de métrica.
+
+**Onde está garantido:**
+
+1. **Domain dataclasses** (`pulse-data/src/contexts/metrics/domain/`): nenhum field tipo `author`, `assignee`, `reporter` ou similar
+2. **Schema registry** (FDD-OPS-001 line 3): payload-vs-dataclass diff loga `_schema_drift` se algo nuevo aparece
+3. **Frontend contract tests** (`tests/contract/anti-surveillance-schemas.test.ts`): meta-test que injeta payload tainted em cada um dos 6 schemas Zod e verifica rejeição
+4. **Underlying tables** (`eng_pull_requests.author`, `eng_issues.assignee`) — campos existem (necessários pra ingestão e linking), mas **nunca atravessam a fronteira de aggregação**
+
+**Snapshot anonimizado (PR #2.1 / future):** quando construirmos pipeline
+de snapshot pra distribuir entre devs, aggregate-only não é suficiente —
+o DB ainda tem PII nos raw fields. Anonimização determinística de
+author/assignee → hash + `@example.invalid` é necessária. Detalhes em
+`docs/onboarding.md` (PR #2.1).
+
+### 8.10 Status Normalization (hybrid textual + statusCategory)
+
+**Fonte primária:** hybrid em 3 camadas (FDD-OPS-017 / INC-022 / commit `0c7124d`):
+
+1. **Textual mapping curado** — `DEFAULT_STATUS_MAPPING` em `engineering_data/normalizer.py`, ~80 entries PT-BR Webmotors-curated + EN. Preserva granularidade `in_progress` vs `in_review`.
+2. **Jira `statusCategory.key` fallback** — autoritativo done/não-done. Connector descobre via `/rest/api/3/status` (1 chamada/lifetime, cacheada). Webmotors: 326 status defs descobertas.
+3. **Default 'todo' com WARN log** — extremamente raro pós-fix (só status sem categoria).
+
+**Categorias normalizadas produzidas:** `todo | in_progress | in_review | done` (4 categorias). Métricas downstream em `domain/lean.py:_ACTIVE_STATUSES = {"in_progress", "in_review"}` tratam ambos como WIP/active para Cycle Time.
+
+**Discovery cacheado por instância de connector:**
+
+```python
+# JiraConnector
+self._status_categories: dict[str, str] = {}  # name (lowercase) → category key
+self._status_categories_discovered: bool = False
+
+async def _discover_status_categories(self):
+    data = await self._client.get(f"{REST_API}/status")
+    for s in data:
+        name = (s.get("name") or "").strip().lower()
+        cat = ((s.get("statusCategory") or {}).get("key") or "").strip().lower()
+        if name and cat in ("new", "indeterminate", "done"):
+            self._status_categories[name] = cat
+```
+
+**`_map_issue` anexa ao dict mapeado:**
+
+- `status_category`: a categoria do status atual
+- `status_categories_map`: o dict completo (mesma referência para todas as issues do batch)
+
+**Histórico (`build_status_transitions`)** usa o `status_categories_map` para classificar cada `to_status` histórica:
+
+```python
+for cl in changelogs:
+    cat = status_categories_map.get(cl["to_status"].strip().lower())
+    normalized = normalize_status(cl["to_status"], status_mapping, cat)
+```
+
+**Edge cases conhecidos & decisões:**
+
+| Status | Mapping | Justificativa |
+|---|---|---|
+| `FECHADO EM PROD` | `done` | Jira category=done; nome literal "FECHADO" |
+| `FECHADO EM HML` | `done` | Jira category=done. Workflow author classifica como done; respeitamos. Se squad quer "ainda em fluxo", renomeia para "Aguardando Deploy Produção" |
+| `aguardando deploy produção` | `in_progress` | INC-019 P2 reverso — quando deploy é o gargalo, item ainda está em fluxo |
+| `em teste azul/hml` | `in_review` | Webmotors-specific QA stages; granularidade preservada via textual |
+| `construção de hipótese` | `in_progress` | Kanban upstream — trabalho ativo de discovery |
+| `Aguardando Code Review` | `in_review` | Trabalho ativo aguardando reviewer (textual ganha sobre Jira `new` neste tenant) |
+| Status sem mapping E sem category | `todo` (com WARN log) | Conservador — operador investiga via WARN |
+
+**Princípio**: textual ganha quando definido (granularidade); category ganha sobre default (autoridade). Tudo que cai em "todo" sem ambos é log-visible — raro, mas observável.
+
+**Por que mantemos 4 categorias (não 3 como Jira)** — métricas Lean precisam distinguir `in_progress` (development active) de `in_review` (waiting on review/test) para Cycle Time Breakdown. Jira `statusCategory.indeterminate` colapsa os dois; nosso textual mapping preserva quando a squad nomeia.
+
+### 8.11 PR ↔ Issue Linking
+
+**Mecanismo:** regex `[A-Z][A-Z0-9]+-\d+` em `pr.title`, `pr.head_ref`, `pr.base_ref`
+
+**Sequência:**
+
+1. Sync worker carrega `(issue_key, external_id)` do tenant **antes** de sincronizar PRs (issues vêm 1º no ciclo)
+2. Pra cada PR, regex extrai possíveis keys (multi-match suportado)
+3. Filtra keys que existem em `jira_project_catalog` com status `active|discovered`
+4. Popula `linked_issue_ids` JSONB do PR
+
+**Per-project link rate observado** (Webmotors, post-discovery):
+
+- Top performers (96-100%): SDI, PUSO, DSP, FID, CRMC
+- Tenant-wide médio: 21.9%
+- Falsos positivos: HOTFIX-123, RELEASE-1, BUGFIX-42, lib names (LODASH-4) — filtrados via `IN (jira_project_catalog)` clause
+- Orphans conhecidos: RC (1348 references, projeto archived no Jira)
+
+**Re-relink pós-ingestão:** script `scripts/relink_prs_to_issues.sql`
+re-aplica em PRs antigos quando novos projetos são ativados via discovery
+dinâmica.
+
+### 8.12 Effort Estimation (story_points field)
+
+**Fonte primária:** `eng_issues.story_points` (numeric, nullable) — populado pelo `_extract_story_points` no connector via fallback chain (FDD-OPS-016 / INC-021 / commit `172f3f2`). Detalhes na §6.3.6.
+
+**Hops em ordem de prioridade** (telemetria via `_effort_source_counts`):
+
+| Hop | Source | Conversão | Source label |
+|---|---|---|---|
+| 1 | `customfield_10004` ("Story Points") | numeric direto (skip se = 0) | `story_points` |
+| 2 | `customfield_18524` ("Story point estimate") | numeric direto | `story_points` |
+| 3 | T-shirt size field (discovered) | Fibonacci: PP=1, P=2, M=3, G=5, GG=8, GGG=13 | `tshirt_to_sp` |
+| 4 | `customfield_15100` ("Tamanho/Impacto") | mesmo mapa | `tshirt_to_sp` |
+| 5 | `timeoriginalestimate` (segundos) | buckets ≤4h=1, ≤8h=2, ≤16h=3, ≤24h=5, ≤40h=8, ≤80h=13, >80h=21 | `hours_to_sp` |
+| 6 | None | `null` em `eng_issues.story_points` | `unestimated` |
+
+**Decisão downstream — quando `story_points IS NULL`:**
+
+- Métricas baseadas em soma (Velocity, Story Point Throughput): **NÃO somar** issues `null`
+- Métricas baseadas em count (Throughput by issue, WIP, Cycle Time): **incluir** issues `null` normalmente
+- **Para tenants Kanban-puros** (Webmotors: 25/27 squads), `story_points` é `null` para 100% — **a métrica primária deve ser count, não sum**
+
+**Anti-pattern evitado:**
+
+> NÃO defaultar para `story_points = 1` (ou outro valor sentinel)
+> quando não há estimativa. Seria silently wrong para Velocity.
+> Métrica precisa saber explicitamente que aquela issue não foi
+> estimada. `null` é fail-loud (NULL no DB visível) vs `1` que é
+> fail-silent.
+
+**Webmotors-observed coverage** pós-fix (CRMC, projeto novo full-history):
+
+- 52,3% com effort estimado (sample de 1.375 issues)
+- Distribuição valores: 1, 2, 3, 5, 8 (Fibonacci aplicado)
+- 47,7% `null` → métrica conta items
+
+**Future:** R3 codename "dev-metrics" (FDD-DEV-METRICS-001) entrega:
+- Coluna `effort_source` em `eng_issues` para auditoria por issue
+- Per-squad estimation method choice (admin UI)
+- Modelo proprietário de previsão usando método nativo do squad
+
+### 8.13 Sprint Status & Goal
+
+**Fonte primária:** `eng_sprints.status` (varchar(50), nullable) + `eng_sprints.goal` (text, nullable). Populados pelo `normalize_sprint` (FDD-OPS-018 / INC-023 / commit `649ed78`).
+
+**Status normalization:**
+
+| Raw value (Jira) | Aliases aceitos | Normalized |
+|---|---|---|
+| ACTIVE | active, open, in_progress | `active` |
+| CLOSED | closed, completed, complete, ended | `closed` |
+| FUTURE | future, planned, upcoming | `future` |
+| (qualquer outro) | — | `None` (fail-loud, operador investiga) |
+
+**Por que NULL para unknown** (não bucketizar): Sprint Velocity e Carryover logic precisam saber QUAIS sprints estão de fato fechadas. Bucketizar "?" para `closed` corromperia a regressão linear de tendência. NULL torna o problema visível.
+
+**Goal field:**
+
+- Source: `sprint.goal` da Jira API (string, free-text setado por squad lead)
+- Normalizer aplica `_strip_null_bytes` (Postgres rejeita 0x00)
+- Webmotors observed: 70% das sprints têm goal real (e.g., "Gestão de banner no backoffice de CNC e TEMPO para novas especificações técnicas")
+
+**Re-fetch policy crítica** — sprints **não usam watermark filter** (decisão de FDD-OPS-018):
+
+- State transitions acontecem em `endDate`, não `startDate`
+- Volume baixo (~216 total / ~5 ativas em qualquer momento)
+- Sempre re-fetch é correto E barato
+- Se quiser otimizar no futuro: filtrar por `endDate < since` (não `startDate`)
+
+**ON CONFLICT update obrigatório:**
+
+```python
+# _upsert_sprints
+.on_conflict_do_update(
+    index_elements=["tenant_id", "external_id"],
+    set_={
+        "name": sd["name"],
+        "status": sd.get("status"),       # FDD-OPS-018: era omitido
+        "goal": sd.get("goal"),           # FDD-OPS-018: era omitido
+        "started_at": sd["started_at"],
+        "completed_at": sd["completed_at"],
+        # ... outros campos métricos
+        "updated_at": datetime.now(timezone.utc),
+    },
+)
+```
+
+**Lição** — quando o ON CONFLICT `set_` omite um campo, sprints existentes nunca recebem update mesmo se o normalizer está correto. Pattern: `set_` deve incluir TODOS os campos que podem mudar entre syncs, exceto `external_id` e `tenant_id`.
+
+---
diff --git a/pulse/docs/ingestion-v2-phase-2-plan.md b/pulse/docs/ingestion-v2-phase-2-plan.md
new file mode 100644
index 0000000..b4037a0
--- /dev/null
+++ b/pulse/docs/ingestion-v2-phase-2-plan.md
@@ -0,0 +1,374 @@
+# Ingestion v2 — Phase 2 Plan (FDD-OPS-014)
+
+**Status:** PARTIAL — foundation shipped 2026-04-28, read-side refactor + worker split deferred.
+**Companion docs:** `ingestion-architecture-v2.md` (overall design),
+`ingestion-spec.md` (current architecture).
+**Sister artifact (applied):** `alembic/versions/010_pipeline_watermarks_scope_key.py`
+
+---
+
+## 0. Shipping summary (2026-04-28 status)
+
+What landed in this iteration vs. what carries forward:
+
+### ✅ Shipped (production-ready, validated against live tenant)
+
+| Step | Commit | What |
+|---|---|---|
+| **2.1** | `f357d05` | Migration 010 applied: `pipeline_watermarks.scope_key VARCHAR(255) NOT NULL DEFAULT '*'` + `uq_watermark_entity_scope` UNIQUE coexisting with legacy `uq_watermark_entity` |
+| **2.2** | `f357d05` | Per-scope watermarks API: `GLOBAL_SCOPE`, `make_scope_key(source, dim, value)`, `_get_watermark(scope_key=...)`, `_set_watermark(scope_key=...)`, `_list_watermarks_by_scope(scope_keys=[...])`. Default `'*'` preserves all legacy callers. |
+| **2.3** | `f357d05` | `_sync_issues()` reads + writes per-project watermarks (`jira:project:<KEY>`). Logs "watermark plan: N backfill, M incremental" pre-flight. Per-project advance fires on project transition. Legacy global '*' kept for compat. |
+| **2.4** | `15574a7` | `_sync_pull_requests()` writes per-repo watermarks (`github:repo:<owner>/<name>`) on each batch persist. **Write-side only** — connector still uses single `since` for fetch. |
+| **2.5** | `15574a7` | `_sync_deployments()` writes per-repo watermarks (`jenkins:repo:<repo>`) post-upsert. Per-repo not per-job (Q2 decision: matches PR↔deploy linking dimension). **Write-side only.** |
+
+Test coverage shipped: 19 unit tests (`test_watermark_scope_keys.py` 9, `test_inline_changelog_extraction.py` 10 — re-validated alongside).
+
+### 🟡 Deferred to next iteration (sister FDD)
+
+| Step | What's missing | Why deferred |
+|---|---|---|
+| **2.4-B / 2.5-B** | Connector signature refactor: accept `since_by_repo` / `since_by_project` so per-scope watermarks are READ during fetch (not just written) | Required for new-repo backfill correctness — without it, adding a repo only fetches PRs newer than the global `*` watermark. Significant connector code change (~M effort), warranted in a dedicated PR with thorough tests. |
+| **2.6** | docker-compose split into per-source workers (jira/github/jenkins) | Architectural value of split (per-source isolation, parallel cycles) only realizes when combined with 2.4-B + 2.5-B. Splitting alone = 3 containers running same global-watermark logic — zero throughput win. |
+| **2.7** | Migration 011: drop legacy `uq_watermark_entity` constraint | Plan §3 explicitly requires "after one successful per-source cycle". Per-source doesn't exist yet (deferred above). Legacy constraint coexists harmlessly until then. |
+| **Health-aware pre-flight** (P-8 in v2 doc) | Pre-cycle source reachability check (skip cycle if source unhealthy) | Belongs with worker-split work (each per-source worker owns its health-check). Without split, a single sync still has interleaved phases. |
+
+### 🟢 Foundation shipped means
+
+- New scope rows accumulate every cycle. When the read-side refactor lands, every active repo/project already has its own watermark — no schema migration, no backfill of historic data.
+- Migration 010 is rollback-safe via `downgrade()`. The legacy unique constraint coexists with the new one for as long as needed.
+- All Phase 1 wins (FDD-OPS-012 batched persistence, FDD-OPS-013 inline changelogs) remain intact and continue working.
+
+### 📅 Suggested next iteration
+
+Open as `feat/ingestion-v2-phase-2b` branch:
+
+1. Refactor `JiraConnector.fetch_issues_batched` to accept `since_by_project` dict (already does — done in Phase 1). Just verify wired correctly.
+2. Refactor `GithubConnector.fetch_pull_requests_batched` to accept `since_by_repo: dict[str, datetime | None]` and use per-repo since when provided.
+3. Refactor `JenkinsConnector` deployments fetch to accept per-repo since.
+4. Update `_sync_*` methods to pass `since_by_<scope>` from `_list_watermarks_by_scope` results.
+5. Smoke test: add new project to Jira catalog → confirm only that scope backfills.
+6. THEN: docker-compose split (Step 2.6) + companion migration 011.
+
+Estimated effort for Phase 2-B: **M-L (~3-5 dev-days)**. Honest scoping based on actual time spent on Phase 2-A (much faster than originally estimated due to clean foundation).
+
+---
+
+## 1. Goals (acceptance criteria)
+
+The migration is "done" when **all 5** acceptance items hold:
+
+1. **Per-source isolation**: Jenkins outage (or Jira slowness, or GitHub
+   rate-limit) does not block the other two sources. Each source has its
+   own worker process, event loop, and cycle cadence.
+2. **Per-scope watermarks**: a new Jira project activation does not
+   trigger a full re-fetch of existing 200k+ issues. Each scope_key
+   advances independently.
+3. **Health-aware pre-flight**: each cycle checks source reachability
+   before starting any I/O. VPN drop = mark unhealthy + skip cycle, not
+   block-and-retry-forever.
+4. **Backwards-compat**: existing `pipeline_watermarks` rows keep working
+   during the transition (scope_key='*' default).
+5. **Tests pass**: 100% of existing unit/integration suites + new tests
+   for per-source and per-scope behavior.
+
+Non-goals (deferred to Phase 3):
+- Job queue / worker pool
+- Pre-flight cost estimation via API count call
+- `/pipeline/jobs` per-job endpoint
+
+---
+
+## 2. Architecture diff (current → target)
+
+### Current
+
+```
+docker-compose.yml:
+  sync-worker         (one process, one event loop, runs:
+                       _sync_issues → _sync_prs → _sync_deploys → _sync_sprints
+                       sequentially, every 15 min)
+
+pipeline_watermarks:
+  (tenant, entity_type) UNIQUE          ← GLOBAL across all scopes
+  e.g. row: (tenant=001, entity='issues', last_synced_at='2026-04-26')
+```
+
+### Target
+
+```
+docker-compose.yml:
+  jira-sync-worker     (entity: issues, sprints, sprint-issues)
+  github-sync-worker   (entity: pull_requests, repos)
+  jenkins-sync-worker  (entity: deployments)
+
+  All independent: own event loop, cron schedule, retry policy,
+  health-check, watermark scope, container.
+
+  discovery-worker (unchanged — already separate)
+
+pipeline_watermarks:
+  (tenant, entity_type, scope_key) UNIQUE   ← PER-SCOPE
+  e.g. rows:
+    (tenant=001, entity='issues', scope='jira:project:BG',  last_synced='...')
+    (tenant=001, entity='issues', scope='jira:project:OKM', last_synced='...')
+    (tenant=001, entity='prs',    scope='github:repo:foo',  last_synced='...')
+```
+
+---
+
+## 3. Implementation order (dependencies)
+
+The order minimizes risk and allows early rollback.
+
+### Step 2.1 — Schema migration (010, sister file)
+
+Add `scope_key` column with default `'*'` + companion unique constraint.
+Existing rows continue to work (read by `(tenant, entity_type)` matches
+the `'*'` row exactly).
+
+**Risk:** very low. Default value preserves all existing reads/writes.
+**Rollback:** `alembic downgrade -1`.
+**Validation:** smoke against existing sync flow — should produce
+identical behavior.
+
+### Step 2.2 — Repository layer: per-scope watermark API
+
+Add `get_watermark(tenant, entity, scope_key='*')` and
+`set_watermark(tenant, entity, scope_key, ts, count)` to the watermarks
+repo. Default `'*'` keeps current callers untouched.
+
+**Risk:** low. Existing call sites untouched; new ones opt in.
+**Validation:** unit tests for default vs explicit scope_key.
+
+### Step 2.3 — JiraSyncWorker (extract from monolith)
+
+New module `src/workers/jira_sync_worker.py` containing:
+
+```python
+class JiraSyncWorker:
+    """Single-source worker. Owns: issues, sprints, sprint-issues."""
+
+    async def cycle(self):
+        if not await self._check_jira_health():
+            logger.info("Jira unhealthy this cycle; skipping")
+            return
+
+        await self._sync_issues()        # uses per-project scope keys
+        await self._sync_sprints()       # scope='jira:board:<id>'
+        await self._sync_sprint_issues() # scope='jira:sprint:<id>'
+
+    async def _check_jira_health(self) -> bool:
+        # GET /rest/api/3/myself with 5s timeout
+        ...
+```
+
+`_sync_issues` becomes per-project loop with per-project watermark
+read/write. The PR loop pattern from Phase 1 transfers directly.
+
+**Risk:** medium. Monolithic worker still works; new worker is opt-in
+via env flag `PULSE_USE_PER_SOURCE_WORKERS=true`.
+
+### Step 2.4 — GithubSyncWorker
+
+Same pattern. Owns: pull_requests, repos discovery.
+scope_key format: `github:repo:<owner>/<name>`.
+
+### Step 2.5 — JenkinsSyncWorker
+
+Same pattern. Owns: deployments.
+scope_key format: `jenkins:job:<job_full_name>`.
+
+Health check: `GET /api/json` with 5s timeout. If VPN off → unhealthy
+this cycle; resume on next.
+
+### Step 2.6 — docker-compose.yml: 3 workers replace 1
+
+```yaml
+sync-worker:
+  # REMOVED. Replaced by 3 specific workers below.
+
+jira-sync-worker:
+  image: pulse-jira-sync-worker
+  command: python -m src.workers.jira_sync_worker
+  ...
+
+github-sync-worker:
+  ...
+
+jenkins-sync-worker:
+  ...
+```
+
+**Risk:** low — Dockerfiles unchanged (single image, 3 different commands).
+**Rollback:** revert compose, restart sync-worker.
+
+### Step 2.7 — Companion migration 011: drop legacy unique constraint
+
+After all workers are emitting per-scope writes for >1 successful cycle,
+drop `uq_watermark_entity` constraint. Coexistence period prevents cutover
+surprises.
+
+---
+
+## 4. Test plan
+
+Each item lists the test type and what it asserts.
+
+### Unit tests (no DB, no network)
+
+| Test | What it asserts |
+|---|---|
+| `test_watermarks_repo_default_scope_compat` | `get_watermark(t, e)` returns same row as `get_watermark(t, e, scope_key='*')` |
+| `test_watermarks_repo_set_per_scope` | Setting scope=`'jira:project:BG'` doesn't affect global `'*'` row |
+| `test_jira_health_check_returns_false_on_timeout` | Mock httpx returning timeout → health=False |
+| `test_jira_sync_skips_cycle_when_unhealthy` | `_check_jira_health()=False` → `_sync_issues()` not called |
+| `test_github_sync_per_repo_watermark` | Each repo has independent watermark |
+| `test_jenkins_sync_per_job_watermark` | Each job has independent watermark |
+
+### Integration tests (DB, mocked HTTP)
+
+| Test | What it asserts |
+|---|---|
+| `test_jira_full_cycle_uses_per_project_watermarks` | After cycle, every active project has its own watermark row |
+| `test_jira_new_project_activation_only_backfills_that_scope` | Activate new project → only that scope_key gets full backfill, others unchanged |
+| `test_jira_one_project_failure_does_not_block_others` | Mock 401 on project X → other projects still complete |
+| `test_companion_migration_011_safe_after_workers_migrated` | Verify constraint drop doesn't break existing reads |
+
+### End-to-end (Webmotors-scale, manual run)
+
+| Test | What it asserts |
+|---|---|
+| Boot 3 workers, full re-ingestion against Webmotors | Convergence in <90 min total (parallel sources) |
+| Disable VPN mid-Jenkins-sync | Jenkins worker pauses gracefully; Jira+GitHub continue |
+| Add new Jira project to catalog | Only that project backfilled in next cycle; others skipped |
+| Kill jira-sync-worker mid-cycle | On restart, ≥80% of fetched issues already persisted (per Phase 1) AND watermarks reflect work done |
+
+### Regression tests (must keep passing)
+
+- All 52 unit tests from Phase 1 connector/aggregator suite
+- `test_inline_changelog_extraction.py` (10 tests, FDD-OPS-013 anti-regression)
+- All existing dora/lean/cycle_time domain tests
+
+---
+
+## 5. Rollout sequence (in production / staging)
+
+When this Phase 2 code is ready:
+
+1. **Pre-flight**: announce maintenance window (~30 min for safety even
+   though zero-downtime is the design goal).
+2. **Run migration 010** (additive) → verify no errors, queries unchanged.
+3. **Deploy new worker images** with `PULSE_USE_PER_SOURCE_WORKERS=false`
+   (still the monolith). No behavior change.
+4. **Validate** monolith still works with new schema column present.
+5. **Flip flag** to `=true`. Three new workers start. Old `sync-worker`
+   container is replaced.
+6. **Watch one full cycle** (~30 min). All three sources should run
+   independently with per-scope watermarks.
+7. **Run migration 011** → drop legacy constraint.
+8. **Remove backwards-compat code paths** (separate cleanup PR).
+
+If anything misbehaves at any step, rollback path:
+- Steps 1-4: `alembic downgrade -1` + redeploy old image
+- Steps 5-6: flip flag back to `false`, kill new workers, restart monolith
+- Step 7: requires manual constraint recreation; coordinate carefully
+
+---
+
+## 6. Estimate (effort)
+
+Honest scoping:
+
+| Step | Effort | Owner |
+|---|---|---|
+| 2.1 Schema migration | XS (1h, already drafted) | data-engineer |
+| 2.2 Watermarks repo per-scope API | S (2-3h) | data-engineer |
+| 2.3 JiraSyncWorker extraction | M (1 day) | data-engineer |
+| 2.4 GithubSyncWorker extraction | S (4-6h, simpler since PRs already streaming) | data-engineer |
+| 2.5 JenkinsSyncWorker extraction | S (4h, simplest) | data-engineer |
+| 2.6 docker-compose split | XS (1h) | engineer |
+| 2.7 Companion migration 011 | XS (30min) | data-engineer |
+| Tests (unit + integration) | M (1 day total) | test-engineer |
+| Rollout + validation | S (half day) | engineer + data-engineer |
+| **Total** | **~1 week of focused engineering** | |
+
+This matches the `ingestion-architecture-v2.md` Phase 2 estimate (3-5 days).
+
+---
+
+## 7. Open questions (for review)
+
+These need a decision before implementation starts. Captured here so
+they don't block the technical work.
+
+### Q1: Health-check policy for workers
+
+Question: when a source is unhealthy, should the worker:
+- (a) Skip the cycle entirely (current Phase 1 behavior — simple)
+- (b) Run with cached data only (more code, useful for read-heavy tasks)
+- (c) Pause the worker (no retry until manual restart)
+
+Recommendation: **(a) skip + log + retry next cycle**. Matches what the
+v2 doc implies. Operators can grep for "unhealthy this cycle".
+
+### Q2: Scope-key format — strict schema or freeform string?
+
+Question: should `scope_key` follow a strict pattern like
+`<source>:<dimension>:<value>` (e.g., `jira:project:BG`) or stay as
+opaque text?
+
+Recommendation: **convention enforced in code, not constraint**.
+String column is flexible; helper functions like
+`make_scope_key(source, dimension, value)` enforce shape. Allows
+future scopes (e.g., `jira:tenant-rule:bg-only`) without migration.
+
+### Q3: What happens to the global `*` rows after migration 011?
+
+Question: keep them as "tenant-wide aggregate watermarks" (informational)
+or delete?
+
+Recommendation: **delete in a separate cleanup PR after 1 month of
+stable per-scope operation**. Removes cognitive load. If someone wants
+"latest across scopes", that's a `MAX(last_synced_at)` query, trivial.
+
+### Q4: Alembic chain — single migration or two?
+
+Question: keep migration split (010 add, 011 drop) or combine?
+
+Recommendation: **keep split**. The risk of dropping the old constraint
+before workers are confirmed writing per-scope is high; the cost of
+keeping both for a month is zero. Two migrations provide a safe rollback
+window.
+
+---
+
+## 8. What this plan does NOT cover (explicitly out of scope)
+
+- **Job queue + worker pool** — Phase 3, separate plan
+- **Pre-flight item count via API** — FDD-OPS-015 full version, separate
+- **Pipeline Monitor UI per-scope tab** — needs FDD-OPS-015's data layer
+  first
+- **GitLab / Azure DevOps / Linear connectors** — R2+, separate work
+- **MTTR pipeline** — FDD-DSH-050, completely independent track
+
+---
+
+## Status
+
+**Status of this document:** PARTIAL IMPLEMENTATION (2026-04-28).
+
+Phase 2-A foundation shipped — see §0 for the breakdown of what landed
+vs. what was deferred to Phase 2-B. The architectural pattern (per-scope
+watermarks coexisting with legacy global '*' rows) is in production use
+and validated against the Webmotors tenant.
+
+Phase 2-B (read-side connector refactor + docker-compose split + drop
+legacy constraint) opens as a separate effort — see §0 "Suggested next
+iteration" for the concrete roadmap.
+
+### Document changelog
+
+- **2026-04-28 evening** — PARTIAL status. Steps 2.1–2.5 (write-side)
+  shipped. Steps 2.4-B, 2.5-B, 2.6, 2.7 deferred with rationale.
+- **2026-04-28 afternoon** — DRAFT 1 produced in parallel while Phase 1
+  ingestion converged.
diff --git a/pulse/docs/metrics/metrics-inconsistencies.md b/pulse/docs/metrics/metrics-inconsistencies.md
index eb4092d..2961590 100644
--- a/pulse/docs/metrics/metrics-inconsistencies.md
+++ b/pulse/docs/metrics/metrics-inconsistencies.md
@@ -31,6 +31,10 @@ Gravidade:
 | INC-017 | Lead Time | P2 | A API DORA retorna `lead_time_for_changes_hours` como numero bruto. A `change_failure_rate` e retornada como ratio (0.0-1.0). A conversao de CFR para % acontece apenas no frontend. Consumidores diretos da API podem confundir CFR = 0.22 com 22% ou com 0.22%. | `routes.py:214`; `transforms.ts:305` | Usuarios integrando a API diretamente podem misinterpretar CFR. | Documentar explicitamente no OpenAPI spec que CFR e ratio 0.0-1.0. Ou unificar: retornar como porcentagem (0-100) diretamente da API. |
 | INC-018 | Cycle Time — benchmarks | P2 | Thresholds de Cycle Time na UI (`< 2h = elite`, `< 24h = high`, `< 72h = medium`) sao definidos no frontend como "PULSE-internal" mas aparecem ao lado de metricas DORA sem distinguir que nao sao da DORA 2023. | `transforms.ts:171-176`; `BENCHMARKS['cycle_time']` | Usuarios podem crer que "Cycle Time elite < 2h" e uma definicao DORA oficial. | Adicionar label "PULSE benchmark" vs "DORA 2023" nos cards da UI. |
 | INC-019 | WIP — "aguardando deploy producao" | P2 | O status Jira "aguardando deploy producao" esta mapeado para `done` no normalizer. Semanticamente, o item ainda esta aguardando ser entregue — nao foi concluido. Isso subestima WIP e o throughput nao contabiliza o delay de deploy. | `normalizer.py:77` | WIP esta subestimado; itens aguardando deploy aparecem como "done" antes de realmente chegarem a producao. | Mapear "aguardando deploy producao" para `in_review` ou criar um 5o status `awaiting_deploy`. |
+| INC-020 | Lean — `status_transitions` | P0 | `_map_issue` no `JiraConnector` extraía o changelog para um cache lateral (`self._last_changelogs`) mas NÃO incluía o campo `changelog` no dict mapeado. O `_sync_issues` chama `extract_status_transitions_inline(raw)` que faz `raw.get("changelog", {}).get("histories", [])` — sempre vazio. **Resultado: 311.007 issues (100%) com `status_transitions=[]`.** Cycle Time não fechava (sem transição final para done), Throughput sub-contava, WIP super-contava, CFD distorcido, Lead Time indeterminado. | `jira_connector.py:_map_issue` (changelog não retornado); `workers/devlake_sync.py:extract_status_transitions_inline` lê do dict mapeado. | Todo o pilar Lean comprometido para qualquer projeto que use o pipeline batched (Phase 1 v2). | Incluir `"changelog": jira_issue.get("changelog", {})` no return de `_map_issue`. Adicionar test guard `TestMapIssuePreservesChangelogForInlineExtraction` end-to-end (mapper → extractor) — o gap de cobertura era exatamente esse: testes do extractor isolado não pegavam o drop no mapper. |
+| INC-021 | Lean / Sprint — `story_points = 0` em 100% issues | P0 | Audit em 2026-04-28 (FDD-OPS-016): `story_points = 0` para todas as 311.007 issues. Investigação na API Jira da Webmotors revelou: `customfield_10004` ("Story Points") e `customfield_18524` ("Story point estimate") **0% populados** em todos os 69 projetos ativos. Webmotors **não usa Story Points como método de estimativa**. Squads usam padrões heterogêneos: T-shirt size, original estimate em horas, ou nada (Kanban-puro). | `jira_connector.py:_extract_story_points` (só consultava campos numéricos clássicos). | Velocity sempre zerada, throughput-by-effort impossível, forecast Monte Carlo sem input. Bloqueia toda métrica que dependa de "esforço" como agregação. | **Fallback chain implementada**: SP nativo → T-shirt (P=2/M=3/G=5… escala Fibonacci) → `timeoriginalestimate` (buckets de horas) → `None`. Discovery dinâmico via `_discover_custom_fields` casa por nome ("t-shirt size", "tamanho/impacto"). Telemetria `_effort_source_counts` por batched run. **Quando `None`, métrica downstream DEVE contar items (Kanban-puro)** — decisão fica na camada de métrica, não no normalizer. |
+| INC-022 | Lean / Flow — Status normalization 96.5% done skew | P0 | Audit em 2026-04-28 mostrou distribuição absurda: 96,5% `done` / 3,3% `todo` / 0,2% `in_progress` / 0,1% `in_review`. A Webmotors tem **104 status raw distintos** em workflows ativos; `DEFAULT_STATUS_MAPPING` cobria ~50 → 50+ status caíam silenciosamente no fallback "Unknown → todo". Casos sistêmicos: `FECHADO EM PROD` (2.881 issues) ia para `todo` em vez de `done`; `Em Progresso`, `Em desenv` (in_progress) idem; `Homologação`, `Em Verificação` (in_review) idem. **Impacto em CASCATA**: status_transitions herdam a classificação errada → último estado de issue concluída ficava `todo`. Cycle Time infinito, Throughput sub-contava, WIP super-contava, CFD/Lead Time corrompidos. | `engineering_data/normalizer.py:normalize_status` (default `'todo'` em status desconhecido); `build_status_transitions` propagava o erro. | **Todo o pilar Lean** corrompido para qualquer tenant com status fora do mapping curado. SaaS-ready zero. | **Hybrid normalization em 3 camadas**: (1) Textual `DEFAULT_STATUS_MAPPING` expandido com ~80 PT-BR Webmotors-curated (preserva granularidade `in_progress` vs `in_review`); (2) Fallback `statusCategory.key` da Jira (autoritativo done/não-done) — connector descobre via `/rest/api/3/status` (1 chamada/lifetime, 326 status defs Webmotors, cacheado); (3) Default `todo` com WARN (extremamente raro agora). Quantificado: 3.151 issues reclassificarão (1% — long tail catastrófico); distribuição já correta para os 97% restantes. |
+| INC-023 | Sprint — `status` sempre vazio | P0 | 100% das 216 sprints na Webmotors com `status=''` no `eng_sprints`. `goal` também totalmente vazio. Investigação revelou clássico **swiss cheese alignment** — 4 bugs independentes em camadas diferentes, cada um sozinho garantindo o resultado: (1) `normalize_sprint` retornava dict SEM o campo `status`; (2) `_upsert_sprints.on_conflict_do_update.set_` não atualizava `status`/`goal` (sprints existentes nunca recebiam update); (3) `_fetch_board_sprints` filtrava por `started_date < since` — sprint state transitions acontecem em `endDate`, não `startDate` (filtro errado de dimensão); (4) **ORM model `EngSprint` não tinha o campo `status`** apesar do schema do DB ter — drift coluna existe há tempos no DB, ORM nunca atualizado. Path que omitia status funcionava silently empty; path que tentava popular crashava com `Unconsumed column names: status`. | `jira_connector.py:_map_sprint`, `normalizer.py:normalize_sprint`, `workers/devlake_sync.py:_upsert_sprints`, `engineering_data/models.py:EngSprint`. | Sprint Comparison / Velocity Trend não pode filtrar `closed`; "current sprint" planejado precisa `active`; Carryover heurística baseada em `endDate < now()` em vez do status correto. | Fix nas 4 camadas: (1) `_map_sprint` passa `goal` adiante; (2) normalizer inclui `status` (lowercase `active`/`closed`/`future`/None) + `goal` com strip de null bytes; (3) ON CONFLICT atualiza ambos; (4) removeu filtro de watermark (volume baixo ~216 total / ~5 ativas, sempre re-fetch é correto pois state transitions); (5) `EngSprint.status: Mapped[str\|None]` adicionado (corrige drift). Helper `_normalize_sprint_status` mapeia aliases (open→active, completed→closed, planned→future) e devolve `None` para desconhecidos — não bucketiza silenciosamente. **Lição genérica**: adicionar guard test "DB columns vs ORM Mapped fields" — schema drift é o bug mais insidioso porque alguns paths funcionam e outros crashern. |
 
 ---
 
@@ -38,10 +42,10 @@ Gravidade:
 
 | Gravidade | Quantidade | Impacto |
 |-----------|-----------|---------|
-| P0 | 7 | Numeros errados exibidos ao usuario |
+| P0 | 11 | Numeros errados exibidos ao usuario |
 | P1 | 8 | Numeros subotimizados/incompletos |
 | P2 | 4 | Apresentacao/documentacao |
-| **Total** | **19** | |
+| **Total** | **23** | |
 
 ## IDs P0 listados por ordem de impacto
 
@@ -52,6 +56,10 @@ Gravidade:
 5. **INC-003** — `first_commit_at` = data de abertura do PR — ✅ **FIXED 2026-04-17** (GraphQL `commits(first:1).authoredDate` no `github_connector`; normalizer consome `_first_commit_at` com fallback para `created_date`; admin endpoint `POST /data/v1/admin/prs/refresh-first-commits` faz backfill. Backfill `scope=last-60d`: 5020 processados, 4653 atualizados, 0 erros, 459s. Resultado: P50 Cycle Time 0,28h → 5,94h em 60d; 90,1% dos PRs agora com `first_commit_at < created_at`. Pendente: backfill histórico (~59k PRs) com `scope=stale`.)
 6. **INC-007** — Cycle time em throughput trend sempre None
 7. **INC-005** — MTTR sempre null (documentado, mas DORA overall fica incompleto)
+8. **INC-022** — Status normalization 96.5% done skew (afeta Cycle Time / Throughput / WIP / CFD / Lead Time em CASCATA via status_transitions) — ✅ **FIXED 2026-04-29** (hybrid: textual mapping curado + Jira `statusCategory.key` fallback + 326 status defs descobertos; commit `0c7124d`).
+9. **INC-020** — `status_transitions=[]` em 311k issues (changelog drop em `_map_issue`) — ✅ **FIXED 2026-04-29** (`jira_connector._map_issue` preserva `changelog`; commit `177830e`).
+10. **INC-021** — `story_points=0` em 100% issues (Webmotors não usa SP) — ✅ **FIXED 2026-04-29** (effort fallback chain SP→T-shirt→Hours→None; commit `172f3f2`).
+11. **INC-023** — Sprint status sempre vazio (4-layer cheese: normalizer + upsert + watermark + ORM drift) — ✅ **FIXED 2026-04-29** (fix nas 4 camadas; commit `649ed78`).
 
 ### Status bar
 
@@ -65,6 +73,10 @@ Gravidade:
 | INC-003 | ✅ Fixed | 2026-04-17 | `connectors/github_connector.py` GraphQL query includes `commits(first:1).authoredDate` + REST fallback via `_fetch_first_commit_date`; `engineering_data/normalizer.py` reads `_first_commit_at` with fallback; `engineering_data/services/backfill_first_commits.py` + admin endpoint `POST /data/v1/admin/prs/refresh-first-commits` backfills historical PRs. 60d scope: 5020 processados, 4653 atualizados, P50 Cycle Time 0,28h → 5,94h. |
 | INC-004 | ✅ Fixed | 2026-04-17 | Temporal linking PR→deploy: `engineering_data/services/backfill_deployed_at.py` (one-shot CTE com LATERAL join em `repo = split_part(pr.repo,'/',2)` e janela 30d) + admin `POST /data/v1/admin/prs/refresh-deployed-at` + forward-path hook em `workers/devlake_sync._sync_deployments` chamando `link_recent_deploys_to_prs` após upsert. SHA match descartado (Jenkins `sha` é build ID, não git SHA). Também corrige INC-012 (Deploy phase) como consequência. **Fix adicional em `domain/cycle_time.breakdown_single_pr`**: `total_hours` agora usa `first_commit_at → merged_at` (Cycle Time canônico); antes caía no mesmo endpoint `deployed_at` do DORA e colapsava Lead Time ≡ Cycle Time assim que `deployed_at` fosse populado. Backfill `scope=last-60d`: 5104 processados, 2037 linkados (40% cobertura — limitado pelos 126/390 repos com Jenkins prod), duração 0,93s. `scope=stale` histórico: 3706 linkados adicionais. Resultado home (60d): LT=65,51h vs CT P50=5,92h (diff=59,59h = fila de deploy). |
 | INC-012 | ✅ Fixed | 2026-04-17 | Resolvido como efeito colateral do INC-004: com `deployed_at` populado em 2037 PRs (60d), a fase Deploy do Cycle Time Breakdown passa a ter dados reais (merge→deploy P50 ≈ 136h em 60d). |
+| INC-020 | ✅ Fixed | 2026-04-29 | `connectors/jira_connector.py:_map_issue` agora inclui `"changelog": jira_issue.get("changelog", {})` no return. Test guard `tests/unit/test_inline_changelog_extraction.py::TestMapIssuePreservesChangelogForInlineExtraction` exercita end-to-end (mapper → extractor) — gap original era teste do extractor isolado. Validado live no projeto BG: 1.994 issues re-sincados todos com 3-8 transitions normalizadas. Commit `177830e`. |
+| INC-021 | ✅ Fixed | 2026-04-29 | `connectors/jira_connector.py`: discovery dinâmico de `customfield_18762` ("T-Shirt Size") + `customfield_15100` ("Tamanho/Impacto") via `/rest/api/3/field`; `_extract_story_points` reescrito como fallback chain (SP nativo → T-shirt mapping Fibonacci → `timeoriginalestimate` buckets → None). Telemetria `_effort_source_counts` loggada por batched run. 34 testes em `tests/unit/test_effort_fallback_chain.py`. Validado live em CRMC (1.375 issues): 52,3% com effort estimado, valores 1/2/3/5/8 (Fibonacci aplicado). Backlog FDD-DEV-METRICS-001 reservado para R3+ (per-squad estimation choice + proprietary forecasting). Commit `172f3f2`. |
+| INC-022 | ✅ Fixed | 2026-04-29 | (1) `_discover_status_categories()` no `JiraConnector` cacheia `name → category` via `/rest/api/3/status` (326 defs Webmotors: 117 new + 181 indeterminate + 28 done). (2) `_map_issue` anexa `status_category` (current) e `status_categories_map` (todos, para histórico). (3) `normalize_status(raw, mapping, status_category=...)` aceita category fallback antes do default todo: `done→done`, `indeterminate→in_progress`, `new→todo`. (4) `build_status_transitions(..., status_categories_map=...)` classifica cada to_status histórica via map. (5) `DEFAULT_STATUS_MAPPING` expandido com ~80 PT-BR Webmotors-curated. 44 testes em `tests/unit/test_status_normalization.py`. Quantificado pré-fix: 3.151 issues reclassificarão (2.923 todo→done, 161 todo→in_review, 67 todo→in_progress). Commit `0c7124d`. |
+| INC-023 | ✅ Fixed | 2026-04-29 | Fix nas 4 camadas do swiss cheese: (1) `jira_connector._map_sprint` passa `goal` adiante; (2) `normalizer.normalize_sprint` inclui `status` (lowercase: `active`/`closed`/`future`/None via helper `_normalize_sprint_status` com aliases) + `goal` com strip de null bytes; (3) `_upsert_sprints` ON CONFLICT atualiza `status`+`goal`; (4) `_fetch_board_sprints` removeu filtro `started_date < since` (volume baixo, state transitions em `endDate`); (5) `EngSprint.status: Mapped[str\|None]` adicionado (corrige schema drift). 26 testes em `tests/unit/test_sprint_normalization.py` (incl. structural anti-regression para upsert set_). Validado live: 195/217 sprints (89,9%) com status correto, 70% com goal real. As 22 vazias = board órfão 873 sem projeto ativo (fora de escopo). Commit `649ed78`. |
 
 ### Admin recalc endpoint (2026-04-17)
 Forced refresh without waiting for Kafka events:
diff --git a/pulse/packages/pulse-data/alembic/versions/010_pipeline_watermarks_scope_key.py b/pulse/packages/pulse-data/alembic/versions/010_pipeline_watermarks_scope_key.py
new file mode 100644
index 0000000..c88f374
--- /dev/null
+++ b/pulse/packages/pulse-data/alembic/versions/010_pipeline_watermarks_scope_key.py
@@ -0,0 +1,165 @@
+"""pipeline_watermarks: add scope_key (FDD-OPS-014 Phase 2, Step 2.1).
+
+Promoted from DRAFT 2026-04-28 after `docs/ingestion-v2-phase-2-plan.md`
+review approval.
+
+==============================================================================
+Why this migration exists (FDD-OPS-014, Phase 2 of ingestion-architecture-v2)
+==============================================================================
+
+Today `pipeline_watermarks` has ONE row per (tenant, entity_type). Adding
+a single new Jira project means resetting the watermark to bring its
+historical data — but that ALSO re-fetches the existing 200k+ issues from
+all other projects unnecessarily.
+
+After this migration: rows are keyed by (tenant, entity_type, scope_key).
+A new project starts with `scope_key = "jira:project:NEWKEY"` watermark
+at NULL → backfills only that scope. Other scopes' watermarks unchanged.
+
+Same pattern for repos (github), jobs (jenkins), and future sources.
+
+==============================================================================
+Migration plan (zero-downtime, multi-step)
+==============================================================================
+
+This migration is INTENTIONALLY conservative — it adds the new column
+with a default WITHOUT removing the old constraint. A second migration
+(011, after the worker code switches to writing per-scope rows) drops
+the old global constraint.
+
+Step 010 (this file):
+  1. ADD COLUMN scope_key VARCHAR(255) NOT NULL DEFAULT '*'
+     - existing rows get scope_key='*' (means "global, all scopes")
+     - workers can keep reading existing rows by querying scope_key='*'
+  2. CREATE INDEX on (tenant_id, entity_type, scope_key)
+  3. CREATE UNIQUE CONSTRAINT uq_watermark_scope on
+     (tenant_id, entity_type, scope_key)  ← coexists with old global one
+  4. KEEP existing uq_watermark_entity (tenant_id, entity_type) UNTIL
+     workers migrate
+
+Step 011 (separate file, AFTER worker code is deployed):
+  - DROP CONSTRAINT uq_watermark_entity
+  - At this point all writes use scope_key, the global '*' rows can
+    be removed too (or kept as "backwards-compat aggregate")
+
+==============================================================================
+Rollback strategy
+==============================================================================
+
+`downgrade()` removes only what `upgrade()` adds. It does NOT touch the
+old constraint (since this migration didn't drop it). Safe to revert
+if the new column proves problematic.
+
+==============================================================================
+What this DOES NOT change
+==============================================================================
+
+- No worker code changes (those go in Phase 2 PR).
+- No queries change yet — workers still read by (tenant, entity_type)
+  which now matches the global '*' row.
+- No data backfill — existing rows just inherit '*' default.
+
+Revision ID: 010_pipeline_watermarks_scope_key
+Revises: 009_metrics_snapshots_tenant_latest_index
+Create Date: 2026-04-28
+"""
+
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+revision: str = "010_watermarks_scope_key"
+down_revision: Union[str, None] = "009_metrics_snapshots_tenant_latest_index"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Add scope_key column + new unique constraint (coexists with old)."""
+
+    # 1. Add the column with default '*' so existing rows get a value.
+    op.add_column(
+        "pipeline_watermarks",
+        sa.Column(
+            "scope_key",
+            sa.String(length=255),
+            nullable=False,
+            server_default="*",
+            comment=(
+                "Scope identifier within an entity_type. "
+                "Format: '<source>:<dimension>:<value>' "
+                "(e.g., 'jira:project:BG', 'github:repo:foo/bar', "
+                "'jenkins:job:deploy-X'). Value '*' means global "
+                "(legacy global watermark). FDD-OPS-014."
+            ),
+        ),
+    )
+
+    # 2. Index for the per-scope lookup pattern. Replaces nothing yet —
+    #    keeps the old (tenant, entity_type) index for backwards-compat.
+    op.create_index(
+        "ix_watermarks_tenant_entity_scope",
+        "pipeline_watermarks",
+        ["tenant_id", "entity_type", "scope_key"],
+        unique=False,
+    )
+
+    # 3. New UNIQUE constraint covering scope_key. Coexists with the old
+    #    `uq_watermark_entity` constraint until step 011 drops it.
+    op.create_unique_constraint(
+        "uq_watermark_entity_scope",
+        "pipeline_watermarks",
+        ["tenant_id", "entity_type", "scope_key"],
+    )
+
+    # 4. Defensive: any RLS policies on the table apply to the new column
+    #    automatically (policies are at table level, not column level).
+    #    No change needed.
+
+
+def downgrade() -> None:
+    """Reverse: drop new constraint + index + column. Old constraints stay."""
+    op.drop_constraint(
+        "uq_watermark_entity_scope",
+        "pipeline_watermarks",
+        type_="unique",
+    )
+    op.drop_index(
+        "ix_watermarks_tenant_entity_scope",
+        table_name="pipeline_watermarks",
+    )
+    op.drop_column("pipeline_watermarks", "scope_key")
+
+
+# ============================================================================
+# Companion migration that should follow (011) — KEEP IN SYNC HERE for review
+# ============================================================================
+#
+# def upgrade():
+#     # Drop the legacy global-watermark constraint now that all writes use
+#     # scope_key. Safe to run only after Phase 2 worker code is deployed.
+#     op.drop_constraint(
+#         "uq_watermark_entity",
+#         "pipeline_watermarks",
+#         type_="unique",
+#     )
+#     op.drop_index(
+#         "ix_watermarks_tenant_entity",
+#         table_name="pipeline_watermarks",
+#     )
+#
+# def downgrade():
+#     op.create_unique_constraint(
+#         "uq_watermark_entity",
+#         "pipeline_watermarks",
+#         ["tenant_id", "entity_type"],
+#     )
+#     op.create_index(
+#         "ix_watermarks_tenant_entity",
+#         "pipeline_watermarks",
+#         ["tenant_id", "entity_type"],
+#     )
+#
+# ============================================================================
diff --git a/pulse/packages/pulse-data/alembic/versions/011_drop_legacy_watermark_constraint.py b/pulse/packages/pulse-data/alembic/versions/011_drop_legacy_watermark_constraint.py
new file mode 100644
index 0000000..1eaac43
--- /dev/null
+++ b/pulse/packages/pulse-data/alembic/versions/011_drop_legacy_watermark_constraint.py
@@ -0,0 +1,68 @@
+"""Drop legacy uq_watermark_entity constraint (FDD-OPS-014, Phase 2 step 2.7).
+
+Promoted earlier than originally planned because the assumption in
+migration 010 ("legacy and new UNIQUE constraints coexist harmlessly")
+was wrong: Postgres enforces ALL UniqueConstraints on every INSERT.
+Trying to insert a per-scope row like (tenant, 'issues',
+'jira:project:OKM', ...) failed with:
+
+    UniqueViolationError: duplicate key value violates unique
+    constraint "uq_watermark_entity"
+    DETAIL: Key (tenant_id, entity_type)=(..., issues) already exists.
+
+The legacy constraint treats (tenant, entity_type) as the unique key
+regardless of scope_key, so the existing '*' row blocked every
+attempt to insert a scoped row.
+
+Resolution: drop the legacy constraint. The new
+`uq_watermark_entity_scope` (tenant, entity_type, scope_key)
+correctly handles both '*' and scoped rows.
+
+This was discovered immediately after Phase 2-A deployment (Steps
+2.1-2.5) when sync cycles started failing with "status=failed" on the
+first scope advance attempt. Documenting the root cause here so
+future migrations don't repeat the dual-constraint assumption.
+
+Revision ID: 011_drop_legacy_watermark
+Revises: 010_watermarks_scope_key
+Create Date: 2026-04-28
+"""
+
+from typing import Sequence, Union
+
+from alembic import op
+
+
+revision: str = "011_drop_legacy_watermark"
+down_revision: Union[str, None] = "010_watermarks_scope_key"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Drop legacy unique-on-(tenant, entity) constraint and index."""
+    # Use IF EXISTS for safety — this migration was applied via raw SQL
+    # before the file existed, so the actual DROP may have already run.
+    op.execute(
+        "ALTER TABLE pipeline_watermarks "
+        "DROP CONSTRAINT IF EXISTS uq_watermark_entity"
+    )
+    op.execute("DROP INDEX IF EXISTS ix_watermarks_tenant_entity")
+
+
+def downgrade() -> None:
+    """Restore legacy constraint + index.
+
+    WARNING: this only works if no two rows have the same
+    (tenant_id, entity_type) — i.e., either you're back to a single '*'
+    row per tenant+entity, or you've collapsed scope rows first.
+    """
+    op.execute(
+        "CREATE INDEX IF NOT EXISTS ix_watermarks_tenant_entity "
+        "ON pipeline_watermarks (tenant_id, entity_type)"
+    )
+    op.execute(
+        "ALTER TABLE pipeline_watermarks "
+        "ADD CONSTRAINT uq_watermark_entity "
+        "UNIQUE (tenant_id, entity_type)"
+    )
diff --git a/pulse/packages/pulse-data/src/connectors/aggregator.py b/pulse/packages/pulse-data/src/connectors/aggregator.py
index eb28e34..f2c2b1a 100644
--- a/pulse/packages/pulse-data/src/connectors/aggregator.py
+++ b/pulse/packages/pulse-data/src/connectors/aggregator.py
@@ -78,10 +78,16 @@ async def get_pull_request_source_count(self) -> int:
         return total
 
     async def fetch_pull_requests_batched(
-        self, since: datetime | None = None,
+        self,
+        since: datetime | None = None,
+        since_by_repo: dict[str, datetime | None] | None = None,
     ) -> AsyncIterator[tuple[str, list[dict[str, Any]] | None]]:
         """Yield PRs in batches (per repo) from all code-hosting connectors.
 
+        FDD-OPS-014 step 2.4-B: forwards since_by_repo to connectors that
+        support it. Connectors without the parameter (older shape) fall
+        back to single-`since` behavior.
+
         Each yield is (repo_name, prs_or_none):
           - prs is None → "starting" signal for this repo (UI progress hint)
           - prs is list → completed batch ready to persist
@@ -90,7 +96,17 @@ async def fetch_pull_requests_batched(
             connector = self._connectors.get(source)
             if connector and hasattr(connector, "fetch_pull_requests_batched"):
                 try:
-                    async for repo_name, prs in connector.fetch_pull_requests_batched(since):
+                    # Detect if connector supports since_by_repo (graceful
+                    # for connectors not yet updated in newer codebases).
+                    import inspect
+                    sig = inspect.signature(connector.fetch_pull_requests_batched)
+                    if "since_by_repo" in sig.parameters:
+                        gen = connector.fetch_pull_requests_batched(
+                            since=since, since_by_repo=since_by_repo,
+                        )
+                    else:
+                        gen = connector.fetch_pull_requests_batched(since)
+                    async for repo_name, prs in gen:
                         yield repo_name, prs
                 except Exception:
                     logger.exception("Error fetching batched PRs from %s", source)
@@ -102,6 +118,10 @@ async def fetch_issues(
     ) -> list[dict[str, Any]]:
         """Fetch issues from all work-tracking connectors (Jira, GitHub Issues).
 
+        DEPRECATED for new code paths — use fetch_issues_batched() which streams
+        per-project and persists incrementally (FDD-OPS-012). This bulk-fetch
+        method is retained for backward compatibility (sprint sync etc.).
+
         Args:
             since: Watermark for incremental sync.
             project_keys: If provided, passed to Jira connector to scope which
@@ -122,6 +142,34 @@ async def fetch_issues(
                     logger.exception("Error fetching issues from %s", source)
         return all_issues
 
+    async def fetch_issues_batched(
+        self,
+        project_keys: list[str],
+        since_by_project: dict[str, datetime | None] | None = None,
+    ):
+        """Stream issues per-project from work-tracking connectors (FDD-OPS-012).
+
+        Yields (project_key, batch) tuples per page. Caller normalizes,
+        upserts, emits Kafka, advances watermark per batch — bounded memory,
+        crash-safe.
+
+        Currently only Jira implements batched issues. GitHub/Azure issues
+        sync remains bulk (low volume, can be migrated later if needed).
+        """
+        connector = self._connectors.get("jira")
+        if connector is None or not hasattr(connector, "fetch_issues_batched"):
+            logger.warning("No Jira connector with batched fetch — skipping")
+            return
+
+        try:
+            async for project_key, batch in connector.fetch_issues_batched(
+                project_keys=project_keys,
+                since_by_project=since_by_project,
+            ):
+                yield project_key, batch
+        except Exception:
+            logger.exception("Error during batched issue fetch from Jira")
+
     async def fetch_issue_changelogs(
         self, issue_ids: list[str],
     ) -> dict[str, list[dict[str, Any]]]:
@@ -171,15 +219,29 @@ async def fetch_issue_changelogs(
         return all_changelogs
 
     async def fetch_deployments(
-        self, since: datetime | None = None,
+        self,
+        since: datetime | None = None,
+        since_by_repo: dict[str, datetime | None] | None = None,
     ) -> list[dict[str, Any]]:
-        """Fetch deployments from all CI/CD connectors (Jenkins, GitHub Actions)."""
+        """Fetch deployments from all CI/CD connectors (Jenkins, GitHub Actions).
+
+        FDD-OPS-014 step 2.5-B: forwards since_by_repo to connectors that
+        support it. Connectors without the parameter fall back to single
+        `since` behavior.
+        """
+        import inspect
         all_deploys: list[dict[str, Any]] = []
         for source in ("jenkins", "github", "gitlab", "azure"):
             connector = self._connectors.get(source)
             if connector:
                 try:
-                    deploys = await connector.fetch_deployments(since)
+                    sig = inspect.signature(connector.fetch_deployments)
+                    if "since_by_repo" in sig.parameters:
+                        deploys = await connector.fetch_deployments(
+                            since=since, since_by_repo=since_by_repo,
+                        )
+                    else:
+                        deploys = await connector.fetch_deployments(since)
                     all_deploys.extend(deploys)
                     logger.info("Fetched %d deployments from %s", len(deploys), source)
                 except Exception:
diff --git a/pulse/packages/pulse-data/src/connectors/github_connector.py b/pulse/packages/pulse-data/src/connectors/github_connector.py
index 44c2535..99e245d 100644
--- a/pulse/packages/pulse-data/src/connectors/github_connector.py
+++ b/pulse/packages/pulse-data/src/connectors/github_connector.py
@@ -197,23 +197,57 @@ async def get_source_count(self) -> int:
         return len(repos)
 
     async def fetch_pull_requests_batched(
-        self, since: datetime | None = None,
+        self,
+        since: datetime | None = None,
+        since_by_repo: dict[str, datetime | None] | None = None,
     ) -> AsyncIterator[tuple[str, list[dict[str, Any]] | None]]:
         """Yield PRs in batches, one batch per repo — parallelized via GraphQL.
 
         Processes REPO_CONCURRENCY repos at a time. Each repo uses a single
         GraphQL query per page (50 PRs) instead of 1+2N REST calls.
 
+        FDD-OPS-014 step 2.4-B: per-repo watermarks. When `since_by_repo`
+        is provided, each repo uses its own `since` timestamp:
+            - Found in dict, value is datetime → incremental from that point
+            - Found in dict, value is None     → full backfill (new repo)
+            - NOT in dict                      → falls back to bulk `since`
+        Backwards-compat: if `since_by_repo` is None, all repos use the
+        single `since` parameter (legacy behavior, preserved for callers
+        not yet updated).
+
         For each repo, emits:
           1. (repo_full_name, None) — "starting" signal for UI progress
           2. (repo_full_name, list_of_prs) — completed batch (only if non-empty)
         """
         repos = await self._get_repos()
         total_repos = len(repos)
-        logger.info(
-            "Starting parallel PR fetch: %d repos, concurrency=%d, page_size=%d",
-            total_repos, REPO_CONCURRENCY, GRAPHQL_PAGE_SIZE,
-        )
+
+        # Resolve effective `since` per repo. Calling with explicit
+        # since_by_repo wins; otherwise everyone gets the bulk `since`.
+        def _resolve_since(repo: str) -> datetime | None:
+            if since_by_repo is not None and repo in since_by_repo:
+                return since_by_repo[repo]
+            return since
+
+        # Pre-flight summary so operator sees the per-repo plan up front.
+        if since_by_repo is not None:
+            backfill = sum(
+                1 for r in repos
+                if since_by_repo.get(r, since) is None
+            )
+            incremental = total_repos - backfill
+            logger.info(
+                "Starting parallel PR fetch: %d repos (per-repo plan: "
+                "%d backfill, %d incremental), concurrency=%d, page_size=%d",
+                total_repos, backfill, incremental,
+                REPO_CONCURRENCY, GRAPHQL_PAGE_SIZE,
+            )
+        else:
+            logger.info(
+                "Starting parallel PR fetch: %d repos, concurrency=%d, "
+                "page_size=%d (single since=%s)",
+                total_repos, REPO_CONCURRENCY, GRAPHQL_PAGE_SIZE, since,
+            )
 
         semaphore = asyncio.Semaphore(REPO_CONCURRENCY)
         # Queue holds outputs from worker coroutines so we can yield them
@@ -224,12 +258,14 @@ async def worker(repo_full_name: str) -> None:
             async with semaphore:
                 # Emit "starting" as soon as we acquire the slot
                 await queue.put(("start", repo_full_name, None))
+                repo_since = _resolve_since(repo_full_name)
                 try:
-                    prs = await self._fetch_repo_prs_graphql(repo_full_name, since)
+                    prs = await self._fetch_repo_prs_graphql(repo_full_name, repo_since)
                     if prs:
                         logger.info(
-                            "Batch: %d PRs from %s (GraphQL)",
+                            "Batch: %d PRs from %s (GraphQL, since=%s)",
                             len(prs), repo_full_name,
+                            repo_since.isoformat() if repo_since else "full-history",
                         )
                         await queue.put(("batch", repo_full_name, prs))
                     else:
@@ -240,7 +276,7 @@ async def worker(repo_full_name: str) -> None:
                         repo_full_name,
                     )
                     try:
-                        prs = await self._fetch_repo_prs(repo_full_name, since)
+                        prs = await self._fetch_repo_prs(repo_full_name, repo_since)
                         await queue.put(("batch", repo_full_name, prs or []))
                     except Exception:
                         logger.exception("REST fallback also failed for %s", repo_full_name)
diff --git a/pulse/packages/pulse-data/src/connectors/jenkins_connector.py b/pulse/packages/pulse-data/src/connectors/jenkins_connector.py
index 5e02429..95adee1 100644
--- a/pulse/packages/pulse-data/src/connectors/jenkins_connector.py
+++ b/pulse/packages/pulse-data/src/connectors/jenkins_connector.py
@@ -116,17 +116,43 @@ async def test_connection(self) -> dict[str, Any]:
     # ------------------------------------------------------------------
 
     async def fetch_deployments(
-        self, since: datetime | None = None,
+        self,
+        since: datetime | None = None,
+        since_by_repo: dict[str, datetime | None] | None = None,
     ) -> list[dict[str, Any]]:
         """Fetch builds from configured Jenkins jobs.
 
         Each build is mapped to a deployment record. Only jobs configured
         in connections.yaml are fetched (not all Jenkins jobs).
+
+        FDD-OPS-014 step 2.5-B: per-repo `since` resolution. Jenkins has
+        no native "repo" concept — we use the job→repo mapping (built
+        from SCM scan, see `discover_jenkins_jobs.py`) to map each job
+        to its source repo and look up the repo's watermark.
+
+        Resolution order per job:
+          1. since_by_repo[mapped_repo] (if mapped_repo in dict)
+          2. fall back to bulk `since` (single-watermark behavior)
+
+        Backwards compat: if since_by_repo is None, all jobs use
+        single `since` (legacy bulk behavior preserved).
         """
         if not self._jobs:
             logger.warning("No Jenkins jobs configured — skipping deployment fetch")
             return []
 
+        # Pre-flight: log per-repo plan when since_by_repo is provided.
+        if since_by_repo is not None:
+            jobs_with_scope = sum(
+                1 for j in self._jobs
+                if self._job_to_repo.get(j.get("fullName", ""), "") in since_by_repo
+            )
+            logger.info(
+                "Jenkins fetch: %d jobs total, %d jobs with per-repo watermark, "
+                "rest use bulk since=%s",
+                len(self._jobs), jobs_with_scope, since,
+            )
+
         all_builds: list[dict[str, Any]] = []
 
         for job_config in self._jobs:
@@ -134,8 +160,15 @@ async def fetch_deployments(
             if not job_name:
                 continue
 
+            # Resolve per-repo since via job→repo mapping.
+            repo = self._job_to_repo.get(job_name, job_name)
+            if since_by_repo is not None and repo in since_by_repo:
+                job_since = since_by_repo[repo]
+            else:
+                job_since = since
+
             try:
-                builds = await self._fetch_job_builds(job_name, since)
+                builds = await self._fetch_job_builds(job_name, job_since)
                 all_builds.extend(builds)
             except Exception:
                 logger.exception("Failed to fetch builds for job: %s", job_name)
diff --git a/pulse/packages/pulse-data/src/connectors/jira_connector.py b/pulse/packages/pulse-data/src/connectors/jira_connector.py
index bdc2a67..9844a16 100644
--- a/pulse/packages/pulse-data/src/connectors/jira_connector.py
+++ b/pulse/packages/pulse-data/src/connectors/jira_connector.py
@@ -57,6 +57,59 @@
 FALLBACK_STORY_POINTS_FIELDS = ("customfield_10016", "customfield_10028")
 FALLBACK_SPRINT_FIELDS = ("customfield_10020", "customfield_10010")
 
+# ---------------------------------------------------------------------------
+# Effort estimation fallback chain (FDD-OPS-016)
+#
+# Webmotors and many enterprise tenants do NOT use story points (validated
+# 2026-04-28: 0% population across all 69 active Jira projects). Different
+# squads use different estimation methods, or none at all. We discover and
+# extract from a fallback chain in priority order:
+#
+#   1. Story Points  (numeric)  → use raw value
+#   2. Story point estimate     → use raw value
+#   3. T-Shirt Size  (option)   → map P/M/G... to Fibonacci scale
+#   4. Tamanho/Impacto (option) → map PP/P/M/G... to Fibonacci scale
+#   5. Original Estimate (sec)  → bucket hours into Fibonacci-aligned points
+#   6. None                     → consumer falls back to count-of-items
+#                                 (Kanban-pure mode)
+#
+# When `story_points` lands as None, downstream metrics (Lean throughput,
+# velocity) MUST count items rather than sum points. The decision to count
+# vs sum lives in the metric layer, not here.
+#
+# Future (codename "dev-metrics"): admin UI to opt into a specific method
+# per source/squad + proprietary forecasting model. See FDD-DEV-METRICS-001
+# in ops-backlog.md.
+# ---------------------------------------------------------------------------
+
+# Field-name keywords used by `_discover_effort_fields` (case-insensitive,
+# matched against Jira `fields` API "name" property).
+EFFORT_NAME_PATTERNS_TSHIRT = ("t-shirt size", "tshirt size", "tamanho/impacto")
+EFFORT_NAME_PATTERNS_TIME = ("original estimate",)  # core field, not custom
+
+# Fibonacci-like mapping for option-typed effort fields. Covers the values
+# observed in Webmotors data + common defaults (XS/S/M/L/XL/XXL).
+TSHIRT_TO_POINTS: dict[str, float] = {
+    # Portuguese sizes
+    "PP": 1.0, "P": 2.0, "M": 3.0, "G": 5.0, "GG": 8.0, "GGG": 13.0,
+    # English sizes
+    "XS": 1.0, "S": 2.0, "L": 5.0, "XL": 8.0, "XXL": 13.0,
+}
+
+# Hour-based estimation buckets → SP equivalent.
+# Aligned with "1 ideal day = ~6h productive, 1 SP ≈ small task < 0.5d" so
+# the steps stay roughly Fibonacci. Calibrated against Webmotors observed
+# values (2h–124h, multiples of 4) so each common value lands in a sensible
+# bucket. Rounded to the SP scale that downstream metrics already speak.
+def _hours_to_points(hours: float) -> float:
+    if hours <= 4:    return 1.0
+    if hours <= 8:    return 2.0
+    if hours <= 16:   return 3.0
+    if hours <= 24:   return 5.0
+    if hours <= 40:   return 8.0
+    if hours <= 80:   return 13.0
+    return 21.0
+
 
 class JiraConnector(BaseConnector):
     """Fetches issues, sprints, and changelogs from Jira Cloud REST API v3.
@@ -102,7 +155,23 @@ def __init__(
         # _discover_custom_fields() on first fetch_issues() call.
         self._sprint_field_id: str | None = None
         self._story_points_field_id: str | None = None
+        # FDD-OPS-016: discovered effort-fallback field IDs (T-shirt size,
+        # Tamanho/Impacto). Many tenants don't use story points at all.
+        self._tshirt_field_ids: list[str] = []
         self._custom_fields_discovered: bool = False
+        # Telemetry for `_extract_effort` — counts how often each strategy
+        # was the one that produced a value, plus how many issues fell
+        # through to None. Logged at end of each batched fetch so operators
+        # can spot estimation mode shifts without combing through traces.
+        self._effort_source_counts: dict[str, int] = {}
+        # FDD-OPS-017 — status→category map cached from /rest/api/3/status.
+        # Keys are lowercased status names (e.g., "fechado em prod"); values
+        # are statusCategory.key ("new" | "indeterminate" | "done"). Used
+        # by the normalizer as the authoritative fallback when a textual
+        # mapping isn't found. Populated by `_discover_status_categories()`
+        # on first fetch.
+        self._status_categories: dict[str, str] = {}
+        self._status_categories_discovered: bool = False
 
     @property
     def source_type(self) -> str:
@@ -206,6 +275,7 @@ async def fetch_issues(
 
         # Discover tenant-specific custom field IDs (sprint, story points)
         await self._discover_custom_fields()
+        await self._discover_status_categories()
 
         # Quote each project key in JQL — some keys like "DESC" are reserved words
         quoted_projects = ", ".join(f'"{p}"' for p in effective_projects)
@@ -223,6 +293,13 @@ async def fetch_issues(
             fields_to_fetch.append(self._sprint_field_id)
         if self._story_points_field_id:
             fields_to_fetch.append(self._story_points_field_id)
+        # FDD-OPS-016: include effort fallback fields (T-shirt size,
+        # Tamanho/Impacto, original estimate)
+        for f in self._tshirt_field_ids:
+            if f not in fields_to_fetch:
+                fields_to_fetch.append(f)
+        if "timeoriginalestimate" not in fields_to_fetch:
+            fields_to_fetch.append("timeoriginalestimate")
         # Always include fallbacks to survive mis-discovery
         for f in FALLBACK_SPRINT_FIELDS + FALLBACK_STORY_POINTS_FIELDS:
             if f not in fields_to_fetch:
@@ -259,6 +336,128 @@ async def fetch_issues(
         logger.info("Fetched %d issues from Jira (%d projects, %d pages)", len(all_issues), len(effective_projects), page)
         return all_issues
 
+    async def fetch_issues_batched(
+        self,
+        project_keys: list[str],
+        since_by_project: dict[str, datetime | None] | None = None,
+    ):
+        """Stream issues PER PROJECT, yielding (project_key, batch) per page.
+
+        FDD-OPS-012 — replaces the bulk-fetch-all-then-persist pattern of
+        fetch_issues(). Yields each page (~50 issues) as it arrives, so the
+        caller can normalize → upsert → emit_event → advance_watermark
+        immediately. Memory bound: ~one page in flight; crash recovery loses
+        at most one page of work.
+
+        Per-project pagination (one JQL per project) instead of `project IN
+        (...)` makes per-scope watermarks possible (each project advances
+        its own last_synced_at independently — see FDD-OPS-014). It also
+        means failure on one project doesn't lose progress on others.
+
+        Args:
+            project_keys: Projects to sync. Must be explicit; no fallback
+                to env var (caller MUST resolve via ModeResolver).
+            since_by_project: Optional per-project watermark. Missing keys
+                default to None (full backfill for that project).
+
+        Yields:
+            (project_key, list_of_normalized_raw_issues) tuples.
+            Each list has SEARCH_PAGE_SIZE items (50 by default), except
+            the last page of each project which may be smaller.
+        """
+        if not project_keys:
+            logger.warning("fetch_issues_batched: empty project_keys, nothing to do")
+            return
+
+        # Discover tenant-specific custom field IDs once (cached for reuse).
+        await self._discover_custom_fields()
+        await self._discover_status_categories()
+
+        # Build fields list: base + discovered custom fields + fallbacks.
+        fields_to_fetch = list(SEARCH_FIELDS)
+        if self._sprint_field_id:
+            fields_to_fetch.append(self._sprint_field_id)
+        if self._story_points_field_id:
+            fields_to_fetch.append(self._story_points_field_id)
+        # FDD-OPS-016: effort fallback fields
+        for f in self._tshirt_field_ids:
+            if f not in fields_to_fetch:
+                fields_to_fetch.append(f)
+        if "timeoriginalestimate" not in fields_to_fetch:
+            fields_to_fetch.append("timeoriginalestimate")
+        for f in FALLBACK_SPRINT_FIELDS + FALLBACK_STORY_POINTS_FIELDS:
+            if f not in fields_to_fetch:
+                fields_to_fetch.append(f)
+
+        since_by_project = since_by_project or {}
+        # FDD-OPS-016: reset effort telemetry per batched call so the
+        # summary log reflects only this run.
+        self._effort_source_counts = {}
+
+        for project_key in project_keys:
+            since = since_by_project.get(project_key)
+            # Keys like "DESC" collide with SQL reserved words — quote always.
+            jql = f'project = "{project_key}"'
+            if since:
+                since_str = since.strftime("%Y-%m-%d %H:%M")
+                jql += f' AND updated >= "{since_str}"'
+            jql += " ORDER BY updated DESC"
+
+            logger.info(
+                "[batched] %s: starting JQL fetch (since=%s)",
+                project_key, since.isoformat() if since else "full-history",
+            )
+
+            next_page_token: str | None = None
+            page = 0
+            total_yielded = 0
+
+            while True:
+                body: dict[str, Any] = {
+                    "jql": jql,
+                    "maxResults": SEARCH_PAGE_SIZE,
+                    "fields": fields_to_fetch,
+                    "expand": "changelog",  # critical: keeps changelog inline (FDD-OPS-013)
+                }
+                if next_page_token:
+                    body["nextPageToken"] = next_page_token
+
+                data = await self._client.post(
+                    f"{REST_API}/search/jql", json_body=body,
+                )
+
+                issues = data.get("issues", [])
+                if issues:
+                    mapped_batch = [self._map_issue(issue) for issue in issues]
+                    yield project_key, mapped_batch
+                    total_yielded += len(mapped_batch)
+
+                page += 1
+                next_page_token = data.get("nextPageToken")
+                if not next_page_token or not issues:
+                    break
+
+            logger.info(
+                "[batched] %s: complete (%d issues, %d pages)",
+                project_key, total_yielded, page,
+            )
+
+        # FDD-OPS-016 — log effort-source distribution so operators can spot
+        # which fields the squad uses (or that they don't estimate at all).
+        if self._effort_source_counts:
+            total = sum(self._effort_source_counts.values())
+            breakdown = ", ".join(
+                f"{src}={cnt} ({100.0*cnt/total:.1f}%)"
+                for src, cnt in sorted(
+                    self._effort_source_counts.items(),
+                    key=lambda kv: -kv[1],
+                )
+            )
+            logger.info(
+                "[batched] effort source distribution (%d issues): %s",
+                total, breakdown,
+            )
+
     async def fetch_issue_changelogs(
         self, issue_ids: list[str],
     ) -> dict[str, list[dict[str, Any]]]:
@@ -393,6 +592,17 @@ def _map_issue(self, jira_issue: dict[str, Any]) -> dict[str, Any]:
         sprint_id = self._extract_sprint_id(fields)
 
         status_name = (fields.get("status") or {}).get("name", "")
+        # FDD-OPS-017 — read statusCategory from Jira's own `status` field
+        # (always inline in the issue response, no extra HTTP). Fallback to
+        # the cached `name → category` map if the issue payload lacks it
+        # (older Jira REST APIs / odd workflows).
+        status_cat_inline = (
+            ((fields.get("status") or {}).get("statusCategory") or {}).get("key")
+        )
+        status_category = (
+            status_cat_inline.lower() if isinstance(status_cat_inline, str)
+            else self._status_categories.get(status_name.strip().lower())
+        )
 
         # Store changelogs inline (extracted separately for the sync worker)
         self._last_changelogs = self._last_changelogs if hasattr(self, "_last_changelogs") else {}
@@ -420,6 +630,20 @@ def _map_issue(self, jira_issue: dict[str, Any]) -> dict[str, Any]:
             "assignee_name": (fields.get("assignee") or {}).get("displayName"),
             "type": (fields.get("issuetype") or {}).get("name", "Task"),
             "sprint_id": sprint_id,
+            # FDD-OPS-017 — Jira's authoritative classification of THIS issue's
+            # current status. The normalizer uses it as the fallback when the
+            # textual DEFAULT_STATUS_MAPPING doesn't recognize the status name.
+            "status_category": status_category,
+            # FDD-OPS-017 — full name→category map so build_status_transitions
+            # can classify each historical to_status, not just the current one.
+            # Same dict reference for every issue (cached on the connector);
+            # downstream upsert ignores extra keys.
+            "status_categories_map": self._status_categories,
+            # FDD-OPS-013 — preserve raw changelog from `expand=changelog` so
+            # `extract_status_transitions_inline()` in the sync worker can read
+            # it. Without this, mapped dict drops the changelog and ALL issues
+            # land with status_transitions=[] in eng_issues.
+            "changelog": jira_issue.get("changelog", {}),
         }
 
     def _map_sprint_issue(
@@ -469,6 +693,52 @@ def _extract_changelogs(
         transitions.sort(key=lambda t: t.get("created_date") or "")
         return transitions
 
+    async def _discover_status_categories(self) -> None:
+        """FDD-OPS-017 — fetch all status definitions and cache name→category.
+
+        Jira's `/rest/api/3/status` returns every status defined in the
+        tenant, each tagged with a `statusCategory.key` of "new",
+        "indeterminate", or "done". This is the AUTHORITATIVE classification
+        of "is this status considered finished by the workflow author".
+
+        Used by the normalizer as the fallback when our textual
+        DEFAULT_STATUS_MAPPING doesn't recognize a status name. Without
+        this, exotic Webmotors statuses like "FECHADO EM PROD" silently
+        defaulted to "todo", catastrophically polluting flow metrics
+        (Cycle Time, Throughput, WIP, CFD all read from `normalized_status`).
+
+        Discovery is one HTTP call per connector lifetime — cached on
+        instance. Failures degrade gracefully: we just lose the fallback.
+        """
+        if self._status_categories_discovered:
+            return
+
+        try:
+            data = await self._client.get(f"{REST_API}/status")
+        except Exception:
+            logger.exception(
+                "Failed to fetch Jira status catalog — normalization will "
+                "rely solely on textual DEFAULT_STATUS_MAPPING"
+            )
+            self._status_categories_discovered = True
+            return
+
+        statuses = data if isinstance(data, list) else data.get("values", [])
+        for s in statuses:
+            name = (s.get("name") or "").strip().lower()
+            cat = ((s.get("statusCategory") or {}).get("key") or "").strip().lower()
+            if name and cat in ("new", "indeterminate", "done"):
+                self._status_categories[name] = cat
+
+        self._status_categories_discovered = True
+        logger.info(
+            "Discovered %d Jira status definitions (new=%d, indeterminate=%d, done=%d)",
+            len(self._status_categories),
+            sum(1 for v in self._status_categories.values() if v == "new"),
+            sum(1 for v in self._status_categories.values() if v == "indeterminate"),
+            sum(1 for v in self._status_categories.values() if v == "done"),
+        )
+
     # ------------------------------------------------------------------
     # Internal: Custom field discovery + extraction helpers
     # ------------------------------------------------------------------
@@ -502,12 +772,18 @@ async def _discover_custom_fields(self) -> None:
                 self._sprint_field_id = fid
             elif name in ("story points", "story point estimate") and not self._story_points_field_id:
                 self._story_points_field_id = fid
+            elif any(p in name for p in EFFORT_NAME_PATTERNS_TSHIRT):
+                # FDD-OPS-016 — option-typed effort fallback (P/M/G…)
+                if fid not in self._tshirt_field_ids:
+                    self._tshirt_field_ids.append(fid)
 
         self._custom_fields_discovered = True
         logger.info(
-            "Discovered Jira custom fields — sprint=%s, story_points=%s",
+            "Discovered Jira custom fields — sprint=%s, story_points=%s, "
+            "effort_tshirt_fields=%s",
             self._sprint_field_id or "(none — using fallback)",
             self._story_points_field_id or "(none — using fallback)",
+            self._tshirt_field_ids or "(none)",
         )
 
     def _extract_sprint_id(self, fields: dict[str, Any]) -> str | None:
@@ -622,20 +898,89 @@ def _collect_leaf_texts(node: Any) -> list[str]:
         return flat
 
     def _extract_story_points(self, fields: dict[str, Any]) -> float | None:
-        """Extract story points, preferring the discovered custom field."""
-        candidates: list[str] = []
+        """Extract effort estimate, falling back through Story Points →
+        T-shirt size → Original Estimate hours → None.
+
+        Returns a float on the SP scale so downstream metrics (velocity,
+        throughput) can sum it. Returns None when the issue is genuinely
+        unestimated; the metric layer must then count items rather than
+        sum points (Kanban-pure mode). See FDD-OPS-016.
+
+        Side effect: increments `self._effort_source_counts[source]` so
+        `fetch_issues_batched` can log the distribution per run. The source
+        label is recorded even on None ("unestimated") so coverage can be
+        observed end-to-end.
+        """
+        # 1+2. Native numeric story-point fields (preferred — no conversion).
+        sp_candidates: list[str] = []
         if self._story_points_field_id:
-            candidates.append(self._story_points_field_id)
-        candidates.extend(FALLBACK_STORY_POINTS_FIELDS)
-        candidates.append("story_points")
-
-        for c in candidates:
+            sp_candidates.append(self._story_points_field_id)
+        sp_candidates.extend(FALLBACK_STORY_POINTS_FIELDS)
+        sp_candidates.append("story_points")
+        for c in sp_candidates:
             value = fields.get(c)
-            if value is not None:
-                try:
-                    return float(value)
-                except (TypeError, ValueError):
-                    continue
+            if value is None or value == "":
+                continue
+            try:
+                points = float(value)
+            except (TypeError, ValueError):
+                continue
+            if points > 0:
+                self._effort_source_counts["story_points"] = (
+                    self._effort_source_counts.get("story_points", 0) + 1
+                )
+                return points
+
+        # 3+4. T-shirt sized fields → map P/M/G… to Fibonacci scale.
+        for fid in self._tshirt_field_ids:
+            raw = fields.get(fid)
+            label = self._unwrap_option(raw)
+            if not label:
+                continue
+            mapped = TSHIRT_TO_POINTS.get(label.upper())
+            if mapped is not None:
+                self._effort_source_counts["tshirt_to_sp"] = (
+                    self._effort_source_counts.get("tshirt_to_sp", 0) + 1
+                )
+                return mapped
+            # Unknown size value — don't silently mis-map; fall through.
+
+        # 5. Original Estimate (hours) → SP equivalent buckets.
+        secs = fields.get("timeoriginalestimate")
+        if secs:
+            try:
+                hours = float(secs) / 3600.0
+                if hours > 0:
+                    self._effort_source_counts["hours_to_sp"] = (
+                        self._effort_source_counts.get("hours_to_sp", 0) + 1
+                    )
+                    return _hours_to_points(hours)
+            except (TypeError, ValueError):
+                pass
+
+        # 6. Genuinely unestimated. Track for telemetry; metric layer counts items.
+        self._effort_source_counts["unestimated"] = (
+            self._effort_source_counts.get("unestimated", 0) + 1
+        )
+        return None
+
+    @staticmethod
+    def _unwrap_option(raw: Any) -> str | None:
+        """Extract the string label from a Jira option-typed field.
+
+        Jira returns option fields as `{"value": "P", "id": "..."}` but
+        legacy/edge cases sometimes use "name" or a bare string. Be lenient.
+        """
+        if raw is None:
+            return None
+        if isinstance(raw, str):
+            label = raw.strip()
+            return label or None
+        if isinstance(raw, dict):
+            for key in ("value", "name", "displayName"):
+                v = raw.get(key)
+                if isinstance(v, str) and v.strip():
+                    return v.strip()
         return None
 
     # ------------------------------------------------------------------
@@ -711,17 +1056,18 @@ async def _fetch_board_sprints(
             for sprint in sprints:
                 mapped = self._map_sprint(sprint, board_id)
 
-                # Apply watermark filter
-                if since:
-                    start_date = mapped.get("started_date")
-                    if start_date and isinstance(start_date, str):
-                        try:
-                            dt = datetime.fromisoformat(start_date.replace("Z", "+00:00"))
-                            if dt < since:
-                                continue
-                        except ValueError:
-                            pass
-
+                # FDD-OPS-018 — DELIBERATELY NOT applying a `since` watermark
+                # filter here. Sprint state transitions (future→active→closed)
+                # happen on `endDate`, not `startDate`. The previous filter
+                # `if started_date < since: continue` meant a sprint that
+                # started in March and closed in May would never have its
+                # status updated past March's snapshot — every Webmotors
+                # sprint landed with empty status because the watermark was
+                # advanced past their start date.
+                #
+                # Volume is bounded (~216 total, ~5 active at any time across
+                # 27 squads), so always re-fetching every sprint per cycle
+                # is cheap and correct.
                 all_sprints.append(mapped)
 
             if data.get("isLast", True) or not sprints:
@@ -749,6 +1095,9 @@ def _map_sprint(self, sprint: dict[str, Any], board_id: int) -> dict[str, Any]:
             "name": sprint.get("name", ""),
             "url": self._base_url,
             "status": status,
+            # FDD-OPS-018 — sprint goal (free-text, set by squad lead).
+            # Jira returns this as a string; pass through for normalizer.
+            "goal": sprint.get("goal"),
             "started_date": sprint.get("startDate"),
             "ended_date": sprint.get("endDate"),
             "completed_date": sprint.get("completeDate"),
diff --git a/pulse/packages/pulse-data/src/contexts/engineering_data/models.py b/pulse/packages/pulse-data/src/contexts/engineering_data/models.py
index fa7fa75..a6e3a14 100644
--- a/pulse/packages/pulse-data/src/contexts/engineering_data/models.py
+++ b/pulse/packages/pulse-data/src/contexts/engineering_data/models.py
@@ -168,6 +168,12 @@ class EngSprint(TenantModel):
     source: Mapped[str] = mapped_column(String(32), nullable=False)
     name: Mapped[str] = mapped_column(String(256), nullable=False)
     board_id: Mapped[str] = mapped_column(String(128), nullable=False)
+    # FDD-OPS-018 — sprint lifecycle: active | closed | future | NULL.
+    # Was missing from the ORM model despite existing in the DB schema
+    # (schema drift). Without this Mapped column, every attempt to upsert
+    # `status` raised "Unconsumed column names: status" and the field
+    # silently stayed empty for all 216 Webmotors sprints.
+    status: Mapped[str | None] = mapped_column(String(50), nullable=True)
     started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
     completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
     goal: Mapped[str | None] = mapped_column(Text, nullable=True)
diff --git a/pulse/packages/pulse-data/src/contexts/engineering_data/normalizer.py b/pulse/packages/pulse-data/src/contexts/engineering_data/normalizer.py
index daedbe6..5016ed4 100644
--- a/pulse/packages/pulse-data/src/contexts/engineering_data/normalizer.py
+++ b/pulse/packages/pulse-data/src/contexts/engineering_data/normalizer.py
@@ -86,15 +86,107 @@
     "aguardando desenvolvimento": "todo",
     "priorizado gp": "todo",
     "pronto para o gp": "todo",
+    "em progresso": "in_progress",
+    "em desenv": "in_progress",
+    "em deploy hml": "in_progress",
+    "em deploy produção": "in_progress",
+    "em deploy azul": "in_progress",
     # Active work / pre-dev analysis
     "construção de hipótese": "in_progress",
     "desenvolvimento": "in_progress",
     "design": "in_progress",
     "analise": "in_progress",
+    "análise": "in_progress",
+    "em análise": "in_progress",
     "discovery": "in_progress",
     "entendimento": "in_progress",
-    # Post-deploy
+    # FDD-OPS-017 — Webmotors PT-BR status names that need the in_review
+    # granularity (Jira's `indeterminate` category collapses these into
+    # in_progress, but for Cycle Time breakdown we want the split).
+    "em verificação": "in_review",
+    "em teste": "in_review",
+    "em teste regressão": "in_review",
+    "em teste integrado hml": "in_review",
+    "em testes integrados": "in_review",
+    "em teste try": "in_review",
+    "homologação": "in_review",
+    "para verificação": "in_review",
+    "pronto para teste": "in_review",
+    "aguardando teste": "in_review",
+    "aguardando teste regressão": "in_review",
+    "aguardando teste hml": "in_review",
+    "aguardando teste try": "in_review",
+    "aguardando review": "in_review",
+    "aguardando deploy": "in_review",
+    "aguardando deploy hml": "in_review",
+    "aguardando deploy azul": "in_review",
+    "aguardando merge": "in_review",
+    "valid. azul": "in_review",
+    "validação": "in_review",
+    "validação infosec": "in_review",
+    "revisão de negócio": "in_review",
+    "em design review": "in_review",
+    # Post-deploy / monitoring → done (issue is shipped, monitoring is
+    # passive observation, not active dev work)
     "pós-implantação": "done",
+    "fechado em prod": "done",
+    # NOTE: "fechado em hml" — Jira's own statusCategory is "done" and the
+    # name literally says FECHADO. We respect that. If a workflow author
+    # later wants to keep these issues in WIP (e.g., pending prod rollout),
+    # they should rename the status to "Aguardando Deploy Produção" which
+    # already maps to in_progress.
+    "fechado em hml": "done",
+    "em monitoramento produção": "done",
+    "feito": "done",
+    "finalizado": "done",
+    "publicado": "done",
+    "resolvido": "done",
+    "entregue": "done",
+    "envio para loja": "done",
+    "itens concluídos": "done",
+    "fechada": "done",
+    # Cancelled / rejected variations observed in Webmotors
+    "recusado": "done",
+    "reprovado": "done",
+    "solicitação reprovada": "done",
+    "falha": "done",
+    "arquivo morto": "done",
+    "estacionamento": "done",
+    # Common backlog/refinement aliases
+    "novo": "todo",
+    "a fazer": "todo",
+    "aberto": "todo",
+    "esboçando": "todo",
+    "ideação": "todo",
+    "exploração": "todo",
+    "descoberta": "todo",
+    "descobrindo": "todo",
+    "mapeando": "todo",
+    "desenhando": "todo",
+    "prototipando": "todo",
+    "novo chamado": "todo",
+    "em refinamento": "todo",
+    "em refinamento de negócio": "todo",
+    "em refinamento técnico": "todo",
+    "pré refinamento": "todo",
+    "aguardando refinamento": "todo",
+    "aguardando refinamento técnico": "todo",
+    "aguardando refinamento tecnico": "todo",
+    "aguardando análise": "todo",
+    "aguardando definição e refinamento": "todo",
+    "aguardando handover": "todo",
+    "aguardando terceiro": "todo",
+    "aguardando ideação": "todo",
+    "aguardando aprovação": "todo",
+    "aguardando validação": "todo",
+    "priorizado": "todo",
+    "priorização técnica": "todo",
+    "priorizando o negócio": "todo",
+    "preparando o trabalho": "todo",
+    "ajustes do trabalho": "todo",
+    "revisando trabalho": "todo",
+    "pausado": "todo",
+    "não aplicável": "todo",
 }
 
 # Regex to find issue keys in branch names (e.g., "feature/BACK-123-add-login")
@@ -168,31 +260,74 @@ def _extract_project_key(issue_key: str | None, url: str | None) -> str:
     return "UNKNOWN"
 
 
-def normalize_status(raw_status: str, status_mapping: dict[str, str] | None = None) -> str:
-    """Normalize a raw issue status to one of: todo, in_progress, done.
+def normalize_status(
+    raw_status: str,
+    status_mapping: dict[str, str] | None = None,
+    status_category: str | None = None,
+) -> str:
+    """Normalize a raw issue status to one of: todo | in_progress | in_review | done.
 
     Args:
         raw_status: The original status string from the source system.
         status_mapping: Optional custom mapping overriding defaults.
+        status_category: FDD-OPS-017 — Jira's own statusCategory.key value
+            ("new" | "indeterminate" | "done") for this status. Used as the
+            authoritative fallback when our textual mapping doesn't recognize
+            the status name. Without it, custom Jira workflows (e.g.,
+            "FECHADO EM PROD") silently default to "todo" — corrupting
+            every flow metric (Cycle Time, Throughput, WIP, CFD).
 
     Returns:
-        Normalized status string.
+        Normalized status string. Granularity:
+          - `todo`         — work not started
+          - `in_progress`  — actively being worked on
+          - `in_review`    — code/test review (subset of "active" for WIP)
+          - `done`         — completed (workflow author classified as done)
+
+    Resolution order:
+        1. Custom + DEFAULT_STATUS_MAPPING textual lookup (preserves
+           the in_progress/in_review distinction we hand-curated)
+        2. status_category fallback ("done" → done, "indeterminate" →
+           in_progress, "new" → todo)
+        3. Final default "todo" with WARN log (visible in pipeline_events)
     """
     mapping = {**DEFAULT_STATUS_MAPPING}
     if status_mapping:
         mapping.update({k.lower(): v for k, v in status_mapping.items()})
 
-    normalized = mapping.get(raw_status.lower().strip())
+    key = raw_status.lower().strip()
+    normalized = mapping.get(key)
     if normalized:
         return normalized
 
-    logger.warning("Unknown status '%s' — defaulting to 'todo'", raw_status)
+    # FDD-OPS-017 — fall back to Jira's own statusCategory before defaulting
+    # to "todo". This is the safety net for the long tail of tenant-custom
+    # workflow states (104 distinct statuses observed in Webmotors alone).
+    if status_category:
+        cat = status_category.lower().strip()
+        if cat == "done":
+            return "done"
+        if cat == "indeterminate":
+            # Active work. We can't distinguish in_progress vs in_review at
+            # this level — that's intentional, since `_ACTIVE_STATUSES`
+            # treats both equivalently for WIP/Cycle Time. Operators who
+            # want the finer split must add the status to DEFAULT_STATUS_MAPPING.
+            return "in_progress"
+        if cat == "new":
+            return "todo"
+
+    logger.warning(
+        "Unknown status %r (no textual mapping, no statusCategory) "
+        "— defaulting to 'todo'",
+        raw_status,
+    )
     return "todo"
 
 
 def build_status_transitions(
     changelogs: list[dict[str, Any]],
     status_mapping: dict[str, str] | None = None,
+    status_categories_map: dict[str, str] | None = None,
 ) -> list[dict[str, Any]]:
     """Convert DevLake issue_changelogs into PULSE status_transitions JSONB.
 
@@ -200,6 +335,11 @@ def build_status_transitions(
         changelogs: Sorted list of changelog dicts with keys:
             from_status, to_status, created_date
         status_mapping: Optional custom mapping for normalization.
+        status_categories_map: FDD-OPS-017 — name→category dict (lowercased
+            keys) from the Jira connector. Lets each historical to_status
+            fall back to its statusCategory when not in the textual mapping.
+            Without this, a status no longer in active Jira workflows
+            (legacy / archived) defaults to "todo" → bogus Cycle Time.
 
     Returns:
         List of transition dicts:
@@ -208,11 +348,13 @@ def build_status_transitions(
     if not changelogs:
         return []
 
+    cats = status_categories_map or {}
     transitions: list[dict[str, Any]] = []
     for i, cl in enumerate(changelogs):
         entered_at = _parse_datetime(cl["created_date"])
         to_status_raw = cl.get("to_status", "")
-        normalized = normalize_status(to_status_raw, status_mapping)
+        cat = cats.get(to_status_raw.strip().lower())
+        normalized = normalize_status(to_status_raw, status_mapping, cat)
 
         # exited_at is the entered_at of the next transition, or None if current
         exited_at = None
@@ -283,8 +425,8 @@ def normalize_pull_request(
         "tenant_id": tenant_id,
         "source": source,
         "repo": repo,
-        "title": devlake_pr.get("title", ""),
-        "author": devlake_pr.get("author_name", "unknown"),
+        "title": _strip_null_bytes(devlake_pr.get("title", "")),
+        "author": _strip_null_bytes(devlake_pr.get("author_name", "unknown")),
         "state": state,
         "is_merged": is_merged,
         "first_commit_at": first_commit_at,  # INC-003: real authored_date when enriched
@@ -321,7 +463,11 @@ def normalize_issue(
         Dict matching EngIssue model columns.
     """
     raw_status = devlake_issue.get("original_status") or devlake_issue.get("status", "")
-    normalized = normalize_status(raw_status, status_mapping)
+    # FDD-OPS-017 — pull Jira's authoritative category from the connector
+    # so the normalizer can fall back to it when textual mapping misses.
+    status_category = devlake_issue.get("status_category")
+    status_categories_map = devlake_issue.get("status_categories_map") or {}
+    normalized = normalize_status(raw_status, status_mapping, status_category)
 
     issue_key = devlake_issue.get("issue_key", "")
     project_key = _extract_project_key(issue_key, devlake_issue.get("url"))
@@ -330,7 +476,9 @@ def normalize_issue(
     resolution_date = _parse_datetime(devlake_issue.get("resolution_date"))
 
     # Build status transitions from changelog data (populated by Jira plugin)
-    transitions = build_status_transitions(changelogs or [], status_mapping)
+    transitions = build_status_transitions(
+        changelogs or [], status_mapping, status_categories_map,
+    )
 
     # Derive started_at from first transition to an active state
     started_at = None
@@ -370,18 +518,23 @@ def normalize_issue(
         else None
     )
 
+    # Strip NULL bytes (0x00) from any text field. Postgres `text`/`varchar`
+    # rejects them with `CharacterNotInRepertoireError: invalid byte sequence
+    # for encoding "UTF8": 0x00`. Real-world Jira data has them — observed
+    # 2026-04-28 in ENO-3296 description (likely paste from buggy source).
+    # Without this, a single bad row breaks the whole batch upsert.
     return {
         "external_id": str(devlake_issue["id"]),
         "tenant_id": tenant_id,
         "source": _detect_source(devlake_issue),
         "project_key": project_key,
         "issue_key": (issue_key or None),
-        "title": devlake_issue.get("title", ""),
-        "description": description,
+        "title": _strip_null_bytes(devlake_issue.get("title", "")),
+        "description": _strip_null_bytes(description),
         "issue_type": issue_type,
         "status": raw_status,
         "normalized_status": normalized,
-        "assignee": devlake_issue.get("assignee_name"),
+        "assignee": _strip_null_bytes(devlake_issue.get("assignee_name")),
         "story_points": devlake_issue.get("story_point"),
         "sprint_id": sprint_id,
         "status_transitions": transitions,
@@ -392,6 +545,19 @@ def normalize_issue(
     }
 
 
+def _strip_null_bytes(value: Any) -> Any:
+    """Remove NULL bytes (0x00) from a string. Pass-through for non-strings.
+
+    Postgres rejects 0x00 in `text`/`varchar` with
+    `CharacterNotInRepertoireError`. Real-world Jira data sometimes contains
+    them (copy-paste from binary sources, malformed encoding upstream).
+    Stripping is the conservative choice — preserves all readable content.
+    """
+    if isinstance(value, str) and "\x00" in value:
+        return value.replace("\x00", "")
+    return value
+
+
 def normalize_deployment(
     devlake_deploy: dict[str, Any],
     tenant_id: UUID,
@@ -507,9 +673,19 @@ def normalize_sprint(
         "source": _detect_source(devlake_sprint),
         "name": devlake_sprint.get("name", ""),
         "board_id": str(devlake_sprint.get("original_board_id", "")),
+        # FDD-OPS-018 — sprint lifecycle status, lowercase to match the
+        # convention used elsewhere in PULSE (`normalized_status`,
+        # `issue_type`, etc.). The connector emits ACTIVE/CLOSED/FUTURE;
+        # we normalize here so consumers can rely on a stable casing.
+        # Was previously DROPPED entirely → all 216 Webmotors sprints
+        # landed with status='' in eng_sprints, breaking any future
+        # filter for "active sprint" / "completed sprints in quarter".
+        "status": _normalize_sprint_status(devlake_sprint.get("status")),
+        # FDD-OPS-018 — sprint goal text (set by squad lead in Jira). Was
+        # hardcoded None; now passed through from the connector.
+        "goal": _strip_null_bytes(devlake_sprint.get("goal")),
         "started_at": started_date,
         "completed_at": ended_date,
-        "goal": None,  # Not in DevLake domain table
         "committed_items": committed_items,
         "committed_points": committed_points,
         "added_items": 0,  # Requires tracking scope changes over time
@@ -522,6 +698,39 @@ def normalize_sprint(
     }
 
 
+# Sprint lifecycle states accepted by `_normalize_sprint_status`. Anything
+# else falls through to None (better than guessing) — operators see NULLs
+# in eng_sprints.status and can investigate.
+_SPRINT_STATUS_ALIASES: dict[str, str] = {
+    "active": "active",
+    "closed": "closed",
+    "future": "future",
+    # Common aliases observed across Jira variants
+    "open": "active",
+    "in_progress": "active",
+    "completed": "closed",
+    "complete": "closed",
+    "ended": "closed",
+    "planned": "future",
+    "upcoming": "future",
+}
+
+
+def _normalize_sprint_status(raw: Any) -> str | None:
+    """Map a sprint state string to one of: active | closed | future | None.
+
+    Lowercased; whitespace stripped. Unknown values return None — we don't
+    silently bucket them into one of the known states, since Sprint Velocity
+    / Carryover logic relies on knowing which sprints are actually closed.
+    """
+    if not isinstance(raw, str):
+        return None
+    key = raw.strip().lower()
+    if not key:
+        return None
+    return _SPRINT_STATUS_ALIASES.get(key)
+
+
 def build_issue_key_map(
     issue_rows: list[tuple[str | None, str]],
 ) -> dict[str, str]:
diff --git a/pulse/packages/pulse-data/src/contexts/pipeline/models.py b/pulse/packages/pulse-data/src/contexts/pipeline/models.py
index 550f907..c528fee 100644
--- a/pulse/packages/pulse-data/src/contexts/pipeline/models.py
+++ b/pulse/packages/pulse-data/src/contexts/pipeline/models.py
@@ -19,20 +19,44 @@
 
 
 class PipelineWatermark(TenantModel):
-    """Stores sync watermarks per entity type for incremental sync.
+    """Stores sync watermarks per (tenant, entity, scope) for incremental sync.
 
     Replaces the in-memory _WATERMARKS dict with persistent DB storage,
     so watermarks survive worker restarts and scale across replicas.
+
+    FDD-OPS-014 (migration 010): added `scope_key` so a single entity_type
+    can have multiple scopes. E.g.:
+        scope_key='*'                   → legacy global (one row, all sources)
+        scope_key='jira:project:BG'     → Jira project BG
+        scope_key='github:repo:foo/bar' → specific GitHub repo
+        scope_key='jenkins:job:deploy-X'→ specific Jenkins job
+
+    The legacy `uq_watermark_entity` constraint coexists with the new
+    `uq_watermark_entity_scope` UNIQUE — to be dropped in migration 011
+    after all worker code is writing per-scope.
     """
 
     __tablename__ = "pipeline_watermarks"
     __table_args__ = (
-        UniqueConstraint("tenant_id", "entity_type", name="uq_watermark_entity"),
+        # Per-scope constraint (active from migration 010 onward).
+        # Legacy uq_watermark_entity (without scope_key) was dropped in
+        # migration 011 — Postgres enforces all UniqueConstraints on every
+        # INSERT, so "harmless coexistence" was impossible: legacy blocked
+        # any per-scope insert because the (tenant, entity) tuple already
+        # existed via the '*' row. Discovered immediately after Phase 2-A
+        # deployment.
+        UniqueConstraint(
+            "tenant_id", "entity_type", "scope_key",
+            name="uq_watermark_entity_scope",
+        ),
     )
 
     entity_type: Mapped[str] = mapped_column(
         String(64), nullable=False,
     )  # pull_requests | issues | deployments | sprints
+    scope_key: Mapped[str] = mapped_column(
+        String(255), nullable=False, server_default="*",
+    )  # see class docstring for format
     last_synced_at: Mapped[datetime] = mapped_column(
         DateTime(timezone=True), nullable=False,
     )
diff --git a/pulse/packages/pulse-data/src/workers/devlake_sync.py b/pulse/packages/pulse-data/src/workers/devlake_sync.py
index c3902f1..5ffb4f1 100644
--- a/pulse/packages/pulse-data/src/workers/devlake_sync.py
+++ b/pulse/packages/pulse-data/src/workers/devlake_sync.py
@@ -66,15 +66,81 @@
 logger = logging.getLogger(__name__)
 
 
+# ---------------------------------------------------------------------------
+# Changelog helpers
+# ---------------------------------------------------------------------------
+
+def extract_status_transitions_inline(raw_issue: dict[str, Any]) -> list[dict[str, Any]]:
+    """Extract status transitions from a Jira issue's INLINE changelog.
+
+    FDD-OPS-013 — replaces the previous round-trip to
+    `fetch_issue_changelogs(issue_ids)` which made one HTTP GET per issue.
+    The JQL search uses `expand=changelog`, so the changelog is already
+    present in the response payload.
+
+    Always returns a list (possibly empty for issues with no status changes
+    in their history). The empty-list case is what fixed the 24h hang in
+    production: previously the cache lookup on `_last_changelogs` skipped
+    entries with empty transitions, causing a downstream cache-miss that
+    triggered the redundant individual GET.
+
+    Output shape mirrors `JiraConnector._extract_changelogs` so that
+    `normalize_issue(..., changelogs=...)` doesn't need to change.
+    """
+    issue_id = str(raw_issue["id"])
+    transitions: list[dict[str, Any]] = []
+    for history in raw_issue.get("changelog", {}).get("histories", []):
+        created = history.get("created")
+        for item in history.get("items", []):
+            if item.get("field", "").lower() == "status":
+                transitions.append({
+                    "issue_id": issue_id,
+                    "from_status": item.get("fromString", ""),
+                    "to_status": item.get("toString", ""),
+                    "created_date": created,
+                })
+    transitions.sort(key=lambda t: t.get("created_date") or "")
+    return transitions
+
+
 # ---------------------------------------------------------------------------
 # Watermark helpers — persistent DB storage via pipeline_watermarks
+#
+# FDD-OPS-014 (migration 010): watermarks are keyed by (tenant, entity, scope).
+# `scope_key='*'` is the legacy "global" key — kept as default for backwards
+# compatibility during the rollout. Per-source workers (steps 2.3-2.5) will
+# pass explicit scope_keys like 'jira:project:BG' or 'github:repo:foo/bar'.
 # ---------------------------------------------------------------------------
 
-async def _get_watermark(session, tenant_id: UUID, entity: str) -> datetime | None:
-    """Get the last sync timestamp for an entity type from the DB."""
+# Scope-key conventions (free-form string per Q2 of phase-2 plan, but helpers
+# enforce shape). Format: '<source>:<dimension>:<value>'.
+GLOBAL_SCOPE = "*"
+
+
+def make_scope_key(source: str, dimension: str, value: str) -> str:
+    """Build a canonical scope_key. Convention enforced via helper, not DB.
+
+    Examples:
+        make_scope_key("jira", "project", "BG")     -> "jira:project:BG"
+        make_scope_key("github", "repo", "foo/bar") -> "github:repo:foo/bar"
+    """
+    return f"{source}:{dimension}:{value}"
+
+
+async def _get_watermark(
+    session, tenant_id: UUID, entity: str, scope_key: str = GLOBAL_SCOPE,
+) -> datetime | None:
+    """Get the last sync timestamp for (entity_type, scope_key) from the DB.
+
+    Default scope_key='*' preserves legacy callers (one global row per
+    entity_type). Per-source workers pass an explicit scope_key.
+    """
     result = await session.execute(
         select(PipelineWatermark.last_synced_at)
-        .where(PipelineWatermark.entity_type == entity)
+        .where(
+            PipelineWatermark.entity_type == entity,
+            PipelineWatermark.scope_key == scope_key,
+        )
     )
     row = result.scalar_one_or_none()
     return row
@@ -82,19 +148,24 @@ async def _get_watermark(session, tenant_id: UUID, entity: str) -> datetime | No
 
 async def _set_watermark(
     session, tenant_id: UUID, entity: str, ts: datetime, count: int,
+    scope_key: str = GLOBAL_SCOPE,
 ) -> None:
-    """Upsert the watermark for an entity type using ON CONFLICT."""
+    """Upsert the watermark for (entity_type, scope_key) using ON CONFLICT.
+
+    Default scope_key='*' upserts the legacy global row.
+    """
     stmt = (
         pg_insert(PipelineWatermark)
         .values(
             id=uuid.uuid4(),
             tenant_id=tenant_id,
             entity_type=entity,
+            scope_key=scope_key,
             last_synced_at=ts,
             records_synced=count,
         )
         .on_conflict_do_update(
-            constraint="uq_watermark_entity",
+            constraint="uq_watermark_entity_scope",
             set_={
                 "last_synced_at": ts,
                 "records_synced": count,
@@ -103,7 +174,33 @@ async def _set_watermark(
         )
     )
     await session.execute(stmt)
-    logger.debug("Updated watermark for %s to %s (count=%d)", entity, ts, count)
+    logger.debug(
+        "Updated watermark for %s/%s to %s (count=%d)",
+        entity, scope_key, ts, count,
+    )
+
+
+async def _list_watermarks_by_scope(
+    session, tenant_id: UUID, entity: str, scope_keys: list[str],
+) -> dict[str, datetime | None]:
+    """Bulk-fetch watermarks for a list of scopes. Returns {scope_key: ts}.
+
+    Missing scopes return None (no watermark = full backfill on first sync).
+    Used by per-source workers (Phase 2 step 2.3+) to feed
+    `since_by_project={...}` into batched fetchers.
+    """
+    if not scope_keys:
+        return {}
+
+    result = await session.execute(
+        select(PipelineWatermark.scope_key, PipelineWatermark.last_synced_at)
+        .where(
+            PipelineWatermark.entity_type == entity,
+            PipelineWatermark.scope_key.in_(scope_keys),
+        )
+    )
+    found = {row[0]: row[1] for row in result.all()}
+    return {scope: found.get(scope) for scope in scope_keys}
 
 
 # ---------------------------------------------------------------------------
@@ -428,11 +525,52 @@ async def _sync_pull_requests(self) -> int:
         published to Kafka immediately — no accumulation in memory. If the
         process crashes mid-sync, all previously persisted repos are safe.
 
+        FDD-OPS-014 step 2.4-B: PER-REPO watermarks now READ + WRITTEN.
+        Each repo has scope_key='github:repo:<owner>/<name>'. Adding a new
+        repo = backfill ONLY that scope. Existing repos continue from their
+        own last_synced_at, not the global '*' value.
+
+        The global '*' watermark is still updated at end-of-cycle for any
+        remaining legacy reads (Pipeline Monitor UI etc.). Migration 011
+        already dropped the legacy unique constraint that conflicted with
+        per-scope inserts.
+
         Progress is tracked in pipeline_ingestion_progress for real-time
         visibility in the Pipeline Monitor dashboard.
         """
+        # Load ALL existing per-repo watermarks for pull_requests. We don't
+        # know which repos the connector will emit yet, so fetch the full
+        # set keyed by scope_key. The connector will look up each repo's
+        # since via since_by_repo[repo] (None = backfill on first sync).
         async with get_session(self._tenant_id) as session:
-            since = await _get_watermark(session, self._tenant_id, "pull_requests")
+            global_since = await _get_watermark(
+                session, self._tenant_id, "pull_requests",
+            )
+            # Returns rows where scope_key starts with 'github:repo:'.
+            from sqlalchemy import select as _select
+            result = await session.execute(
+                _select(
+                    PipelineWatermark.scope_key,
+                    PipelineWatermark.last_synced_at,
+                ).where(
+                    PipelineWatermark.entity_type == "pull_requests",
+                    PipelineWatermark.scope_key.like("github:repo:%"),
+                )
+            )
+            since_by_repo: dict[str, datetime | None] = {}
+            for scope_key_str, last_synced in result.all():
+                # 'github:repo:owner/name' → 'owner/name'
+                repo = scope_key_str[len("github:repo:"):]
+                since_by_repo[repo] = last_synced
+
+        logger.info(
+            "[prs] watermark plan: %d repos with per-scope rows, global '*' fallback=%s",
+            len(since_by_repo),
+            global_since.isoformat() if global_since else "None (full backfill)",
+        )
+        # Pass single fallback for compatibility — repos not in
+        # since_by_repo (newly discovered) inherit it.
+        since = global_since
 
         # Build issue-key lookup for PR linking. Loading all issue external_ids
         # from the tenant is cheap (~30k strings) and lets us link each batch
@@ -473,7 +611,10 @@ async def _sync_pull_requests(self) -> int:
         repos_done = 0
 
         try:
-            async for repo_name, raw_prs in self._reader.fetch_pull_requests_batched(since=since):
+            async for repo_name, raw_prs in self._reader.fetch_pull_requests_batched(
+                since=since,
+                since_by_repo=since_by_repo,
+            ):
                 # "Starting" signal: connector emits (repo_name, None) before
                 # any API calls so the UI can show progress immediately.
                 if raw_prs is None:
@@ -527,6 +668,19 @@ async def _sync_pull_requests(self) -> int:
                     events.append((str(pr["external_id"]), event))
                 await publish_batch(self._producer, TOPIC_PR_NORMALIZED, events)
 
+                # FDD-OPS-014 step 2.4: advance this repo's scope watermark.
+                # Writes accumulate even though reads are still global '*';
+                # follow-up commit changes the connector to read this dict
+                # via since_by_repo.
+                if batch_count > 0:
+                    repo_scope = make_scope_key("github", "repo", repo_name)
+                    async with get_session(self._tenant_id) as session:
+                        await _set_watermark(
+                            session, self._tenant_id, "pull_requests",
+                            started_at, batch_count,
+                            scope_key=repo_scope,
+                        )
+
                 # Update progress in DB (queryable by API)
                 await _update_ingestion_progress(
                     self._tenant_id, "pull_requests",
@@ -580,92 +734,293 @@ async def _sync_pull_requests(self) -> int:
         return total_count
 
     async def _sync_issues(self) -> int:
-        """Read issues from source connectors, upsert to PULSE DB, publish to Kafka."""
-        async with get_session(self._tenant_id) as session:
-            since = await _get_watermark(session, self._tenant_id, "issues")
-
-        # Resolve project keys via dynamic discovery or env var fallback
-        project_keys: list[str] | None = None
+        """Stream issues from Jira PER PROJECT, persisting each batch immediately.
+
+        FDD-OPS-012 — replaces the previous bulk-fetch-then-persist pattern
+        (everything in RAM until JQL pagination + ALL changelog HTTP calls
+        complete, then single upsert) with per-page streaming. Mirrors the
+        pattern that PRs adopted in commit 7f9f339.
+
+        FDD-OPS-014 step 2.3 — uses PER-PROJECT watermarks. Each project has
+        its own scope_key='jira:project:<KEY>' row in pipeline_watermarks.
+        Adding a new project = backfill ONLY that scope. Per-project failures
+        don't reset other projects' watermarks. The legacy global '*'
+        watermark is also updated at end-of-cycle for backwards compat.
+
+        Properties:
+        - Time-to-first-row: < 10s
+        - Memory: ~one page in flight, not all-projects
+        - Crash recovery: lose ≤ 1 batch of work
+        - Per-project incremental sync: only fetch new since last project run
+        """
+        # Resolve project keys via dynamic discovery (kill-switch via env var).
+        # No fallback to a static env var list — that path was deprecated when
+        # we landed discovery-only (ingestion-spec §2.3). Empty list = nothing
+        # to sync this cycle.
+        project_keys: list[str] = []
         if settings.dynamic_jira_discovery_enabled:
             try:
                 async with get_session(self._tenant_id) as session:
                     resolver = ModeResolver(session)
                     project_keys = await resolver.resolve_active_projects(self._tenant_id)
                 logger.info(
-                    "Dynamic discovery resolved %d Jira projects for tenant %s",
+                    "[issues] resolved %d active Jira projects for tenant %s",
                     len(project_keys), self._tenant_id,
                 )
             except Exception:
                 logger.exception(
-                    "ModeResolver failed for tenant %s, falling back to env var",
+                    "[issues] ModeResolver failed for tenant %s — skipping cycle",
                     self._tenant_id,
                 )
-                project_keys = None
-
-        fetch_kwargs: dict[str, Any] = {"since": since}
-        if project_keys is not None:
-            fetch_kwargs["project_keys"] = project_keys
-        raw_issues = await self._reader.fetch_issues(**fetch_kwargs)
-        if not raw_issues:
-            logger.info("No new issues to sync")
+                return 0
+
+        if not project_keys:
+            logger.info("[issues] no active projects, nothing to sync")
             return 0
 
-        # Fetch status changelogs for all issues in this batch (Jira only)
-        issue_ids = [str(raw["id"]) for raw in raw_issues]
-        changelogs_by_issue = await self._reader.fetch_issue_changelogs(issue_ids)
+        # FDD-OPS-014 step 2.3: load per-project watermarks (scope_key per
+        # project). Missing rows return None = full backfill for that scope.
+        project_scopes = [
+            make_scope_key("jira", "project", pk) for pk in project_keys
+        ]
+        async with get_session(self._tenant_id) as session:
+            scope_to_wm = await _list_watermarks_by_scope(
+                session, self._tenant_id, "issues", project_scopes,
+            )
+        since_by_project: dict[str, datetime | None] = {
+            pk: scope_to_wm[make_scope_key("jira", "project", pk)]
+            for pk in project_keys
+        }
+
+        # Log which projects need backfill vs which have an existing watermark
+        backfill_count = sum(1 for v in since_by_project.values() if v is None)
+        incremental_count = len(project_keys) - backfill_count
+        logger.info(
+            "[issues] watermark plan: %d projects backfill (no scope), "
+            "%d projects incremental",
+            backfill_count, incremental_count,
+        )
 
-        # Normalize
-        normalized = []
-        for raw in raw_issues:
-            try:
-                issue_id = str(raw["id"])
-                issue_changelogs = changelogs_by_issue.get(issue_id, [])
-                issue_data = normalize_issue(
-                    raw,
-                    self._tenant_id,
-                    self._status_mapping,
-                    changelogs=issue_changelogs,
+        # FDD-OPS-015 lite: pre-flight progress signal so operators see the
+        # scope BEFORE we start hammering the API.
+        started_at = datetime.now(timezone.utc)
+        await _update_ingestion_progress(
+            self._tenant_id, "issues",
+            status="running",
+            total_sources=len(project_keys),
+            sources_done=0,
+            records_ingested=0,
+            current_source=None,
+            started_at=started_at,
+        )
+
+        total_count = 0
+        projects_done: set[str] = set()
+        current_project: str | None = None
+        per_project_count: dict[str, int] = {pk: 0 for pk in project_keys}
+
+        async def _advance_project_watermark(project_key: str) -> None:
+            """Update watermark for `jira:project:<KEY>` after that project finishes.
+
+            Only advances when count > 0 — empty syncs (incremental with no
+            changes) leave the watermark unchanged so a subsequent failed
+            cycle doesn't accidentally claim "synced through now()".
+            """
+            count_for_project = per_project_count.get(project_key, 0)
+            if count_for_project == 0:
+                return
+            scope_key = make_scope_key("jira", "project", project_key)
+            async with get_session(self._tenant_id) as session:
+                await _set_watermark(
+                    session, self._tenant_id, "issues",
+                    started_at, count_for_project, scope_key=scope_key,
                 )
-                normalized.append(issue_data)
-            except Exception:
-                logger.exception("Error normalizing issue: %s", raw.get("id"))
+            logger.info(
+                "[issues] watermark advanced: %s → %s (%d issues this cycle)",
+                scope_key, started_at.isoformat(), count_for_project,
+            )
 
-        # Upsert to PULSE DB
-        count = await self._upsert_issues(normalized)
+        try:
+            async for project_key, raw_batch in self._reader.fetch_issues_batched(
+                project_keys=project_keys,
+                since_by_project=since_by_project,
+            ):
+                # Project change marker for ingestion progress + watermark advance
+                if project_key != current_project:
+                    if current_project is not None:
+                        # Previous project finished — advance its scope watermark
+                        await _advance_project_watermark(current_project)
+                        projects_done.add(current_project)
+                    current_project = project_key
+                    await _update_ingestion_progress(
+                        self._tenant_id, "issues",
+                        status="running",
+                        sources_done=len(projects_done),
+                        records_ingested=total_count,
+                        current_source=project_key,
+                    )
 
-        # Publish to Kafka
-        events = []
-        for issue in normalized:
-            events.append((str(issue["external_id"]), issue))
-        await publish_batch(self._producer, TOPIC_ISSUE_NORMALIZED, events)
+                # FDD-OPS-013: changelogs are INLINE from JQL expand=changelog.
+                # No extra HTTP round-trip per issue.
+                normalized: list[dict[str, Any]] = []
+                for raw in raw_batch:
+                    try:
+                        issue_changelogs = extract_status_transitions_inline(raw)
+                        issue_data = normalize_issue(
+                            raw,
+                            self._tenant_id,
+                            self._status_mapping,
+                            changelogs=issue_changelogs,
+                        )
+                        normalized.append(issue_data)
+                    except Exception:
+                        logger.exception(
+                            "[issues] normalize error in project %s: id=%s",
+                            project_key, raw.get("id"),
+                        )
 
-        # Update watermark in DB
-        async with get_session(self._tenant_id) as session:
-            await _set_watermark(
-                session, self._tenant_id, "issues",
-                datetime.now(timezone.utc), count,
+                if not normalized:
+                    continue
+
+                # Persist this batch immediately (FDD-OPS-012)
+                batch_count = await self._upsert_issues(normalized)
+                total_count += batch_count
+                per_project_count[project_key] = per_project_count.get(project_key, 0) + batch_count
+
+                # Emit Kafka events for this batch only
+                events = [
+                    (str(issue["external_id"]), issue)
+                    for issue in normalized
+                ]
+                await publish_batch(
+                    self._producer, TOPIC_ISSUE_NORMALIZED, events,
+                )
+
+                # Per-batch progress update (operator can grep the log to
+                # confirm forward progress)
+                logger.info(
+                    "[issues] batch persisted: %s +%d (project total: %d, "
+                    "tenant total: %d)",
+                    project_key, batch_count,
+                    per_project_count[project_key], total_count,
+                )
+
+                await _update_ingestion_progress(
+                    self._tenant_id, "issues",
+                    records_ingested=total_count,
+                    current_source=project_key,
+                )
+
+            # Final project after the loop: advance its watermark + mark done
+            if current_project is not None:
+                await _advance_project_watermark(current_project)
+                projects_done.add(current_project)
+
+            logger.info(
+                "[issues] sync complete: %d issues across %d projects "
+                "(per-project counts: %s)",
+                total_count, len(projects_done),
+                {k: v for k, v in per_project_count.items() if v > 0},
             )
 
-        # Record sync outcome per project for guardrails (dynamic discovery only)
-        if settings.dynamic_jira_discovery_enabled and project_keys:
-            try:
+            # Update legacy global '*' watermark for backwards compat. Some
+            # monitoring queries / Pipeline Monitor still read by entity
+            # without scope. Migration 011 (FDD-OPS-014 step 2.7) will drop
+            # the legacy unique constraint after a successful per-source
+            # cycle; until then both keep updating.
+            if total_count > 0:
                 async with get_session(self._tenant_id) as session:
-                    guardrails = Guardrails(session)
-                    for pk in project_keys:
-                        await guardrails.record_sync_outcome(
-                            self._tenant_id, pk, success=True,
-                        )
-            except Exception:
-                logger.exception("Failed to record sync outcomes for guardrails")
+                    await _set_watermark(
+                        session, self._tenant_id, "issues",
+                        started_at, total_count,
+                        # default scope_key='*' — legacy global row
+                    )
 
-        return count
+            # Record per-project sync outcome for guardrails (success only —
+            # batches that errored mid-stream are logged but don't block)
+            if settings.dynamic_jira_discovery_enabled and projects_done:
+                try:
+                    async with get_session(self._tenant_id) as session:
+                        guardrails = Guardrails(session)
+                        for pk in projects_done:
+                            await guardrails.record_sync_outcome(
+                                self._tenant_id, pk, success=True,
+                            )
+                except Exception:
+                    logger.exception(
+                        "[issues] failed to record guardrail outcomes",
+                    )
+
+            await _update_ingestion_progress(
+                self._tenant_id, "issues",
+                status="completed",
+                sources_done=len(projects_done),
+                records_ingested=total_count,
+                current_source=None,
+                finished_at=datetime.now(timezone.utc),
+            )
+
+        except Exception as exc:
+            await _update_ingestion_progress(
+                self._tenant_id, "issues",
+                status="failed",
+                sources_done=len(projects_done),
+                records_ingested=total_count,
+                current_source=current_project,
+                finished_at=datetime.now(timezone.utc),
+                error_message=str(exc)[:500],
+            )
+            logger.exception("[issues] sync cycle failed")
+            raise
+
+        return total_count
 
     async def _sync_deployments(self) -> int:
-        """Read deployments from source connectors, upsert to PULSE DB, publish to Kafka."""
+        """Read deployments from source connectors, upsert to PULSE DB, publish to Kafka.
+
+        FDD-OPS-014 step 2.5 — writes per-repo scope watermarks alongside
+        the legacy global '*' row. Per-repo READ + per-job streaming are
+        follow-ups; this commit accumulates the rows so they're available
+        when the connector refactor lands.
+
+        Granularity choice (Q2 of phase-2-plan): repo-level scope rather
+        than per-job. Volume is low (~1.4k deploys at Webmotors scale); the
+        repo dimension matches the cross-source linking model (PR↔deploy
+        is by repo+sha) and avoids an explosion of scope rows for
+        ephemeral Jenkins jobs.
+        """
+        started_at = datetime.now(timezone.utc)
+        # FDD-OPS-014 step 2.5-B: read per-repo watermarks for deployments.
+        # Pre-load all rows where scope_key starts with 'jenkins:repo:' so
+        # the connector can resolve each job's `since` via job→repo mapping.
         async with get_session(self._tenant_id) as session:
             since = await _get_watermark(session, self._tenant_id, "deployments")
+            from sqlalchemy import select as _select
+            result = await session.execute(
+                _select(
+                    PipelineWatermark.scope_key,
+                    PipelineWatermark.last_synced_at,
+                ).where(
+                    PipelineWatermark.entity_type == "deployments",
+                    PipelineWatermark.scope_key.like("jenkins:repo:%"),
+                )
+            )
+            since_by_repo: dict[str, datetime | None] = {}
+            for scope_key_str, last_synced in result.all():
+                # 'jenkins:repo:owner/name' → 'owner/name'
+                repo = scope_key_str[len("jenkins:repo:"):]
+                since_by_repo[repo] = last_synced
+
+        logger.info(
+            "[deployments] watermark plan: %d repos with per-scope rows, "
+            "global '*' fallback=%s",
+            len(since_by_repo),
+            since.isoformat() if since else "None (full backfill)",
+        )
 
-        raw_deployments = await self._reader.fetch_deployments(since=since)
+        raw_deployments = await self._reader.fetch_deployments(
+            since=since, since_by_repo=since_by_repo,
+        )
         if not raw_deployments:
             logger.info("No new deployments to sync")
             return 0
@@ -679,9 +1034,31 @@ async def _sync_deployments(self) -> int:
             except Exception:
                 logger.exception("Error normalizing deployment: %s", raw.get("id"))
 
+        # Group per repo to track per-scope counts for watermark writes.
+        per_repo_count: dict[str, int] = {}
+        for d in normalized:
+            repo = d.get("repo") or "unknown"
+            per_repo_count[repo] = per_repo_count.get(repo, 0) + 1
+
         # Upsert to PULSE DB
         count = await self._upsert_deployments(normalized)
 
+        # FDD-OPS-014 step 2.5: advance per-repo deploy watermarks. Reads
+        # still use global '*' until the fetcher refactor lands.
+        async with get_session(self._tenant_id) as session:
+            for repo, repo_count in per_repo_count.items():
+                if repo_count == 0:
+                    continue
+                repo_scope = make_scope_key("jenkins", "repo", repo)
+                await _set_watermark(
+                    session, self._tenant_id, "deployments",
+                    started_at, repo_count, scope_key=repo_scope,
+                )
+        logger.info(
+            "[deployments] advanced %d per-repo watermarks (jenkins:repo:*)",
+            len([c for c in per_repo_count.values() if c > 0]),
+        )
+
         # INC-004 — forward-path linker: bind newly ingested deploys back to
         # any merged PRs in the same repo that were still missing
         # `deployed_at`. Scoped to the min deployed_at in this batch so the
@@ -877,6 +1254,12 @@ async def _upsert_sprints(self, sprints: list[dict[str, Any]]) -> int:
                         index_elements=["tenant_id", "external_id"],
                         set_={
                             "name": sprint_data["name"],
+                            # FDD-OPS-018 — status + goal were missing from
+                            # this ON CONFLICT set, so existing sprints kept
+                            # their stale (empty) status forever. Active
+                            # sprints transitioning to closed never updated.
+                            "status": sprint_data.get("status"),
+                            "goal": sprint_data.get("goal"),
                             "started_at": sprint_data["started_at"],
                             "completed_at": sprint_data["completed_at"],
                             "committed_items": sprint_data["committed_items"],
diff --git a/pulse/packages/pulse-data/tests/unit/test_effort_fallback_chain.py b/pulse/packages/pulse-data/tests/unit/test_effort_fallback_chain.py
new file mode 100644
index 0000000..11a8de2
--- /dev/null
+++ b/pulse/packages/pulse-data/tests/unit/test_effort_fallback_chain.py
@@ -0,0 +1,224 @@
+"""Regression tests for FDD-OPS-016 — effort estimation fallback chain.
+
+Webmotors and many enterprise tenants don't use Story Points. Different
+squads use T-shirt sizes (P/M/G…), original estimate hours, or simply
+don't estimate. The connector's `_extract_story_points` walks a priority
+chain so downstream metrics get a usable number when one exists, and
+None when the issue is genuinely unestimated.
+
+These tests exercise the chain end-to-end against Jira-shaped payloads.
+If a future refactor reorders the chain or drops a fallback, multiple
+tests fail with messages naming the broken hop.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from src.connectors.jira_connector import (
+    TSHIRT_TO_POINTS,
+    JiraConnector,
+    _hours_to_points,
+)
+
+
+@pytest.fixture
+def connector() -> JiraConnector:
+    """A connector instance with effort discovery already populated.
+
+    We bypass __init__ so tests don't hit env vars / the network.
+    """
+    c = JiraConnector.__new__(JiraConnector)
+    c._connection_id = 1
+    c._base_url = "https://example.atlassian.net"
+    c._sprint_field_id = None
+    c._story_points_field_id = "customfield_10004"
+    c._tshirt_field_ids = ["customfield_18762", "customfield_15100"]
+    c._custom_fields_discovered = True
+    c._effort_source_counts = {}
+    return c
+
+
+# ---------------------------------------------------------------------------
+# 1. Native Story Points — highest priority
+# ---------------------------------------------------------------------------
+
+class TestStoryPointsTakesPriority:
+    def test_uses_discovered_story_points_field_when_set(self, connector):
+        result = connector._extract_story_points({"customfield_10004": 5})
+        assert result == 5.0
+        assert connector._effort_source_counts == {"story_points": 1}
+
+    def test_skips_zero_story_points_and_falls_through(self, connector):
+        """0 SP is a common sentinel for "not yet estimated" — skip it."""
+        result = connector._extract_story_points({
+            "customfield_10004": 0,
+            "customfield_18762": {"value": "P"},
+        })
+        assert result == TSHIRT_TO_POINTS["P"]
+        assert connector._effort_source_counts == {"tshirt_to_sp": 1}
+
+    def test_native_sp_wins_over_tshirt(self, connector):
+        result = connector._extract_story_points({
+            "customfield_10004": 8,
+            "customfield_18762": {"value": "P"},  # would map to 2
+            "timeoriginalestimate": 14400,        # would map via hours
+        })
+        assert result == 8.0
+
+
+# ---------------------------------------------------------------------------
+# 2. T-shirt sizing — second priority
+# ---------------------------------------------------------------------------
+
+class TestTshirtSizing:
+    @pytest.mark.parametrize(
+        "size,expected",
+        [("PP", 1.0), ("P", 2.0), ("M", 3.0), ("G", 5.0), ("GG", 8.0), ("GGG", 13.0)],
+    )
+    def test_portuguese_sizes_map_correctly(self, connector, size, expected):
+        result = connector._extract_story_points({
+            "customfield_18762": {"value": size},
+        })
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        "size,expected",
+        [("XS", 1.0), ("S", 2.0), ("M", 3.0), ("L", 5.0), ("XL", 8.0), ("XXL", 13.0)],
+    )
+    def test_english_sizes_map_correctly(self, connector, size, expected):
+        result = connector._extract_story_points({
+            "customfield_18762": {"value": size},
+        })
+        assert result == expected
+
+    def test_lowercase_size_is_normalized(self, connector):
+        """Be lenient: Jira sometimes returns 'p' instead of 'P'."""
+        result = connector._extract_story_points({
+            "customfield_18762": {"value": "p"},
+        })
+        assert result == TSHIRT_TO_POINTS["P"]
+
+    def test_unknown_size_falls_through_to_hours(self, connector):
+        result = connector._extract_story_points({
+            "customfield_18762": {"value": "JUMBO"},
+            "timeoriginalestimate": 28800,  # 8h → 2 SP
+        })
+        assert result == 2.0
+        assert connector._effort_source_counts == {"hours_to_sp": 1}
+
+    def test_secondary_tshirt_field_used_when_first_empty(self, connector):
+        """Tamanho/Impacto picks up where T-Shirt Size is empty."""
+        result = connector._extract_story_points({
+            "customfield_18762": None,
+            "customfield_15100": {"value": "G"},
+        })
+        assert result == TSHIRT_TO_POINTS["G"]
+
+    def test_bare_string_option_value(self, connector):
+        """Some legacy responses give a string directly, not a dict."""
+        result = connector._extract_story_points({
+            "customfield_18762": "M",
+        })
+        assert result == TSHIRT_TO_POINTS["M"]
+
+
+# ---------------------------------------------------------------------------
+# 3. Original Estimate (hours) — third priority
+# ---------------------------------------------------------------------------
+
+class TestOriginalEstimateHours:
+    @pytest.mark.parametrize(
+        "seconds,expected_hours,expected_sp",
+        [
+            (3600,    1.0,  1.0),   # ≤4h
+            (14400,   4.0,  1.0),   # exactly 4h
+            (28800,   8.0,  2.0),   # ≤8h (1 day)
+            (57600,  16.0,  3.0),   # ≤16h (2 days)
+            (86400,  24.0,  5.0),   # ≤24h
+            (115200, 32.0,  8.0),   # ≤40h
+            (288000, 80.0, 13.0),   # ≤80h (2 weeks)
+            (446400, 124.0, 21.0),  # >80h — observed Webmotors max
+        ],
+    )
+    def test_seconds_to_sp_buckets(
+        self, connector, seconds, expected_hours, expected_sp,
+    ):
+        # Direct check of the helper for clarity
+        assert _hours_to_points(expected_hours) == expected_sp
+        # End-to-end: connector picks up timeoriginalestimate
+        result = connector._extract_story_points({
+            "timeoriginalestimate": seconds,
+        })
+        assert result == expected_sp
+        assert connector._effort_source_counts == {"hours_to_sp": 1}
+
+    def test_zero_seconds_falls_through_to_unestimated(self, connector):
+        result = connector._extract_story_points({"timeoriginalestimate": 0})
+        assert result is None
+        assert connector._effort_source_counts == {"unestimated": 1}
+
+
+# ---------------------------------------------------------------------------
+# 4. Unestimated — final fallback
+# ---------------------------------------------------------------------------
+
+class TestUnestimatedReturnsNone:
+    def test_no_fields_returns_none(self, connector):
+        """Kanban-pure mode: metric layer must count items, not sum SP."""
+        result = connector._extract_story_points({})
+        assert result is None
+        assert connector._effort_source_counts == {"unestimated": 1}
+
+    def test_empty_strings_treated_as_missing(self, connector):
+        result = connector._extract_story_points({
+            "customfield_10004": "",
+            "customfield_18762": {"value": ""},
+            "customfield_15100": None,
+        })
+        assert result is None
+
+    def test_telemetry_aggregates_across_calls(self, connector):
+        """Operators rely on the breakdown log to spot estimation shifts."""
+        connector._extract_story_points({"customfield_10004": 5})
+        connector._extract_story_points({"customfield_18762": {"value": "M"}})
+        connector._extract_story_points({"timeoriginalestimate": 14400})
+        connector._extract_story_points({})
+        connector._extract_story_points({})
+        assert connector._effort_source_counts == {
+            "story_points": 1,
+            "tshirt_to_sp": 1,
+            "hours_to_sp": 1,
+            "unestimated": 2,
+        }
+
+
+# ---------------------------------------------------------------------------
+# 5. Webmotors-shaped real-world cases
+# ---------------------------------------------------------------------------
+
+class TestWebmotorsShapeIntegration:
+    """Sanity check against the field combos actually observed in production."""
+
+    def test_eno_typical_issue(self, connector):
+        """ENO sample: T-shirt 'P' + 8h original estimate. T-shirt wins."""
+        result = connector._extract_story_points({
+            "customfield_18762": {"value": "P"},
+            "timeoriginalestimate": 28800,
+        })
+        assert result == 2.0  # P → 2
+
+    def test_desc_typical_issue(self, connector):
+        """DESC sample: T-shirt 'G' only."""
+        result = connector._extract_story_points({
+            "customfield_18762": {"value": "G"},
+        })
+        assert result == 5.0
+
+    def test_bg_typical_issue(self, connector):
+        """BG (Kanban-pure): nothing populated — None forces item count."""
+        result = connector._extract_story_points({
+            "summary": "do the thing",
+            "status": {"name": "Done"},
+        })
+        assert result is None
diff --git a/pulse/packages/pulse-data/tests/unit/test_inline_changelog_extraction.py b/pulse/packages/pulse-data/tests/unit/test_inline_changelog_extraction.py
new file mode 100644
index 0000000..f750171
--- /dev/null
+++ b/pulse/packages/pulse-data/tests/unit/test_inline_changelog_extraction.py
@@ -0,0 +1,356 @@
+"""Regression tests for FDD-OPS-013 — inline changelog extraction.
+
+Locks in the contract that `_sync_issues` extracts status transitions from
+the JQL response payload (`raw_issue["changelog"]["histories"]`) WITHOUT
+making additional HTTP round-trips per issue.
+
+Why this matters: the previous implementation called
+`self._reader.fetch_issue_changelogs(issue_ids)` after `fetch_issues`,
+which performed one `GET /issue/{id}?expand=changelog` per issue. For
+Webmotors-scale tenants (~376k issues), this took 24+ hours of pure
+HTTP latency. After this fix, the same data is extracted from the
+already-loaded JQL response in a few milliseconds.
+
+If a future refactor reintroduces the round-trip pattern, these tests
+should fail and force the author to confront the cost.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from src.workers.devlake_sync import extract_status_transitions_inline
+
+
+# ---------------------------------------------------------------------------
+# Fixtures — shape mirrors real Jira JQL `expand=changelog` response
+# ---------------------------------------------------------------------------
+
+def _jira_issue_with_changelog(issue_id: str, histories: list[dict]) -> dict:
+    """Build a fake Jira JQL response item with inline changelog."""
+    return {
+        "id": issue_id,
+        "key": f"TEST-{issue_id}",
+        "fields": {"status": {"name": "In Progress"}},
+        "changelog": {"histories": histories},
+    }
+
+
+@pytest.fixture
+def issue_with_two_status_transitions() -> dict:
+    """Realistic case: a typical issue moves through To Do → In Progress → Done."""
+    return _jira_issue_with_changelog(
+        issue_id="100200",
+        histories=[
+            {
+                "created": "2026-01-15T10:00:00.000+0000",
+                "items": [
+                    {
+                        "field": "Status",
+                        "fromString": "To Do",
+                        "toString": "In Progress",
+                    },
+                ],
+            },
+            {
+                "created": "2026-01-20T16:30:00.000+0000",
+                "items": [
+                    {
+                        "field": "Status",
+                        "fromString": "In Progress",
+                        "toString": "Done",
+                    },
+                ],
+            },
+        ],
+    )
+
+
+@pytest.fixture
+def issue_with_no_changelog() -> dict:
+    """Edge case: brand-new issue, never moved status. Pre-fix this caused
+    the cache miss → downstream HTTP call. Now must return [] safely."""
+    return _jira_issue_with_changelog(issue_id="100300", histories=[])
+
+
+@pytest.fixture
+def issue_with_mixed_history() -> dict:
+    """Realistic: changelog has Status changes mixed with non-Status events
+    (assignee, priority, summary). Only Status events become transitions."""
+    return _jira_issue_with_changelog(
+        issue_id="100400",
+        histories=[
+            {
+                "created": "2026-02-01T09:00:00.000+0000",
+                "items": [
+                    {"field": "Assignee", "fromString": "Alice", "toString": "Bob"},
+                ],
+            },
+            {
+                "created": "2026-02-02T11:00:00.000+0000",
+                "items": [
+                    {"field": "Status", "fromString": "To Do", "toString": "In Progress"},
+                    {"field": "Priority", "fromString": "Medium", "toString": "High"},
+                ],
+            },
+            {
+                "created": "2026-02-03T14:00:00.000+0000",
+                "items": [
+                    {"field": "Summary", "fromString": "Foo", "toString": "Foo bar"},
+                ],
+            },
+        ],
+    )
+
+
+@pytest.fixture
+def issue_with_unsorted_history() -> dict:
+    """Defensive: Jira occasionally returns histories out of chronological
+    order. The extracted transitions must be sorted by created_date so
+    `build_status_transitions` (downstream) computes correct durations."""
+    return _jira_issue_with_changelog(
+        issue_id="100500",
+        histories=[
+            {
+                "created": "2026-03-15T12:00:00.000+0000",  # later
+                "items": [
+                    {"field": "Status", "fromString": "B", "toString": "C"},
+                ],
+            },
+            {
+                "created": "2026-03-10T09:00:00.000+0000",  # earlier
+                "items": [
+                    {"field": "Status", "fromString": "A", "toString": "B"},
+                ],
+            },
+        ],
+    )
+
+
+# ---------------------------------------------------------------------------
+# Behavioral tests
+# ---------------------------------------------------------------------------
+
+class TestExtractStatusTransitionsInline:
+    def test_extracts_two_status_transitions(self, issue_with_two_status_transitions):
+        result = extract_status_transitions_inline(issue_with_two_status_transitions)
+        assert len(result) == 2
+        assert result[0]["from_status"] == "To Do"
+        assert result[0]["to_status"] == "In Progress"
+        assert result[1]["from_status"] == "In Progress"
+        assert result[1]["to_status"] == "Done"
+
+    def test_each_transition_carries_issue_id(self, issue_with_two_status_transitions):
+        result = extract_status_transitions_inline(issue_with_two_status_transitions)
+        assert all(t["issue_id"] == "100200" for t in result)
+
+    def test_each_transition_carries_created_date(self, issue_with_two_status_transitions):
+        result = extract_status_transitions_inline(issue_with_two_status_transitions)
+        assert result[0]["created_date"] == "2026-01-15T10:00:00.000+0000"
+        assert result[1]["created_date"] == "2026-01-20T16:30:00.000+0000"
+
+    def test_empty_changelog_returns_empty_list(self, issue_with_no_changelog):
+        """REGRESSION GUARD: pre-fix, this case caused cache-miss + HTTP fallback.
+        Must always return a list, even if empty. Never None, never raise."""
+        result = extract_status_transitions_inline(issue_with_no_changelog)
+        assert result == []
+        assert isinstance(result, list)
+
+    def test_only_status_field_events_are_extracted(self, issue_with_mixed_history):
+        """Assignee, Priority, Summary changes don't become transitions."""
+        result = extract_status_transitions_inline(issue_with_mixed_history)
+        assert len(result) == 1
+        assert result[0]["from_status"] == "To Do"
+        assert result[0]["to_status"] == "In Progress"
+
+    def test_status_field_match_is_case_insensitive(self):
+        """Defensive: Jira sometimes returns 'status', sometimes 'Status'."""
+        for field_name in ("Status", "status", "STATUS"):
+            issue = _jira_issue_with_changelog(
+                issue_id="999",
+                histories=[
+                    {
+                        "created": "2026-01-01T00:00:00.000+0000",
+                        "items": [
+                            {"field": field_name, "fromString": "X", "toString": "Y"},
+                        ],
+                    },
+                ],
+            )
+            result = extract_status_transitions_inline(issue)
+            assert len(result) == 1, f"failed for field name {field_name!r}"
+
+    def test_transitions_are_chronologically_sorted(self, issue_with_unsorted_history):
+        """Downstream metric calculations depend on ordered transitions."""
+        result = extract_status_transitions_inline(issue_with_unsorted_history)
+        assert len(result) == 2
+        assert result[0]["created_date"] == "2026-03-10T09:00:00.000+0000"
+        assert result[1]["created_date"] == "2026-03-15T12:00:00.000+0000"
+
+    def test_returns_empty_for_issue_without_changelog_key(self):
+        """Defensive: issue from Jira API may lack `changelog` key entirely."""
+        result = extract_status_transitions_inline(
+            {"id": "555", "key": "X-1", "fields": {}}
+        )
+        assert result == []
+
+    def test_returns_empty_for_changelog_without_histories(self):
+        """Defensive: `changelog: {}` without `histories` key."""
+        result = extract_status_transitions_inline(
+            {"id": "555", "key": "X-1", "changelog": {}}
+        )
+        assert result == []
+
+
+# ---------------------------------------------------------------------------
+# Anti-regression: the redundant HTTP call must NEVER come back
+# ---------------------------------------------------------------------------
+
+class TestSyncIssuesDoesNotCallFetchIssueChangelogs:
+    """If a future refactor reintroduces the per-issue HTTP fallback in
+    `_sync_issues`, this test fails. The check is structural — it greps
+    the source — to keep the test independent of any DB or network setup.
+
+    Note: `fetch_issue_changelogs` may STILL be called from sprint sync
+    (where issues come without `expand=changelog`). This test scopes its
+    assertion to `_sync_issues` only.
+    """
+
+    def test_sync_issues_does_not_call_fetch_issue_changelogs(self):
+        """Source-grep: `_sync_issues` body must not reference `fetch_issue_changelogs`.
+
+        If you really need it back, remove this test AND amend FDD-OPS-013
+        in ops-backlog.md AND benchmark the new approach against
+        Webmotors-scale dataset (376k issues).
+        """
+        from pathlib import Path
+
+        sync_file = Path(__file__).resolve().parents[2] / "src" / "workers" / "devlake_sync.py"
+        source = sync_file.read_text()
+
+        # Find the _sync_issues body — from "async def _sync_issues" until
+        # the next "async def" or "def " at the same indentation.
+        start = source.find("async def _sync_issues(")
+        assert start != -1, "Could not find _sync_issues definition"
+
+        # Find next method def at same indent (4 spaces, prefixed with newline).
+        end = source.find("\n    async def ", start + 1)
+        if end == -1:
+            end = source.find("\n    def ", start + 1)
+        assert end != -1, "Could not find end of _sync_issues body"
+
+        sync_issues_body = source[start:end]
+
+        # Only flag actual function CALLS (`.fetch_issue_changelogs(` or
+        # `await fetch_issue_changelogs(`), not comments or docstrings that
+        # reference the name historically. The pattern matches a call
+        # expression, not free text.
+        import re
+        call_pattern = re.compile(r"(?<![A-Za-z_])fetch_issue_changelogs\s*\(")
+        # But we still allow the function name to appear in comments/strings.
+        # Strip Python comments before matching to avoid false positives.
+        body_no_comments = re.sub(r"#[^\n]*", "", sync_issues_body)
+        # Strip triple-quoted strings (docstrings)
+        body_no_comments = re.sub(
+            r'"""[\s\S]*?"""', "", body_no_comments,
+        )
+
+        match = call_pattern.search(body_no_comments)
+        assert match is None, (
+            "FDD-OPS-013 regression: _sync_issues is calling "
+            "fetch_issue_changelogs() again at offset "
+            f"{match.start() if match else '?'}. This makes one HTTP "
+            "round-trip per issue and was the cause of the 2026-04-28 "
+            "24h-stuck incident. Use extract_status_transitions_inline(raw) "
+            "instead — changelogs are already inline in the JQL response "
+            "(expand=changelog)."
+        )
+
+
+# ---------------------------------------------------------------------------
+# End-to-end: connector mapping → inline extraction
+# ---------------------------------------------------------------------------
+
+class TestMapIssuePreservesChangelogForInlineExtraction:
+    """REGRESSION GUARD (2026-04-27 incident).
+
+    `JiraConnector._map_issue` originally extracted the changelog into a
+    side-cache (`self._last_changelogs`) but did NOT include it in the
+    returned mapped dict. The new `_sync_issues` flow reads
+    `raw["changelog"]["histories"]` from the mapped dict via
+    `extract_status_transitions_inline()` — so 311k issues landed in
+    `eng_issues` with `status_transitions=[]`, breaking every Lean and
+    Cycle Time metric downstream.
+
+    This test wires the connector mapping to the inline extractor end-to-end
+    and asserts that real Jira API shape produces non-empty transitions.
+    """
+
+    def test_map_issue_output_yields_status_transitions_when_changelog_present(self):
+        from src.connectors.jira_connector import JiraConnector
+
+        # Build a Jira API issue payload mirroring what `expand=changelog` returns.
+        jira_api_response = {
+            "id": "100200",
+            "key": "TEST-1",
+            "fields": {
+                "summary": "do the thing",
+                "status": {"name": "Done"},
+                "priority": {"name": "Medium"},
+                "issuetype": {"name": "Task"},
+                "assignee": {"displayName": "Alice"},
+                "created": "2026-01-15T10:00:00.000+0000",
+                "updated": "2026-01-20T16:30:00.000+0000",
+                "resolutiondate": "2026-01-20T16:30:00.000+0000",
+                "description": None,
+            },
+            "changelog": {
+                "histories": [
+                    {
+                        "created": "2026-01-15T10:00:00.000+0000",
+                        "items": [
+                            {"field": "Status", "fromString": "To Do",
+                             "toString": "In Progress"},
+                        ],
+                    },
+                    {
+                        "created": "2026-01-20T16:30:00.000+0000",
+                        "items": [
+                            {"field": "Status", "fromString": "In Progress",
+                             "toString": "Done"},
+                        ],
+                    },
+                ],
+            },
+        }
+
+        # Instantiate without hitting the network — supply minimal config.
+        connector = JiraConnector.__new__(JiraConnector)
+        connector._connection_id = 1
+        connector._base_url = "https://example.atlassian.net"
+        connector._sprint_field_id = None
+        connector._story_points_field_id = None
+        # FDD-OPS-016 — effort fallback discovery state
+        connector._tshirt_field_ids = []
+        connector._effort_source_counts = {}
+        # FDD-OPS-017 — status category cache (empty in this test)
+        connector._status_categories = {}
+        connector._status_categories_discovered = True
+        connector._last_changelogs = {}
+
+        mapped = connector._map_issue(jira_api_response)
+
+        # The mapped dict MUST carry the changelog so the inline extractor
+        # downstream can find it. Removing this key (or renaming it without
+        # updating extract_status_transitions_inline) silently breaks
+        # every status-transition metric.
+        assert "changelog" in mapped, (
+            "_map_issue dropped the `changelog` key — extract_status_"
+            "transitions_inline() in the sync worker will return [] for "
+            "every issue. This is the 2026-04-27 production bug."
+        )
+
+        transitions = extract_status_transitions_inline(mapped)
+        assert len(transitions) == 2
+        assert transitions[0]["to_status"] == "In Progress"
+        assert transitions[1]["to_status"] == "Done"
diff --git a/pulse/packages/pulse-data/tests/unit/test_sprint_normalization.py b/pulse/packages/pulse-data/tests/unit/test_sprint_normalization.py
new file mode 100644
index 0000000..1fe6f5a
--- /dev/null
+++ b/pulse/packages/pulse-data/tests/unit/test_sprint_normalization.py
@@ -0,0 +1,208 @@
+"""Regression tests for FDD-OPS-018 — sprint status normalization.
+
+THE BUG: `normalize_sprint` returned a dict that did NOT include the
+`status` field, so all 216 Webmotors sprints landed with status=''
+in `eng_sprints`. The connector mapped state correctly (ACTIVE/CLOSED/
+FUTURE), but the normalizer dropped it.
+
+Compounding bug: `_upsert_sprints` ON CONFLICT did not update `status`
+or `goal` in `set_={...}`, so even fixing the normalizer wouldn't
+correct existing rows on re-sync — only newly-created sprints would
+land correctly. Active→Closed transitions were silently invisible.
+
+THIS TEST FILE locks in the contract:
+  1. The normalizer always emits `status` (lowercase: active/closed/future)
+  2. Unknown raw values become None (not silently bucketed)
+  3. The normalizer always emits `goal` (string or None)
+
+If a future refactor drops `status` from the return dict again, every
+test in the StatusFieldPresent class fails with a precise error pointing
+to the contract breach.
+"""
+
+from __future__ import annotations
+
+from uuid import uuid4
+
+import pytest
+
+from src.contexts.engineering_data.normalizer import (
+    _normalize_sprint_status,
+    normalize_sprint,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helper — minimal sprint payload as the connector emits
+# ---------------------------------------------------------------------------
+
+def _connector_sprint(
+    sprint_id: str = "1234",
+    status: str | None = "ACTIVE",
+    goal: str | None = "Ship the thing",
+) -> dict:
+    """Mirror what `JiraConnector._map_sprint` returns (ACTIVE/CLOSED/FUTURE)."""
+    return {
+        "id": f"jira:JiraSprint:1:{sprint_id}",
+        "original_board_id": "42",
+        "name": "Sprint 99",
+        "url": "https://example.atlassian.net",
+        "status": status,
+        "goal": goal,
+        "started_date": "2026-04-01T00:00:00.000Z",
+        "ended_date": "2026-04-15T00:00:00.000Z",
+        "completed_date": None,
+        "total_issues": 0,
+    }
+
+
+# ---------------------------------------------------------------------------
+# 1. Normalize sprint emits the status field
+# ---------------------------------------------------------------------------
+
+class TestStatusFieldPresent:
+    """REGRESSION GUARD: pre-fix, `normalize_sprint` returned a dict without
+    a `status` key at all, so every sprint landed with NULL/empty status."""
+
+    def test_active_normalizes_to_lowercase(self):
+        result = normalize_sprint(_connector_sprint(status="ACTIVE"), uuid4())
+        assert "status" in result, (
+            "normalize_sprint dropped the `status` field — eng_sprints.status "
+            "would land empty for every sprint. This is the 2026-04-29 bug."
+        )
+        assert result["status"] == "active"
+
+    def test_closed_normalizes_to_lowercase(self):
+        result = normalize_sprint(_connector_sprint(status="CLOSED"), uuid4())
+        assert result["status"] == "closed"
+
+    def test_future_normalizes_to_lowercase(self):
+        result = normalize_sprint(_connector_sprint(status="FUTURE"), uuid4())
+        assert result["status"] == "future"
+
+    def test_already_lowercase_passthrough(self):
+        result = normalize_sprint(_connector_sprint(status="active"), uuid4())
+        assert result["status"] == "active"
+
+    def test_whitespace_is_stripped(self):
+        result = normalize_sprint(_connector_sprint(status="  CLOSED  "), uuid4())
+        assert result["status"] == "closed"
+
+
+# ---------------------------------------------------------------------------
+# 2. Unknown / missing values
+# ---------------------------------------------------------------------------
+
+class TestUnknownStatusReturnsNone:
+    """We deliberately don't bucket unknown values — operators must see
+    NULLs in eng_sprints.status and investigate (e.g., new Jira state).
+    Silently mapping to one of the known states would corrupt Velocity /
+    Carryover logic that relies on knowing which sprints are ACTUALLY
+    closed."""
+
+    def test_empty_string_is_none(self):
+        result = normalize_sprint(_connector_sprint(status=""), uuid4())
+        assert result["status"] is None
+
+    def test_none_is_none(self):
+        result = normalize_sprint(_connector_sprint(status=None), uuid4())
+        assert result["status"] is None
+
+    def test_unknown_value_is_none(self):
+        result = normalize_sprint(_connector_sprint(status="some_new_state"), uuid4())
+        assert result["status"] is None
+
+    def test_non_string_is_none(self):
+        result = normalize_sprint(_connector_sprint(status=42), uuid4())  # type: ignore[arg-type]
+        assert result["status"] is None
+
+
+# ---------------------------------------------------------------------------
+# 3. Aliases — common Jira variants that should map cleanly
+# ---------------------------------------------------------------------------
+
+class TestStatusAliases:
+    @pytest.mark.parametrize("raw,expected", [
+        ("active", "active"),
+        ("ACTIVE", "active"),
+        ("open", "active"),         # alias
+        ("in_progress", "active"),  # alias
+        ("closed", "closed"),
+        ("CLOSED", "closed"),
+        ("completed", "closed"),    # alias
+        ("complete", "closed"),     # alias
+        ("ended", "closed"),        # alias
+        ("future", "future"),
+        ("FUTURE", "future"),
+        ("planned", "future"),      # alias
+        ("upcoming", "future"),     # alias
+    ])
+    def test_alias_maps_correctly(self, raw, expected):
+        assert _normalize_sprint_status(raw) == expected
+
+
+# ---------------------------------------------------------------------------
+# 4. Goal field passthrough (also was previously hardcoded to None)
+# ---------------------------------------------------------------------------
+
+class TestGoalFieldPassthrough:
+    def test_goal_string_is_preserved(self):
+        result = normalize_sprint(
+            _connector_sprint(goal="Ship the auth flow this sprint"), uuid4(),
+        )
+        assert result["goal"] == "Ship the auth flow this sprint"
+
+    def test_none_goal_stays_none(self):
+        result = normalize_sprint(_connector_sprint(goal=None), uuid4())
+        assert result["goal"] is None
+
+    def test_null_byte_in_goal_is_stripped(self):
+        """Postgres `text` rejects 0x00. Same defensive strip we apply to
+        title/description/assignee on issues."""
+        result = normalize_sprint(
+            _connector_sprint(goal="Goal with\x00null byte"), uuid4(),
+        )
+        assert result["goal"] is not None
+        assert "\x00" not in result["goal"]
+
+
+# ---------------------------------------------------------------------------
+# 5. Anti-regression on _upsert_sprints — structural source check
+# ---------------------------------------------------------------------------
+
+class TestUpsertSprintsIncludesStatus:
+    """REGRESSION GUARD: pre-fix, `_upsert_sprints.on_conflict_do_update.set_`
+    omitted `status` and `goal` — so existing sprints kept their stale empty
+    status forever even after the normalizer was fixed.
+
+    If a future refactor removes them from the set_ block again, this test
+    fails. The check is structural (greps the source) so it doesn't depend
+    on a real DB or Jira client.
+    """
+
+    def test_upsert_sprints_set_includes_status_and_goal(self):
+        from pathlib import Path
+
+        sync_file = (
+            Path(__file__).resolve().parents[2] / "src" / "workers" / "devlake_sync.py"
+        )
+        source = sync_file.read_text()
+
+        start = source.find("async def _upsert_sprints(")
+        assert start != -1, "Could not find _upsert_sprints definition"
+
+        # Find next method or top-level def
+        end = source.find("\n    async def ", start + 1)
+        if end == -1:
+            end = source.find("\n    def ", start + 1)
+        if end == -1:
+            end = len(source)
+
+        body = source[start:end]
+
+        for field in ("status", "goal"):
+            assert f'"{field}": sprint_data' in body or f'"{field}":sprint_data' in body, (
+                f"_upsert_sprints set_ block must update {field!r} on conflict. "
+                "Without it, existing sprints never receive the corrected "
+                "value when the connector or normalizer changes."
+            )
diff --git a/pulse/packages/pulse-data/tests/unit/test_status_normalization.py b/pulse/packages/pulse-data/tests/unit/test_status_normalization.py
new file mode 100644
index 0000000..b2f73d2
--- /dev/null
+++ b/pulse/packages/pulse-data/tests/unit/test_status_normalization.py
@@ -0,0 +1,216 @@
+"""Regression tests for FDD-OPS-017 — status normalization with statusCategory
+fallback.
+
+THE BUG (2026-04-28 audit): 311k issues showed normalized_status distribution
+of 96.5% done, 0.2% in_progress, 3.3% todo. Investigation revealed:
+
+  - Webmotors Jira has 104 distinct status names across workflows
+  - DEFAULT_STATUS_MAPPING covered ~50 → 50+ statuses fell to default 'todo'
+  - 2,881 issues with status='FECHADO EM PROD' landed in 'todo' (should be 'done')
+  - Various active work states ('Em Progresso', 'Em desenv') were classified
+    as 'todo'
+  - Result: every flow metric (Cycle Time, Throughput, WIP, CFD, Flow
+    Efficiency) was systematically corrupted across the whole tenant
+
+THE FIX: hybrid normalization
+
+  1. Textual DEFAULT_STATUS_MAPPING — preserves the in_progress vs in_review
+     distinction we curated for Cycle Time breakdown
+  2. Jira's statusCategory.key fallback — authoritative for done/non-done,
+     covers the long tail of tenant-custom workflows automatically
+  3. Final default 'todo' with WARN log
+
+If a future refactor reverts to the textual-only path, these tests fail
+loudly with messages naming the broken classification.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from src.contexts.engineering_data.normalizer import (
+    DEFAULT_STATUS_MAPPING,
+    build_status_transitions,
+    normalize_status,
+)
+
+
+# ---------------------------------------------------------------------------
+# 1. Textual mapping wins (preserves curated granularity)
+# ---------------------------------------------------------------------------
+
+class TestTextualMappingTakesPriority:
+    def test_known_status_uses_textual_even_when_category_disagrees(self):
+        """Even if Jira's category says 'indeterminate', our explicit mapping
+        of 'em code review' → 'in_review' must win. The category-only fallback
+        loses the in_progress/in_review granularity that Cycle Time needs."""
+        result = normalize_status(
+            "Em Code Review",
+            status_category="indeterminate",
+        )
+        assert result == "in_review"
+
+    def test_pt_br_done_status_classified_correctly(self):
+        """'Concluído' must always be done, regardless of category."""
+        assert normalize_status("Concluído") == "done"
+        # Even if hypothetically the category was wrong:
+        assert normalize_status("Concluído", status_category="new") == "done"
+
+    def test_custom_mapping_overrides_default(self):
+        custom = {"weird-state": "in_progress"}
+        assert normalize_status("weird-state", status_mapping=custom) == "in_progress"
+
+
+# ---------------------------------------------------------------------------
+# 2. statusCategory fallback — the actual fix
+# ---------------------------------------------------------------------------
+
+class TestStatusCategoryFallback:
+    def test_unknown_status_with_done_category_returns_done(self):
+        """REGRESSION: pre-fix, this returned 'todo' and corrupted Throughput
+        + Cycle Time + Lead Time for every issue with a custom 'done' status."""
+        result = normalize_status(
+            "FECHADO EM PROD UNKNOWN VARIANT",
+            status_category="done",
+        )
+        assert result == "done"
+
+    def test_unknown_status_with_indeterminate_returns_in_progress(self):
+        """Active work that isn't in our textual mapping defaults to
+        in_progress (not in_review) — operators must add explicit mapping
+        if the in_review distinction matters."""
+        result = normalize_status(
+            "Some New Custom State",
+            status_category="indeterminate",
+        )
+        assert result == "in_progress"
+
+    def test_unknown_status_with_new_category_returns_todo(self):
+        result = normalize_status(
+            "Aguardando Terceiro Custom",
+            status_category="new",
+        )
+        assert result == "todo"
+
+    def test_unknown_status_without_category_defaults_to_todo(self):
+        """Legacy fallback when neither textual nor category matches."""
+        result = normalize_status("Totally Unknown")
+        assert result == "todo"
+
+    def test_invalid_category_falls_through_to_default(self):
+        """Defensive: garbage in `status_category` doesn't crash the pipeline."""
+        result = normalize_status("Whatever", status_category="garbage")
+        assert result == "todo"
+
+    def test_category_is_case_insensitive(self):
+        assert normalize_status("X", status_category="DONE") == "done"
+        assert normalize_status("X", status_category="Indeterminate") == "in_progress"
+
+
+# ---------------------------------------------------------------------------
+# 3. Real-world Webmotors statuses that broke the original normalizer
+# ---------------------------------------------------------------------------
+
+class TestWebmotorsStatusRegression:
+    """Each parametrized case is a status string that, pre-fix, caused
+    visible metric corruption. They must classify correctly NOW.
+    """
+
+    @pytest.mark.parametrize("raw,expected", [
+        ("FECHADO EM PROD", "done"),       # 2,881 issues affected
+        ("FECHADO EM HML", "done"),        # Jira's own category is "done"
+        ("Concluído", "done"),
+        ("Cancelado", "done"),
+        ("FECHADO", "done"),
+        ("Em Desenvolvimento", "in_progress"),
+        ("Em imersão", "in_progress"),
+        ("Em andamento", "in_progress"),
+        ("Em Progresso", "in_progress"),   # was 'todo' pre-fix
+        ("Em Code Review", "in_review"),
+        ("Em Teste HML", "in_review"),
+        ("Homologação", "in_review"),      # was 'todo' pre-fix
+        ("Em Verificação", "in_review"),   # was 'todo' pre-fix
+        ("BACKLOG", "todo"),
+        ("A Fazer", "todo"),
+        ("Refinado", "todo"),
+        ("PAUSADO", "todo"),
+    ])
+    def test_observed_status_classifies_correctly(self, raw, expected):
+        assert normalize_status(raw) == expected, (
+            f"{raw!r} should be {expected!r}, but got {normalize_status(raw)!r}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# 4. build_status_transitions integrates the category map
+# ---------------------------------------------------------------------------
+
+class TestBuildStatusTransitionsWithCategories:
+    def test_unknown_to_status_uses_categories_map(self):
+        """REGRESSION: a transition into a custom 'done'-category status
+        must be classified as done in the resulting transitions array,
+        not 'todo'. Cycle Time breakdown reads transitions to determine
+        time spent in each phase."""
+        changelogs = [
+            {
+                "from_status": "Em Desenvolvimento",
+                "to_status": "Some Custom Done State",
+                "created_date": "2026-04-01T10:00:00.000+0000",
+            },
+        ]
+        cats_map = {"some custom done state": "done"}
+        result = build_status_transitions(
+            changelogs, status_categories_map=cats_map,
+        )
+        assert len(result) == 1
+        assert result[0]["status"] == "done"
+
+    def test_textual_mapping_still_wins_in_transitions(self):
+        changelogs = [
+            {
+                "from_status": "A",
+                "to_status": "Em Code Review",
+                "created_date": "2026-04-01T10:00:00.000+0000",
+            },
+        ]
+        # Even with a misleading category in the map:
+        cats_map = {"em code review": "indeterminate"}
+        result = build_status_transitions(
+            changelogs, status_categories_map=cats_map,
+        )
+        assert result[0]["status"] == "in_review"
+
+    def test_transitions_without_categories_map_still_works(self):
+        """Backward compat: legacy callers don't pass status_categories_map."""
+        changelogs = [
+            {
+                "from_status": "A",
+                "to_status": "Done",
+                "created_date": "2026-04-01T10:00:00.000+0000",
+            },
+        ]
+        result = build_status_transitions(changelogs)
+        assert result[0]["status"] == "done"
+
+
+# ---------------------------------------------------------------------------
+# 5. Anti-regression: textual mapping coverage
+# ---------------------------------------------------------------------------
+
+class TestTextualMappingCompleteness:
+    """The DEFAULT_STATUS_MAPPING grew significantly during FDD-OPS-017 to
+    cover Webmotors PT-BR workflows. These tests guard against accidental
+    deletion.
+    """
+
+    @pytest.mark.parametrize("status", [
+        "fechado em prod", "concluído", "cancelado", "fechado",
+        "em desenvolvimento", "em andamento", "em progresso",
+        "em code review", "em teste hml", "em verificação", "homologação",
+        "backlog", "a fazer", "refinado",
+    ])
+    def test_critical_pt_br_status_is_mapped(self, status):
+        assert status in DEFAULT_STATUS_MAPPING, (
+            f"{status!r} must remain in DEFAULT_STATUS_MAPPING — "
+            "removing it reverts FDD-OPS-017 and re-corrupts metrics."
+        )
diff --git a/pulse/packages/pulse-data/tests/unit/test_watermark_scope_keys.py b/pulse/packages/pulse-data/tests/unit/test_watermark_scope_keys.py
new file mode 100644
index 0000000..81f67f7
--- /dev/null
+++ b/pulse/packages/pulse-data/tests/unit/test_watermark_scope_keys.py
@@ -0,0 +1,62 @@
+"""Unit tests for FDD-OPS-014 step 2.2 — per-scope watermark API.
+
+Validates that:
+1. `make_scope_key()` produces canonical strings
+2. Default scope_key='*' preserves legacy callers (backwards-compat)
+3. New explicit scope_keys are independent rows
+4. `_list_watermarks_by_scope` returns None for missing scopes (full backfill)
+
+Tests use a Postgres test container fixture (existing in conftest); the
+DB-touching tests live under tests/integration/ — this file covers the
+pure helpers that don't need DB.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from src.workers.devlake_sync import GLOBAL_SCOPE, make_scope_key
+
+
+class TestMakeScopeKey:
+    def test_jira_project_format(self):
+        assert make_scope_key("jira", "project", "BG") == "jira:project:BG"
+
+    def test_github_repo_format(self):
+        assert make_scope_key("github", "repo", "foo/bar") == "github:repo:foo/bar"
+
+    def test_jenkins_job_with_folders(self):
+        # Jenkins jobs can have folder/sub/job notation
+        assert (
+            make_scope_key("jenkins", "job", "PI-Money/money-prd")
+            == "jenkins:job:PI-Money/money-prd"
+        )
+
+    def test_global_scope_constant(self):
+        # Sanity: the default value used everywhere matches what migration 010
+        # set as DEFAULT in DDL. If this changes, the migration default and
+        # legacy reads break.
+        assert GLOBAL_SCOPE == "*"
+
+    def test_separator_is_colon(self):
+        # Scope keys are routed by source prefix; helpers and consumers all
+        # split on ':'. Don't change the separator without a migration.
+        result = make_scope_key("source", "dim", "value")
+        assert result.count(":") == 2
+        assert result.split(":") == ["source", "dim", "value"]
+
+    @pytest.mark.parametrize(
+        "source,dim,value",
+        [
+            ("jira", "project", "X"),
+            ("github", "repo", "a/b/c"),     # repos can have slashes
+            ("jenkins", "job", "x.y.z"),     # job names can have dots
+            ("future", "tenant", "id-with-dashes"),
+        ],
+    )
+    def test_value_pass_through(self, source, dim, value):
+        # Helper does NOT escape or sanitize — values pass through. Callers
+        # are expected to use scope_key as opaque identifier; equality
+        # comparison is what matters.
+        result = make_scope_key(source, dim, value)
+        assert result == f"{source}:{dim}:{value}"