From ff8af998f557dad07d59028c48a2491fb89aa0f7 Mon Sep 17 00:00:00 2001 From: Zbigniew Sobiecki Date: Fri, 1 May 2026 17:46:41 +0000 Subject: [PATCH] fix(triggers): read REDIS_URL via routerConfig so dedup survives envScrub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #1248's Redis-backed dedup throws inside cascade-worker processes: ERROR Review-dispatch dedup Redis call failed — failing closed error: 'Error: REDIS_URL is required for review-dispatch dedup' Confirmed live at 2026-05-01T17:25:44 in worker `cascade-worker-coalesce_ucho_MNG-461_*`. Sentry tag: `review_dedup_redis_down`. Root cause: `src/utils/envScrub.ts:13-18` lists REDIS_URL in `SENSITIVE_ENV_KEYS`. Worker-entry calls `scrubSensitiveEnv()` early in startup, which does `delete process.env.REDIS_URL` to keep infra secrets out of agent-spawned subprocesses. The dedup module reads `process.env.REDIS_URL` lazily at first dispatch — well after the scrub — so it sees `undefined` and throws. The fail-closed handler returns false, and the post-completion-hook silently logs `Skipping post-completion review: already dispatched` (which looks identical to the working dedup's "correctly rejected duplicate" log, hence the bug evaded the prod canary). Why it didn't bite immediately: the post-completion-hook short-circuits when CI isn't all passing at impl exit (`agent-execution.ts:269-275`), so the bug only triggers on the narrow timing where CI is fully green BEFORE impl finishes. Small fraction of runs. Why router is unaffected: cascade-router never calls scrubSensitiveEnv. PR #196's review (verified 1-run earlier) was dispatched from the router via check-suite-success — that path was never broken. Fix: read the URL via `routerConfig.redisUrl` instead of `process.env.REDIS_URL`. routerConfig is captured at module-load time in `src/router/config.ts:137-138`, well before scrubSensitiveEnv runs. The cached value survives the scrub the same way the DB pool's cached connection string does — exactly the pattern envScrub.ts's docstring describes. Two-line source change + new regression-pin test that claims, deletes process.env.REDIS_URL, and claims again — both must succeed (second deduped) without throwing `REDIS_URL is required`. Verification: vitest 2497/2497, typecheck clean, lint clean. Out of scope (follow-up): `src/queue/cancel.ts` has the same `process.env.REDIS_URL` lazy pattern but is only called from cascade-router and cascade-dashboard (neither scrubs env), so it doesn't bite today. Apply the same routerConfig swap for symmetry in a separate PR. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/triggers/github/review-dispatch-dedup.ts | 13 +++-- .../github/review-dispatch-dedup.test.ts | 52 +++++++++++++++++++ 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/src/triggers/github/review-dispatch-dedup.ts b/src/triggers/github/review-dispatch-dedup.ts index 9323f48c..d3d247b8 100644 --- a/src/triggers/github/review-dispatch-dedup.ts +++ b/src/triggers/github/review-dispatch-dedup.ts @@ -32,6 +32,7 @@ */ import { Redis } from 'ioredis'; +import { routerConfig } from '../../router/config.js'; import { captureException } from '../../sentry.js'; import { logger } from '../../utils/logging.js'; @@ -48,14 +49,20 @@ let redisInstance: Redis | null = null; * Lazy singleton — first call connects, subsequent calls reuse the same * client. The worker process pays the connection cost only if it actually * dispatches a review (post-completion-hook). + * + * Reads the URL via `routerConfig.redisUrl` (captured at config.ts module + * load) rather than `process.env.REDIS_URL`. Worker processes call + * `scrubSensitiveEnv()` early in startup, which deletes `REDIS_URL` from + * `process.env`; reading lazily would see `undefined` and throw. The + * routerConfig snapshot survives the scrub the same way the DB pool's + * cached connection string does. */ function getRedis(): Redis { if (!redisInstance) { - const redisUrl = process.env.REDIS_URL; - if (!redisUrl) { + if (!routerConfig.redisUrl) { throw new Error('REDIS_URL is required for review-dispatch dedup'); } - redisInstance = new Redis(redisUrl); + redisInstance = new Redis(routerConfig.redisUrl); } return redisInstance; } diff --git a/tests/unit/triggers/github/review-dispatch-dedup.test.ts b/tests/unit/triggers/github/review-dispatch-dedup.test.ts index 3079ace2..1051c9a5 100644 --- a/tests/unit/triggers/github/review-dispatch-dedup.test.ts +++ b/tests/unit/triggers/github/review-dispatch-dedup.test.ts @@ -316,3 +316,55 @@ describe('cross-process dedup invariant (PR #194 regression pin)', () => { expect(secondResult).toBeNull(); }); }); + +// ─── Worker-scrub regression pin (PR #1250) ───────────────────────────────── +// +// `src/utils/envScrub.ts:13-18` deletes REDIS_URL from process.env early in +// every cascade-worker process (after the DB pool is initialized). The +// dedup module must survive this — `getRedis()` reads from the +// `routerConfig.redisUrl` snapshot captured at config.ts load, not from +// `process.env.REDIS_URL` lazily. +// +// Production incident: 2026-05-01T17:25:44 in worker +// `cascade-worker-coalesce_ucho_MNG-461_*` threw `Error: REDIS_URL is +// required for review-dispatch dedup` because the lazy read happened well +// after the scrub had run. Sentry tag: `review_dedup_redis_down`. + +describe('survives mid-process REDIS_URL deletion (envScrub regression pin)', () => { + it('claims successfully even after process.env.REDIS_URL is deleted', async () => { + const key = buildReviewDispatchKey('acme', 'repo', 42, 'sha42'); + + // First claim with REDIS_URL set — should succeed and create the + // singleton client backed by the routerConfig snapshot. + const first = await claimReviewDispatch(key, 'check-suite-success', { + prNumber: 42, + headSha: 'sha42', + }); + expect(first).toBe(true); + + // Simulate worker-process envScrub by deleting the env var. + // Subsequent claims must STILL work because the singleton uses + // routerConfig.redisUrl (captured at module load), not process.env. + const previous = process.env.REDIS_URL; + delete process.env.REDIS_URL; + try { + // Same key → dedup hit (false). Crucially this does NOT throw + // `REDIS_URL is required` — the regression behaviour pre-PR-#1250. + const second = await claimReviewDispatch(key, 'post-completion-hook', { + prNumber: 42, + headSha: 'sha42', + }); + expect(second).toBe(false); + + // Different key → claim succeeds. + const otherKey = buildReviewDispatchKey('acme', 'repo', 43, 'sha43'); + const third = await claimReviewDispatch(otherKey, 'post-completion-hook', { + prNumber: 43, + headSha: 'sha43', + }); + expect(third).toBe(true); + } finally { + if (previous !== undefined) process.env.REDIS_URL = previous; + } + }); +});