From d4e82ca16674b37bf926215e2f5317f9c714fe0b Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 1 Apr 2026 07:38:32 +0000 Subject: [PATCH 1/3] feat: add self-healing monitor auto-retry after transient failures Schedule a single automatic re-check 35 minutes after a transient scrape failure so momentary site outages recover silently. Adds pendingRetryAt column to monitors table, scheduler pickup logic, and clears the retry on success, manual check, or after execution. https://claude.ai/code/session_013FQG661RB6D6wMie2REkL6 --- server/routes.ts | 10 +- server/services/scheduler.test.ts | 83 ++++++++++++ server/services/scheduler.ts | 25 +++- server/services/scraper.test.ts | 202 ++++++++++++++++++++++++++++++ server/services/scraper.ts | 61 +++++++++ shared/schema.ts | 2 + 6 files changed, 381 insertions(+), 2 deletions(-) diff --git a/server/routes.ts b/server/routes.ts index 7cb5d2b..28398f0 100644 --- a/server/routes.ts +++ b/server/routes.ts @@ -19,7 +19,7 @@ import { ErrorLogger } from "./services/logger"; import { notificationTablesExist, channelTablesExist } from "./services/notificationReady"; import { BrowserlessUsageTracker, getMonthResetDate } from "./services/browserlessTracker"; import { ResendUsageTracker, getResendResetDate } from "./services/resendTracker"; -import { errorLogs, monitorMetrics } from "@shared/schema"; +import { errorLogs, monitorMetrics, monitors } from "@shared/schema"; import { generalRateLimiter, createMonitorRateLimiter, @@ -248,6 +248,7 @@ export async function registerRoutes( pauseReason: null, healthAlertSentAt: null, lastHealthyAt: null, + pendingRetryAt: null, createdAt: new Date() }; @@ -586,6 +587,13 @@ export async function registerRoutes( if (String(existing.userId) !== String(req.user.claims.sub)) return res.status(403).json({ message: "Forbidden" }); const result = await checkMonitor(existing); + + // Clear any pending auto-retry since the user triggered a manual check + await db.update(monitors) + .set({ pendingRetryAt: null }) + .where(eq(monitors.id, id)) + .catch(() => {}); // best-effort + res.json(result); } catch (error: any) { console.error("[Check Monitor] Error:", error.message); diff --git a/server/services/scheduler.test.ts b/server/services/scheduler.test.ts index 720ba38..56ae7ed 100644 --- a/server/services/scheduler.test.ts +++ b/server/services/scheduler.test.ts @@ -8,6 +8,7 @@ const { mockGetAllActiveMonitors, mockCleanupPollutedValues, mockDbExecute, + mockDbUpdateSet, cronCallbacks, mockMonitorsNeedingRetry, mockDeliverWebhook, @@ -16,6 +17,7 @@ const { mockGetAllActiveMonitors: vi.fn().mockResolvedValue([]), mockCleanupPollutedValues: vi.fn().mockResolvedValue(undefined), mockDbExecute: vi.fn().mockResolvedValue({ rowCount: 0 }), + mockDbUpdateSet: vi.fn(), cronCallbacks: {} as Record Promise>>, mockMonitorsNeedingRetry: new Set(), mockDeliverWebhook: vi.fn().mockResolvedValue({ success: true, statusCode: 200 }), @@ -60,6 +62,16 @@ vi.mock("./logger", () => ({ vi.mock("../db", () => ({ db: { execute: (...args: any[]) => mockDbExecute(...args), + update: vi.fn().mockReturnValue({ + set: (...args: any[]) => { + mockDbUpdateSet(...args); + const whereFn = vi.fn().mockReturnValue({ + catch: vi.fn().mockReturnValue(Promise.resolve()), + then: (resolve: any) => Promise.resolve().then(resolve), + }); + return { where: whereFn }; + }, + }), }, })); @@ -163,6 +175,7 @@ function makeMonitor(overrides: Partial = {}): Monitor { pauseReason: null, healthAlertSentAt: null, lastHealthyAt: null, + pendingRetryAt: null, createdAt: new Date(), ...overrides, }; @@ -399,6 +412,75 @@ describe("startScheduler", () => { resolver!(); await Promise.resolve(); }); + + // ----------------------------------------------------------------------- + // auto-retry scheduler pickup (pendingRetryAt) + // ----------------------------------------------------------------------- + + it("triggers check for monitor with pendingRetryAt <= now", async () => { + const monitor = makeMonitor({ + frequency: "hourly", + lastChecked: new Date(Date.now() - 30 * 60 * 1000), // 30 min ago — not normally due + pendingRetryAt: new Date(Date.now() - 1000), // 1 second in the past + }); + mockGetAllActiveMonitors.mockResolvedValueOnce([monitor]); + + await startScheduler(); + await runCron("* * * * *"); + await vi.advanceTimersByTimeAsync(31000); + + expect(mockCheckMonitor).toHaveBeenCalledWith(monitor); + }); + + it("does NOT trigger check for monitor with pendingRetryAt in the future", async () => { + const monitor = makeMonitor({ + frequency: "hourly", + lastChecked: new Date(Date.now() - 30 * 60 * 1000), // 30 min ago — not normally due + pendingRetryAt: new Date(Date.now() + 30 * 60 * 1000), // 30 min in the future + }); + mockGetAllActiveMonitors.mockResolvedValueOnce([monitor]); + + await startScheduler(); + await runCron("* * * * *"); + await vi.advanceTimersByTimeAsync(31000); + + expect(mockCheckMonitor).not.toHaveBeenCalled(); + }); + + it("clears pendingRetryAt after retry fires (success path)", async () => { + const monitor = makeMonitor({ + frequency: "hourly", + lastChecked: new Date(Date.now() - 30 * 60 * 1000), + pendingRetryAt: new Date(Date.now() - 1000), + }); + mockGetAllActiveMonitors.mockResolvedValueOnce([monitor]); + + await startScheduler(); + await runCron("* * * * *"); + await vi.advanceTimersByTimeAsync(31000); + + expect(mockCheckMonitor).toHaveBeenCalledWith(monitor); + // The finally block should clear pendingRetryAt + expect(mockDbUpdateSet).toHaveBeenCalledWith({ pendingRetryAt: null }); + }); + + it("clears pendingRetryAt after retry fires (failure path)", async () => { + mockCheckMonitor.mockRejectedValueOnce(new Error("Scrape failed")); + const monitor = makeMonitor({ + frequency: "hourly", + lastChecked: new Date(Date.now() - 30 * 60 * 1000), + pendingRetryAt: new Date(Date.now() - 1000), + }); + mockGetAllActiveMonitors.mockResolvedValueOnce([monitor]); + + await startScheduler(); + await runCron("* * * * *"); + await vi.advanceTimersByTimeAsync(31000); + + expect(mockCheckMonitor).toHaveBeenCalledWith(monitor); + // Even on failure, pendingRetryAt should be cleared + expect(mockDbUpdateSet).toHaveBeenCalledWith({ pendingRetryAt: null }); + }); }); describe("concurrency limiting (runCheckWithLimit)", () => { @@ -1290,3 +1372,4 @@ describe("webhook retry cumulative backoff", () => { })); }); }); + diff --git a/server/services/scheduler.ts b/server/services/scheduler.ts index 2195f1e..673dd63 100644 --- a/server/services/scheduler.ts +++ b/server/services/scheduler.ts @@ -10,7 +10,8 @@ import { ensureMonitorConditionsTable } from "./ensureTables"; import { processAutomatedCampaigns } from "./automatedCampaigns"; import { isTransientDbError } from "../utils/dbErrors"; import { db } from "../db"; -import { sql } from "drizzle-orm"; +import { eq, sql } from "drizzle-orm"; +import { monitors } from "@shared/schema"; // Keep below DB pool max (3, see db.ts) to leave headroom for cron jobs and // API requests. Browser POOL_MAX is 1 (browserPool.ts), so the second @@ -110,9 +111,17 @@ async function runCheckWithLimit(monitor: Parameters[0]): P console.debug(`[Scheduler] Concurrency limit reached, deferring monitor ${monitor.id}`); return false; } + + const hadPendingRetry = !!( + monitor.pendingRetryAt && new Date(monitor.pendingRetryAt) <= new Date() + ); + activeChecks++; try { await checkMonitor(monitor); + if (hadPendingRetry) { + console.log(`[AutoRetry] Monitor ${monitor.id} — retry completed`); + } return true; } catch (error) { await ErrorLogger.error("scheduler", `"${monitor.name}" — scheduled check failed. This is usually a temporary issue. If it persists, verify the URL is still valid and the selector matches the page.`, error instanceof Error ? error : null, { @@ -124,6 +133,15 @@ async function runCheckWithLimit(monitor: Parameters[0]): P return true; } finally { activeChecks--; + if (hadPendingRetry) { + db.update(monitors) + .set({ pendingRetryAt: null }) + .where(eq(monitors.id, monitor.id)) + .catch((err: unknown) => { + console.error(`[AutoRetry] Failed to clear pendingRetryAt for monitor ${monitor.id}:`, + err instanceof Error ? err.message : err); + }); + } } } @@ -218,6 +236,11 @@ export async function startScheduler() { } } + // Auto-retry: fire if pendingRetryAt window has elapsed + if (!shouldCheck && monitor.pendingRetryAt && new Date(monitor.pendingRetryAt) <= now) { + shouldCheck = true; + } + if (shouldCheck) { const jitterMs = Math.floor(Math.random() * 30000); trackTimeout(() => { diff --git a/server/services/scraper.test.ts b/server/services/scraper.test.ts index 1fd55bc..b7d2711 100644 --- a/server/services/scraper.test.ts +++ b/server/services/scraper.test.ts @@ -161,6 +161,7 @@ function makeMonitor(overrides: Partial = {}): Monitor { pauseReason: null, healthAlertSentAt: null, lastHealthyAt: null, + pendingRetryAt: null, createdAt: new Date(), ...overrides, }; @@ -6280,3 +6281,204 @@ describe("updateLastHealthyAt on success", () => { } }); }); + +// --------------------------------------------------------------------------- +// auto-retry scheduling (pendingRetryAt) +// --------------------------------------------------------------------------- +describe("auto-retry scheduling (pendingRetryAt)", () => { + const mockStorage = storage as unknown as { + updateMonitor: ReturnType; + addMonitorChange: ReturnType; + getUser: ReturnType; + }; + const mockDb = db as unknown as { + insert: ReturnType; + update: ReturnType; + }; + + // Track calls to db.update().set() so we can inspect pendingRetryAt writes + let dbUpdateSetCalls: any[]; + + function mockDbUpdateChain(returnedFailureCount: number, returnedActive = true) { + dbUpdateSetCalls = []; + const returningFn = vi.fn().mockResolvedValue([{ + consecutiveFailures: returnedFailureCount, + active: returnedActive, + }]); + const whereFn = vi.fn().mockReturnValue({ returning: returningFn }); + const setFn = vi.fn().mockImplementation((args: any) => { + dbUpdateSetCalls.push(args); + return { where: whereFn, returning: returningFn }; + }); + mockDb.update.mockReturnValue({ set: setFn }); + return { setFn, whereFn, returningFn }; + } + + beforeEach(() => { + vi.useFakeTimers(); + vi.clearAllMocks(); + delete process.env.BROWSERLESS_TOKEN; + mockDbUpdateChain(1, true); + mockStorage.getUser.mockResolvedValue({ id: "user1", tier: "free" }); + }); + + afterEach(() => { + vi.useRealTimers(); + vi.restoreAllMocks(); + }); + + async function runWithTimers(monitor: Monitor) { + const promise = checkMonitor(monitor); + await vi.advanceTimersByTimeAsync(3000); + return promise; + } + + it("sets pendingRetryAt ~35 min after a transient 'error' result", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("Network failure")); + + // daily monitor checked 2 hours ago — next normal check is ~22h away (> 45 min) + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + // Should have set pendingRetryAt via db.update + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt !== undefined && c.pendingRetryAt !== null); + expect(retrySetCall).toBeDefined(); + const retryAt = retrySetCall.pendingRetryAt as Date; + // Should be ~35 minutes from now + const diffMinutes = (retryAt.getTime() - Date.now()) / (60 * 1000); + expect(diffMinutes).toBeGreaterThan(34); + expect(diffMinutes).toBeLessThan(36); + }); + + it("does NOT set pendingRetryAt for 'blocked' status", async () => { + const html = `Access DeniedForbidden`; + vi.spyOn(globalThis, "fetch").mockResolvedValueOnce( + new Response(html, { status: 200 }) + ); + + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("blocked"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("does NOT set pendingRetryAt for 'selector_missing' status", async () => { + const html = `

No match

`; + vi.spyOn(globalThis, "fetch") + .mockResolvedValueOnce(new Response(html, { status: 200 })) + .mockResolvedValueOnce(new Response(html, { status: 200 })); + + const monitor = makeMonitor({ + selector: ".missing", + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("selector_missing"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("does NOT set pendingRetryAt when error contains ENOTFOUND", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("getaddrinfo ENOTFOUND example.invalid")); + + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("does NOT set pendingRetryAt when error contains 'certificate'", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("certificate has expired")); + + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("does NOT set pendingRetryAt when monitor.active is false", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("Network failure")); + + const monitor = makeMonitor({ + active: false, + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("does NOT set pendingRetryAt when pendingRetryAt is already set on monitor", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("Network failure")); + + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + pendingRetryAt: new Date(Date.now() + 30 * 60 * 1000), // already set + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("does NOT set pendingRetryAt when next normal check is within 45 minutes", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("Network failure")); + + // hourly monitor checked 30 minutes ago — next normal check in ~30 min (< 45 min) + const monitor = makeMonitor({ + frequency: "hourly", + lastChecked: new Date(Date.now() - 30 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("clears pendingRetryAt (null) on successful check via storage.updateMonitor", async () => { + const html = `$5.00`; + vi.spyOn(globalThis, "fetch").mockResolvedValueOnce( + new Response(html, { status: 200 }) + ); + + const monitor = makeMonitor({ + currentValue: "$5.00", + pendingRetryAt: new Date(Date.now() + 30 * 60 * 1000), + }); + await runWithTimers(monitor); + + expect(mockStorage.updateMonitor).toHaveBeenCalledWith( + 1, + expect.objectContaining({ + pendingRetryAt: null, + }) + ); + }); +}); diff --git a/server/services/scraper.ts b/server/services/scraper.ts index 3201c4a..0188707 100644 --- a/server/services/scraper.ts +++ b/server/services/scraper.ts @@ -1344,6 +1344,7 @@ export async function checkMonitor(monitor: Monitor): Promise<{ lastStatus: finalStatus, lastError: null, consecutiveFailures: 0, + pendingRetryAt: null, }); } catch (dbError) { // Retry once after a short delay for transient DB errors. @@ -1355,6 +1356,7 @@ export async function checkMonitor(monitor: Monitor): Promise<{ lastStatus: finalStatus, lastError: null, consecutiveFailures: 0, + pendingRetryAt: null, }); } catch (retryError) { // Both attempts failed. Transient DB errors (connection drops) are @@ -1465,6 +1467,36 @@ export async function checkMonitor(monitor: Monitor): Promise<{ console.error(`[Scraper] handleMonitorFailure threw for monitor ${monitor.id}:`, failureErr); } + // Self-heal: schedule a single auto-retry for transient errors + // Check both raw error patterns and sanitized user-facing messages + if ( + finalStatus === "error" && + monitor.active && + !monitor.pendingRetryAt && + !/ENOTFOUND|certificate|ssl|tls|SSRF blocked|Could not resolve|SSL\/TLS error|URL is not allowed/i.test(finalError ?? "") + ) { + const frequencyMinutes = monitor.frequency === "hourly" ? 60 : 1440; + const minsUntilNormal = + ((monitor.lastChecked ? new Date(monitor.lastChecked).getTime() : 0) + + frequencyMinutes * 60 * 1000 + - Date.now()) / (60 * 1000); + + if (minsUntilNormal > 45) { + try { + const retryAt = new Date(Date.now() + 35 * 60 * 1000); + await db.update(monitors) + .set({ pendingRetryAt: retryAt }) + .where(eq(monitors.id, monitor.id)); + console.log(`[AutoRetry] Monitor ${monitor.id} — retry scheduled at ${retryAt.toISOString()}`); + } catch (err) { + console.error(`[AutoRetry] Failed to set pendingRetryAt for monitor ${monitor.id}:`, + err instanceof Error ? err.message : err); + } + } else { + console.log(`[AutoRetry] Monitor ${monitor.id} — skipped (next normal check in ${Math.round(minsUntilNormal)} min)`); + } + } + return { changed: false, currentValue: oldValue, @@ -1502,6 +1534,35 @@ export async function checkMonitor(monitor: Monitor): Promise<{ console.error(`[Scraper] handleMonitorFailure threw in outer catch for monitor ${monitor.id}:`, failureErr); } + // Self-heal: schedule a single auto-retry for transient errors + // Check both raw error patterns and sanitized user-facing messages + if ( + monitor.active && + !monitor.pendingRetryAt && + !/ENOTFOUND|certificate|ssl|tls|SSRF blocked|Could not resolve|SSL\/TLS error|URL is not allowed/i.test(userMessage) + ) { + const frequencyMinutes = monitor.frequency === "hourly" ? 60 : 1440; + const minsUntilNormal = + ((monitor.lastChecked ? new Date(monitor.lastChecked).getTime() : 0) + + frequencyMinutes * 60 * 1000 + - Date.now()) / (60 * 1000); + + if (minsUntilNormal > 45) { + try { + const retryAt = new Date(Date.now() + 35 * 60 * 1000); + await db.update(monitors) + .set({ pendingRetryAt: retryAt }) + .where(eq(monitors.id, monitor.id)); + console.log(`[AutoRetry] Monitor ${monitor.id} — retry scheduled at ${retryAt.toISOString()}`); + } catch (err) { + console.error(`[AutoRetry] Failed to set pendingRetryAt for monitor ${monitor.id}:`, + err instanceof Error ? err.message : err); + } + } else { + console.log(`[AutoRetry] Monitor ${monitor.id} — skipped (next normal check in ${Math.round(minsUntilNormal)} min)`); + } + } + return { changed: false, currentValue: monitor.currentValue, diff --git a/shared/schema.ts b/shared/schema.ts index 3777025..10fae55 100644 --- a/shared/schema.ts +++ b/shared/schema.ts @@ -27,6 +27,7 @@ export const monitors = pgTable("monitors", { pauseReason: text("pause_reason"), healthAlertSentAt: timestamp("health_alert_sent_at"), lastHealthyAt: timestamp("last_healthy_at"), + pendingRetryAt: timestamp("pending_retry_at"), createdAt: timestamp("created_at").defaultNow().notNull(), }); @@ -476,6 +477,7 @@ export const insertMonitorSchema = createInsertSchema(monitors).omit({ pauseReason: true, healthAlertSentAt: true, lastHealthyAt: true, + pendingRetryAt: true, createdAt: true }); From 51feeada5f1f286efe6228f329601013302181a4 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 1 Apr 2026 08:05:36 +0000 Subject: [PATCH 2/3] fix: address skeptic review findings for auto-retry - Extract maybeScheduleAutoRetry helper to eliminate duplication - Check handleMonitorFailure paused return to skip retry for paused monitors - Fix stale lastChecked in imminence calculation (use current time) - Move manual Check Now pendingRetryAt clear before checkMonitor call - Await DB update in scheduler finally block instead of fire-and-forget - Add test for auto-paused monitor skipping retry https://claude.ai/code/session_013FQG661RB6D6wMie2REkL6 --- server/routes.ts | 11 +-- server/services/scheduler.test.ts | 6 +- server/services/scheduler.ts | 15 ++-- server/services/scraper.test.ts | 24 ++++++- server/services/scraper.ts | 110 +++++++++++++++--------------- 5 files changed, 93 insertions(+), 73 deletions(-) diff --git a/server/routes.ts b/server/routes.ts index 28398f0..143dc57 100644 --- a/server/routes.ts +++ b/server/routes.ts @@ -586,14 +586,17 @@ export async function registerRoutes( if (!existing) return res.status(404).json({ message: "Not found" }); if (String(existing.userId) !== String(req.user.claims.sub)) return res.status(403).json({ message: "Forbidden" }); - const result = await checkMonitor(existing); - - // Clear any pending auto-retry since the user triggered a manual check + // Clear any pending auto-retry before the manual check to prevent + // a narrow race where the scheduler cron fires a duplicate check. await db.update(monitors) .set({ pendingRetryAt: null }) .where(eq(monitors.id, id)) - .catch(() => {}); // best-effort + .catch((err: unknown) => { + console.error(`[AutoRetry] Failed to clear pendingRetryAt for monitor ${id}:`, + err instanceof Error ? err.message : err); + }); + const result = await checkMonitor(existing); res.json(result); } catch (error: any) { console.error("[Check Monitor] Error:", error.message); diff --git a/server/services/scheduler.test.ts b/server/services/scheduler.test.ts index 56ae7ed..b385944 100644 --- a/server/services/scheduler.test.ts +++ b/server/services/scheduler.test.ts @@ -65,10 +65,8 @@ vi.mock("../db", () => ({ update: vi.fn().mockReturnValue({ set: (...args: any[]) => { mockDbUpdateSet(...args); - const whereFn = vi.fn().mockReturnValue({ - catch: vi.fn().mockReturnValue(Promise.resolve()), - then: (resolve: any) => Promise.resolve().then(resolve), - }); + const whereResult = Promise.resolve(); + const whereFn = vi.fn().mockReturnValue(whereResult); return { where: whereFn }; }, }), diff --git a/server/services/scheduler.ts b/server/services/scheduler.ts index 673dd63..47d05ab 100644 --- a/server/services/scheduler.ts +++ b/server/services/scheduler.ts @@ -134,13 +134,14 @@ async function runCheckWithLimit(monitor: Parameters[0]): P } finally { activeChecks--; if (hadPendingRetry) { - db.update(monitors) - .set({ pendingRetryAt: null }) - .where(eq(monitors.id, monitor.id)) - .catch((err: unknown) => { - console.error(`[AutoRetry] Failed to clear pendingRetryAt for monitor ${monitor.id}:`, - err instanceof Error ? err.message : err); - }); + try { + await db.update(monitors) + .set({ pendingRetryAt: null }) + .where(eq(monitors.id, monitor.id)); + } catch (err: unknown) { + console.error(`[AutoRetry] Failed to clear pendingRetryAt for monitor ${monitor.id}:`, + err instanceof Error ? err.message : err); + } } } } diff --git a/server/services/scraper.test.ts b/server/services/scraper.test.ts index b7d2711..5368ad0 100644 --- a/server/services/scraper.test.ts +++ b/server/services/scraper.test.ts @@ -6447,16 +6447,36 @@ describe("auto-retry scheduling (pendingRetryAt)", () => { expect(retrySetCall).toBeUndefined(); }); - it("does NOT set pendingRetryAt when next normal check is within 45 minutes", async () => { + it("sets pendingRetryAt for hourly monitors (next check > 45 min after failure)", async () => { vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("Network failure")); - // hourly monitor checked 30 minutes ago — next normal check in ~30 min (< 45 min) + // hourly monitor — after failure, handleMonitorFailure updates lastChecked to now, + // so next normal check is 60 min away (> 45 min threshold) const monitor = makeMonitor({ frequency: "hourly", lastChecked: new Date(Date.now() - 30 * 60 * 1000), }); const result = await runWithTimers(monitor); + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeDefined(); + }); + + it("does NOT set pendingRetryAt when monitor was just auto-paused", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("Network failure")); + + // Simulate a monitor at the pause threshold — handleMonitorFailure returns paused: true + // The mockDbUpdateChain returns active: false to indicate the monitor was paused + mockDbUpdateChain(5, false); // returnedActive = false → paused + + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + consecutiveFailures: 4, + }); + const result = await runWithTimers(monitor); + expect(result.status).toBe("error"); const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); expect(retrySetCall).toBeUndefined(); diff --git a/server/services/scraper.ts b/server/services/scraper.ts index 0188707..99f69ac 100644 --- a/server/services/scraper.ts +++ b/server/services/scraper.ts @@ -511,6 +511,51 @@ export function classifyOuterError(error: unknown): { userMessage: string; logCo return { userMessage: "Failed to fetch or parse the page. Verify the URL is accessible and the selector is correct.", logContext: "unclassified error" }; } +/** Permanent error patterns that should never trigger auto-retry. */ +const PERMANENT_ERROR_RE = /ENOTFOUND|certificate|ssl|tls|SSRF blocked|Could not resolve|SSL\/TLS error|URL is not allowed/i; + +/** + * Schedule a single auto-retry 35 minutes from now for transient scrape errors. + * Skips if the monitor was just paused, has a pending retry, the error is permanent, + * or the next normal check is imminent (within 45 minutes). + */ +async function maybeScheduleAutoRetry( + monitor: Monitor, + errorMessage: string, + wasPaused: boolean, +): Promise { + if ( + !monitor.active || + wasPaused || + monitor.pendingRetryAt || + PERMANENT_ERROR_RE.test(errorMessage) + ) { + return; + } + + const frequencyMinutes = monitor.frequency === "hourly" ? 60 : 1440; + // Use Date.now() as the effective lastChecked since handleMonitorFailure + // already updated it in the DB — avoids using the stale in-memory value. + const minsUntilNormal = frequencyMinutes - 0; // just checked → full interval ahead + // More precisely: since we just failed, lastChecked was just set to now, + // so the next normal check is ~frequencyMinutes from now. + + if (minsUntilNormal > 45) { + try { + const retryAt = new Date(Date.now() + 35 * 60 * 1000); + await db.update(monitors) + .set({ pendingRetryAt: retryAt }) + .where(eq(monitors.id, monitor.id)); + console.log(`[AutoRetry] Monitor ${monitor.id} — retry scheduled at ${retryAt.toISOString()}`); + } catch (err) { + console.error(`[AutoRetry] Failed to set pendingRetryAt for monitor ${monitor.id}:`, + err instanceof Error ? err.message : err); + } + } else { + console.log(`[AutoRetry] Monitor ${monitor.id} — skipped (next normal check in ${Math.round(minsUntilNormal)} min)`); + } +} + async function recordMetric( monitorId: number, stage: string, @@ -1461,40 +1506,17 @@ export async function checkMonitor(monitor: Monitor): Promise<{ error: null }; } else { + let wasPaused = false; try { - await handleMonitorFailure(monitor, finalStatus, finalError!, browserlessInfraFailure); + const result = await handleMonitorFailure(monitor, finalStatus, finalError!, browserlessInfraFailure); + wasPaused = result.paused; } catch (failureErr) { console.error(`[Scraper] handleMonitorFailure threw for monitor ${monitor.id}:`, failureErr); } // Self-heal: schedule a single auto-retry for transient errors - // Check both raw error patterns and sanitized user-facing messages - if ( - finalStatus === "error" && - monitor.active && - !monitor.pendingRetryAt && - !/ENOTFOUND|certificate|ssl|tls|SSRF blocked|Could not resolve|SSL\/TLS error|URL is not allowed/i.test(finalError ?? "") - ) { - const frequencyMinutes = monitor.frequency === "hourly" ? 60 : 1440; - const minsUntilNormal = - ((monitor.lastChecked ? new Date(monitor.lastChecked).getTime() : 0) - + frequencyMinutes * 60 * 1000 - - Date.now()) / (60 * 1000); - - if (minsUntilNormal > 45) { - try { - const retryAt = new Date(Date.now() + 35 * 60 * 1000); - await db.update(monitors) - .set({ pendingRetryAt: retryAt }) - .where(eq(monitors.id, monitor.id)); - console.log(`[AutoRetry] Monitor ${monitor.id} — retry scheduled at ${retryAt.toISOString()}`); - } catch (err) { - console.error(`[AutoRetry] Failed to set pendingRetryAt for monitor ${monitor.id}:`, - err instanceof Error ? err.message : err); - } - } else { - console.log(`[AutoRetry] Monitor ${monitor.id} — skipped (next normal check in ${Math.round(minsUntilNormal)} min)`); - } + if (finalStatus === "error") { + await maybeScheduleAutoRetry(monitor, finalError ?? "", wasPaused); } return { @@ -1528,40 +1550,16 @@ export async function checkMonitor(monitor: Monitor): Promise<{ ).catch(() => {}); } + let wasPaused = false; try { - await handleMonitorFailure(monitor, "error", userMessage, false); + const result = await handleMonitorFailure(monitor, "error", userMessage, false); + wasPaused = result.paused; } catch (failureErr) { console.error(`[Scraper] handleMonitorFailure threw in outer catch for monitor ${monitor.id}:`, failureErr); } // Self-heal: schedule a single auto-retry for transient errors - // Check both raw error patterns and sanitized user-facing messages - if ( - monitor.active && - !monitor.pendingRetryAt && - !/ENOTFOUND|certificate|ssl|tls|SSRF blocked|Could not resolve|SSL\/TLS error|URL is not allowed/i.test(userMessage) - ) { - const frequencyMinutes = monitor.frequency === "hourly" ? 60 : 1440; - const minsUntilNormal = - ((monitor.lastChecked ? new Date(monitor.lastChecked).getTime() : 0) - + frequencyMinutes * 60 * 1000 - - Date.now()) / (60 * 1000); - - if (minsUntilNormal > 45) { - try { - const retryAt = new Date(Date.now() + 35 * 60 * 1000); - await db.update(monitors) - .set({ pendingRetryAt: retryAt }) - .where(eq(monitors.id, monitor.id)); - console.log(`[AutoRetry] Monitor ${monitor.id} — retry scheduled at ${retryAt.toISOString()}`); - } catch (err) { - console.error(`[AutoRetry] Failed to set pendingRetryAt for monitor ${monitor.id}:`, - err instanceof Error ? err.message : err); - } - } else { - console.log(`[AutoRetry] Monitor ${monitor.id} — skipped (next normal check in ${Math.round(minsUntilNormal)} min)`); - } - } + await maybeScheduleAutoRetry(monitor, userMessage, wasPaused); return { changed: false, From 739020ec586e8fbd4e63b6307710f5df5d6f63a2 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 1 Apr 2026 11:09:20 +0000 Subject: [PATCH 3/3] =?UTF-8?q?fix:=20address=20CodeRabbit=20review=20?= =?UTF-8?q?=E2=80=94=20permanent=20HTTP=20errors=20and=20SSRF=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add HTTP 4xx permanent error patterns (401, 403, 404, 410) to PERMANENT_ERROR_RE so monitors failing with permanent HTTP status codes don't schedule unnecessary auto-retries - Add test: SSRF-blocked URLs don't trigger auto-retry - Add test: HTTP 404 permanent errors don't trigger auto-retry https://claude.ai/code/session_013FQG661RB6D6wMie2REkL6 --- server/services/scraper.test.ts | 31 +++++++++++++++++++++++++++++++ server/services/scraper.ts | 6 ++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/server/services/scraper.test.ts b/server/services/scraper.test.ts index 5368ad0..8056e59 100644 --- a/server/services/scraper.test.ts +++ b/server/services/scraper.test.ts @@ -6432,6 +6432,37 @@ describe("auto-retry scheduling (pendingRetryAt)", () => { expect(retrySetCall).toBeUndefined(); }); + it("does NOT set pendingRetryAt when SSRF validation blocks the URL", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("SSRF blocked: URL is not allowed")); + + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("does NOT set pendingRetryAt for permanent HTTP 404 errors", async () => { + // Simulate a 404 response — the scraper classifies this as a permanent error + vi.spyOn(globalThis, "fetch").mockResolvedValueOnce( + new Response("Not Found", { status: 404 }) + ); + + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + it("does NOT set pendingRetryAt when pendingRetryAt is already set on monitor", async () => { vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("Network failure")); diff --git a/server/services/scraper.ts b/server/services/scraper.ts index 99f69ac..3719cf1 100644 --- a/server/services/scraper.ts +++ b/server/services/scraper.ts @@ -511,8 +511,10 @@ export function classifyOuterError(error: unknown): { userMessage: string; logCo return { userMessage: "Failed to fetch or parse the page. Verify the URL is accessible and the selector is correct.", logContext: "unclassified error" }; } -/** Permanent error patterns that should never trigger auto-retry. */ -const PERMANENT_ERROR_RE = /ENOTFOUND|certificate|ssl|tls|SSRF blocked|Could not resolve|SSL\/TLS error|URL is not allowed/i; +/** Permanent error patterns that should never trigger auto-retry. + * Includes sanitized messages from classifyHttpStatus for permanent HTTP errors + * (401, 403, 404, 410, other 4xx except 429 which is transient). */ +const PERMANENT_ERROR_RE = /ENOTFOUND|certificate|ssl|tls|SSRF blocked|Could not resolve|SSL\/TLS error|URL is not allowed|Access denied.*HTTP 40[13]|Page not found.*HTTP 404|Page no longer exists.*HTTP 410|rejected the request.*HTTP 4/i; /** * Schedule a single auto-retry 35 minutes from now for transient scrape errors.