diff --git a/server/routes.ts b/server/routes.ts index 7cb5d2b..143dc57 100644 --- a/server/routes.ts +++ b/server/routes.ts @@ -19,7 +19,7 @@ import { ErrorLogger } from "./services/logger"; import { notificationTablesExist, channelTablesExist } from "./services/notificationReady"; import { BrowserlessUsageTracker, getMonthResetDate } from "./services/browserlessTracker"; import { ResendUsageTracker, getResendResetDate } from "./services/resendTracker"; -import { errorLogs, monitorMetrics } from "@shared/schema"; +import { errorLogs, monitorMetrics, monitors } from "@shared/schema"; import { generalRateLimiter, createMonitorRateLimiter, @@ -248,6 +248,7 @@ export async function registerRoutes( pauseReason: null, healthAlertSentAt: null, lastHealthyAt: null, + pendingRetryAt: null, createdAt: new Date() }; @@ -585,6 +586,16 @@ export async function registerRoutes( if (!existing) return res.status(404).json({ message: "Not found" }); if (String(existing.userId) !== String(req.user.claims.sub)) return res.status(403).json({ message: "Forbidden" }); + // Clear any pending auto-retry before the manual check to prevent + // a narrow race where the scheduler cron fires a duplicate check. + await db.update(monitors) + .set({ pendingRetryAt: null }) + .where(eq(monitors.id, id)) + .catch((err: unknown) => { + console.error(`[AutoRetry] Failed to clear pendingRetryAt for monitor ${id}:`, + err instanceof Error ? err.message : err); + }); + const result = await checkMonitor(existing); res.json(result); } catch (error: any) { diff --git a/server/services/scheduler.test.ts b/server/services/scheduler.test.ts index 720ba38..b385944 100644 --- a/server/services/scheduler.test.ts +++ b/server/services/scheduler.test.ts @@ -8,6 +8,7 @@ const { mockGetAllActiveMonitors, mockCleanupPollutedValues, mockDbExecute, + mockDbUpdateSet, cronCallbacks, mockMonitorsNeedingRetry, mockDeliverWebhook, @@ -16,6 +17,7 @@ const { mockGetAllActiveMonitors: vi.fn().mockResolvedValue([]), mockCleanupPollutedValues: vi.fn().mockResolvedValue(undefined), mockDbExecute: vi.fn().mockResolvedValue({ rowCount: 0 }), + mockDbUpdateSet: vi.fn(), cronCallbacks: {} as Record Promise>>, mockMonitorsNeedingRetry: new Set(), mockDeliverWebhook: vi.fn().mockResolvedValue({ success: true, statusCode: 200 }), @@ -60,6 +62,14 @@ vi.mock("./logger", () => ({ vi.mock("../db", () => ({ db: { execute: (...args: any[]) => mockDbExecute(...args), + update: vi.fn().mockReturnValue({ + set: (...args: any[]) => { + mockDbUpdateSet(...args); + const whereResult = Promise.resolve(); + const whereFn = vi.fn().mockReturnValue(whereResult); + return { where: whereFn }; + }, + }), }, })); @@ -163,6 +173,7 @@ function makeMonitor(overrides: Partial = {}): Monitor { pauseReason: null, healthAlertSentAt: null, lastHealthyAt: null, + pendingRetryAt: null, createdAt: new Date(), ...overrides, }; @@ -399,6 +410,75 @@ describe("startScheduler", () => { resolver!(); await Promise.resolve(); }); + + // ----------------------------------------------------------------------- + // auto-retry scheduler pickup (pendingRetryAt) + // ----------------------------------------------------------------------- + + it("triggers check for monitor with pendingRetryAt <= now", async () => { + const monitor = makeMonitor({ + frequency: "hourly", + lastChecked: new Date(Date.now() - 30 * 60 * 1000), // 30 min ago — not normally due + pendingRetryAt: new Date(Date.now() - 1000), // 1 second in the past + }); + mockGetAllActiveMonitors.mockResolvedValueOnce([monitor]); + + await startScheduler(); + await runCron("* * * * *"); + await vi.advanceTimersByTimeAsync(31000); + + expect(mockCheckMonitor).toHaveBeenCalledWith(monitor); + }); + + it("does NOT trigger check for monitor with pendingRetryAt in the future", async () => { + const monitor = makeMonitor({ + frequency: "hourly", + lastChecked: new Date(Date.now() - 30 * 60 * 1000), // 30 min ago — not normally due + pendingRetryAt: new Date(Date.now() + 30 * 60 * 1000), // 30 min in the future + }); + mockGetAllActiveMonitors.mockResolvedValueOnce([monitor]); + + await startScheduler(); + await runCron("* * * * *"); + await vi.advanceTimersByTimeAsync(31000); + + expect(mockCheckMonitor).not.toHaveBeenCalled(); + }); + + it("clears pendingRetryAt after retry fires (success path)", async () => { + const monitor = makeMonitor({ + frequency: "hourly", + lastChecked: new Date(Date.now() - 30 * 60 * 1000), + pendingRetryAt: new Date(Date.now() - 1000), + }); + mockGetAllActiveMonitors.mockResolvedValueOnce([monitor]); + + await startScheduler(); + await runCron("* * * * *"); + await vi.advanceTimersByTimeAsync(31000); + + expect(mockCheckMonitor).toHaveBeenCalledWith(monitor); + // The finally block should clear pendingRetryAt + expect(mockDbUpdateSet).toHaveBeenCalledWith({ pendingRetryAt: null }); + }); + + it("clears pendingRetryAt after retry fires (failure path)", async () => { + mockCheckMonitor.mockRejectedValueOnce(new Error("Scrape failed")); + const monitor = makeMonitor({ + frequency: "hourly", + lastChecked: new Date(Date.now() - 30 * 60 * 1000), + pendingRetryAt: new Date(Date.now() - 1000), + }); + mockGetAllActiveMonitors.mockResolvedValueOnce([monitor]); + + await startScheduler(); + await runCron("* * * * *"); + await vi.advanceTimersByTimeAsync(31000); + + expect(mockCheckMonitor).toHaveBeenCalledWith(monitor); + // Even on failure, pendingRetryAt should be cleared + expect(mockDbUpdateSet).toHaveBeenCalledWith({ pendingRetryAt: null }); + }); }); describe("concurrency limiting (runCheckWithLimit)", () => { @@ -1290,3 +1370,4 @@ describe("webhook retry cumulative backoff", () => { })); }); }); + diff --git a/server/services/scheduler.ts b/server/services/scheduler.ts index 2195f1e..47d05ab 100644 --- a/server/services/scheduler.ts +++ b/server/services/scheduler.ts @@ -10,7 +10,8 @@ import { ensureMonitorConditionsTable } from "./ensureTables"; import { processAutomatedCampaigns } from "./automatedCampaigns"; import { isTransientDbError } from "../utils/dbErrors"; import { db } from "../db"; -import { sql } from "drizzle-orm"; +import { eq, sql } from "drizzle-orm"; +import { monitors } from "@shared/schema"; // Keep below DB pool max (3, see db.ts) to leave headroom for cron jobs and // API requests. Browser POOL_MAX is 1 (browserPool.ts), so the second @@ -110,9 +111,17 @@ async function runCheckWithLimit(monitor: Parameters[0]): P console.debug(`[Scheduler] Concurrency limit reached, deferring monitor ${monitor.id}`); return false; } + + const hadPendingRetry = !!( + monitor.pendingRetryAt && new Date(monitor.pendingRetryAt) <= new Date() + ); + activeChecks++; try { await checkMonitor(monitor); + if (hadPendingRetry) { + console.log(`[AutoRetry] Monitor ${monitor.id} — retry completed`); + } return true; } catch (error) { await ErrorLogger.error("scheduler", `"${monitor.name}" — scheduled check failed. This is usually a temporary issue. If it persists, verify the URL is still valid and the selector matches the page.`, error instanceof Error ? error : null, { @@ -124,6 +133,16 @@ async function runCheckWithLimit(monitor: Parameters[0]): P return true; } finally { activeChecks--; + if (hadPendingRetry) { + try { + await db.update(monitors) + .set({ pendingRetryAt: null }) + .where(eq(monitors.id, monitor.id)); + } catch (err: unknown) { + console.error(`[AutoRetry] Failed to clear pendingRetryAt for monitor ${monitor.id}:`, + err instanceof Error ? err.message : err); + } + } } } @@ -218,6 +237,11 @@ export async function startScheduler() { } } + // Auto-retry: fire if pendingRetryAt window has elapsed + if (!shouldCheck && monitor.pendingRetryAt && new Date(monitor.pendingRetryAt) <= now) { + shouldCheck = true; + } + if (shouldCheck) { const jitterMs = Math.floor(Math.random() * 30000); trackTimeout(() => { diff --git a/server/services/scraper.test.ts b/server/services/scraper.test.ts index 1fd55bc..8056e59 100644 --- a/server/services/scraper.test.ts +++ b/server/services/scraper.test.ts @@ -161,6 +161,7 @@ function makeMonitor(overrides: Partial = {}): Monitor { pauseReason: null, healthAlertSentAt: null, lastHealthyAt: null, + pendingRetryAt: null, createdAt: new Date(), ...overrides, }; @@ -6280,3 +6281,255 @@ describe("updateLastHealthyAt on success", () => { } }); }); + +// --------------------------------------------------------------------------- +// auto-retry scheduling (pendingRetryAt) +// --------------------------------------------------------------------------- +describe("auto-retry scheduling (pendingRetryAt)", () => { + const mockStorage = storage as unknown as { + updateMonitor: ReturnType; + addMonitorChange: ReturnType; + getUser: ReturnType; + }; + const mockDb = db as unknown as { + insert: ReturnType; + update: ReturnType; + }; + + // Track calls to db.update().set() so we can inspect pendingRetryAt writes + let dbUpdateSetCalls: any[]; + + function mockDbUpdateChain(returnedFailureCount: number, returnedActive = true) { + dbUpdateSetCalls = []; + const returningFn = vi.fn().mockResolvedValue([{ + consecutiveFailures: returnedFailureCount, + active: returnedActive, + }]); + const whereFn = vi.fn().mockReturnValue({ returning: returningFn }); + const setFn = vi.fn().mockImplementation((args: any) => { + dbUpdateSetCalls.push(args); + return { where: whereFn, returning: returningFn }; + }); + mockDb.update.mockReturnValue({ set: setFn }); + return { setFn, whereFn, returningFn }; + } + + beforeEach(() => { + vi.useFakeTimers(); + vi.clearAllMocks(); + delete process.env.BROWSERLESS_TOKEN; + mockDbUpdateChain(1, true); + mockStorage.getUser.mockResolvedValue({ id: "user1", tier: "free" }); + }); + + afterEach(() => { + vi.useRealTimers(); + vi.restoreAllMocks(); + }); + + async function runWithTimers(monitor: Monitor) { + const promise = checkMonitor(monitor); + await vi.advanceTimersByTimeAsync(3000); + return promise; + } + + it("sets pendingRetryAt ~35 min after a transient 'error' result", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("Network failure")); + + // daily monitor checked 2 hours ago — next normal check is ~22h away (> 45 min) + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + // Should have set pendingRetryAt via db.update + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt !== undefined && c.pendingRetryAt !== null); + expect(retrySetCall).toBeDefined(); + const retryAt = retrySetCall.pendingRetryAt as Date; + // Should be ~35 minutes from now + const diffMinutes = (retryAt.getTime() - Date.now()) / (60 * 1000); + expect(diffMinutes).toBeGreaterThan(34); + expect(diffMinutes).toBeLessThan(36); + }); + + it("does NOT set pendingRetryAt for 'blocked' status", async () => { + const html = `Access DeniedForbidden`; + vi.spyOn(globalThis, "fetch").mockResolvedValueOnce( + new Response(html, { status: 200 }) + ); + + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("blocked"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("does NOT set pendingRetryAt for 'selector_missing' status", async () => { + const html = `

No match

`; + vi.spyOn(globalThis, "fetch") + .mockResolvedValueOnce(new Response(html, { status: 200 })) + .mockResolvedValueOnce(new Response(html, { status: 200 })); + + const monitor = makeMonitor({ + selector: ".missing", + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("selector_missing"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("does NOT set pendingRetryAt when error contains ENOTFOUND", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("getaddrinfo ENOTFOUND example.invalid")); + + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("does NOT set pendingRetryAt when error contains 'certificate'", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("certificate has expired")); + + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("does NOT set pendingRetryAt when monitor.active is false", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("Network failure")); + + const monitor = makeMonitor({ + active: false, + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("does NOT set pendingRetryAt when SSRF validation blocks the URL", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("SSRF blocked: URL is not allowed")); + + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("does NOT set pendingRetryAt for permanent HTTP 404 errors", async () => { + // Simulate a 404 response — the scraper classifies this as a permanent error + vi.spyOn(globalThis, "fetch").mockResolvedValueOnce( + new Response("Not Found", { status: 404 }) + ); + + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("does NOT set pendingRetryAt when pendingRetryAt is already set on monitor", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("Network failure")); + + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + pendingRetryAt: new Date(Date.now() + 30 * 60 * 1000), // already set + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("sets pendingRetryAt for hourly monitors (next check > 45 min after failure)", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("Network failure")); + + // hourly monitor — after failure, handleMonitorFailure updates lastChecked to now, + // so next normal check is 60 min away (> 45 min threshold) + const monitor = makeMonitor({ + frequency: "hourly", + lastChecked: new Date(Date.now() - 30 * 60 * 1000), + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeDefined(); + }); + + it("does NOT set pendingRetryAt when monitor was just auto-paused", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("Network failure")); + + // Simulate a monitor at the pause threshold — handleMonitorFailure returns paused: true + // The mockDbUpdateChain returns active: false to indicate the monitor was paused + mockDbUpdateChain(5, false); // returnedActive = false → paused + + const monitor = makeMonitor({ + frequency: "daily", + lastChecked: new Date(Date.now() - 2 * 60 * 60 * 1000), + consecutiveFailures: 4, + }); + const result = await runWithTimers(monitor); + + expect(result.status).toBe("error"); + const retrySetCall = dbUpdateSetCalls.find((c: any) => c.pendingRetryAt instanceof Date); + expect(retrySetCall).toBeUndefined(); + }); + + it("clears pendingRetryAt (null) on successful check via storage.updateMonitor", async () => { + const html = `$5.00`; + vi.spyOn(globalThis, "fetch").mockResolvedValueOnce( + new Response(html, { status: 200 }) + ); + + const monitor = makeMonitor({ + currentValue: "$5.00", + pendingRetryAt: new Date(Date.now() + 30 * 60 * 1000), + }); + await runWithTimers(monitor); + + expect(mockStorage.updateMonitor).toHaveBeenCalledWith( + 1, + expect.objectContaining({ + pendingRetryAt: null, + }) + ); + }); +}); diff --git a/server/services/scraper.ts b/server/services/scraper.ts index 3201c4a..3719cf1 100644 --- a/server/services/scraper.ts +++ b/server/services/scraper.ts @@ -511,6 +511,53 @@ export function classifyOuterError(error: unknown): { userMessage: string; logCo return { userMessage: "Failed to fetch or parse the page. Verify the URL is accessible and the selector is correct.", logContext: "unclassified error" }; } +/** Permanent error patterns that should never trigger auto-retry. + * Includes sanitized messages from classifyHttpStatus for permanent HTTP errors + * (401, 403, 404, 410, other 4xx except 429 which is transient). */ +const PERMANENT_ERROR_RE = /ENOTFOUND|certificate|ssl|tls|SSRF blocked|Could not resolve|SSL\/TLS error|URL is not allowed|Access denied.*HTTP 40[13]|Page not found.*HTTP 404|Page no longer exists.*HTTP 410|rejected the request.*HTTP 4/i; + +/** + * Schedule a single auto-retry 35 minutes from now for transient scrape errors. + * Skips if the monitor was just paused, has a pending retry, the error is permanent, + * or the next normal check is imminent (within 45 minutes). + */ +async function maybeScheduleAutoRetry( + monitor: Monitor, + errorMessage: string, + wasPaused: boolean, +): Promise { + if ( + !monitor.active || + wasPaused || + monitor.pendingRetryAt || + PERMANENT_ERROR_RE.test(errorMessage) + ) { + return; + } + + const frequencyMinutes = monitor.frequency === "hourly" ? 60 : 1440; + // Use Date.now() as the effective lastChecked since handleMonitorFailure + // already updated it in the DB — avoids using the stale in-memory value. + const minsUntilNormal = frequencyMinutes - 0; // just checked → full interval ahead + // More precisely: since we just failed, lastChecked was just set to now, + // so the next normal check is ~frequencyMinutes from now. + + if (minsUntilNormal > 45) { + try { + const retryAt = new Date(Date.now() + 35 * 60 * 1000); + await db.update(monitors) + .set({ pendingRetryAt: retryAt }) + .where(eq(monitors.id, monitor.id)); + console.log(`[AutoRetry] Monitor ${monitor.id} — retry scheduled at ${retryAt.toISOString()}`); + } catch (err) { + console.error(`[AutoRetry] Failed to set pendingRetryAt for monitor ${monitor.id}:`, + err instanceof Error ? err.message : err); + } + } else { + console.log(`[AutoRetry] Monitor ${monitor.id} — skipped (next normal check in ${Math.round(minsUntilNormal)} min)`); + } +} + async function recordMetric( monitorId: number, stage: string, @@ -1344,6 +1391,7 @@ export async function checkMonitor(monitor: Monitor): Promise<{ lastStatus: finalStatus, lastError: null, consecutiveFailures: 0, + pendingRetryAt: null, }); } catch (dbError) { // Retry once after a short delay for transient DB errors. @@ -1355,6 +1403,7 @@ export async function checkMonitor(monitor: Monitor): Promise<{ lastStatus: finalStatus, lastError: null, consecutiveFailures: 0, + pendingRetryAt: null, }); } catch (retryError) { // Both attempts failed. Transient DB errors (connection drops) are @@ -1459,12 +1508,19 @@ export async function checkMonitor(monitor: Monitor): Promise<{ error: null }; } else { + let wasPaused = false; try { - await handleMonitorFailure(monitor, finalStatus, finalError!, browserlessInfraFailure); + const result = await handleMonitorFailure(monitor, finalStatus, finalError!, browserlessInfraFailure); + wasPaused = result.paused; } catch (failureErr) { console.error(`[Scraper] handleMonitorFailure threw for monitor ${monitor.id}:`, failureErr); } + // Self-heal: schedule a single auto-retry for transient errors + if (finalStatus === "error") { + await maybeScheduleAutoRetry(monitor, finalError ?? "", wasPaused); + } + return { changed: false, currentValue: oldValue, @@ -1496,12 +1552,17 @@ export async function checkMonitor(monitor: Monitor): Promise<{ ).catch(() => {}); } + let wasPaused = false; try { - await handleMonitorFailure(monitor, "error", userMessage, false); + const result = await handleMonitorFailure(monitor, "error", userMessage, false); + wasPaused = result.paused; } catch (failureErr) { console.error(`[Scraper] handleMonitorFailure threw in outer catch for monitor ${monitor.id}:`, failureErr); } + // Self-heal: schedule a single auto-retry for transient errors + await maybeScheduleAutoRetry(monitor, userMessage, wasPaused); + return { changed: false, currentValue: monitor.currentValue, diff --git a/shared/schema.ts b/shared/schema.ts index 3777025..10fae55 100644 --- a/shared/schema.ts +++ b/shared/schema.ts @@ -27,6 +27,7 @@ export const monitors = pgTable("monitors", { pauseReason: text("pause_reason"), healthAlertSentAt: timestamp("health_alert_sent_at"), lastHealthyAt: timestamp("last_healthy_at"), + pendingRetryAt: timestamp("pending_retry_at"), createdAt: timestamp("created_at").defaultNow().notNull(), }); @@ -476,6 +477,7 @@ export const insertMonitorSchema = createInsertSchema(monitors).omit({ pauseReason: true, healthAlertSentAt: true, lastHealthyAt: true, + pendingRetryAt: true, createdAt: true });