From 62e4a1b3c2be6164a2e504b71fb6c37f9487a0b6 Mon Sep 17 00:00:00 2001
From: Recoup Agent <agent@recoupable.com>
Date: Mon, 23 Mar 2026 17:47:18 +0000
Subject: [PATCH 1/4] =?UTF-8?q?feat:=20add=20agent-day=20task=20=E2=80=94?=
 =?UTF-8?q?=20autonomous=20Sunday=20feature=20implementation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a new `agent-day` scheduled task that runs every Sunday at 10 AM ET
and implements a new feature end-to-end without human intervention:

1. Fetches recent commits from all 11 submodule repos via GitHub API
2. Uses Claude (Anthropic API) to plan the next most valuable feature
3. Triggers `coding-agent` to implement it and open PRs
4. Reviews each PR: waits for CI checks, assesses review comments with Claude
5. Applies valid feedback via `update-pr` task (up to 3 iterations per PR)
6. Tests Vercel preview deployments (health check for api/chat repos)
7. Merges approved PRs via GitHub API (squash merge)
8. Posts a summary to Slack channel #C08HN8RKJHZ

New utilities:
- src/github/fetchRecentSubmoduleCommits.ts
- src/github/waitForPRChecks.ts
- src/github/fetchPRReviews.ts
- src/github/mergePR.ts
- src/github/getVercelPreviewUrl.ts
- src/slack/postToSlackChannel.ts
- src/ai/generateFeaturePrompt.ts
- src/ai/assessPRFeedback.ts

Schema changes (backward compatible):
- codingAgentSchema: callbackThreadId is now optional
- updatePRSchema: callbackThreadId is now optional

Both coding-agent and update-pr tasks skip the Slack callback when
callbackThreadId is not provided (used when triggered programmatically).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/ai/__tests__/assessPRFeedback.test.ts     |  95 +++++++++
 .../__tests__/generateFeaturePrompt.test.ts   |  72 +++++++
 src/ai/assessPRFeedback.ts                    | 110 ++++++++++
 src/ai/generateFeaturePrompt.ts               | 108 ++++++++++
 src/github/__tests__/fetchPRReviews.test.ts   |  70 +++++++
 .../fetchRecentSubmoduleCommits.test.ts       |  78 ++++++++
 src/github/__tests__/mergePR.test.ts          |  49 +++++
 src/github/fetchPRReviews.ts                  |  84 ++++++++
 src/github/fetchRecentSubmoduleCommits.ts     |  56 ++++++
 src/github/getVercelPreviewUrl.ts             |  74 +++++++
 src/github/mergePR.ts                         |  39 ++++
 src/github/waitForPRChecks.ts                 |  90 +++++++++
 src/schemas/codingAgentSchema.ts              |   2 +-
 src/schemas/updatePRSchema.ts                 |   2 +-
 .../__tests__/postToSlackChannel.test.ts      |  60 ++++++
 src/slack/postToSlackChannel.ts               |  36 ++++
 src/tasks/agentDayTask.ts                     | 188 ++++++++++++++++++
 src/tasks/codingAgentTask.ts                  |  24 ++-
 src/tasks/updatePRTask.ts                     |  20 +-
 19 files changed, 1235 insertions(+), 22 deletions(-)
 create mode 100644 src/ai/__tests__/assessPRFeedback.test.ts
 create mode 100644 src/ai/__tests__/generateFeaturePrompt.test.ts
 create mode 100644 src/ai/assessPRFeedback.ts
 create mode 100644 src/ai/generateFeaturePrompt.ts
 create mode 100644 src/github/__tests__/fetchPRReviews.test.ts
 create mode 100644 src/github/__tests__/fetchRecentSubmoduleCommits.test.ts
 create mode 100644 src/github/__tests__/mergePR.test.ts
 create mode 100644 src/github/fetchPRReviews.ts
 create mode 100644 src/github/fetchRecentSubmoduleCommits.ts
 create mode 100644 src/github/getVercelPreviewUrl.ts
 create mode 100644 src/github/mergePR.ts
 create mode 100644 src/github/waitForPRChecks.ts
 create mode 100644 src/slack/__tests__/postToSlackChannel.test.ts
 create mode 100644 src/slack/postToSlackChannel.ts
 create mode 100644 src/tasks/agentDayTask.ts

diff --git a/src/ai/__tests__/assessPRFeedback.test.ts b/src/ai/__tests__/assessPRFeedback.test.ts
new file mode 100644
index 0000000..b99862d
--- /dev/null
+++ b/src/ai/__tests__/assessPRFeedback.test.ts
@@ -0,0 +1,95 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+
+vi.mock("@trigger.dev/sdk/v3", () => ({
+  logger: { log: vi.fn(), warn: vi.fn(), error: vi.fn() },
+}));
+
+const mockFetch = vi.fn();
+vi.stubGlobal("fetch", mockFetch);
+
+const { assessPRFeedback } = await import("../assessPRFeedback");
+
+const noFeedback = { reviews: [], comments: [] };
+const withFeedback = {
+  reviews: [
+    {
+      author: "reviewer",
+      body: "Missing error handling in the route handler.",
+      state: "CHANGES_REQUESTED",
+      submittedAt: "2026-03-01T12:00:00Z",
+    },
+  ],
+  comments: [],
+};
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  process.env.ANTHROPIC_API_KEY = "test-key";
+});
+
+describe("assessPRFeedback", () => {
+  it("returns no actionable feedback when reviews and comments are empty", async () => {
+    const result = await assessPRFeedback("recoupable/api", "Build feature X", noFeedback);
+
+    expect(result.hasActionableFeedback).toBe(false);
+    expect(mockFetch).not.toHaveBeenCalled();
+  });
+
+  it("returns actionable feedback parsed from Claude response", async () => {
+    mockFetch.mockResolvedValueOnce({
+      ok: true,
+      json: async () => ({
+        content: [
+          {
+            type: "text",
+            text: JSON.stringify({
+              hasActionableFeedback: true,
+              feedbackSummary: "Add error handling",
+              implementation: "Wrap the handler in a try/catch and return 500 on error",
+            }),
+          },
+        ],
+      }),
+    });
+
+    const result = await assessPRFeedback(
+      "recoupable/api",
+      "Build feature X",
+      withFeedback,
+    );
+
+    expect(result.hasActionableFeedback).toBe(true);
+    expect(result.feedbackSummary).toBe("Add error handling");
+    expect(result.implementation).toContain("try/catch");
+  });
+
+  it("returns no actionable feedback when API key is missing", async () => {
+    delete process.env.ANTHROPIC_API_KEY;
+
+    const result = await assessPRFeedback("recoupable/api", "Build feature X", withFeedback);
+
+    expect(result.hasActionableFeedback).toBe(false);
+    expect(mockFetch).not.toHaveBeenCalled();
+  });
+
+  it("returns no actionable feedback when API call fails", async () => {
+    mockFetch.mockResolvedValueOnce({ ok: false, status: 500 });
+
+    const result = await assessPRFeedback("recoupable/api", "Feature", withFeedback);
+
+    expect(result.hasActionableFeedback).toBe(false);
+  });
+
+  it("returns no actionable feedback when JSON parsing fails", async () => {
+    mockFetch.mockResolvedValueOnce({
+      ok: true,
+      json: async () => ({
+        content: [{ type: "text", text: "not valid json" }],
+      }),
+    });
+
+    const result = await assessPRFeedback("recoupable/api", "Feature", withFeedback);
+
+    expect(result.hasActionableFeedback).toBe(false);
+  });
+});
diff --git a/src/ai/__tests__/generateFeaturePrompt.test.ts b/src/ai/__tests__/generateFeaturePrompt.test.ts
new file mode 100644
index 0000000..c5c76fc
--- /dev/null
+++ b/src/ai/__tests__/generateFeaturePrompt.test.ts
@@ -0,0 +1,72 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+
+vi.mock("@trigger.dev/sdk/v3", () => ({
+  logger: { log: vi.fn(), warn: vi.fn(), error: vi.fn() },
+}));
+
+const mockFetch = vi.fn();
+vi.stubGlobal("fetch", mockFetch);
+
+const { generateFeaturePrompt } = await import("../generateFeaturePrompt");
+
+const mockCommits = [
+  {
+    submodule: "api",
+    repo: "recoupable/api",
+    commits: [
+      { sha: "abc1234", message: "feat: add privy logins endpoint", author: "Dev", date: "2026-03-01T00:00:00Z" },
+    ],
+  },
+];
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  process.env.ANTHROPIC_API_KEY = "test-key";
+});
+
+describe("generateFeaturePrompt", () => {
+  it("returns the Claude-generated prompt on success", async () => {
+    mockFetch.mockResolvedValueOnce({
+      ok: true,
+      json: async () => ({
+        content: [{ type: "text", text: "Build a feature X in the api submodule." }],
+      }),
+    });
+
+    const result = await generateFeaturePrompt(mockCommits);
+
+    expect(result).toBe("Build a feature X in the api submodule.");
+    expect(mockFetch).toHaveBeenCalledWith(
+      "https://api.anthropic.com/v1/messages",
+      expect.objectContaining({
+        method: "POST",
+        headers: expect.objectContaining({ "x-api-key": "test-key" }),
+      }),
+    );
+  });
+
+  it("returns fallback prompt when API key is missing", async () => {
+    delete process.env.ANTHROPIC_API_KEY;
+
+    const result = await generateFeaturePrompt(mockCommits);
+
+    expect(result).toContain("PROGRESS.md");
+    expect(mockFetch).not.toHaveBeenCalled();
+  });
+
+  it("returns fallback prompt when Anthropic API call fails", async () => {
+    mockFetch.mockResolvedValueOnce({ ok: false, status: 500 });
+
+    const result = await generateFeaturePrompt(mockCommits);
+
+    expect(result).toContain("PROGRESS.md");
+  });
+
+  it("returns fallback prompt when fetch throws", async () => {
+    mockFetch.mockRejectedValueOnce(new Error("Network error"));
+
+    const result = await generateFeaturePrompt(mockCommits);
+
+    expect(result).toContain("PROGRESS.md");
+  });
+});
diff --git a/src/ai/assessPRFeedback.ts b/src/ai/assessPRFeedback.ts
new file mode 100644
index 0000000..9d5c430
--- /dev/null
+++ b/src/ai/assessPRFeedback.ts
@@ -0,0 +1,110 @@
+import { logger } from "@trigger.dev/sdk/v3";
+import type { PRFeedback } from "../github/fetchPRReviews";
+
+export interface FeedbackAssessment {
+  hasActionableFeedback: boolean;
+  feedbackSummary: string;
+  implementation: string;
+}
+
+/**
+ * Uses the Anthropic API to assess PR review feedback and determine
+ * whether any changes should be implemented by the coding agent.
+ *
+ * Ignores automated bot comments and approvals without substantive feedback.
+ * Returns a structured assessment with what (if anything) should be changed.
+ */
+export async function assessPRFeedback(
+  repo: string,
+  featureDescription: string,
+  feedback: PRFeedback,
+): Promise<FeedbackAssessment> {
+  const noFeedback: FeedbackAssessment = {
+    hasActionableFeedback: false,
+    feedbackSummary: "No feedback",
+    implementation: "",
+  };
+
+  if (feedback.reviews.length === 0 && feedback.comments.length === 0) {
+    return noFeedback;
+  }
+
+  const apiKey = process.env.ANTHROPIC_API_KEY;
+
+  if (!apiKey) {
+    logger.warn("Missing ANTHROPIC_API_KEY — skipping feedback assessment");
+    return noFeedback;
+  }
+
+  const reviewsText = feedback.reviews
+    .map((r) => `Review by ${r.author} (${r.state}): ${r.body}`)
+    .join("\n");
+
+  const commentsText = feedback.comments
+    .map((c) => `Comment by ${c.author} on ${c.path}: ${c.body}`)
+    .join("\n");
+
+  const feedbackText = [reviewsText, commentsText].filter(Boolean).join("\n\n");
+
+  try {
+    const response = await fetch("https://api.anthropic.com/v1/messages", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "x-api-key": apiKey,
+        "anthropic-version": "2023-06-01",
+      },
+      body: JSON.stringify({
+        model: "claude-sonnet-4-5",
+        max_tokens: 512,
+        messages: [
+          {
+            role: "user",
+            content: [
+              `You are reviewing PR feedback for the repo "${repo}".`,
+              ``,
+              `Feature that was implemented: ${featureDescription.slice(0, 500)}`,
+              ``,
+              `PR feedback received:`,
+              feedbackText,
+              ``,
+              `Determine if there is actionable feedback that the coding agent should implement.`,
+              `Ignore: automated bot comments, approval messages, "LGTM", CI failure notices.`,
+              `Focus on: code quality issues, bugs, missing functionality, style violations.`,
+              ``,
+              `Respond with JSON only (no markdown):`,
+              `{"hasActionableFeedback": boolean, "feedbackSummary": "brief summary", "implementation": "specific changes to make, or empty string"}`,
+            ].join("\n"),
+          },
+        ],
+      }),
+    });
+
+    if (!response.ok) {
+      logger.error("Anthropic API error assessing PR feedback", { status: response.status });
+      return noFeedback;
+    }
+
+    const data = (await response.json()) as {
+      content: Array<{ type: string; text: string }>;
+    };
+
+    const text = data.content.find((c) => c.type === "text")?.text?.trim() ?? "";
+
+    try {
+      const parsed = JSON.parse(text) as FeedbackAssessment;
+      logger.log("PR feedback assessment complete", {
+        repo,
+        hasActionableFeedback: parsed.hasActionableFeedback,
+        summary: parsed.feedbackSummary,
+      });
+      return parsed;
+    } catch {
+      logger.warn("Failed to parse feedback assessment JSON", { text: text.slice(0, 200) });
+      return noFeedback;
+    }
+  } catch (error) {
+    logger.error("Failed to assess PR feedback", { error });
+    return noFeedback;
+  }
+}
diff --git a/src/ai/generateFeaturePrompt.ts b/src/ai/generateFeaturePrompt.ts
new file mode 100644
index 0000000..b5908ee
--- /dev/null
+++ b/src/ai/generateFeaturePrompt.ts
@@ -0,0 +1,108 @@
+import { logger } from "@trigger.dev/sdk/v3";
+import type { SubmoduleCommit } from "../github/fetchRecentSubmoduleCommits";
+
+const SYSTEM_PROMPT = `You are a senior software engineer on the Recoupable platform — a music industry management tool for record labels and artist managers.
+
+The platform has these main components:
+- chat: Next.js frontend where music managers chat with their AI agent
+- api: Backend API (Next.js) with AI/MCP tools, Supabase DB, Slack bot integration
+- tasks: Trigger.dev background jobs (pulse emails, content creation, coding agent)
+- admin: Internal admin dashboard (Next.js)
+- cli: Command-line interface for power users
+- docs: API documentation (Mintlify)
+
+Your task: analyze recent commits and propose the single most valuable small feature to implement next.
+
+Rules:
+- Pick something that builds naturally on recent work
+- Keep it focused — a single, shippable improvement
+- Favor real user value (music managers need to manage artists, track metrics, send communications)
+- DO NOT suggest refactors, tests, or documentation updates
+
+Respond with ONLY an implementation prompt for an AI coding agent. The prompt should:
+- Say exactly what to build and in which submodule(s)
+- Reference specific files/routes/components when relevant
+- Include clear acceptance criteria
+- NOT ask for planning or approval — just direct the agent to implement it`;
+
+/**
+ * Uses the Anthropic API to generate an actionable feature implementation prompt
+ * based on the recent commit history across monorepo submodules.
+ *
+ * Falls back to a generic improvement prompt if the API call fails.
+ */
+export async function generateFeaturePrompt(
+  recentCommits: SubmoduleCommit[],
+): Promise<string> {
+  const apiKey = process.env.ANTHROPIC_API_KEY;
+
+  if (!apiKey) {
+    logger.warn("Missing ANTHROPIC_API_KEY — using fallback feature prompt");
+    return getFallbackPrompt();
+  }
+
+  const commitsContext = recentCommits
+    .map(
+      ({ submodule, commits }) =>
+        `### ${submodule}\n${commits.map((c) => `- ${c.sha} ${c.message} (${c.date.slice(0, 10)})`).join("\n")}`,
+    )
+    .join("\n\n");
+
+  const userMessage = `Here are the most recent commits across the Recoupable monorepo:
+
+${commitsContext}
+
+Based on this recent work, write a specific implementation prompt for an AI coding agent to implement the next most valuable feature.`;
+
+  try {
+    const response = await fetch("https://api.anthropic.com/v1/messages", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "x-api-key": apiKey,
+        "anthropic-version": "2023-06-01",
+      },
+      body: JSON.stringify({
+        model: "claude-sonnet-4-5",
+        max_tokens: 1024,
+        system: SYSTEM_PROMPT,
+        messages: [{ role: "user", content: userMessage }],
+      }),
+    });
+
+    if (!response.ok) {
+      logger.error("Anthropic API error generating feature prompt", { status: response.status });
+      return getFallbackPrompt();
+    }
+
+    const data = (await response.json()) as {
+      content: Array<{ type: string; text: string }>;
+    };
+
+    const text = data.content.find((c) => c.type === "text")?.text?.trim();
+
+    if (!text) {
+      logger.warn("Empty response from Anthropic API");
+      return getFallbackPrompt();
+    }
+
+    logger.log("Generated Agent Day feature prompt", { preview: text.slice(0, 200) });
+    return text;
+  } catch (error) {
+    logger.error("Failed to generate feature prompt", { error });
+    return getFallbackPrompt();
+  }
+}
+
+function getFallbackPrompt(): string {
+  return [
+    "Read PROGRESS_USAGE.md and PROGRESS.md in the mono repo codebase first.",
+    "",
+    "Review the last 10 commits across the api, chat, admin, and tasks submodules.",
+    "Identify the single most impactful small improvement that builds on recent work",
+    "— a bug fix, a missing endpoint, a UI polish, or a small new feature.",
+    "",
+    "Implement it end-to-end (API route + frontend if needed), write any relevant tests,",
+    "then update PROGRESS.md with what you built.",
+  ].join("\n");
+}
diff --git a/src/github/__tests__/fetchPRReviews.test.ts b/src/github/__tests__/fetchPRReviews.test.ts
new file mode 100644
index 0000000..f7a1952
--- /dev/null
+++ b/src/github/__tests__/fetchPRReviews.test.ts
@@ -0,0 +1,70 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+
+vi.mock("@trigger.dev/sdk/v3", () => ({
+  logger: { warn: vi.fn() },
+}));
+
+const mockFetch = vi.fn();
+vi.stubGlobal("fetch", mockFetch);
+
+const { fetchPRReviews } = await import("../fetchPRReviews");
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  process.env.GITHUB_TOKEN = "test-token";
+});
+
+describe("fetchPRReviews", () => {
+  it("returns reviews and inline comments", async () => {
+    const mockReview = {
+      user: { login: "reviewer" },
+      body: "Please fix the error handling.",
+      state: "CHANGES_REQUESTED",
+      submitted_at: "2026-03-01T12:00:00Z",
+    };
+    const mockComment = {
+      user: { login: "reviewer" },
+      body: "This variable name is unclear.",
+      path: "lib/api/handler.ts",
+      created_at: "2026-03-01T12:05:00Z",
+    };
+
+    mockFetch
+      .mockResolvedValueOnce({ ok: true, json: async () => [mockReview] })
+      .mockResolvedValueOnce({ ok: true, json: async () => [mockComment] });
+
+    const result = await fetchPRReviews("recoupable/api", 42);
+
+    expect(result.reviews).toHaveLength(1);
+    expect(result.reviews[0].author).toBe("reviewer");
+    expect(result.reviews[0].body).toBe("Please fix the error handling.");
+    expect(result.reviews[0].state).toBe("CHANGES_REQUESTED");
+
+    expect(result.comments).toHaveLength(1);
+    expect(result.comments[0].author).toBe("reviewer");
+    expect(result.comments[0].path).toBe("lib/api/handler.ts");
+  });
+
+  it("filters out reviews with empty bodies", async () => {
+    const mockReview = { user: { login: "bot" }, body: "", state: "APPROVED", submitted_at: "" };
+
+    mockFetch
+      .mockResolvedValueOnce({ ok: true, json: async () => [mockReview] })
+      .mockResolvedValueOnce({ ok: true, json: async () => [] });
+
+    const result = await fetchPRReviews("recoupable/api", 1);
+
+    expect(result.reviews).toHaveLength(0);
+  });
+
+  it("returns empty arrays when fetches fail", async () => {
+    mockFetch
+      .mockResolvedValueOnce({ ok: false, status: 403 })
+      .mockResolvedValueOnce({ ok: false, status: 403 });
+
+    const result = await fetchPRReviews("recoupable/api", 99);
+
+    expect(result.reviews).toEqual([]);
+    expect(result.comments).toEqual([]);
+  });
+});
diff --git a/src/github/__tests__/fetchRecentSubmoduleCommits.test.ts b/src/github/__tests__/fetchRecentSubmoduleCommits.test.ts
new file mode 100644
index 0000000..4c4ee7c
--- /dev/null
+++ b/src/github/__tests__/fetchRecentSubmoduleCommits.test.ts
@@ -0,0 +1,78 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+
+vi.mock("@trigger.dev/sdk/v3", () => ({
+  logger: { warn: vi.fn() },
+}));
+
+const mockFetch = vi.fn();
+vi.stubGlobal("fetch", mockFetch);
+
+const { fetchRecentSubmoduleCommits } = await import("../fetchRecentSubmoduleCommits");
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  process.env.GITHUB_TOKEN = "test-token";
+});
+
+describe("fetchRecentSubmoduleCommits", () => {
+  it("fetches commits for all submodules and formats them", async () => {
+    const mockCommit = {
+      sha: "abc1234def5678",
+      commit: {
+        message: "feat: add new feature\n\nLonger description",
+        author: { name: "Dev", date: "2026-03-01T10:00:00Z" },
+      },
+    };
+
+    mockFetch.mockResolvedValue({
+      ok: true,
+      json: async () => [mockCommit],
+    });
+
+    const results = await fetchRecentSubmoduleCommits();
+
+    expect(results.length).toBeGreaterThan(0);
+
+    const apiResult = results.find((r) => r.submodule === "api");
+    expect(apiResult).toBeDefined();
+    expect(apiResult!.repo).toBe("recoupable/api");
+    expect(apiResult!.commits[0].sha).toBe("abc1234"); // first 7 chars
+    expect(apiResult!.commits[0].message).toBe("feat: add new feature"); // first line only
+    expect(apiResult!.commits[0].author).toBe("Dev");
+  });
+
+  it("skips repos where the fetch fails", async () => {
+    mockFetch
+      .mockResolvedValueOnce({ ok: false, status: 404 }) // api fails
+      .mockResolvedValue({ ok: true, json: async () => [] }); // rest succeed
+
+    const results = await fetchRecentSubmoduleCommits();
+
+    const apiResult = results.find((r) => r.submodule === "api");
+    expect(apiResult).toBeUndefined();
+  });
+
+  it("skips repos where fetch throws", async () => {
+    mockFetch.mockRejectedValueOnce(new Error("Network error"));
+    mockFetch.mockResolvedValue({ ok: true, json: async () => [] });
+
+    const results = await fetchRecentSubmoduleCommits();
+    // Should not throw, just skip the errored repo
+    expect(Array.isArray(results)).toBe(true);
+  });
+
+  it("uses Authorization header with GITHUB_TOKEN", async () => {
+    mockFetch.mockResolvedValue({ ok: true, json: async () => [] });
+
+    await fetchRecentSubmoduleCommits();
+
+    expect(mockFetch).toHaveBeenCalledWith(
+      expect.any(String),
+      expect.objectContaining({
+        headers: expect.objectContaining({
+          Authorization: "token test-token",
+        }),
+      }),
+    );
+  });
+});
diff --git a/src/github/__tests__/mergePR.test.ts b/src/github/__tests__/mergePR.test.ts
new file mode 100644
index 0000000..9b015b2
--- /dev/null
+++ b/src/github/__tests__/mergePR.test.ts
@@ -0,0 +1,49 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+
+vi.mock("@trigger.dev/sdk/v3", () => ({
+  logger: { log: vi.fn(), error: vi.fn() },
+}));
+
+const mockFetch = vi.fn();
+vi.stubGlobal("fetch", mockFetch);
+
+const { mergePR } = await import("../mergePR");
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  process.env.GITHUB_TOKEN = "test-token";
+});
+
+describe("mergePR", () => {
+  it("merges the PR and returns true on success", async () => {
+    mockFetch.mockResolvedValueOnce({ ok: true });
+
+    const result = await mergePR("recoupable/api", 42);
+
+    expect(result).toBe(true);
+    expect(mockFetch).toHaveBeenCalledWith(
+      "https://api.github.com/repos/recoupable/api/pulls/42/merge",
+      expect.objectContaining({
+        method: "PUT",
+        headers: expect.objectContaining({
+          Authorization: "token test-token",
+        }),
+      }),
+    );
+
+    const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+    expect(body.merge_method).toBe("squash");
+  });
+
+  it("returns false when merge fails", async () => {
+    mockFetch.mockResolvedValueOnce({
+      ok: false,
+      status: 405,
+      text: async () => "Pull Request is not mergeable",
+    });
+
+    const result = await mergePR("recoupable/api", 42);
+
+    expect(result).toBe(false);
+  });
+});
diff --git a/src/github/fetchPRReviews.ts b/src/github/fetchPRReviews.ts
new file mode 100644
index 0000000..a376a01
--- /dev/null
+++ b/src/github/fetchPRReviews.ts
@@ -0,0 +1,84 @@
+import { logger } from "@trigger.dev/sdk/v3";
+
+export interface PRReview {
+  author: string;
+  body: string;
+  state: string;
+  submittedAt: string;
+}
+
+export interface PRLineComment {
+  author: string;
+  body: string;
+  path: string;
+  createdAt: string;
+}
+
+export interface PRFeedback {
+  reviews: PRReview[];
+  comments: PRLineComment[];
+}
+
+/**
+ * Fetches review bodies and inline comments from a GitHub PR.
+ *
+ * @param repo - GitHub repo in "owner/repo" format
+ * @param prNumber - The PR number
+ */
+export async function fetchPRReviews(repo: string, prNumber: number): Promise<PRFeedback> {
+  const token = process.env.GITHUB_TOKEN;
+  const headers = {
+    Authorization: `token ${token}`,
+    Accept: "application/vnd.github.v3+json",
+  };
+
+  const [reviewsRes, commentsRes] = await Promise.all([
+    fetch(`https://api.github.com/repos/${repo}/pulls/${prNumber}/reviews`, { headers }),
+    fetch(`https://api.github.com/repos/${repo}/pulls/${prNumber}/comments`, { headers }),
+  ]);
+
+  const reviews: PRReview[] = [];
+  const comments: PRLineComment[] = [];
+
+  if (reviewsRes.ok) {
+    const data = (await reviewsRes.json()) as Array<{
+      user: { login: string };
+      body: string;
+      state: string;
+      submitted_at: string;
+    }>;
+    reviews.push(
+      ...data
+        .filter((r) => r.body?.trim())
+        .map((r) => ({
+          author: r.user.login,
+          body: r.body,
+          state: r.state,
+          submittedAt: r.submitted_at,
+        })),
+    );
+  } else {
+    logger.warn(`Failed to fetch reviews for PR #${prNumber} in ${repo}`);
+  }
+
+  if (commentsRes.ok) {
+    const data = (await commentsRes.json()) as Array<{
+      user: { login: string };
+      body: string;
+      path: string;
+      created_at: string;
+    }>;
+    comments.push(
+      ...data.map((c) => ({
+        author: c.user.login,
+        body: c.body,
+        path: c.path,
+        createdAt: c.created_at,
+      })),
+    );
+  } else {
+    logger.warn(`Failed to fetch comments for PR #${prNumber} in ${repo}`);
+  }
+
+  return { reviews, comments };
+}
diff --git a/src/github/fetchRecentSubmoduleCommits.ts b/src/github/fetchRecentSubmoduleCommits.ts
new file mode 100644
index 0000000..7bcfc26
--- /dev/null
+++ b/src/github/fetchRecentSubmoduleCommits.ts
@@ -0,0 +1,56 @@
+import { logger } from "@trigger.dev/sdk/v3";
+import { SUBMODULE_CONFIG } from "../sandboxes/submoduleConfig";
+
+export interface SubmoduleCommit {
+  submodule: string;
+  repo: string;
+  commits: { sha: string; message: string; author: string; date: string }[];
+}
+
+/**
+ * Fetches the 5 most recent commits from each submodule repo via the GitHub REST API.
+ * Skips repos where the request fails (e.g. missing token or rate limit).
+ */
+export async function fetchRecentSubmoduleCommits(): Promise<SubmoduleCommit[]> {
+  const token = process.env.GITHUB_TOKEN;
+  const results: SubmoduleCommit[] = [];
+
+  for (const [submodule, { repo }] of Object.entries(SUBMODULE_CONFIG)) {
+    try {
+      const response = await fetch(
+        `https://api.github.com/repos/${repo}/commits?per_page=5`,
+        {
+          headers: {
+            Authorization: `token ${token}`,
+            Accept: "application/vnd.github.v3+json",
+          },
+        },
+      );
+
+      if (!response.ok) {
+        logger.warn(`Failed to fetch commits for ${repo}`, { status: response.status });
+        continue;
+      }
+
+      const data = (await response.json()) as Array<{
+        sha: string;
+        commit: { message: string; author: { name: string; date: string } };
+      }>;
+
+      results.push({
+        submodule,
+        repo,
+        commits: data.map((c) => ({
+          sha: c.sha.slice(0, 7),
+          message: c.commit.message.split("\n")[0],
+          author: c.commit.author.name,
+          date: c.commit.author.date,
+        })),
+      });
+    } catch (error) {
+      logger.warn(`Error fetching commits for ${repo}`, { error });
+    }
+  }
+
+  return results;
+}
diff --git a/src/github/getVercelPreviewUrl.ts b/src/github/getVercelPreviewUrl.ts
new file mode 100644
index 0000000..a7a8f72
--- /dev/null
+++ b/src/github/getVercelPreviewUrl.ts
@@ -0,0 +1,74 @@
+import { logger } from "@trigger.dev/sdk/v3";
+
+/**
+ * Finds the Vercel preview deployment URL for a PR by inspecting
+ * check runs created by the Vercel GitHub integration.
+ * Returns null if no Vercel deployment is found.
+ *
+ * @param repo - GitHub repo in "owner/repo" format
+ * @param prNumber - The PR number
+ */
+export async function getVercelPreviewUrl(
+  repo: string,
+  prNumber: number,
+): Promise<string | null> {
+  const token = process.env.GITHUB_TOKEN;
+  const headers = {
+    Authorization: `token ${token}`,
+    Accept: "application/vnd.github.v3+json",
+  };
+
+  const prRes = await fetch(`https://api.github.com/repos/${repo}/pulls/${prNumber}`, {
+    headers,
+  });
+
+  if (!prRes.ok) return null;
+
+  const pr = (await prRes.json()) as { head: { sha: string } };
+
+  const checksRes = await fetch(
+    `https://api.github.com/repos/${repo}/commits/${pr.head.sha}/check-runs`,
+    { headers },
+  );
+
+  if (!checksRes.ok) return null;
+
+  const checksData = (await checksRes.json()) as {
+    check_runs: Array<{ name: string; details_url: string | null; conclusion: string | null }>;
+  };
+
+  const vercelCheck = checksData.check_runs.find(
+    (c) => c.name.toLowerCase().includes("vercel") && c.details_url?.includes("vercel.app"),
+  );
+
+  if (!vercelCheck?.details_url) return null;
+
+  // Vercel details_url is the inspect URL: https://vercel.com/...
+  // Extract the preview deployment URL from the deployment status instead
+  const statusRes = await fetch(
+    `https://api.github.com/repos/${repo}/commits/${pr.head.sha}/statuses`,
+    { headers },
+  );
+
+  if (!statusRes.ok) return null;
+
+  const statuses = (await statusRes.json()) as Array<{
+    context: string;
+    target_url: string;
+    state: string;
+  }>;
+
+  const vercelStatus = statuses.find(
+    (s) =>
+      s.context.toLowerCase().includes("vercel") &&
+      s.state === "success" &&
+      s.target_url?.includes("vercel.app"),
+  );
+
+  if (vercelStatus?.target_url) {
+    logger.log(`Found Vercel preview URL for PR #${prNumber}`, { url: vercelStatus.target_url });
+    return vercelStatus.target_url;
+  }
+
+  return null;
+}
diff --git a/src/github/mergePR.ts b/src/github/mergePR.ts
new file mode 100644
index 0000000..5768b70
--- /dev/null
+++ b/src/github/mergePR.ts
@@ -0,0 +1,39 @@
+import { logger } from "@trigger.dev/sdk/v3";
+
+/**
+ * Merges a GitHub PR using the squash merge strategy.
+ * Returns true if the merge was successful, false otherwise.
+ *
+ * @param repo - GitHub repo in "owner/repo" format
+ * @param prNumber - The PR number to merge
+ */
+export async function mergePR(repo: string, prNumber: number): Promise<boolean> {
+  const token = process.env.GITHUB_TOKEN;
+
+  const response = await fetch(
+    `https://api.github.com/repos/${repo}/pulls/${prNumber}/merge`,
+    {
+      method: "PUT",
+      headers: {
+        Authorization: `token ${token}`,
+        Accept: "application/vnd.github.v3+json",
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        merge_method: "squash",
+      }),
+    },
+  );
+
+  if (!response.ok) {
+    const errorText = await response.text();
+    logger.error(`Failed to merge PR #${prNumber} in ${repo}`, {
+      status: response.status,
+      error: errorText,
+    });
+    return false;
+  }
+
+  logger.log(`Successfully merged PR #${prNumber} in ${repo}`);
+  return true;
+}
diff --git a/src/github/waitForPRChecks.ts b/src/github/waitForPRChecks.ts
new file mode 100644
index 0000000..12feff7
--- /dev/null
+++ b/src/github/waitForPRChecks.ts
@@ -0,0 +1,90 @@
+import { logger, wait } from "@trigger.dev/sdk/v3";
+
+export interface PRCheckResult {
+  allPassed: boolean;
+  failedChecks: string[];
+  pendingChecks: string[];
+}
+
+/**
+ * Polls the GitHub API until all check runs on a PR complete or the timeout is reached.
+ * Returns whether all checks passed and a list of any failed check names.
+ *
+ * @param repo - GitHub repo in "owner/repo" format
+ * @param prNumber - The PR number
+ * @param maxWaitMs - How long to wait before giving up (default 15 minutes)
+ */
+export async function waitForPRChecks(
+  repo: string,
+  prNumber: number,
+  maxWaitMs: number = 15 * 60 * 1000,
+): Promise<PRCheckResult> {
+  const token = process.env.GITHUB_TOKEN;
+  const headers = {
+    Authorization: `token ${token}`,
+    Accept: "application/vnd.github.v3+json",
+  };
+  const startTime = Date.now();
+
+  // Get the head SHA from the PR
+  const prRes = await fetch(`https://api.github.com/repos/${repo}/pulls/${prNumber}`, {
+    headers,
+  });
+
+  if (!prRes.ok) {
+    logger.warn(`Failed to fetch PR #${prNumber} from ${repo}`, { status: prRes.status });
+    return { allPassed: false, failedChecks: [], pendingChecks: ["fetch-failed"] };
+  }
+
+  const pr = (await prRes.json()) as { head: { sha: string } };
+  const headSha = pr.head.sha;
+
+  while (Date.now() - startTime < maxWaitMs) {
+    const checksRes = await fetch(
+      `https://api.github.com/repos/${repo}/commits/${headSha}/check-runs`,
+      { headers },
+    );
+
+    if (!checksRes.ok) {
+      logger.warn(`Failed to fetch check runs for ${repo}/${headSha}`);
+      return { allPassed: false, failedChecks: [], pendingChecks: ["fetch-failed"] };
+    }
+
+    const checksData = (await checksRes.json()) as {
+      check_runs: Array<{ name: string; status: string; conclusion: string | null }>;
+    };
+
+    const checks = checksData.check_runs;
+
+    if (checks.length > 0) {
+      const pending = checks.filter((c) => c.status !== "completed");
+      const failed = checks.filter(
+        (c) =>
+          c.status === "completed" &&
+          c.conclusion !== "success" &&
+          c.conclusion !== "skipped" &&
+          c.conclusion !== "neutral",
+      );
+
+      if (pending.length === 0) {
+        logger.log(`All checks complete for PR #${prNumber}`, {
+          total: checks.length,
+          failed: failed.length,
+        });
+        return {
+          allPassed: failed.length === 0,
+          failedChecks: failed.map((c) => c.name),
+          pendingChecks: [],
+        };
+      }
+
+      logger.log(`Waiting for ${pending.length} checks on PR #${prNumber}`, {
+        pending: pending.map((c) => c.name),
+      });
+    }
+
+    await wait.for({ seconds: 30 });
+  }
+
+  return { allPassed: false, failedChecks: [], pendingChecks: ["timeout"] };
+}
diff --git a/src/schemas/codingAgentSchema.ts b/src/schemas/codingAgentSchema.ts
index 150289f..b098f02 100644
--- a/src/schemas/codingAgentSchema.ts
+++ b/src/schemas/codingAgentSchema.ts
@@ -2,7 +2,7 @@ import { z } from "zod";
 
 export const codingAgentPayloadSchema = z.object({
   prompt: z.string().min(1, "prompt is required"),
-  callbackThreadId: z.string().min(1, "callbackThreadId is required"),
+  callbackThreadId: z.string().optional(),
 });
 
 export type CodingAgentPayload = z.infer<typeof codingAgentPayloadSchema>;
diff --git a/src/schemas/updatePRSchema.ts b/src/schemas/updatePRSchema.ts
index 50484f9..d6e7bcc 100644
--- a/src/schemas/updatePRSchema.ts
+++ b/src/schemas/updatePRSchema.ts
@@ -5,7 +5,7 @@ export const updatePRPayloadSchema = z.object({
   snapshotId: z.string().min(1, "snapshotId is required"),
   branch: z.string().min(1, "branch is required"),
   repo: z.string().min(1, "repo is required"),
-  callbackThreadId: z.string().min(1, "callbackThreadId is required"),
+  callbackThreadId: z.string().optional(),
 });
 
 export type UpdatePRPayload = z.infer<typeof updatePRPayloadSchema>;
diff --git a/src/slack/__tests__/postToSlackChannel.test.ts b/src/slack/__tests__/postToSlackChannel.test.ts
new file mode 100644
index 0000000..1e7d23d
--- /dev/null
+++ b/src/slack/__tests__/postToSlackChannel.test.ts
@@ -0,0 +1,60 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+
+vi.mock("@trigger.dev/sdk/v3", () => ({
+  logger: { log: vi.fn(), error: vi.fn() },
+}));
+
+const mockFetch = vi.fn();
+vi.stubGlobal("fetch", mockFetch);
+
+const { postToSlackChannel } = await import("../postToSlackChannel");
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  process.env.SLACK_BOT_TOKEN = "xoxb-test-token";
+});
+
+describe("postToSlackChannel", () => {
+  it("posts a message to the correct channel and returns true", async () => {
+    mockFetch.mockResolvedValueOnce({ json: async () => ({ ok: true }) });
+
+    const result = await postToSlackChannel("C08HN8RKJHZ", "Hello, world!");
+
+    expect(result).toBe(true);
+    expect(mockFetch).toHaveBeenCalledWith(
+      "https://slack.com/api/chat.postMessage",
+      expect.objectContaining({
+        method: "POST",
+        headers: expect.objectContaining({
+          Authorization: "Bearer xoxb-test-token",
+          "Content-Type": "application/json",
+        }),
+      }),
+    );
+
+    const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+    expect(body.channel).toBe("C08HN8RKJHZ");
+    expect(body.text).toBe("Hello, world!");
+  });
+
+  it("returns false when Slack API returns ok: false", async () => {
+    mockFetch.mockResolvedValueOnce({
+      json: async () => ({ ok: false, error: "channel_not_found" }),
+    });
+
+    const result = await postToSlackChannel("CINVALID", "Test");
+
+    expect(result).toBe(false);
+  });
+
+  it("returns false and logs error when SLACK_BOT_TOKEN is missing", async () => {
+    delete process.env.SLACK_BOT_TOKEN;
+    const { logger } = await import("@trigger.dev/sdk/v3");
+
+    const result = await postToSlackChannel("C08HN8RKJHZ", "Test");
+
+    expect(result).toBe(false);
+    expect(mockFetch).not.toHaveBeenCalled();
+    expect(logger.error).toHaveBeenCalledWith(expect.stringContaining("SLACK_BOT_TOKEN"));
+  });
+});
diff --git a/src/slack/postToSlackChannel.ts b/src/slack/postToSlackChannel.ts
new file mode 100644
index 0000000..543e503
--- /dev/null
+++ b/src/slack/postToSlackChannel.ts
@@ -0,0 +1,36 @@
+import { logger } from "@trigger.dev/sdk/v3";
+
+/**
+ * Posts a plain-text message to a Slack channel using the bot token.
+ * Returns true if the message was sent successfully.
+ *
+ * @param channelId - The Slack channel ID (e.g. "C08HN8RKJHZ")
+ * @param text - The message text (supports Slack mrkdwn formatting)
+ */
+export async function postToSlackChannel(channelId: string, text: string): Promise<boolean> {
+  const token = process.env.SLACK_BOT_TOKEN;
+
+  if (!token) {
+    logger.error("Missing SLACK_BOT_TOKEN — cannot post to Slack");
+    return false;
+  }
+
+  const response = await fetch("https://slack.com/api/chat.postMessage", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${token}`,
+    },
+    body: JSON.stringify({ channel: channelId, text }),
+  });
+
+  const data = (await response.json()) as { ok: boolean; error?: string };
+
+  if (!data.ok) {
+    logger.error("Failed to post message to Slack", { channel: channelId, error: data.error });
+    return false;
+  }
+
+  logger.log("Posted message to Slack", { channel: channelId });
+  return true;
+}
diff --git a/src/tasks/agentDayTask.ts b/src/tasks/agentDayTask.ts
new file mode 100644
index 0000000..b5d3d1e
--- /dev/null
+++ b/src/tasks/agentDayTask.ts
@@ -0,0 +1,188 @@
+import { logger, schedules, wait } from "@trigger.dev/sdk/v3";
+import { codingAgentTask } from "./codingAgentTask";
+import { updatePRTask } from "./updatePRTask";
+import { fetchRecentSubmoduleCommits } from "../github/fetchRecentSubmoduleCommits";
+import { generateFeaturePrompt } from "../ai/generateFeaturePrompt";
+import { waitForPRChecks } from "../github/waitForPRChecks";
+import { fetchPRReviews } from "../github/fetchPRReviews";
+import { assessPRFeedback } from "../ai/assessPRFeedback";
+import { getVercelPreviewUrl } from "../github/getVercelPreviewUrl";
+import { mergePR } from "../github/mergePR";
+import { postToSlackChannel } from "../slack/postToSlackChannel";
+import { logStep } from "../sandboxes/logStep";
+
+const AGENT_DAY_SLACK_CHANNEL = "C08HN8RKJHZ";
+const MAX_REVIEW_ITERATIONS = 3;
+const PR_CHECK_TIMEOUT_MS = 15 * 60 * 1000; // 15 minutes
+
+/**
+ * Scheduled task that runs every Sunday at 10 AM ET and autonomously
+ * implements a new feature end-to-end:
+ *
+ * 1. Gathers recent commits across all submodules
+ * 2. Uses Claude to plan a focused feature based on recent work
+ * 3. Triggers the coding-agent task to implement it and open PRs
+ * 4. Reviews each PR: waits for checks, assesses human feedback, applies fixes
+ * 5. Tests Vercel preview deployments (api/chat repos)
+ * 6. Merges approved PRs
+ * 7. Posts a summary to the #dev Slack channel
+ */
+export const agentDayTask = schedules.task({
+  id: "agent-day",
+  cron: { pattern: "0 10 * * 0", timezone: "America/New_York" }, // 10 AM ET every Sunday
+  maxDuration: 60 * 120, // 2 hours
+  run: async (payload) => {
+    logStep("Agent Day task started", true, {
+      timestamp: payload.timestamp,
+      date: new Date(payload.timestamp).toDateString(),
+    });
+
+    // Step 1: Gather recent commits to understand what has been built recently
+    logStep("Fetching recent commits from all submodules");
+    const recentCommits = await fetchRecentSubmoduleCommits();
+    logStep("Recent commits fetched", true, { submoduleCount: recentCommits.length });
+
+    // Step 2: Use Claude to plan the next feature based on recent work
+    logStep("Generating feature prompt from recent commits");
+    const featurePrompt = await generateFeaturePrompt(recentCommits);
+    logStep("Feature prompt generated", true, { preview: featurePrompt.slice(0, 300) });
+
+    // Step 3 & 4: Trigger the coding agent to implement the feature and open PRs
+    logStep("Triggering coding agent");
+    const codingResult = await codingAgentTask.triggerAndWait({ prompt: featurePrompt });
+
+    if (!codingResult.ok) {
+      logger.error("Coding agent task failed", { error: codingResult.error });
+      await postToSlackChannel(
+        AGENT_DAY_SLACK_CHANNEL,
+        `🤖 *Agent Day — ${new Date(payload.timestamp).toDateString()}*\n\nCoding agent failed. Check Trigger.dev for details.`,
+      );
+      return { success: false, error: "Coding agent failed" };
+    }
+
+    const { branch, snapshotId, prs } = codingResult.output;
+    logStep("Coding agent completed", true, { branch, prCount: prs.length, prs });
+
+    if (prs.length === 0) {
+      await postToSlackChannel(
+        AGENT_DAY_SLACK_CHANNEL,
+        `🤖 *Agent Day — ${new Date(payload.timestamp).toDateString()}*\n\nNo changes were made — the agent found nothing to implement.`,
+      );
+      return { success: true, prs: [], featurePrompt };
+    }
+
+    // Step 5: Review each PR — wait for checks, implement feedback, test preview
+    const mergedPRs: typeof prs = [];
+    let currentSnapshotId = snapshotId;
+
+    for (const pr of prs) {
+      logStep(`Reviewing PR #${pr.number} in ${pr.repo}`);
+
+      // Step 5a: Wait for all CI checks to complete
+      logStep(`Waiting for checks on PR #${pr.number}`);
+      const checkResult = await waitForPRChecks(pr.repo, pr.number, PR_CHECK_TIMEOUT_MS);
+      logStep(`Checks complete for PR #${pr.number}`, true, {
+        allPassed: checkResult.allPassed,
+        failedChecks: checkResult.failedChecks,
+      });
+
+      let reviewSnapshotId = currentSnapshotId;
+
+      // Step 5b-5c: PR review loop — assess feedback and implement if needed
+      for (let iteration = 0; iteration < MAX_REVIEW_ITERATIONS; iteration++) {
+        logStep(`Review iteration ${iteration + 1} for PR #${pr.number}`);
+
+        const feedback = await fetchPRReviews(pr.repo, pr.number);
+        const assessment = await assessPRFeedback(pr.repo, featurePrompt, feedback);
+
+        logStep(`Feedback assessed for PR #${pr.number}`, true, {
+          hasActionableFeedback: assessment.hasActionableFeedback,
+          summary: assessment.feedbackSummary,
+        });
+
+        if (!assessment.hasActionableFeedback) {
+          break; // Nothing to address — move on
+        }
+
+        // Step 5c: Apply the feedback via the update-pr task
+        logStep(`Applying feedback for PR #${pr.number}: ${assessment.feedbackSummary}`);
+        const updateResult = await updatePRTask.triggerAndWait({
+          feedback: assessment.implementation,
+          snapshotId: reviewSnapshotId,
+          branch,
+          repo: pr.repo,
+        });
+
+        if (!updateResult.ok) {
+          logger.warn(`Failed to apply feedback for PR #${pr.number}`, {
+            error: updateResult.error,
+          });
+          break;
+        }
+
+        reviewSnapshotId = updateResult.output.snapshotId;
+        currentSnapshotId = reviewSnapshotId;
+
+        // Wait for the new commit to be picked up by CI before re-checking
+        await wait.for({ seconds: 30 });
+      }
+
+      // Step 5d: Test Vercel preview deployment for api and chat repos
+      if (pr.repo === "recoupable/api" || pr.repo === "recoupable/chat") {
+        const previewUrl = await getVercelPreviewUrl(pr.repo, pr.number);
+
+        if (previewUrl) {
+          logStep(`Testing Vercel preview for PR #${pr.number}`, true, { previewUrl });
+          try {
+            const healthRes = await fetch(`${previewUrl}/api/health`, {
+              signal: AbortSignal.timeout(10_000),
+            });
+            logStep(`Preview health check for PR #${pr.number}`, true, {
+              url: `${previewUrl}/api/health`,
+              status: healthRes.status,
+            });
+          } catch (error) {
+            logger.warn(`Preview health check failed for PR #${pr.number}`, { error });
+          }
+        } else {
+          logStep(`No Vercel preview URL found for PR #${pr.number}`);
+        }
+      }
+
+      // Step 6: Merge the PR
+      logStep(`Merging PR #${pr.number} in ${pr.repo}`);
+      const merged = await mergePR(pr.repo, pr.number);
+
+      if (merged) {
+        mergedPRs.push(pr);
+        logStep(`Merged PR #${pr.number}`, true, { url: pr.url });
+      } else {
+        logger.warn(`Could not merge PR #${pr.number} in ${pr.repo}`);
+      }
+    }
+
+    // Step 7: Post summary to Slack
+    const date = new Date(payload.timestamp).toDateString();
+    const prLines = mergedPRs.map((pr) => `• <${pr.url}|${pr.repo} #${pr.number}>`).join("\n");
+    const unmergedCount = prs.length - mergedPRs.length;
+
+    const slackLines = [
+      `🤖 *Agent Day — ${date}*`,
+      ``,
+      `Implemented and merged *${mergedPRs.length}* PR${mergedPRs.length !== 1 ? "s" : ""}:`,
+      prLines || "_(none)_",
+      unmergedCount > 0 ? `\n_${unmergedCount} PR(s) could not be merged automatically._` : "",
+      ``,
+      `*Feature:* ${featurePrompt.slice(0, 280)}${featurePrompt.length > 280 ? "…" : ""}`,
+    ];
+
+    await postToSlackChannel(AGENT_DAY_SLACK_CHANNEL, slackLines.filter(Boolean).join("\n"));
+
+    logStep("Agent Day task completed", true, {
+      mergedPRs: mergedPRs.length,
+      totalPRs: prs.length,
+    });
+
+    return { success: true, prs: mergedPRs, featurePrompt };
+  },
+});
diff --git a/src/tasks/codingAgentTask.ts b/src/tasks/codingAgentTask.ts
index 18e1f92..69cc72e 100644
--- a/src/tasks/codingAgentTask.ts
+++ b/src/tasks/codingAgentTask.ts
@@ -64,17 +64,19 @@ export const codingAgentTask = schemaTask({
       logStep("Taking snapshot");
       const { snapshotId } = await sandbox.snapshot();
 
-      const callbackPayload = {
-        threadId: callbackThreadId,
-        status: (prs.length > 0 ? "pr_created" : "no_changes") as "pr_created" | "no_changes",
-        branch,
-        snapshotId,
-        prs,
-        stdout: agentResult.stdout,
-        stderr: agentResult.stderr,
-      };
-      logStep("Notifying bot", true, callbackPayload);
-      await notifyCodingAgentCallback(callbackPayload);
+      if (callbackThreadId) {
+        const callbackPayload = {
+          threadId: callbackThreadId,
+          status: (prs.length > 0 ? "pr_created" : "no_changes") as "pr_created" | "no_changes",
+          branch,
+          snapshotId,
+          prs,
+          stdout: agentResult.stdout,
+          stderr: agentResult.stderr,
+        };
+        logStep("Notifying bot", true, callbackPayload);
+        await notifyCodingAgentCallback(callbackPayload);
+      }
 
       metadata.set("currentStep", "Complete");
 
diff --git a/src/tasks/updatePRTask.ts b/src/tasks/updatePRTask.ts
index 8aa1464..f608b33 100644
--- a/src/tasks/updatePRTask.ts
+++ b/src/tasks/updatePRTask.ts
@@ -69,15 +69,17 @@ export const updatePRTask = schemaTask({
       logStep("Taking new snapshot");
       const newSnapshot = await sandbox.snapshot();
 
-      const callbackPayload = {
-        threadId: callbackThreadId,
-        status: "updated" as const,
-        snapshotId: newSnapshot.snapshotId,
-        stdout: agentResult.stdout,
-        stderr: agentResult.stderr,
-      };
-      logStep("Notifying bot", true, callbackPayload);
-      await notifyCodingAgentCallback(callbackPayload);
+      if (callbackThreadId) {
+        const callbackPayload = {
+          threadId: callbackThreadId,
+          status: "updated" as const,
+          snapshotId: newSnapshot.snapshotId,
+          stdout: agentResult.stdout,
+          stderr: agentResult.stderr,
+        };
+        logStep("Notifying bot", true, callbackPayload);
+        await notifyCodingAgentCallback(callbackPayload);
+      }
 
       metadata.set("currentStep", "Complete");
 

From e47574d5a8ddbb0fb09024f83e1bb93b48aad72c Mon Sep 17 00:00:00 2001
From: Sweets Sweetman <sweetmantech@gmail.com>
Date: Tue, 24 Mar 2026 09:05:18 -0500
Subject: [PATCH 2/4] refactor: use Claude Code sandbox for AI reasoning,
 replace logger.log with logStep

Addresses PR review feedback:
- Replace raw Anthropic API calls in assessPRFeedback and generateFeaturePrompt
  with runClaudeCodeAgent in a Vercel sandbox
- Replace logger.log with shared logStep function in waitForPRChecks and
  postToSlackChannel
- Create dedicated AI reasoning sandbox in agentDayTask with proper cleanup

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/ai/__tests__/assessPRFeedback.test.ts     | 73 ++++++++--------
 .../__tests__/generateFeaturePrompt.test.ts   | 62 ++++++++------
 src/ai/assessPRFeedback.ts                    | 83 ++++++++-----------
 src/ai/generateFeaturePrompt.ts               | 60 +++++---------
 src/github/waitForPRChecks.ts                 | 11 +--
 .../__tests__/postToSlackChannel.test.ts      |  5 ++
 src/slack/postToSlackChannel.ts               |  3 +-
 src/tasks/agentDayTask.ts                     | 17 +++-
 8 files changed, 158 insertions(+), 156 deletions(-)

diff --git a/src/ai/__tests__/assessPRFeedback.test.ts b/src/ai/__tests__/assessPRFeedback.test.ts
index b99862d..a03a420 100644
--- a/src/ai/__tests__/assessPRFeedback.test.ts
+++ b/src/ai/__tests__/assessPRFeedback.test.ts
@@ -2,13 +2,22 @@ import { describe, it, expect, vi, beforeEach } from "vitest";
 
 vi.mock("@trigger.dev/sdk/v3", () => ({
   logger: { log: vi.fn(), warn: vi.fn(), error: vi.fn() },
+  metadata: { set: vi.fn(), append: vi.fn() },
 }));
 
-const mockFetch = vi.fn();
-vi.stubGlobal("fetch", mockFetch);
+const mockRunClaudeCodeAgent = vi.fn();
+vi.mock("../../sandboxes/runClaudeCodeAgent", () => ({
+  runClaudeCodeAgent: (...args: unknown[]) => mockRunClaudeCodeAgent(...args),
+}));
+
+vi.mock("../../sandboxes/logStep", () => ({
+  logStep: vi.fn(),
+}));
 
 const { assessPRFeedback } = await import("../assessPRFeedback");
 
+const mockSandbox = {} as any;
+
 const noFeedback = { reviews: [], comments: [] };
 const withFeedback = {
   reviews: [
@@ -24,35 +33,29 @@ const withFeedback = {
 
 beforeEach(() => {
   vi.clearAllMocks();
-  process.env.ANTHROPIC_API_KEY = "test-key";
 });
 
 describe("assessPRFeedback", () => {
   it("returns no actionable feedback when reviews and comments are empty", async () => {
-    const result = await assessPRFeedback("recoupable/api", "Build feature X", noFeedback);
+    const result = await assessPRFeedback(mockSandbox, "recoupable/api", "Build feature X", noFeedback);
 
     expect(result.hasActionableFeedback).toBe(false);
-    expect(mockFetch).not.toHaveBeenCalled();
+    expect(mockRunClaudeCodeAgent).not.toHaveBeenCalled();
   });
 
-  it("returns actionable feedback parsed from Claude response", async () => {
-    mockFetch.mockResolvedValueOnce({
-      ok: true,
-      json: async () => ({
-        content: [
-          {
-            type: "text",
-            text: JSON.stringify({
-              hasActionableFeedback: true,
-              feedbackSummary: "Add error handling",
-              implementation: "Wrap the handler in a try/catch and return 500 on error",
-            }),
-          },
-        ],
+  it("returns actionable feedback parsed from Claude Code response", async () => {
+    mockRunClaudeCodeAgent.mockResolvedValueOnce({
+      exitCode: 0,
+      stdout: JSON.stringify({
+        hasActionableFeedback: true,
+        feedbackSummary: "Add error handling",
+        implementation: "Wrap the handler in a try/catch and return 500 on error",
       }),
+      stderr: "",
     });
 
     const result = await assessPRFeedback(
+      mockSandbox,
       "recoupable/api",
       "Build feature X",
       withFeedback,
@@ -63,32 +66,34 @@ describe("assessPRFeedback", () => {
     expect(result.implementation).toContain("try/catch");
   });
 
-  it("returns no actionable feedback when API key is missing", async () => {
-    delete process.env.ANTHROPIC_API_KEY;
+  it("returns no actionable feedback when Claude Code exits non-zero", async () => {
+    mockRunClaudeCodeAgent.mockResolvedValueOnce({
+      exitCode: 1,
+      stdout: "",
+      stderr: "error",
+    });
 
-    const result = await assessPRFeedback("recoupable/api", "Build feature X", withFeedback);
+    const result = await assessPRFeedback(mockSandbox, "recoupable/api", "Feature", withFeedback);
 
     expect(result.hasActionableFeedback).toBe(false);
-    expect(mockFetch).not.toHaveBeenCalled();
   });
 
-  it("returns no actionable feedback when API call fails", async () => {
-    mockFetch.mockResolvedValueOnce({ ok: false, status: 500 });
+  it("returns no actionable feedback when JSON parsing fails", async () => {
+    mockRunClaudeCodeAgent.mockResolvedValueOnce({
+      exitCode: 0,
+      stdout: "not valid json",
+      stderr: "",
+    });
 
-    const result = await assessPRFeedback("recoupable/api", "Feature", withFeedback);
+    const result = await assessPRFeedback(mockSandbox, "recoupable/api", "Feature", withFeedback);
 
     expect(result.hasActionableFeedback).toBe(false);
   });
 
-  it("returns no actionable feedback when JSON parsing fails", async () => {
-    mockFetch.mockResolvedValueOnce({
-      ok: true,
-      json: async () => ({
-        content: [{ type: "text", text: "not valid json" }],
-      }),
-    });
+  it("returns no actionable feedback when runClaudeCodeAgent throws", async () => {
+    mockRunClaudeCodeAgent.mockRejectedValueOnce(new Error("Sandbox error"));
 
-    const result = await assessPRFeedback("recoupable/api", "Feature", withFeedback);
+    const result = await assessPRFeedback(mockSandbox, "recoupable/api", "Feature", withFeedback);
 
     expect(result.hasActionableFeedback).toBe(false);
   });
diff --git a/src/ai/__tests__/generateFeaturePrompt.test.ts b/src/ai/__tests__/generateFeaturePrompt.test.ts
index c5c76fc..95c2b3b 100644
--- a/src/ai/__tests__/generateFeaturePrompt.test.ts
+++ b/src/ai/__tests__/generateFeaturePrompt.test.ts
@@ -2,13 +2,22 @@ import { describe, it, expect, vi, beforeEach } from "vitest";
 
 vi.mock("@trigger.dev/sdk/v3", () => ({
   logger: { log: vi.fn(), warn: vi.fn(), error: vi.fn() },
+  metadata: { set: vi.fn(), append: vi.fn() },
 }));
 
-const mockFetch = vi.fn();
-vi.stubGlobal("fetch", mockFetch);
+const mockRunClaudeCodeAgent = vi.fn();
+vi.mock("../../sandboxes/runClaudeCodeAgent", () => ({
+  runClaudeCodeAgent: (...args: unknown[]) => mockRunClaudeCodeAgent(...args),
+}));
+
+vi.mock("../../sandboxes/logStep", () => ({
+  logStep: vi.fn(),
+}));
 
 const { generateFeaturePrompt } = await import("../generateFeaturePrompt");
 
+const mockSandbox = {} as any;
+
 const mockCommits = [
   {
     submodule: "api",
@@ -21,51 +30,56 @@ const mockCommits = [
 
 beforeEach(() => {
   vi.clearAllMocks();
-  process.env.ANTHROPIC_API_KEY = "test-key";
 });
 
 describe("generateFeaturePrompt", () => {
-  it("returns the Claude-generated prompt on success", async () => {
-    mockFetch.mockResolvedValueOnce({
-      ok: true,
-      json: async () => ({
-        content: [{ type: "text", text: "Build a feature X in the api submodule." }],
-      }),
+  it("returns the Claude Code generated prompt on success", async () => {
+    mockRunClaudeCodeAgent.mockResolvedValueOnce({
+      exitCode: 0,
+      stdout: "Build a feature X in the api submodule.",
+      stderr: "",
     });
 
-    const result = await generateFeaturePrompt(mockCommits);
+    const result = await generateFeaturePrompt(mockSandbox, mockCommits);
 
     expect(result).toBe("Build a feature X in the api submodule.");
-    expect(mockFetch).toHaveBeenCalledWith(
-      "https://api.anthropic.com/v1/messages",
+    expect(mockRunClaudeCodeAgent).toHaveBeenCalledWith(
+      mockSandbox,
       expect.objectContaining({
-        method: "POST",
-        headers: expect.objectContaining({ "x-api-key": "test-key" }),
+        label: "Generate feature prompt",
+        message: expect.stringContaining("abc1234"),
       }),
     );
   });
 
-  it("returns fallback prompt when API key is missing", async () => {
-    delete process.env.ANTHROPIC_API_KEY;
+  it("returns fallback prompt when Claude Code exits non-zero", async () => {
+    mockRunClaudeCodeAgent.mockResolvedValueOnce({
+      exitCode: 1,
+      stdout: "",
+      stderr: "error",
+    });
 
-    const result = await generateFeaturePrompt(mockCommits);
+    const result = await generateFeaturePrompt(mockSandbox, mockCommits);
 
     expect(result).toContain("PROGRESS.md");
-    expect(mockFetch).not.toHaveBeenCalled();
   });
 
-  it("returns fallback prompt when Anthropic API call fails", async () => {
-    mockFetch.mockResolvedValueOnce({ ok: false, status: 500 });
+  it("returns fallback prompt when Claude Code returns empty stdout", async () => {
+    mockRunClaudeCodeAgent.mockResolvedValueOnce({
+      exitCode: 0,
+      stdout: "",
+      stderr: "",
+    });
 
-    const result = await generateFeaturePrompt(mockCommits);
+    const result = await generateFeaturePrompt(mockSandbox, mockCommits);
 
     expect(result).toContain("PROGRESS.md");
   });
 
-  it("returns fallback prompt when fetch throws", async () => {
-    mockFetch.mockRejectedValueOnce(new Error("Network error"));
+  it("returns fallback prompt when runClaudeCodeAgent throws", async () => {
+    mockRunClaudeCodeAgent.mockRejectedValueOnce(new Error("Sandbox error"));
 
-    const result = await generateFeaturePrompt(mockCommits);
+    const result = await generateFeaturePrompt(mockSandbox, mockCommits);
 
     expect(result).toContain("PROGRESS.md");
   });
diff --git a/src/ai/assessPRFeedback.ts b/src/ai/assessPRFeedback.ts
index 9d5c430..a57c202 100644
--- a/src/ai/assessPRFeedback.ts
+++ b/src/ai/assessPRFeedback.ts
@@ -1,5 +1,7 @@
-import { logger } from "@trigger.dev/sdk/v3";
+import type { Sandbox } from "@vercel/sandbox";
 import type { PRFeedback } from "../github/fetchPRReviews";
+import { runClaudeCodeAgent } from "../sandboxes/runClaudeCodeAgent";
+import { logStep } from "../sandboxes/logStep";
 
 export interface FeedbackAssessment {
   hasActionableFeedback: boolean;
@@ -8,13 +10,14 @@ export interface FeedbackAssessment {
 }
 
 /**
- * Uses the Anthropic API to assess PR review feedback and determine
+ * Uses Claude Code in a sandbox to assess PR review feedback and determine
  * whether any changes should be implemented by the coding agent.
  *
  * Ignores automated bot comments and approvals without substantive feedback.
  * Returns a structured assessment with what (if anything) should be changed.
  */
 export async function assessPRFeedback(
+  sandbox: Sandbox,
   repo: string,
   featureDescription: string,
   feedback: PRFeedback,
@@ -29,13 +32,6 @@ export async function assessPRFeedback(
     return noFeedback;
   }
 
-  const apiKey = process.env.ANTHROPIC_API_KEY;
-
-  if (!apiKey) {
-    logger.warn("Missing ANTHROPIC_API_KEY — skipping feedback assessment");
-    return noFeedback;
-  }
-
   const reviewsText = feedback.reviews
     .map((r) => `Review by ${r.author} (${r.state}): ${r.body}`)
     .join("\n");
@@ -46,65 +42,52 @@ export async function assessPRFeedback(
 
   const feedbackText = [reviewsText, commentsText].filter(Boolean).join("\n\n");
 
+  const message = [
+    `You are reviewing PR feedback for the repo "${repo}".`,
+    ``,
+    `Feature that was implemented: ${featureDescription.slice(0, 500)}`,
+    ``,
+    `PR feedback received:`,
+    feedbackText,
+    ``,
+    `Determine if there is actionable feedback that the coding agent should implement.`,
+    `Ignore: automated bot comments, approval messages, "LGTM", CI failure notices.`,
+    `Focus on: code quality issues, bugs, missing functionality, style violations.`,
+    ``,
+    `Respond with JSON only (no markdown):`,
+    `{"hasActionableFeedback": boolean, "feedbackSummary": "brief summary", "implementation": "specific changes to make, or empty string"}`,
+  ].join("\n");
+
   try {
-    const response = await fetch("https://api.anthropic.com/v1/messages", {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        "x-api-key": apiKey,
-        "anthropic-version": "2023-06-01",
-      },
-      body: JSON.stringify({
-        model: "claude-sonnet-4-5",
-        max_tokens: 512,
-        messages: [
-          {
-            role: "user",
-            content: [
-              `You are reviewing PR feedback for the repo "${repo}".`,
-              ``,
-              `Feature that was implemented: ${featureDescription.slice(0, 500)}`,
-              ``,
-              `PR feedback received:`,
-              feedbackText,
-              ``,
-              `Determine if there is actionable feedback that the coding agent should implement.`,
-              `Ignore: automated bot comments, approval messages, "LGTM", CI failure notices.`,
-              `Focus on: code quality issues, bugs, missing functionality, style violations.`,
-              ``,
-              `Respond with JSON only (no markdown):`,
-              `{"hasActionableFeedback": boolean, "feedbackSummary": "brief summary", "implementation": "specific changes to make, or empty string"}`,
-            ].join("\n"),
-          },
-        ],
-      }),
+    const result = await runClaudeCodeAgent(sandbox, {
+      label: `Assess PR feedback for ${repo}`,
+      message,
     });
 
-    if (!response.ok) {
-      logger.error("Anthropic API error assessing PR feedback", { status: response.status });
+    if (result.exitCode !== 0) {
+      logStep("Claude Code failed to assess PR feedback", false, {
+        exitCode: result.exitCode,
+        stderr: result.stderr.slice(-500),
+      });
       return noFeedback;
     }
 
-    const data = (await response.json()) as {
-      content: Array<{ type: string; text: string }>;
-    };
-
-    const text = data.content.find((c) => c.type === "text")?.text?.trim() ?? "";
+    const text = result.stdout.trim();
 
     try {
       const parsed = JSON.parse(text) as FeedbackAssessment;
-      logger.log("PR feedback assessment complete", {
+      logStep("PR feedback assessment complete", false, {
         repo,
         hasActionableFeedback: parsed.hasActionableFeedback,
         summary: parsed.feedbackSummary,
       });
       return parsed;
     } catch {
-      logger.warn("Failed to parse feedback assessment JSON", { text: text.slice(0, 200) });
+      logStep("Failed to parse feedback assessment JSON", false, { text: text.slice(0, 200) });
       return noFeedback;
     }
   } catch (error) {
-    logger.error("Failed to assess PR feedback", { error });
+    logStep("Failed to assess PR feedback", false, { error: String(error) });
     return noFeedback;
   }
 }
diff --git a/src/ai/generateFeaturePrompt.ts b/src/ai/generateFeaturePrompt.ts
index b5908ee..e1e45a9 100644
--- a/src/ai/generateFeaturePrompt.ts
+++ b/src/ai/generateFeaturePrompt.ts
@@ -1,7 +1,9 @@
-import { logger } from "@trigger.dev/sdk/v3";
+import type { Sandbox } from "@vercel/sandbox";
 import type { SubmoduleCommit } from "../github/fetchRecentSubmoduleCommits";
+import { runClaudeCodeAgent } from "../sandboxes/runClaudeCodeAgent";
+import { logStep } from "../sandboxes/logStep";
 
-const SYSTEM_PROMPT = `You are a senior software engineer on the Recoupable platform — a music industry management tool for record labels and artist managers.
+const SYSTEM_CONTEXT = `You are a senior software engineer on the Recoupable platform — a music industry management tool for record labels and artist managers.
 
 The platform has these main components:
 - chat: Next.js frontend where music managers chat with their AI agent
@@ -26,21 +28,15 @@ Respond with ONLY an implementation prompt for an AI coding agent. The prompt sh
 - NOT ask for planning or approval — just direct the agent to implement it`;
 
 /**
- * Uses the Anthropic API to generate an actionable feature implementation prompt
+ * Uses Claude Code in a sandbox to generate an actionable feature implementation prompt
  * based on the recent commit history across monorepo submodules.
  *
- * Falls back to a generic improvement prompt if the API call fails.
+ * Falls back to a generic improvement prompt if the sandbox call fails.
  */
 export async function generateFeaturePrompt(
+  sandbox: Sandbox,
   recentCommits: SubmoduleCommit[],
 ): Promise<string> {
-  const apiKey = process.env.ANTHROPIC_API_KEY;
-
-  if (!apiKey) {
-    logger.warn("Missing ANTHROPIC_API_KEY — using fallback feature prompt");
-    return getFallbackPrompt();
-  }
-
   const commitsContext = recentCommits
     .map(
       ({ submodule, commits }) =>
@@ -48,48 +44,34 @@ export async function generateFeaturePrompt(
     )
     .join("\n\n");
 
-  const userMessage = `Here are the most recent commits across the Recoupable monorepo:
+  const message = `${SYSTEM_CONTEXT}
+
+Here are the most recent commits across the Recoupable monorepo:
 
 ${commitsContext}
 
 Based on this recent work, write a specific implementation prompt for an AI coding agent to implement the next most valuable feature.`;
 
   try {
-    const response = await fetch("https://api.anthropic.com/v1/messages", {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        "x-api-key": apiKey,
-        "anthropic-version": "2023-06-01",
-      },
-      body: JSON.stringify({
-        model: "claude-sonnet-4-5",
-        max_tokens: 1024,
-        system: SYSTEM_PROMPT,
-        messages: [{ role: "user", content: userMessage }],
-      }),
+    const result = await runClaudeCodeAgent(sandbox, {
+      label: "Generate feature prompt",
+      message,
     });
 
-    if (!response.ok) {
-      logger.error("Anthropic API error generating feature prompt", { status: response.status });
-      return getFallbackPrompt();
-    }
-
-    const data = (await response.json()) as {
-      content: Array<{ type: string; text: string }>;
-    };
-
-    const text = data.content.find((c) => c.type === "text")?.text?.trim();
+    const text = result.stdout.trim();
 
-    if (!text) {
-      logger.warn("Empty response from Anthropic API");
+    if (result.exitCode !== 0 || !text) {
+      logStep("Claude Code failed to generate feature prompt", false, {
+        exitCode: result.exitCode,
+        stderr: result.stderr.slice(-500),
+      });
       return getFallbackPrompt();
     }
 
-    logger.log("Generated Agent Day feature prompt", { preview: text.slice(0, 200) });
+    logStep("Generated Agent Day feature prompt", false, { preview: text.slice(0, 200) });
     return text;
   } catch (error) {
-    logger.error("Failed to generate feature prompt", { error });
+    logStep("Failed to generate feature prompt", false, { error: String(error) });
     return getFallbackPrompt();
   }
 }
diff --git a/src/github/waitForPRChecks.ts b/src/github/waitForPRChecks.ts
index 12feff7..143b47b 100644
--- a/src/github/waitForPRChecks.ts
+++ b/src/github/waitForPRChecks.ts
@@ -1,4 +1,5 @@
-import { logger, wait } from "@trigger.dev/sdk/v3";
+import { wait } from "@trigger.dev/sdk/v3";
+import { logStep } from "../sandboxes/logStep";
 
 export interface PRCheckResult {
   allPassed: boolean;
@@ -32,7 +33,7 @@ export async function waitForPRChecks(
   });
 
   if (!prRes.ok) {
-    logger.warn(`Failed to fetch PR #${prNumber} from ${repo}`, { status: prRes.status });
+    logStep(`Failed to fetch PR #${prNumber} from ${repo}`, false, { status: prRes.status });
     return { allPassed: false, failedChecks: [], pendingChecks: ["fetch-failed"] };
   }
 
@@ -46,7 +47,7 @@ export async function waitForPRChecks(
     );
 
     if (!checksRes.ok) {
-      logger.warn(`Failed to fetch check runs for ${repo}/${headSha}`);
+      logStep(`Failed to fetch check runs for ${repo}/${headSha}`, false);
       return { allPassed: false, failedChecks: [], pendingChecks: ["fetch-failed"] };
     }
 
@@ -67,7 +68,7 @@ export async function waitForPRChecks(
       );
 
       if (pending.length === 0) {
-        logger.log(`All checks complete for PR #${prNumber}`, {
+        logStep(`All checks complete for PR #${prNumber}`, false, {
           total: checks.length,
           failed: failed.length,
         });
@@ -78,7 +79,7 @@ export async function waitForPRChecks(
         };
       }
 
-      logger.log(`Waiting for ${pending.length} checks on PR #${prNumber}`, {
+      logStep(`Waiting for ${pending.length} checks on PR #${prNumber}`, false, {
         pending: pending.map((c) => c.name),
       });
     }
diff --git a/src/slack/__tests__/postToSlackChannel.test.ts b/src/slack/__tests__/postToSlackChannel.test.ts
index 1e7d23d..aa886ed 100644
--- a/src/slack/__tests__/postToSlackChannel.test.ts
+++ b/src/slack/__tests__/postToSlackChannel.test.ts
@@ -2,6 +2,11 @@ import { describe, it, expect, vi, beforeEach } from "vitest";
 
 vi.mock("@trigger.dev/sdk/v3", () => ({
   logger: { log: vi.fn(), error: vi.fn() },
+  metadata: { set: vi.fn(), append: vi.fn() },
+}));
+
+vi.mock("../../sandboxes/logStep", () => ({
+  logStep: vi.fn(),
 }));
 
 const mockFetch = vi.fn();
diff --git a/src/slack/postToSlackChannel.ts b/src/slack/postToSlackChannel.ts
index 543e503..6806c42 100644
--- a/src/slack/postToSlackChannel.ts
+++ b/src/slack/postToSlackChannel.ts
@@ -1,4 +1,5 @@
 import { logger } from "@trigger.dev/sdk/v3";
+import { logStep } from "../sandboxes/logStep";
 
 /**
  * Posts a plain-text message to a Slack channel using the bot token.
@@ -31,6 +32,6 @@ export async function postToSlackChannel(channelId: string, text: string): Promi
     return false;
   }
 
-  logger.log("Posted message to Slack", { channel: channelId });
+  logStep("Posted message to Slack", false, { channel: channelId });
   return true;
 }
diff --git a/src/tasks/agentDayTask.ts b/src/tasks/agentDayTask.ts
index b5d3d1e..57da4d5 100644
--- a/src/tasks/agentDayTask.ts
+++ b/src/tasks/agentDayTask.ts
@@ -10,6 +10,8 @@ import { getVercelPreviewUrl } from "../github/getVercelPreviewUrl";
 import { mergePR } from "../github/mergePR";
 import { postToSlackChannel } from "../slack/postToSlackChannel";
 import { logStep } from "../sandboxes/logStep";
+import { getOrCreateSandbox } from "../sandboxes/getOrCreateSandbox";
+import { CODING_AGENT_ACCOUNT_ID } from "../consts";
 
 const AGENT_DAY_SLACK_CHANNEL = "C08HN8RKJHZ";
 const MAX_REVIEW_ITERATIONS = 3;
@@ -37,14 +39,19 @@ export const agentDayTask = schedules.task({
       date: new Date(payload.timestamp).toDateString(),
     });
 
+    // Create a sandbox for AI reasoning (feature planning + feedback assessment)
+    logStep("Creating sandbox for AI reasoning");
+    const { sandbox: aiSandbox } = await getOrCreateSandbox(CODING_AGENT_ACCOUNT_ID);
+
+    try {
     // Step 1: Gather recent commits to understand what has been built recently
     logStep("Fetching recent commits from all submodules");
     const recentCommits = await fetchRecentSubmoduleCommits();
     logStep("Recent commits fetched", true, { submoduleCount: recentCommits.length });
 
-    // Step 2: Use Claude to plan the next feature based on recent work
+    // Step 2: Use Claude Code in sandbox to plan the next feature based on recent work
     logStep("Generating feature prompt from recent commits");
-    const featurePrompt = await generateFeaturePrompt(recentCommits);
+    const featurePrompt = await generateFeaturePrompt(aiSandbox, recentCommits);
     logStep("Feature prompt generated", true, { preview: featurePrompt.slice(0, 300) });
 
     // Step 3 & 4: Trigger the coding agent to implement the feature and open PRs
@@ -93,7 +100,7 @@ export const agentDayTask = schedules.task({
         logStep(`Review iteration ${iteration + 1} for PR #${pr.number}`);
 
         const feedback = await fetchPRReviews(pr.repo, pr.number);
-        const assessment = await assessPRFeedback(pr.repo, featurePrompt, feedback);
+        const assessment = await assessPRFeedback(aiSandbox, pr.repo, featurePrompt, feedback);
 
         logStep(`Feedback assessed for PR #${pr.number}`, true, {
           hasActionableFeedback: assessment.hasActionableFeedback,
@@ -184,5 +191,9 @@ export const agentDayTask = schedules.task({
     });
 
     return { success: true, prs: mergedPRs, featurePrompt };
+    } finally {
+      logStep("Stopping AI reasoning sandbox", false);
+      await aiSandbox.stop();
+    }
   },
 });

From 321e516598042a0f5880d84e1b48fce75fb719ac Mon Sep 17 00:00:00 2001
From: Sweets Sweetman <sweetmantech@gmail.com>
Date: Tue, 24 Mar 2026 09:36:23 -0500
Subject: [PATCH 3/4] refactor: extract getFallbackPrompt to its own file (SRP)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/ai/generateFeaturePrompt.ts | 14 +-------------
 src/ai/getFallbackPrompt.ts     | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 13 deletions(-)
 create mode 100644 src/ai/getFallbackPrompt.ts

diff --git a/src/ai/generateFeaturePrompt.ts b/src/ai/generateFeaturePrompt.ts
index e1e45a9..3f08499 100644
--- a/src/ai/generateFeaturePrompt.ts
+++ b/src/ai/generateFeaturePrompt.ts
@@ -2,6 +2,7 @@ import type { Sandbox } from "@vercel/sandbox";
 import type { SubmoduleCommit } from "../github/fetchRecentSubmoduleCommits";
 import { runClaudeCodeAgent } from "../sandboxes/runClaudeCodeAgent";
 import { logStep } from "../sandboxes/logStep";
+import { getFallbackPrompt } from "./getFallbackPrompt";
 
 const SYSTEM_CONTEXT = `You are a senior software engineer on the Recoupable platform — a music industry management tool for record labels and artist managers.
 
@@ -75,16 +76,3 @@ Based on this recent work, write a specific implementation prompt for an AI codi
     return getFallbackPrompt();
   }
 }
-
-function getFallbackPrompt(): string {
-  return [
-    "Read PROGRESS_USAGE.md and PROGRESS.md in the mono repo codebase first.",
-    "",
-    "Review the last 10 commits across the api, chat, admin, and tasks submodules.",
-    "Identify the single most impactful small improvement that builds on recent work",
-    "— a bug fix, a missing endpoint, a UI polish, or a small new feature.",
-    "",
-    "Implement it end-to-end (API route + frontend if needed), write any relevant tests,",
-    "then update PROGRESS.md with what you built.",
-  ].join("\n");
-}
diff --git a/src/ai/getFallbackPrompt.ts b/src/ai/getFallbackPrompt.ts
new file mode 100644
index 0000000..5053314
--- /dev/null
+++ b/src/ai/getFallbackPrompt.ts
@@ -0,0 +1,16 @@
+/**
+ * Returns a generic feature implementation prompt used as a fallback
+ * when Claude Code fails to generate a specific one from recent commits.
+ */
+export function getFallbackPrompt(): string {
+  return [
+    "Read PROGRESS_USAGE.md and PROGRESS.md in the mono repo codebase first.",
+    "",
+    "Review the last 10 commits across the api, chat, admin, and tasks submodules.",
+    "Identify the single most impactful small improvement that builds on recent work",
+    "— a bug fix, a missing endpoint, a UI polish, or a small new feature.",
+    "",
+    "Implement it end-to-end (API route + frontend if needed), write any relevant tests,",
+    "then update PROGRESS.md with what you built.",
+  ].join("\n");
+}

From da902900c554ced2cfede7306c09d9bcde4f2a3a Mon Sep 17 00:00:00 2001
From: Sweets Sweetman <sweetmantech@gmail.com>
Date: Tue, 24 Mar 2026 10:03:53 -0500
Subject: [PATCH 4/4] feat: replace auto-merge with Telegram notification for
 human review

Instead of automatically merging PRs, agent-day now sends a Telegram
message with PR links so a human can review and merge manually.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/tasks/agentDayTask.ts                     | 49 +++++++------
 .../__tests__/sendTelegramMessage.test.ts     | 71 +++++++++++++++++++
 src/telegram/sendTelegramMessage.ts           | 42 +++++++++++
 3 files changed, 141 insertions(+), 21 deletions(-)
 create mode 100644 src/telegram/__tests__/sendTelegramMessage.test.ts
 create mode 100644 src/telegram/sendTelegramMessage.ts

diff --git a/src/tasks/agentDayTask.ts b/src/tasks/agentDayTask.ts
index 57da4d5..3c19d1c 100644
--- a/src/tasks/agentDayTask.ts
+++ b/src/tasks/agentDayTask.ts
@@ -7,8 +7,8 @@ import { waitForPRChecks } from "../github/waitForPRChecks";
 import { fetchPRReviews } from "../github/fetchPRReviews";
 import { assessPRFeedback } from "../ai/assessPRFeedback";
 import { getVercelPreviewUrl } from "../github/getVercelPreviewUrl";
-import { mergePR } from "../github/mergePR";
 import { postToSlackChannel } from "../slack/postToSlackChannel";
+import { sendTelegramMessage } from "../telegram/sendTelegramMessage";
 import { logStep } from "../sandboxes/logStep";
 import { getOrCreateSandbox } from "../sandboxes/getOrCreateSandbox";
 import { CODING_AGENT_ACCOUNT_ID } from "../consts";
@@ -26,7 +26,7 @@ const PR_CHECK_TIMEOUT_MS = 15 * 60 * 1000; // 15 minutes
  * 3. Triggers the coding-agent task to implement it and open PRs
  * 4. Reviews each PR: waits for checks, assesses human feedback, applies fixes
  * 5. Tests Vercel preview deployments (api/chat repos)
- * 6. Merges approved PRs
+ * 6. Sends Telegram notification with PR links for human review/merge
  * 7. Posts a summary to the #dev Slack channel
  */
 export const agentDayTask = schedules.task({
@@ -79,7 +79,7 @@ export const agentDayTask = schedules.task({
     }
 
     // Step 5: Review each PR — wait for checks, implement feedback, test preview
-    const mergedPRs: typeof prs = [];
+    const reviewedPRs: typeof prs = [];
     let currentSnapshotId = snapshotId;
 
     for (const pr of prs) {
@@ -156,29 +156,36 @@ export const agentDayTask = schedules.task({
         }
       }
 
-      // Step 6: Merge the PR
-      logStep(`Merging PR #${pr.number} in ${pr.repo}`);
-      const merged = await mergePR(pr.repo, pr.number);
-
-      if (merged) {
-        mergedPRs.push(pr);
-        logStep(`Merged PR #${pr.number}`, true, { url: pr.url });
-      } else {
-        logger.warn(`Could not merge PR #${pr.number} in ${pr.repo}`);
-      }
+      reviewedPRs.push(pr);
     }
 
-    // Step 7: Post summary to Slack
+    // Step 6: Send Telegram notification with PR links for human review/merge
     const date = new Date(payload.timestamp).toDateString();
-    const prLines = mergedPRs.map((pr) => `• <${pr.url}|${pr.repo} #${pr.number}>`).join("\n");
-    const unmergedCount = prs.length - mergedPRs.length;
+    const prLines = reviewedPRs
+      .map((pr) => `• <a href="${pr.url}">${pr.repo} #${pr.number}</a>`)
+      .join("\n");
+
+    const telegramMessage = [
+      `🤖 <b>Agent Day — ${date}</b>`,
+      ``,
+      `${reviewedPRs.length} PR${reviewedPRs.length !== 1 ? "s" : ""} ready for review:`,
+      prLines,
+      ``,
+      `<b>Feature:</b> ${featurePrompt.slice(0, 280)}${featurePrompt.length > 280 ? "…" : ""}`,
+    ].join("\n");
+
+    await sendTelegramMessage(telegramMessage);
+
+    // Step 7: Post summary to Slack
+    const slackPrLines = reviewedPRs
+      .map((pr) => `• <${pr.url}|${pr.repo} #${pr.number}>`)
+      .join("\n");
 
     const slackLines = [
       `🤖 *Agent Day — ${date}*`,
       ``,
-      `Implemented and merged *${mergedPRs.length}* PR${mergedPRs.length !== 1 ? "s" : ""}:`,
-      prLines || "_(none)_",
-      unmergedCount > 0 ? `\n_${unmergedCount} PR(s) could not be merged automatically._` : "",
+      `Opened *${reviewedPRs.length}* PR${reviewedPRs.length !== 1 ? "s" : ""} for review:`,
+      slackPrLines || "_(none)_",
       ``,
       `*Feature:* ${featurePrompt.slice(0, 280)}${featurePrompt.length > 280 ? "…" : ""}`,
     ];
@@ -186,11 +193,11 @@ export const agentDayTask = schedules.task({
     await postToSlackChannel(AGENT_DAY_SLACK_CHANNEL, slackLines.filter(Boolean).join("\n"));
 
     logStep("Agent Day task completed", true, {
-      mergedPRs: mergedPRs.length,
+      reviewedPRs: reviewedPRs.length,
       totalPRs: prs.length,
     });
 
-    return { success: true, prs: mergedPRs, featurePrompt };
+    return { success: true, prs: reviewedPRs, featurePrompt };
     } finally {
       logStep("Stopping AI reasoning sandbox", false);
       await aiSandbox.stop();
diff --git a/src/telegram/__tests__/sendTelegramMessage.test.ts b/src/telegram/__tests__/sendTelegramMessage.test.ts
new file mode 100644
index 0000000..48791e3
--- /dev/null
+++ b/src/telegram/__tests__/sendTelegramMessage.test.ts
@@ -0,0 +1,71 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+
+vi.mock("@trigger.dev/sdk/v3", () => ({
+  logger: { log: vi.fn(), error: vi.fn() },
+  metadata: { set: vi.fn(), append: vi.fn() },
+}));
+
+vi.mock("../../sandboxes/logStep", () => ({
+  logStep: vi.fn(),
+}));
+
+const mockFetch = vi.fn();
+vi.stubGlobal("fetch", mockFetch);
+
+const { sendTelegramMessage } = await import("../sendTelegramMessage");
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  process.env.TELEGRAM_BOT_TOKEN = "123:ABC";
+  process.env.TELEGRAM_CHAT_ID = "456";
+});
+
+describe("sendTelegramMessage", () => {
+  it("sends a message and returns true on success", async () => {
+    mockFetch.mockResolvedValueOnce({ json: async () => ({ ok: true }) });
+
+    const result = await sendTelegramMessage("Hello!");
+
+    expect(result).toBe(true);
+    expect(mockFetch).toHaveBeenCalledWith(
+      "https://api.telegram.org/bot123:ABC/sendMessage",
+      expect.objectContaining({
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+      }),
+    );
+
+    const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+    expect(body.chat_id).toBe("456");
+    expect(body.text).toBe("Hello!");
+    expect(body.parse_mode).toBe("HTML");
+  });
+
+  it("returns false when Telegram API returns ok: false", async () => {
+    mockFetch.mockResolvedValueOnce({
+      json: async () => ({ ok: false, description: "Bad Request" }),
+    });
+
+    const result = await sendTelegramMessage("Test");
+
+    expect(result).toBe(false);
+  });
+
+  it("returns false when TELEGRAM_BOT_TOKEN is missing", async () => {
+    delete process.env.TELEGRAM_BOT_TOKEN;
+
+    const result = await sendTelegramMessage("Test");
+
+    expect(result).toBe(false);
+    expect(mockFetch).not.toHaveBeenCalled();
+  });
+
+  it("returns false when TELEGRAM_CHAT_ID is missing", async () => {
+    delete process.env.TELEGRAM_CHAT_ID;
+
+    const result = await sendTelegramMessage("Test");
+
+    expect(result).toBe(false);
+    expect(mockFetch).not.toHaveBeenCalled();
+  });
+});
diff --git a/src/telegram/sendTelegramMessage.ts b/src/telegram/sendTelegramMessage.ts
new file mode 100644
index 0000000..87c97d4
--- /dev/null
+++ b/src/telegram/sendTelegramMessage.ts
@@ -0,0 +1,42 @@
+import { logger } from "@trigger.dev/sdk/v3";
+import { logStep } from "../sandboxes/logStep";
+
+/**
+ * Sends a message to a Telegram chat using the Bot API.
+ * Returns true if the message was sent successfully.
+ *
+ * @param text - The message text (supports Telegram MarkdownV2 or HTML)
+ * @param parseMode - Optional parse mode ("MarkdownV2" or "HTML")
+ */
+export async function sendTelegramMessage(
+  text: string,
+  parseMode: "MarkdownV2" | "HTML" = "HTML",
+): Promise<boolean> {
+  const token = process.env.TELEGRAM_BOT_TOKEN;
+  const chatId = process.env.TELEGRAM_CHAT_ID;
+
+  if (!token || !chatId) {
+    logger.error("Missing TELEGRAM_BOT_TOKEN or TELEGRAM_CHAT_ID");
+    return false;
+  }
+
+  const response = await fetch(`https://api.telegram.org/bot${token}/sendMessage`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      chat_id: chatId,
+      text,
+      parse_mode: parseMode,
+    }),
+  });
+
+  const data = (await response.json()) as { ok: boolean; description?: string };
+
+  if (!data.ok) {
+    logger.error("Failed to send Telegram message", { error: data.description });
+    return false;
+  }
+
+  logStep("Sent Telegram notification", false);
+  return true;
+}