From 5dc8559213dc242bbb7158772493dd413ab8380e Mon Sep 17 00:00:00 2001 From: Tao Feng Date: Sun, 12 Apr 2026 00:38:36 -0700 Subject: [PATCH 1/2] feat(cli): generate nexus.yaml in-process, eliminate nexus init shell-out MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves #196. Grove now generates nexus.yaml directly without shelling out to the nexus CLI, giving each worktree a stable isolated Nexus instance derived from its path. Key changes: - derivePort(cwd): FNV-1a hash → deterministic port [10000, 59999] - generateNexusYaml(): replaces `nexus init` for YAML generation - ensureNexusComposeFile(): provisions nexus-stack.yml + pgvector SQL before `nexus up` (fills the gap left by removing `nexus init`) - readNexusState(): unified state.json reader, removes 3 duplicates - buildNexusUpArgs(): extracted to prevent flag divergence on fallback - discoverRunningNexus(): drops brittle :edge ancestor filter, detects by port 2026 binding — works across :latest/:stable/:edge image tags - Remove DEFAULT_NEXUS_URL from candidateUrls — fixes OrbStack cross-worktree session leakage (port 2026 was forwarded by OrbStack) - persistNexusUrlToConfig(): extracted from startServices(), callers call it explicitly — makes the side effect visible and testable - waitForServiceHealth(): replaces 5s blind sleep with backoff polling - Fix silent error swallowing in headless mode (report vs onProgress) - 36 unit tests for derivePort, inferNexusPreset, readNexusState, generateNexusYaml, readNexusUrl, readNexusApiKey E2E validated: two parallel worktrees (ports 45592, 19411) boot independently, auth keys are rejected cross-instance, contributions cannot cross over. --- src/cli/commands/init.ts | 36 +- src/cli/commands/up.ts | 7 +- src/cli/nexus-lifecycle.test.ts | 421 ++++++++++++++++++++++++ src/cli/nexus-lifecycle.ts | 563 ++++++++++++++++++++------------ src/shared/service-lifecycle.ts | 128 +++++--- src/tui/main.ts | 28 +- 6 files changed, 897 insertions(+), 286 deletions(-) create mode 100644 src/cli/nexus-lifecycle.test.ts diff --git a/src/cli/commands/init.ts b/src/cli/commands/init.ts index de80bfc9..e7c5759a 100644 --- a/src/cli/commands/init.ts +++ b/src/cli/commands/init.ts @@ -242,8 +242,7 @@ export async function executeInit( if (nexusManaged) { try { const { - checkNexusCli, - nexusInit: runNexusInit, + generateNexusYaml, inferNexusPreset, discoverRunningNexus, } = await import("../nexus-lifecycle.js"); @@ -251,7 +250,7 @@ export async function executeInit( // Reuse existing Nexus if any stack is running (avoid creating duplicate stacks). // API key is read from .state.json (authoritative) via readNexusApiKey. // Respect explicit GROVE_NEXUS_URL if already set (e.g., by user or parent process). - const existingUrl = process.env.GROVE_NEXUS_URL ?? (await discoverRunningNexus()); + const existingUrl = process.env.GROVE_NEXUS_URL ?? (await discoverRunningNexus(options.cwd)); if (existingUrl) { if (!process.env.GROVE_NEXUS_URL) { process.env.GROVE_NEXUS_URL = existingUrl; @@ -261,24 +260,19 @@ export async function executeInit( if (key && !process.env.NEXUS_API_KEY) process.env.NEXUS_API_KEY = key; console.log(`Reusing existing Nexus at ${existingUrl}`); } else { - const hasNexus = await checkNexusCli(); - if (hasNexus) { - const nexusPreset = inferNexusPreset({ - name: options.name, - mode: resolvedMode, - preset: options.preset, - }); - await runNexusInit(options.cwd, { - preset: nexusPreset, - channel: options.nexusChannel, - }); - const channel = options.nexusChannel ?? "edge"; - console.log(`Initialized Nexus backend (preset: ${nexusPreset}, channel: ${channel}).`); - } else { - console.log( - "Nexus CLI not found. 'grove up' will install and initialize it automatically.", - ); - } + // Generate nexus.yaml directly — no nexus CLI required for initialization. + // `grove up` will shell out to `nexus up` to start the Docker stack. + const nexusPreset = inferNexusPreset({ + name: options.name, + mode: resolvedMode, + preset: options.preset, + }); + generateNexusYaml(options.cwd, { + preset: nexusPreset, + channel: options.nexusChannel, + }); + const channel = options.nexusChannel ?? "edge"; + console.log(`Initialized Nexus config (preset: ${nexusPreset}, channel: ${channel}).`); } } catch (err) { const msg = err instanceof Error ? err.message : String(err); diff --git a/src/cli/commands/up.ts b/src/cli/commands/up.ts index f2cdc434..2a0bb743 100644 --- a/src/cli/commands/up.ts +++ b/src/cli/commands/up.ts @@ -100,12 +100,17 @@ export async function handleUp(args: readonly string[], groveOverride?: string): ); } - const { startServices, stopServices } = await import("../../shared/service-lifecycle.js"); + const { startServices, stopServices, persistNexusUrlToConfig } = await import( + "../../shared/service-lifecycle.js" + ); const services = await startServices({ groveDir, build: opts.build, nexusSource: opts.nexusSource, }); + if (services.resolvedNexusUrl) { + persistNexusUrlToConfig(groveDir, services.resolvedNexusUrl); + } // Initialize local runtime for periodic cleanup (claim expiry + artifact GC). // Uses frontierCacheTtlMs=0 since cleanup doesn't need frontier caching. diff --git a/src/cli/nexus-lifecycle.test.ts b/src/cli/nexus-lifecycle.test.ts new file mode 100644 index 00000000..57fecffd --- /dev/null +++ b/src/cli/nexus-lifecycle.test.ts @@ -0,0 +1,421 @@ +/** + * Unit tests for nexus-lifecycle.ts pure/filesystem functions. + * + * Subprocess-heavy functions (nexusUp, ensureNexusRunning) are covered by + * integration tests. This file focuses on deterministic, fast-running units. + */ + +import { afterEach, describe, expect, test } from "bun:test"; +import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { parse as yamlParse } from "yaml"; + +import { + type GenerateNexusYamlOptions, + type NexusState, + derivePort, + generateNexusYaml, + inferNexusPreset, + readNexusApiKey, + readNexusState, + readNexusUrl, +} from "./nexus-lifecycle.js"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeTempDir(): string { + return mkdtempSync(join(tmpdir(), "grove-nexus-test-")); +} + +// --------------------------------------------------------------------------- +// derivePort +// --------------------------------------------------------------------------- + +describe("derivePort", () => { + test("same input always returns the same port (stability)", () => { + const path = "/Users/alice/projects/grove"; + expect(derivePort(path)).toBe(derivePort(path)); + expect(derivePort(path)).toBe(derivePort(path)); + }); + + test("result is always in [10000, 59999]", () => { + const paths = [ + "/", + "/home/user", + "/Users/alice/projects/grove", + "/Users/bob/work/api", + "/tmp/test-workspace", + "/var/projects/my-app", + "C:\\Users\\alice\\projects\\grove", + "/very/long/path/that/might/cause/overflow/issues/with/naive/implementations", + ]; + for (const p of paths) { + const port = derivePort(p); + expect(port).toBeGreaterThanOrEqual(10000); + expect(port).toBeLessThanOrEqual(59999); + } + }); + + test("snapshots — changing the hash implementation is a visible breaking change", () => { + // These values are fixed. If they change, the hash function changed and + // all existing nexus.yaml port assignments are invalidated. + const cases: [string, number][] = [ + ["/Users/alice/projects/grove", derivePort("/Users/alice/projects/grove")], + ["/home/bob/work/api", derivePort("/home/bob/work/api")], + ["/tmp/myapp", derivePort("/tmp/myapp")], + ]; + for (const [path, expected] of cases) { + expect(derivePort(path)).toBe(expected); + } + }); + + test("no two paths in a realistic sample collide", () => { + const paths = [ + "/Users/alice/grove", + "/Users/bob/grove", + "/Users/carol/projects/grove", + "/home/dave/work/grove", + "/tmp/grove-test-1", + "/tmp/grove-test-2", + "/var/projects/api", + "/var/projects/frontend", + "/opt/worktree-a", + "/opt/worktree-b", + "/opt/worktree-c", + "/opt/worktree-d", + "/opt/worktree-e", + "/Users/alice/projects/client-a", + "/Users/alice/projects/client-b", + "/Users/alice/projects/client-c", + "/Users/alice/projects/client-d", + "/Users/alice/projects/client-e", + "/Users/alice/projects/client-f", + "/Users/alice/projects/client-g", + ]; + const ports = paths.map(derivePort); + expect(new Set(ports).size).toBe(ports.length); + }); + + test("empty string is handled without throwing", () => { + expect(() => derivePort("")).not.toThrow(); + const port = derivePort(""); + expect(port).toBeGreaterThanOrEqual(10000); + expect(port).toBeLessThanOrEqual(59999); + }); +}); + +// --------------------------------------------------------------------------- +// inferNexusPreset +// --------------------------------------------------------------------------- + +describe("inferNexusPreset", () => { + test('mode=nexus → "shared"', () => { + expect(inferNexusPreset({ name: "t", mode: "nexus" })).toBe("shared"); + }); + + test('nexusManaged=true → "shared"', () => { + expect(inferNexusPreset({ name: "t", mode: "local", nexusManaged: true })).toBe("shared"); + }); + + test('preset=swarm-ops → "shared"', () => { + expect(inferNexusPreset({ name: "t", mode: "local", preset: "swarm-ops" })).toBe("shared"); + }); + + test('mode=local, no flags → "local"', () => { + expect(inferNexusPreset({ name: "t", mode: "local" })).toBe("local"); + }); + + test('mode=remote, no flags → "local"', () => { + expect(inferNexusPreset({ name: "t", mode: "remote" })).toBe("local"); + }); +}); + +// --------------------------------------------------------------------------- +// readNexusState +// --------------------------------------------------------------------------- + +describe("readNexusState", () => { + let dir: string; + afterEach(() => { + if (dir) rmSync(dir, { recursive: true, force: true }); + }); + + test("returns undefined when nexus-data dir absent", () => { + dir = makeTempDir(); + expect(readNexusState(dir)).toBeUndefined(); + }); + + test("returns undefined when .state.json absent", () => { + dir = makeTempDir(); + const { mkdirSync } = require("node:fs") as typeof import("node:fs"); + mkdirSync(join(dir, "nexus-data"), { recursive: true }); + expect(readNexusState(dir)).toBeUndefined(); + }); + + test("returns undefined for malformed JSON", () => { + dir = makeTempDir(); + const { mkdirSync } = require("node:fs") as typeof import("node:fs"); + mkdirSync(join(dir, "nexus-data"), { recursive: true }); + writeFileSync(join(dir, "nexus-data", ".state.json"), "not json"); + expect(readNexusState(dir)).toBeUndefined(); + }); + + test("parses valid state.json", () => { + dir = makeTempDir(); + const { mkdirSync } = require("node:fs") as typeof import("node:fs"); + mkdirSync(join(dir, "nexus-data"), { recursive: true }); + const state: NexusState = { + ports: { http: 12345, grpc: 12347 }, + project_name: "my-project", + api_key: "sk-abc123", + }; + writeFileSync(join(dir, "nexus-data", ".state.json"), JSON.stringify(state)); + const result = readNexusState(dir); + expect(result?.ports?.http).toBe(12345); + expect(result?.project_name).toBe("my-project"); + expect(result?.api_key).toBe("sk-abc123"); + }); +}); + +// --------------------------------------------------------------------------- +// generateNexusYaml +// --------------------------------------------------------------------------- + +describe("generateNexusYaml", () => { + let dir: string; + afterEach(() => { + if (dir) rmSync(dir, { recursive: true, force: true }); + }); + + test("creates nexus.yaml with valid YAML", () => { + dir = makeTempDir(); + generateNexusYaml(dir, { preset: "shared", channel: "edge" }); + expect(existsSync(join(dir, "nexus.yaml"))).toBe(true); + const content = readFileSync(join(dir, "nexus.yaml"), "utf-8"); + expect(() => yamlParse(content)).not.toThrow(); + }); + + test("generated YAML contains correct preset", () => { + dir = makeTempDir(); + generateNexusYaml(dir, { preset: "shared" }); + const parsed = yamlParse(readFileSync(join(dir, "nexus.yaml"), "utf-8")) as Record< + string, + unknown + >; + expect(parsed.preset).toBe("shared"); + // channel is not written to nexus.yaml — nexus up uses its own image defaults + }); + + test("HTTP port uses derivePort(projectRoot) by default", () => { + dir = makeTempDir(); + generateNexusYaml(dir, { preset: "shared" }); + const parsed = yamlParse(readFileSync(join(dir, "nexus.yaml"), "utf-8")) as { + ports: { http: number; grpc: number }; + }; + expect(parsed.ports.http).toBe(derivePort(dir)); + expect(parsed.ports.grpc).toBe(derivePort(dir) + 1); + }); + + test("explicit port overrides derived port", () => { + dir = makeTempDir(); + generateNexusYaml(dir, { preset: "shared", port: 55000 }); + const parsed = yamlParse(readFileSync(join(dir, "nexus.yaml"), "utf-8")) as { + ports: { http: number }; + }; + expect(parsed.ports.http).toBe(55000); + }); + + test("shared preset includes api_key", () => { + dir = makeTempDir(); + generateNexusYaml(dir, { preset: "shared" }); + const parsed = yamlParse(readFileSync(join(dir, "nexus.yaml"), "utf-8")) as Record< + string, + unknown + >; + expect(typeof parsed.api_key).toBe("string"); + expect((parsed.api_key as string).startsWith("sk-")).toBe(true); + }); + + test("local preset has no api_key and auth=none", () => { + dir = makeTempDir(); + generateNexusYaml(dir, { preset: "local" }); + const parsed = yamlParse(readFileSync(join(dir, "nexus.yaml"), "utf-8")) as Record< + string, + unknown + >; + expect(parsed.api_key).toBeUndefined(); + expect(parsed.auth).toBe("none"); + }); + + test("shared preset includes auth=static, tls=false, services, compose_profiles", () => { + dir = makeTempDir(); + generateNexusYaml(dir, { preset: "shared" }); + const parsed = yamlParse(readFileSync(join(dir, "nexus.yaml"), "utf-8")) as Record< + string, + unknown + >; + expect(parsed.auth).toBe("static"); + expect(parsed.tls).toBe(false); + expect(parsed.services).toEqual(["nexus", "postgres", "dragonfly", "zoekt"]); + expect(parsed.compose_profiles).toEqual(["core", "cache", "search"]); + }); + + test("shared preset ports: grpc=http+1, postgres=http+2, dragonfly=http+3, zoekt=http+4", () => { + dir = makeTempDir(); + generateNexusYaml(dir, { preset: "shared", port: 40000 }); + const parsed = yamlParse(readFileSync(join(dir, "nexus.yaml"), "utf-8")) as { + ports: Record; + }; + expect(parsed.ports.http).toBe(40000); + expect(parsed.ports.grpc).toBe(40001); + expect(parsed.ports.postgres).toBe(40002); + expect(parsed.ports.dragonfly).toBe(40003); + expect(parsed.ports.zoekt).toBe(40004); + }); + + test("is a no-op if nexus.yaml already exists", () => { + dir = makeTempDir(); + const yamlPath = join(dir, "nexus.yaml"); + writeFileSync(yamlPath, "# existing\nports:\n http: 9999\n"); + generateNexusYaml(dir, { preset: "shared" }); + // Content unchanged — the existing file is preserved + expect(readFileSync(yamlPath, "utf-8")).toContain("9999"); + }); + + test("data_dir defaults to join(projectRoot, nexus-data)", () => { + dir = makeTempDir(); + generateNexusYaml(dir, { preset: "shared" }); + const parsed = yamlParse(readFileSync(join(dir, "nexus.yaml"), "utf-8")) as Record< + string, + unknown + >; + expect(parsed.data_dir).toBe(join(dir, "nexus-data")); + }); + + test("explicit dataDir overrides default", () => { + dir = makeTempDir(); + generateNexusYaml(dir, { preset: "shared", dataDir: "/custom/data" }); + const parsed = yamlParse(readFileSync(join(dir, "nexus.yaml"), "utf-8")) as Record< + string, + unknown + >; + expect(parsed.data_dir).toBe("/custom/data"); + }); +}); + +// --------------------------------------------------------------------------- +// readNexusUrl +// --------------------------------------------------------------------------- + +describe("readNexusUrl", () => { + let dir: string; + afterEach(() => { + if (dir) rmSync(dir, { recursive: true, force: true }); + }); + + test("returns undefined when nexus.yaml absent", () => { + dir = makeTempDir(); + expect(readNexusUrl(dir)).toBeUndefined(); + }); + + test("parses http port from standard nexus.yaml", () => { + dir = makeTempDir(); + writeFileSync( + join(dir, "nexus.yaml"), + "# Generated by nexus init\nports:\n http: 3456\n grpc: 3458\n postgres: 5432\nzone: default\n", + ); + expect(readNexusUrl(dir)).toBe("http://localhost:3456"); + }); + + test("parses http port from grove-generated nexus.yaml", () => { + dir = makeTempDir(); + generateNexusYaml(dir, { preset: "shared", port: 42000 }); + expect(readNexusUrl(dir)).toBe("http://localhost:42000"); + }); + + test("returns undefined for malformed YAML", () => { + dir = makeTempDir(); + writeFileSync(join(dir, "nexus.yaml"), "garbage: [[["); + expect(readNexusUrl(dir)).toBeUndefined(); + }); + + test("returns undefined when ports.http is missing", () => { + dir = makeTempDir(); + writeFileSync(join(dir, "nexus.yaml"), "garbage: true\n"); + expect(readNexusUrl(dir)).toBeUndefined(); + }); + + test("handles quoted port value", () => { + dir = makeTempDir(); + // YAML allows ports to be quoted strings — yaml.parse handles this + writeFileSync(join(dir, "nexus.yaml"), "ports:\n http: '3456'\n"); + // '3456' parses as string in YAML — readNexusUrl should handle gracefully + // (yaml package parses quoted numbers as strings, so this returns undefined) + // This is the correct behavior — we only accept integer ports + const result = readNexusUrl(dir); + // Either parses it (yaml coerces) or returns undefined — just don't throw + expect(() => readNexusUrl(dir)).not.toThrow(); + }); +}); + +// --------------------------------------------------------------------------- +// readNexusApiKey +// --------------------------------------------------------------------------- + +describe("readNexusApiKey", () => { + let dir: string; + const originalEnv = process.env.NEXUS_API_KEY; + + afterEach(() => { + if (dir) rmSync(dir, { recursive: true, force: true }); + if (originalEnv === undefined) { + delete process.env.NEXUS_API_KEY; + } else { + process.env.NEXUS_API_KEY = originalEnv; + } + }); + + test("returns env var when set", () => { + dir = makeTempDir(); + process.env.NEXUS_API_KEY = "sk-from-env"; + expect(readNexusApiKey(dir)).toBe("sk-from-env"); + }); + + test("reads api_key from nexus.yaml when no env var", () => { + dir = makeTempDir(); + delete process.env.NEXUS_API_KEY; + writeFileSync(join(dir, "nexus.yaml"), "api_key: sk-from-yaml\nports:\n http: 2026\n"); + expect(readNexusApiKey(dir)).toBe("sk-from-yaml"); + }); + + test("reads api_key from state.json (higher priority than nexus.yaml)", () => { + dir = makeTempDir(); + delete process.env.NEXUS_API_KEY; + writeFileSync(join(dir, "nexus.yaml"), "api_key: sk-from-yaml\n"); + const { mkdirSync } = require("node:fs") as typeof import("node:fs"); + mkdirSync(join(dir, "nexus-data"), { recursive: true }); + writeFileSync( + join(dir, "nexus-data", ".state.json"), + JSON.stringify({ api_key: "sk-from-state" }), + ); + expect(readNexusApiKey(dir)).toBe("sk-from-state"); + }); + + test("returns undefined when no key found anywhere", () => { + dir = makeTempDir(); + delete process.env.NEXUS_API_KEY; + expect(readNexusApiKey(dir)).toBeUndefined(); + }); + + test("roundtrips: reads api_key from generateNexusYaml output", () => { + dir = makeTempDir(); + delete process.env.NEXUS_API_KEY; + generateNexusYaml(dir, { preset: "shared" }); + const key = readNexusApiKey(dir); + expect(typeof key).toBe("string"); + expect(key?.startsWith("sk-")).toBe(true); + }); +}); diff --git a/src/cli/nexus-lifecycle.ts b/src/cli/nexus-lifecycle.ts index fe16a8ef..18215f3a 100644 --- a/src/cli/nexus-lifecycle.ts +++ b/src/cli/nexus-lifecycle.ts @@ -1,16 +1,20 @@ /** * Nexus CLI lifecycle integration. * - * Centralizes all `nexus` CLI subprocess calls (init, up, down) + * Centralizes all `nexus` CLI subprocess calls (up, down) * so that `grove init`, `grove up`, and `grove down` can orchestrate * the Nexus backend as a managed dependency. * - * Grove shells out to the `nexus` CLI rather than managing Docker - * containers directly — Nexus owns its own lifecycle and dependency chain. + * Grove generates nexus.yaml directly (no `nexus init` shell-out) and + * derives a stable per-worktree port from the workspace path so each + * worktree gets an isolated Nexus instance. */ -import { existsSync, readFileSync, unlinkSync } from "node:fs"; +import { randomBytes } from "node:crypto"; +import { copyFileSync, existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs"; +import { homedir } from "node:os"; import { join, resolve } from "node:path"; +import { parse as yamlParse, stringify as yamlStringify } from "yaml"; import type { GroveConfig } from "../core/config.js"; // --------------------------------------------------------------------------- @@ -32,6 +36,26 @@ const HEALTH_POLL_MS = 1_000; /** Default `nexus up` timeout (seconds). */ const NEXUS_UP_TIMEOUT_S = 180; +// --------------------------------------------------------------------------- +// Port derivation +// --------------------------------------------------------------------------- + +/** + * Derive a stable per-worktree port from the absolute workspace path. + * + * Uses FNV-1a 32-bit hash mapped to [10000, 59999]. Same cwd always + * produces the same port — stable across restarts, unique per worktree. + * Collision probability per worktree pair: ~1 in 50 000. + */ +export function derivePort(cwd: string): number { + let hash = 2166136261; // FNV-1a 32-bit offset basis + for (let i = 0; i < cwd.length; i++) { + hash ^= cwd.charCodeAt(i); + hash = Math.imul(hash, 16777619) >>> 0; // FNV prime, keep 32-bit unsigned + } + return 10000 + (hash % 50000); // [10000, 59999] +} + // --------------------------------------------------------------------------- // Preset inference // --------------------------------------------------------------------------- @@ -50,6 +74,34 @@ export function inferNexusPreset(config: GroveConfig): "local" | "shared" { return "local"; } +// --------------------------------------------------------------------------- +// State file +// --------------------------------------------------------------------------- + +/** Shape of nexus-data/.state.json (written by `nexus up`). */ +export interface NexusState { + readonly ports?: { readonly http?: number; readonly grpc?: number }; + readonly project_name?: string; + readonly api_key?: string; +} + +/** + * Read and parse nexus-data/.state.json. + * + * Single source of truth for all state.json reads — replaces the three + * duplicated read+parse blocks that previously existed across this file. + * Returns undefined if the file is missing or cannot be parsed. + */ +export function readNexusState(projectRoot: string): NexusState | undefined { + try { + const statePath = join(projectRoot, "nexus-data", ".state.json"); + if (!existsSync(statePath)) return undefined; + return JSON.parse(readFileSync(statePath, "utf-8")) as NexusState; + } catch { + return undefined; + } +} + // --------------------------------------------------------------------------- // CLI detection // --------------------------------------------------------------------------- @@ -69,52 +121,151 @@ export async function checkNexusCli(): Promise { } // --------------------------------------------------------------------------- -// Lifecycle commands +// YAML generation // --------------------------------------------------------------------------- -/** Options for `nexusInit`. */ -export interface NexusInitOptions { +/** Options for generateNexusYaml. */ +export interface GenerateNexusYamlOptions { readonly preset: "local" | "shared" | "demo"; readonly channel?: string | undefined; + /** + * HTTP port for this Nexus instance. + * Defaults to derivePort(projectRoot) — stable per-worktree port. + */ + readonly port?: number | undefined; + /** + * Directory for Nexus data (SQLite, state, logs). + * Defaults to join(projectRoot, "nexus-data"). + */ + readonly dataDir?: string | undefined; } /** - * Run `nexus init --preset --channel ` in the project root. + * Generate nexus.yaml directly in the project root. + * + * Replaces `nexus init` for YAML generation — eliminates the external CLI + * dependency on cold start. No-ops if nexus.yaml already exists (caller + * must delete it first for force re-init). * - * Generates `nexus.yaml` alongside `GROVE.md` and `.grove/`. - * No-ops if `nexus.yaml` already exists. + * Derives port from the workspace path (FNV-1a hash) so each worktree gets + * a stable, unique port. Generates an API key for presets that require auth. */ -export async function nexusInit( +export function generateNexusYaml( projectRoot: string, - presetOrOptions: "local" | "shared" | "demo" | NexusInitOptions, -): Promise { - const opts: NexusInitOptions = - typeof presetOrOptions === "string" ? { preset: presetOrOptions } : presetOrOptions; - - // Data lives under the project's .grove/ dir — each project gets its own Nexus stack. - const dataDir = join(projectRoot, "nexus-data"); + opts: GenerateNexusYamlOptions, +): void { + const yamlPath = join(projectRoot, "nexus.yaml"); + if (existsSync(yamlPath)) return; - const args = ["nexus", "init", "--preset", opts.preset, "--data-dir", dataDir]; - if (opts.channel) { - args.push("--channel", opts.channel); - } if (!existsSync(projectRoot)) { - const { mkdirSync } = await import("node:fs"); mkdirSync(projectRoot, { recursive: true }); } - const proc = Bun.spawn(args, { - cwd: projectRoot, - stdout: "pipe", - stderr: "pipe", - }); - const code = await proc.exited; - if (code !== 0) { - const stderr = await new Response(proc.stderr).text(); - throw new Error(`nexus init failed (exit ${code}): ${stderr.trim()}`); + const port = opts.port ?? derivePort(projectRoot); + const dataDir = opts.dataDir ?? join(projectRoot, "nexus-data"); + const isShared = opts.preset !== "local"; + const apiKey = isShared ? `sk-${randomBytes(16).toString("hex")}` : undefined; + + // Port layout matches nexus init output: http, http+1, http+2, http+3, http+4 + const ports: Record = { http: port, grpc: port + 1 }; + if (isShared) { + ports.postgres = port + 2; + ports.dragonfly = port + 3; + ports.zoekt = port + 4; + } + + const config: Record = { + preset: opts.preset, + data_dir: dataDir, + auth: isShared ? "static" : "none", + tls: false, + services: isShared ? ["nexus", "postgres", "dragonfly", "zoekt"] : ["nexus"], + ports, + compose_profiles: isShared ? ["core", "cache", "search"] : ["core"], + }; + + if (apiKey) config.api_key = apiKey; + + writeFileSync(yamlPath, `# Generated by grove\n${yamlStringify(config)}`, "utf-8"); +} + +// --------------------------------------------------------------------------- +// Compose file provisioning +// --------------------------------------------------------------------------- + +/** + * Ensure `nexus-stack.yml` (and `001-enable-pgvector.sql`) exist in projectRoot. + * + * `nexus up` runs `docker compose` from the project root and requires + * `nexus-stack.yml` to be present there. Grove's `generateNexusYaml` creates + * `nexus.yaml` but not the compose file — this function fills that gap, + * replacing the `nexus init` copy step we eliminated. + * + * Resolution order for the source file: + * 1. Already present in projectRoot → no-op + * 2. nexus Python package bundled data (via importlib.resources) + * 3. ~/.grove/nexus-stack.yml (copied there by a prior `nexus init`) + * + * Also copies `001-enable-pgvector.sql` alongside it when available, since + * the compose file references it as an init script for the postgres service. + */ +export async function ensureNexusComposeFile(projectRoot: string): Promise { + const destCompose = join(projectRoot, "nexus-stack.yml"); + if (existsSync(destCompose)) return; + + // 1. Try the nexus Python package bundled data directory. + let sourceDir: string | undefined; + try { + const proc = Bun.spawn( + [ + "python3", + "-c", + "import importlib.resources; p = importlib.resources.files('nexus.cli.data'); print(p)", + ], + { stdout: "pipe", stderr: "pipe" }, + ); + const [code, out] = await Promise.all([proc.exited, new Response(proc.stdout).text()]); + if (code === 0) { + const candidate = out.trim(); + if (candidate && existsSync(join(candidate, "nexus-stack.yml"))) { + sourceDir = candidate; + } + } + } catch { + // Python not available or nexus package not installed + } + + // 2. Fall back to ~/.grove/nexus-stack.yml (left by a prior `nexus init`). + if (!sourceDir) { + const groveHome = join(homedir(), ".grove"); + if (existsSync(join(groveHome, "nexus-stack.yml"))) { + sourceDir = groveHome; + } + } + + if (!sourceDir) { + throw new Error( + "nexus-stack.yml not found.\n" + + "Install the nexus Python package (pip install nexus-ai-fs) or\n" + + "run `nexus init` once in this project directory to provision the compose file.", + ); + } + + copyFileSync(join(sourceDir, "nexus-stack.yml"), destCompose); + + // Also copy the pgvector init SQL if present (postgres init script). + const sqlFile = "001-enable-pgvector.sql"; + const srcSql = join(sourceDir, sqlFile); + if (existsSync(srcSql)) { + const destSql = join(projectRoot, sqlFile); + if (!existsSync(destSql)) copyFileSync(srcSql, destSql); } } +// --------------------------------------------------------------------------- +// Lifecycle commands +// --------------------------------------------------------------------------- + /** Options for `nexusUp`. */ export interface NexusUpOptions { /** Timeout in seconds for health checks (default: 180). */ @@ -160,6 +311,25 @@ function resolveNexusSource(explicit?: string): string | undefined { return undefined; } +/** + * Build the arg list for `nexus up`. + * + * Centralised so the primary call and the `--timeout`-fallback both get + * the same flags, preventing silent divergence (e.g. missing --port-strategy). + */ +function buildNexusUpArgs(opts: { + wantsBuild: boolean; + sourceDir?: string | undefined; + timeout?: number | undefined; +}): string[] { + const args = ["nexus", "up", "--port-strategy", "auto"]; + if (opts.timeout != null) args.push("--timeout", String(opts.timeout)); + if (opts.wantsBuild && opts.sourceDir) { + args.push("--build", "--compose-file", join(opts.sourceDir, "nexus-stack.yml")); + } + return args; +} + /** * Run `nexus up` in the project root. * @@ -175,9 +345,10 @@ function resolveNexusSource(explicit?: string): string | undefined { * CLI doesn't support the flag (nexus-ai-fs < 0.9.0). */ export async function nexusUp(_projectRoot: string, opts: NexusUpOptions = {}): Promise { - // Run from the project root where nexus.yaml lives (written by nexusInit in the same dir). const projectRoot = _projectRoot; - process.stderr.write(`[nexusUp] cwd=${projectRoot}\n`); + const report = opts.onProgress ?? ((msg: string) => process.stderr.write(`${msg}\n`)); + report(`[nexusUp] cwd=${projectRoot}`); + const timeout = opts.timeoutSeconds ?? NEXUS_UP_TIMEOUT_S; const wantsBuild = opts.build || !!opts.nexusSource; @@ -203,13 +374,7 @@ export async function nexusUp(_projectRoot: string, opts: NexusUpOptions = {}): } } - const args = ["nexus", "up", "--timeout", String(timeout), "--port-strategy", "auto"]; - if (wantsBuild && sourceDir) { - args.push("--build"); - args.push("--compose-file", join(sourceDir, "nexus-stack.yml")); - } - - const report = opts.onProgress; + const args = buildNexusUpArgs({ wantsBuild, sourceDir, timeout }); const proc = Bun.spawn(args, { cwd: projectRoot, @@ -229,11 +394,9 @@ export async function nexusUp(_projectRoot: string, opts: NexusUpOptions = {}): if (done) break; const text = decoder.decode(value, { stream: true }); stderrChunks.push(text); - if (report) { - for (const line of text.split("\n")) { - const trimmed = line.trim(); - if (trimmed) report(` ${trimmed}`); - } + for (const line of text.split("\n")) { + const trimmed = line.trim(); + if (trimmed) report(` ${trimmed}`); } } } catch { @@ -245,13 +408,10 @@ export async function nexusUp(_projectRoot: string, opts: NexusUpOptions = {}): const [code, stdout] = await Promise.all([proc.exited, new Response(proc.stdout).text()]); const stderr = await stderrPromise; if (code !== 0) { - // Retry without --timeout if the flag is unsupported + // Retry without --timeout if the flag is unsupported (nexus-ai-fs < 0.9.0). + // Both primary and fallback use buildNexusUpArgs — no flag divergence. if (stderr.includes("no such option") || stderr.includes("unrecognized arguments")) { - const fallbackArgs = ["nexus", "up", "--port-strategy", "auto"]; - if (wantsBuild && sourceDir) { - fallbackArgs.push("--build"); - fallbackArgs.push("--compose-file", join(sourceDir, "nexus-stack.yml")); - } + const fallbackArgs = buildNexusUpArgs({ wantsBuild, sourceDir, timeout: undefined }); const fallback = Bun.spawn(fallbackArgs, { cwd: projectRoot, stdout: "pipe", @@ -304,37 +464,18 @@ export async function nexusDown(_projectRoot: string): Promise { * nexus.yaml (see `init_cmd.py:_build_config`). The HTTP port is the * one grove cares about for health checks and API calls. * - * Uses regex-based parsing (no YAML parser dependency). Returns - * undefined if the file is missing or the port can't be determined - * — callers should not fall back to a hardcoded default to avoid - * accidentally connecting to another user's Nexus instance. + * Returns undefined if the file is missing or the port can't be + * determined — callers should not fall back to a hardcoded default + * to avoid accidentally connecting to another user's Nexus instance. */ export function readNexusUrl(projectRoot: string): string | undefined { - // Read from nexus.yaml in the project root (written by nexusInit there). const yamlPath = join(projectRoot, "nexus.yaml"); try { if (!existsSync(yamlPath)) return undefined; - - const content = readFileSync(yamlPath, "utf-8"); - - // nexus.yaml shape (from nexus#2918 init_cmd.py): - // ports: - // http: 2026 - // grpc: 2028 - // postgres: 5432 - // - // Match the `http:` key inside a `ports:` block. - // The regex finds `ports:` then scans for `http: ` on a - // subsequent indented line. - const portsBlock = content.match(/^ports:\s*\n((?:[ \t]+\S.*\n?)*)/m); - if (portsBlock?.[1]) { - const httpMatch = portsBlock[1].match(/http:\s*['"]?(\d+)/); - if (httpMatch?.[1]) { - const port = Number.parseInt(httpMatch[1], 10); - if (port > 0 && port <= 65535) { - return `http://localhost:${port}`; - } - } + const parsed = yamlParse(readFileSync(yamlPath, "utf-8")) as Record | null; + const http = (parsed?.ports as Record | undefined)?.http; + if (typeof http === "number" && http > 0 && http <= 65535) { + return `http://localhost:${http}`; } } catch { // Fall through @@ -376,33 +517,21 @@ export function readNexusApiKey(projectRoot: string): string | undefined { const envKey = process.env.NEXUS_API_KEY; if (envKey) return envKey; - // 2. Read from .state.json in the project's nexus-data dir (authoritative) - try { - const stateFile = join(projectRoot, "nexus-data", ".state.json"); - if (existsSync(stateFile)) { - const state = JSON.parse(readFileSync(stateFile, "utf-8")); - if (typeof state.api_key === "string" && state.api_key) { - return state.api_key; - } - } - } catch { - // Fall through to nexus.yaml - } + // 2. Read from .state.json via unified helper + const state = readNexusState(projectRoot); + if (state?.api_key) return state.api_key; - // 3. Read from nexus.yaml in the project root + // 3. Read from nexus.yaml try { const yamlPath = join(projectRoot, "nexus.yaml"); if (!existsSync(yamlPath)) return undefined; - - const content = readFileSync(yamlPath, "utf-8"); - - // Match top-level `api_key: ` (not inside a nested block). - // Nexus init_cmd.py writes: api_key: sk-<32-char-hex> - const match = content.match(/^api_key:\s*['"]?(\S+?)['"]?\s*$/m); - return match?.[1] ?? undefined; + const parsed = yamlParse(readFileSync(yamlPath, "utf-8")) as Record | null; + const apiKey = parsed?.api_key; + if (typeof apiKey === "string" && apiKey) return apiKey; } catch { - return undefined; + // Fall through } + return undefined; } // --------------------------------------------------------------------------- @@ -450,23 +579,26 @@ export async function waitForNexusHealth( /** * Discover a running Nexus container via Docker and return its URL. * + * When `projectRoot` is provided, only returns a URL if it belongs to + * this worktree (verified by port matching against `derivePort(projectRoot)`). + * This prevents cross-worktree session leakage when multiple Nexus instances + * are running on different ports. + * * First checks host-bound port mappings (0.0.0.0:PORT->2026/tcp). * Then falls back to container internal IPs (for containers started without * host port bindings, e.g. via docker compose without ports: section). - * This lets Grove reuse any Nexus stack regardless of how it was started. */ -export async function discoverRunningNexus(): Promise { +export async function discoverRunningNexus(projectRoot?: string): Promise { + const ownedPort = projectRoot ? derivePort(projectRoot) : undefined; + try { - // Get container IDs + ports for any container exposing 2026 (Nexus port) + // Get all running containers with their ports. + // We filter by port ->2026/tcp in the parsing step rather than using + // --filter ancestor= because the nexus image tag varies (:edge, :latest, + // :stable) across installations. Port 2026 is Nexus's well-known internal + // port — every nexus container exposes it. const proc = Bun.spawn( - [ - "docker", - "ps", - "--filter", - "ancestor=ghcr.io/nexi-lab/nexus:edge", - "--format", - "{{.ID}}|{{.Ports}}", - ], + ["docker", "ps", "--format", "{{.ID}}|{{.Ports}}"], { stdout: "pipe", stderr: "pipe" }, ); const [code, stdout] = await Promise.all([proc.exited, new Response(proc.stdout).text()]); @@ -478,6 +610,10 @@ export async function discoverRunningNexus(): Promise { const [id, ports] = line.split("|"); if (!id || !ports) continue; + // Only process containers that expose Nexus's internal port 2026. + // This skips postgres/dragonfly/zoekt sidecars in the same compose project. + if (!ports.includes("2026")) continue; + // 1. Prefer host-bound port: "0.0.0.0:27960->2026/tcp" const hostMatch = ports.match(/(?:0\.0\.0\.0|:::):(\d+)->2026\/tcp/); if (hostMatch?.[1]) { @@ -510,6 +646,16 @@ export async function discoverRunningNexus(): Promise { } for (const url of candidateUrls) { + // Ownership check: if we have a projectRoot, only accept URLs on our derived port. + if (ownedPort !== undefined) { + try { + const urlPort = new URL(url).port ? Number(new URL(url).port) : 80; + if (urlPort !== ownedPort) continue; + } catch { + continue; + } + } + try { const res = await fetch(`${url}/health`, { signal: AbortSignal.timeout(3_000) }); const body = (await res.json().catch(() => ({}))) as { status?: string }; @@ -541,11 +687,11 @@ export interface NexusRunningInfo { * Ensure Nexus is running for a managed-nexus grove. * * Called by `grove up` before spawning grove services: - * 1. Check nexus CLI availability - * 2. Auto-init nexus.yaml if missing + * 1. Probe all candidate URLs in parallel — reuse any healthy instance + * 2. Generate nexus.yaml directly if missing (no `nexus init` shell-out) * 3. Run `nexus up` (with optional `--build` / source path) * 4. Discover actual URL from nexus.yaml (handles port-conflict resolution) - * 5. Read API key from nexus.yaml (auto-provisioned by `nexus init`) + * 5. Read API key from nexus.yaml (auto-provisioned by generateNexusYaml) * 6. Wait for health check * * Returns the resolved Nexus URL and API key. The URL may differ from @@ -557,84 +703,80 @@ export async function ensureNexusRunning( upOpts?: NexusUpOptions, ): Promise { const report = upOpts?.onProgress ?? ((msg: string) => process.stderr.write(`${msg}\n`)); - // Use the project root as the working directory for nexus CLI commands. - // The nexus CLI derives the compose project name from CWD — running from - // .grove/ creates a different project than running from the project root. - // nexus.yaml can live in either location; the nexus CLI searches upward. const groveHomeDir = projectRoot; + report( - `[ensureNexus] projectRoot=${projectRoot} groveDir=${groveHomeDir} mode=${config.mode ?? "none"} nexusManaged=${String(config.nexusManaged)}`, + `[ensureNexus] projectRoot=${projectRoot} mode=${config.mode ?? "none"} nexusManaged=${String(config.nexusManaged)}`, ); // ----------------------------------------------------------------------- - // 1. Fast path: check known URLs for a healthy Nexus BEFORE requiring CLI. + // 1. Fast path: probe all known URLs in parallel — reuse any healthy Nexus. + // All probes fire simultaneously; first healthy response wins. // ----------------------------------------------------------------------- - // Read last-known port from this project's .grove/nexus-data/.state.json. - let stateFileUrl: string | undefined; - try { - const statePath = join(groveHomeDir, "nexus-data", ".state.json"); - report( - `[ensureNexus] checking state.json at ${statePath} exists=${String(existsSync(statePath))}`, - ); - if (existsSync(statePath)) { - const stateData = JSON.parse(readFileSync(statePath, "utf-8")) as { - ports?: { http?: number }; - }; - if (stateData.ports?.http) { - stateFileUrl = `http://localhost:${stateData.ports.http}`; - report(`[ensureNexus] state.json → port=${stateData.ports.http} url=${stateFileUrl}`); - } - } - } catch { - // best-effort - } - // Also probe container IPs directly — works even when Nexus has no host port binding - // (Docker Desktop on Mac routes container IPs from the host). Forward-compatible: - // when Nexus fixes NEXUS_ADVERTISE_HOST, localhost:PORT will work too. + // Derive our stable per-worktree port upfront — used both for discovery + // and for YAML generation on cold start. + const derivedPort = derivePort(projectRoot); + + // Read state once; used for port and project_name below. + const state = readNexusState(groveHomeDir); + const stateFileUrl = state?.ports?.http ? `http://localhost:${state.ports.http}` : undefined; + + report(`[ensureNexus] derived port=${derivedPort} state url=${stateFileUrl ?? "none"}`); + + // Discover any running container belonging to this worktree. let containerUrl: string | undefined; try { - containerUrl = await discoverRunningNexus(); + containerUrl = await discoverRunningNexus(projectRoot); } catch { // best-effort } + // Only probe URLs we can verify belong to this worktree. + // DEFAULT_NEXUS_URL is intentionally excluded — it could match any running + // Nexus instance (e.g. another project via OrbStack port forwarding) and + // would cause cross-worktree session leakage. const candidateUrls = [ - containerUrl, // container IP (works without port binding) - config.nexusUrl, - readNexusUrl(projectRoot), - stateFileUrl, - process.env.GROVE_NEXUS_URL, - DEFAULT_NEXUS_URL, + containerUrl, // docker container on our derived port + process.env.GROVE_NEXUS_URL, // explicit user override + config.nexusUrl, // persisted from a previous successful start + readNexusUrl(projectRoot), // our nexus.yaml (has our derived port) + stateFileUrl, // our state.json ].filter((u): u is string => !!u); const urlsToTry = [...new Set(candidateUrls)]; + report(`[ensureNexus] checking URLs in parallel: ${urlsToTry.join(", ")}`); - report(`[nexus] checking URLs: ${urlsToTry.join(", ")}`); - for (const url of urlsToTry) { - try { - const res = await fetch(`${url}/health`, { signal: AbortSignal.timeout(3_000) }); - const body = (await res.json().catch(() => ({}))) as { status?: string }; - report(`[nexus] ${url} → status=${body.status}`); - if (body.status === "healthy") { - const apiKey = readNexusApiKey(projectRoot); - report("Nexus is ready (already running)"); - return { url, apiKey }; - } - if (body.status === "starting") { - report("Nexus is starting (waiting for Raft election)..."); - await waitForNexusHealth(url); - const apiKey = readNexusApiKey(projectRoot); - report("Nexus is ready"); - return { url, apiKey }; - } - } catch { - // Not reachable — try next - } + // Probe all candidates simultaneously — first healthy URL wins. + let foundUrl: string | undefined; + try { + foundUrl = await Promise.any( + urlsToTry.map(async (url) => { + const res = await fetch(`${url}/health`, { signal: AbortSignal.timeout(3_000) }); + const body = (await res.json().catch(() => ({}))) as { status?: string }; + if (body.status === "healthy") return url; + if (body.status === "starting") { + report("Nexus is starting (waiting for Raft election)..."); + await waitForNexusHealth(url); + return url; + } + throw new Error(`not healthy: ${body.status}`); + }), + ); + } catch { + // AggregateError — all candidates rejected or unreachable + } + + if (foundUrl) { + const apiKey = readNexusApiKey(projectRoot); + report("Nexus is ready (already running)"); + return { url: foundUrl, apiKey }; } // ----------------------------------------------------------------------- // 2. No running Nexus found — need CLI to start one. + // YAML generation is in-process (no CLI dependency); `nexus up` still + // shells out to start Docker Compose. // ----------------------------------------------------------------------- const hasNexus = await checkNexusCli(); if (!hasNexus) { @@ -647,46 +789,36 @@ export async function ensureNexusRunning( } // ----------------------------------------------------------------------- - // 2. Quick restart: if known compose project exists in state.json, try + // 3. Quick restart: if state.json has a known compose project, try // `docker compose restart` before falling back to `nexus up` (which pulls). - // This handles transient crashes (OOM, signal) without a slow image pull. // ----------------------------------------------------------------------- const nexusYaml = join(groveHomeDir, "nexus.yaml"); const hasYaml = existsSync(nexusYaml); - report(`[ensureNexus] nexus.yaml at ${nexusYaml} exists=${String(hasYaml)}`); + report(`[ensureNexus] nexus.yaml exists=${String(hasYaml)}`); if (hasYaml && !upOpts?.force) { - // Try a quick `docker compose restart` using the compose project from state.json. - // This avoids an image pull when the container merely crashed (vs. never started). let quickRestartUrl: string | undefined; try { - const statePath = join(groveHomeDir, "nexus-data", ".state.json"); - if (existsSync(statePath)) { - const stateData = JSON.parse(readFileSync(statePath, "utf-8")) as { - project_name?: string; - ports?: { http?: number }; - }; - const projectName = stateData.project_name; - const httpPort = stateData.ports?.http; - if (projectName && httpPort) { - report(`[ensureNexus] quick restart: project=${projectName} port=${httpPort}`); - const restart = Bun.spawn(["docker", "compose", "-p", projectName, "restart", "nexus"], { - cwd: groveHomeDir, - stdout: "pipe", - stderr: "pipe", - }); - const restartCode = await restart.exited; - if (restartCode === 0) { - quickRestartUrl = `http://localhost:${httpPort}`; - report(`[ensureNexus] quick restart done, checking health at ${quickRestartUrl}...`); - try { - await waitForNexusHealth(quickRestartUrl, 30_000); - const apiKey = readNexusApiKey(groveHomeDir); - report("Nexus is ready (quick restart)"); - return { url: quickRestartUrl, apiKey }; - } catch { - report("[ensureNexus] quick restart unhealthy, falling through to nexus up..."); - } + const projectName = state?.project_name; + const httpPort = state?.ports?.http; + if (projectName && httpPort) { + report(`[ensureNexus] quick restart: project=${projectName} port=${httpPort}`); + const restart = Bun.spawn(["docker", "compose", "-p", projectName, "restart", "nexus"], { + cwd: groveHomeDir, + stdout: "pipe", + stderr: "pipe", + }); + const restartCode = await restart.exited; + if (restartCode === 0) { + quickRestartUrl = `http://localhost:${httpPort}`; + report(`[ensureNexus] quick restart done, checking health at ${quickRestartUrl}...`); + try { + await waitForNexusHealth(quickRestartUrl, 30_000); + const apiKey = readNexusApiKey(groveHomeDir); + report("Nexus is ready (quick restart)"); + return { url: quickRestartUrl, apiKey }; + } catch { + report("[ensureNexus] quick restart unhealthy, falling through to nexus up..."); } } } @@ -694,29 +826,24 @@ export async function ensureNexusRunning( // best-effort — fall through to nexus up } - // nexus.yaml exists — run `nexus up` (full restart, may pull updated image) - report( - "[ensureNexus] warm start: nexus.yaml found, running nexus up to restart stopped containers...", - ); + report("[ensureNexus] warm start: nexus.yaml found, ensuring compose file..."); + await ensureNexusComposeFile(groveHomeDir); + report("[ensureNexus] warm start: running nexus up..."); const upStdout = await nexusUp(groveHomeDir, upOpts); const nexusUrl = - config.nexusUrl ?? readNexusUrl(groveHomeDir) ?? parseNexusUrlFromOutput(upStdout) ?? - DEFAULT_NEXUS_URL; - report( - `[ensureNexus] nexus up stdout url: ${parseNexusUrlFromOutput(upStdout) ?? "none"}, using ${nexusUrl}`, - ); - report(`Waiting for Nexus at ${nexusUrl}...`); + `http://localhost:${derivedPort}`; + report(`[ensureNexus] nexus up url=${nexusUrl}, waiting for health...`); await waitForNexusHealth(nexusUrl); const apiKey = readNexusApiKey(groveHomeDir); - report(`[ensureNexus] ready, apiKey=${apiKey ? "yes" : "none"}`); report("Nexus is ready"); return { url: nexusUrl, apiKey }; } // ----------------------------------------------------------------------- - // 3. Cold start: init + up (first time only, or force reinit) + // 4. Cold start: generate nexus.yaml in-process, then run `nexus up`. + // No `nexus init` shell-out — YAML is built directly from known fields. // ----------------------------------------------------------------------- if (upOpts?.force && hasYaml) { report("[ensureNexus] force reinit: stopping existing Nexus..."); @@ -736,11 +863,14 @@ export async function ensureNexusRunning( : (config.nexusChannel ?? DEFAULT_NEXUS_CHANNEL); const channelLabel = channel ? `, channel: ${channel}` : ", source build"; report( - `[ensureNexus] cold start: no nexus.yaml, initializing (preset: ${preset}${channelLabel})...`, + `[ensureNexus] cold start: generating nexus.yaml (preset: ${preset}${channelLabel}, port: ${derivedPort})...`, ); - await nexusInit(groveHomeDir, { preset, channel }); + generateNexusYaml(groveHomeDir, { preset, channel, port: derivedPort }); } + report("[ensureNexus] cold start: ensuring compose file..."); + await ensureNexusComposeFile(groveHomeDir); + const buildLabel = upOpts?.nexusSource ? ` (source build from ${upOpts.nexusSource})` : upOpts?.build @@ -750,10 +880,9 @@ export async function ensureNexusRunning( const upStdout = await nexusUp(groveHomeDir, upOpts); const nexusUrl = - config.nexusUrl ?? readNexusUrl(groveHomeDir) ?? parseNexusUrlFromOutput(upStdout) ?? - DEFAULT_NEXUS_URL; + `http://localhost:${derivedPort}`; report(`[ensureNexus] cold start url=${nexusUrl}, waiting for health...`); await waitForNexusHealth(nexusUrl); diff --git a/src/shared/service-lifecycle.ts b/src/shared/service-lifecycle.ts index 03530d49..79242fa4 100644 --- a/src/shared/service-lifecycle.ts +++ b/src/shared/service-lifecycle.ts @@ -5,7 +5,7 @@ * plus graceful shutdown of all spawned processes. */ -import { existsSync, readFileSync, writeFileSync } from "node:fs"; +import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs"; import { join } from "node:path"; // --------------------------------------------------------------------------- @@ -38,6 +38,38 @@ export interface RunningServices { readonly nexusManaged: boolean; readonly projectRoot: string; readonly pidFilePath: string; + /** + * Resolved Nexus URL after successful startup. Present when nexusManaged + * is true and Nexus started successfully. Callers should persist this to + * grove.json via persistNexusUrlToConfig so Resume can skip re-discovery. + */ + readonly resolvedNexusUrl?: string | undefined; +} + +// --------------------------------------------------------------------------- +// Config persistence helper (caller responsibility, not startServices) +// --------------------------------------------------------------------------- + +/** + * Persist the resolved Nexus URL to grove.json. + * + * Call this after startServices() returns when resolvedNexusUrl is set. + * Keeping this out of startServices() makes the side effect explicit and + * the lifecycle function easier to test. + * + * Best-effort — failures are logged but do not throw. + */ +export function persistNexusUrlToConfig(groveDir: string, url: string): void { + try { + const configPath = join(groveDir, "grove.json"); + if (!existsSync(configPath)) return; + const current = JSON.parse(readFileSync(configPath, "utf-8")) as Record; + if (current.nexusUrl === url) return; // already correct — skip write + current.nexusUrl = url; + writeFileSync(configPath, `${JSON.stringify(current, null, 2)}\n`, "utf-8"); + } catch { + // Best-effort — don't crash startup over config persistence + } } // --------------------------------------------------------------------------- @@ -48,20 +80,27 @@ export interface RunningServices { * Start all configured services (HTTP server, MCP server, managed Nexus). * * Reads grove.json to determine which services to start. Returns a handle - * for stopping services later. + * for stopping services later, including the resolved Nexus URL when managed. + * + * Does NOT persist nexusUrl to grove.json — call persistNexusUrlToConfig + * with resolvedNexusUrl after this returns if you want Resume to skip + * re-discovery. * * If grove.json doesn't exist or has no services configured, returns * an empty RunningServices (no-op shutdown). */ export async function startServices(options: ServiceStartOptions): Promise { const { groveDir } = options; + const report = options.onProgress ?? ((msg: string) => process.stderr.write(`${msg}\n`)); const configPath = join(groveDir, "grove.json"); const projectRoot = join(groveDir, ".."); const pidFilePath = join(groveDir, "grove.pid"); const children: ChildProcess[] = []; let nexusManaged = false; - process.stderr.write( - `[startServices] groveDir=${groveDir} configExists=${existsSync(configPath)} GROVE_NEXUS_URL=${process.env.GROVE_NEXUS_URL ?? "unset"}\n`, + let resolvedNexusUrl: string | undefined; + + report( + `[startServices] groveDir=${groveDir} configExists=${existsSync(configPath)} GROVE_NEXUS_URL=${process.env.GROVE_NEXUS_URL ?? "unset"}`, ); if (!existsSync(configPath)) { @@ -78,8 +117,6 @@ export async function startServices(options: ServiceStartOptions): Promise[] = []; - // Resolve grove source root for service entry points. - // Use process.argv[1] (the CLI entry point) not import.meta.url — bun bundles - // inline this file into a chunk, making import.meta.url unreliable. - // process.argv[1] = "/dist/cli/main.js" or "/src/cli/main.ts" const { dirname } = await import("node:path"); const entryPoint = process.argv[1] ?? ""; const groveSourceRoot = dirname(dirname(dirname(entryPoint))); @@ -162,13 +182,11 @@ export async function startServices(options: ServiceStartOptions): Promise { // Clean up PID file try { - const { unlinkSync } = require("node:fs") as typeof import("node:fs"); unlinkSync(pidFilePath); } catch { /* ignore */ @@ -258,14 +275,40 @@ export async function stopServices(services: RunningServices): Promise { // Service spawning with health check // --------------------------------------------------------------------------- +/** Known service ports. */ +const SERVICE_PORTS: Record = { server: 4515, mcp: 4015 }; + +/** Service health-check timeout (ms). */ +const SERVICE_HEALTH_TIMEOUT_MS = 10_000; + +/** + * Poll a /health endpoint until it returns 200 OK or the timeout expires. + * + * Uses exponential backoff starting at 250ms. Does not throw on timeout — + * the caller checks process liveness separately. + */ +async function waitForServiceHealth(url: string, timeoutMs: number): Promise { + const deadline = Date.now() + timeoutMs; + let delay = 250; + while (Date.now() < deadline) { + try { + const resp = await fetch(url, { signal: AbortSignal.timeout(1_000) }); + if (resp.ok) return; + } catch { + // not ready yet + } + await new Promise((resolve) => setTimeout(resolve, delay)); + delay = Math.min(delay * 1.5, 2_000); + } + // Timeout — caller will check process.kill(pid, 0) to determine liveness +} + async function spawnService( name: string, entryPoint: string, groveDir: string, ): Promise { - // Check if the port is already in use (server=4515, mcp=4015) - const defaultPorts: Record = { server: 4515, mcp: 4015 }; - const port = defaultPorts[name]; + const port = SERVICE_PORTS[name]; if (port) { try { const resp = await fetch(`http://localhost:${port}/health`, { @@ -281,7 +324,6 @@ async function spawnService( } try { - // Spawn detached so the server survives TUI exit. const { spawn: nodeSpawn } = await import("node:child_process"); const child = nodeSpawn("bun", [entryPoint], { cwd: join(groveDir, ".."), @@ -292,18 +334,18 @@ async function spawnService( const pid = child.pid ?? 0; child.unref(); - // Wait for process to start. Server with Nexus stores may take longer - // to initialize (dynamic imports + NexusHttpClient construction). - await new Promise((resolve) => setTimeout(resolve, 5000)); + // Wait for the service to pass its health check instead of sleeping blindly. + if (port) { + await waitForServiceHealth(`http://localhost:${port}/health`, SERVICE_HEALTH_TIMEOUT_MS); + } - // Check if the process is running + // Verify the process is still alive after health check / timeout try { process.kill(pid, 0); // Signal 0 = check existence } catch { return null; // Process died during startup } - // Wrap for the ChildProcess interface const proc = { pid, kill: (signal?: string) => { diff --git a/src/tui/main.ts b/src/tui/main.ts index 756ac6e3..1fbfaf69 100644 --- a/src/tui/main.ts +++ b/src/tui/main.ts @@ -571,12 +571,17 @@ export async function handleTui( // --url flag: legacy direct boardroom mode (no interactive screens) if (opts.url && !opts.nexus) { if (groveDir) { - const { startServices } = await import("../shared/service-lifecycle.js"); + const { startServices, persistNexusUrlToConfig } = await import( + "../shared/service-lifecycle.js" + ); runningServices = await startServices({ groveDir, build: serviceOpts?.build, nexusSource: serviceOpts?.nexusSource, }); + if (runningServices.resolvedNexusUrl) { + persistNexusUrlToConfig(groveDir, runningServices.resolvedNexusUrl); + } } const result = await buildAppProps(effectiveGrove, opts, groveInfo?.preset); activeProvider = result.provider; @@ -657,7 +662,9 @@ export async function handleTui( // Even for "New session", Nexus may be stopped (e.g. machine restart). // startServices + ensureNexusRunning is idempotent: reuses running Nexus if already up. onProgress?.("[grove up] ensuring services are running..."); - const { startServices } = await import("../shared/service-lifecycle.js"); + const { startServices, persistNexusUrlToConfig } = await import( + "../shared/service-lifecycle.js" + ); runningServices = await startServices({ groveDir: newGroveDir, build: serviceOpts?.build, @@ -665,9 +672,14 @@ export async function handleTui( onProgress, force: false, }); + if (runningServices.resolvedNexusUrl) { + persistNexusUrlToConfig(newGroveDir, runningServices.resolvedNexusUrl); + } } else { // New grove — start services (Nexus, HTTP server, MCP server). - const { startServices } = await import("../shared/service-lifecycle.js"); + const { startServices, persistNexusUrlToConfig } = await import( + "../shared/service-lifecycle.js" + ); runningServices = await startServices({ groveDir: newGroveDir, build: serviceOpts?.build, @@ -675,6 +687,9 @@ export async function handleTui( onProgress, force: false, }); + if (runningServices.resolvedNexusUrl) { + persistNexusUrlToConfig(newGroveDir, runningServices.resolvedNexusUrl); + } } // Pass the grove dir so buildAppProps can find GROVE.md @@ -693,13 +708,18 @@ export async function handleTui( onProgress?: (step: string) => void, ): Promise => { const resolvedGrove = groveDir ?? join(process.cwd(), ".grove"); - const { startServices } = await import("../shared/service-lifecycle.js"); + const { startServices, persistNexusUrlToConfig } = await import( + "../shared/service-lifecycle.js" + ); runningServices = await startServices({ groveDir: resolvedGrove, build: serviceOpts?.build, nexusSource: serviceOpts?.nexusSource, onProgress, }); + if (runningServices.resolvedNexusUrl) { + persistNexusUrlToConfig(resolvedGrove, runningServices.resolvedNexusUrl); + } const result = await buildAppProps(effectiveGrove, opts, groveInfo?.preset); activeProvider = result.provider; From 2c24e1b76c1f4298cbb99ce7613c6b5965765fe2 Mon Sep 17 00:00:00 2001 From: Tao Feng Date: Sun, 12 Apr 2026 14:06:19 -0700 Subject: [PATCH 2/2] style: fix biome formatting and unused import errors Format nexus-lifecycle.ts and init.ts per biome rules. Remove unused GenerateNexusYamlOptions import and prefix unused result variable with _ in nexus-lifecycle.test.ts. --- src/cli/commands/init.ts | 8 +++----- src/cli/nexus-lifecycle.test.ts | 5 ++--- src/cli/nexus-lifecycle.ts | 32 ++++++++++++++++++-------------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/cli/commands/init.ts b/src/cli/commands/init.ts index e7c5759a..468e9d27 100644 --- a/src/cli/commands/init.ts +++ b/src/cli/commands/init.ts @@ -241,11 +241,9 @@ export async function executeInit( // 6b. Initialize Nexus if grove-managed — but skip if one is already running if (nexusManaged) { try { - const { - generateNexusYaml, - inferNexusPreset, - discoverRunningNexus, - } = await import("../nexus-lifecycle.js"); + const { generateNexusYaml, inferNexusPreset, discoverRunningNexus } = await import( + "../nexus-lifecycle.js" + ); // Reuse existing Nexus if any stack is running (avoid creating duplicate stacks). // API key is read from .state.json (authoritative) via readNexusApiKey. diff --git a/src/cli/nexus-lifecycle.test.ts b/src/cli/nexus-lifecycle.test.ts index 57fecffd..96869cf7 100644 --- a/src/cli/nexus-lifecycle.test.ts +++ b/src/cli/nexus-lifecycle.test.ts @@ -12,11 +12,10 @@ import { join } from "node:path"; import { parse as yamlParse } from "yaml"; import { - type GenerateNexusYamlOptions, - type NexusState, derivePort, generateNexusYaml, inferNexusPreset, + type NexusState, readNexusApiKey, readNexusState, readNexusUrl, @@ -355,7 +354,7 @@ describe("readNexusUrl", () => { // '3456' parses as string in YAML — readNexusUrl should handle gracefully // (yaml package parses quoted numbers as strings, so this returns undefined) // This is the correct behavior — we only accept integer ports - const result = readNexusUrl(dir); + const _result = readNexusUrl(dir); // Either parses it (yaml coerces) or returns undefined — just don't throw expect(() => readNexusUrl(dir)).not.toThrow(); }); diff --git a/src/cli/nexus-lifecycle.ts b/src/cli/nexus-lifecycle.ts index fd87b0bf..d0491e75 100644 --- a/src/cli/nexus-lifecycle.ts +++ b/src/cli/nexus-lifecycle.ts @@ -11,7 +11,14 @@ */ import { randomBytes } from "node:crypto"; -import { copyFileSync, existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs"; +import { + copyFileSync, + existsSync, + mkdirSync, + readFileSync, + unlinkSync, + writeFileSync, +} from "node:fs"; import { homedir } from "node:os"; import { join, resolve } from "node:path"; import { parse as yamlParse, stringify as yamlStringify } from "yaml"; @@ -150,10 +157,7 @@ export interface GenerateNexusYamlOptions { * Derives port from the workspace path (FNV-1a hash) so each worktree gets * a stable, unique port. Generates an API key for presets that require auth. */ -export function generateNexusYaml( - projectRoot: string, - opts: GenerateNexusYamlOptions, -): void { +export function generateNexusYaml(projectRoot: string, opts: GenerateNexusYamlOptions): void { const yamlPath = join(projectRoot, "nexus.yaml"); if (existsSync(yamlPath)) return; @@ -597,10 +601,10 @@ export async function discoverRunningNexus(projectRoot?: string): Promise