diff --git a/.gitignore b/.gitignore index c9f093fe..cdf1cdf8 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,5 @@ bin/ .gstack/ supabase/.temp/ .claude/skills/ +.claude/settings.local.json .idea diff --git a/CHANGELOG.md b/CHANGELOG.md index 1537e621..b6ed801f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ All notable changes to GBrain will be documented in this file. +## [0.10.3] - 2026-04-17 + +### Fixed + +- **Compiled `gbrain` binaries can actually open your brain now.** Every PGLite command (`list`, `search`, `doctor`, anything that touches the DB) was crashing with `Extension bundle not found: file:///.../vector.tar.gz` in compiled mode. Bun's `--compile` bundles JavaScript but silently drops data files referenced via `new URL("...", import.meta.url)` — and PGLite needs five of them (pgvector tarball, pg_trgm tarball, pglite.wasm, initdb.wasm, pglite.data). Source mode (`bun run src/cli.ts`) worked, so the gap hid in plain sight until someone actually ran the shipped binary. Fix embeds all five via `with { type: 'file' }`, materializes the extension tarballs to `$TMPDIR/gbrain-pglite-ext/` (PGLite's stream loader can't traverse Bun's virtual FS), and hands the WASM modules + `pglite.data` Blob directly to `PGlite.create()` so URL resolution is bypassed entirely. New E2E test `test/e2e/compiled-binary.test.ts` compiles a fresh binary, runs `init` + `list` + `search` against a scratch dir, and fails loudly if any embedded asset drops out of the compile again. + ## [0.10.2] - 2026-04-17 ### Security — Wave 3 (9 vulnerabilities closed) diff --git a/TODOS.md b/TODOS.md index 2ebac62f..cfb051dc 100644 --- a/TODOS.md +++ b/TODOS.md @@ -22,15 +22,17 @@ ### Fix `bun build --compile` WASM embedding for PGLite **What:** Submit PR to oven-sh/bun fixing WASM file embedding in `bun build --compile` (issue oven-sh/bun#15032). -**Why:** PGLite's WASM files (~3MB) can't be embedded in the compiled binary. Users who install via `bun install -g gbrain` are fine (WASM resolves from node_modules), but the compiled binary can't use PGLite. Jarred Sumner (Bun founder, YC W22) would likely be receptive. +**Status:** Worked around in v0.10.3 — `src/core/pglite-engine.ts` embeds tarballs and WASM via `with { type: 'file' }` and passes them through `PGlite.create()`'s `pgliteWasmModule` / `initdbWasmModule` / `fsBundle` options. Compiled binaries work. An upstream Bun fix would let us delete that workaround and let other WASM packages Just Work. -**Pros:** Single-binary distribution includes PGLite. No sidecar files needed. +**Why:** The workaround is 80 lines of glue. An upstream fix would generalize to every package that uses `new URL("./asset.ext", import.meta.url)` for data files, not just PGLite. Jarred Sumner (Bun founder, YC W22) would likely be receptive. + +**Pros:** Single-binary distribution stays zero-config for the next WASM-dependent package we pull in. **Cons:** Requires understanding Bun's bundler internals. May be a large PR. **Context:** Issue has been open since Nov 2024. The root cause is that `bun build --compile` generates virtual filesystem paths (`/$bunfs/root/...`) that PGLite can't resolve. Multiple users have reported this. A fix would benefit any WASM-dependent package, not just PGLite. -**Depends on:** PGLite engine shipping (to have a real use case for the PR). +**Depends on:** PGLite engine shipping (to have a real use case for the PR). DONE. ### ChatGPT MCP support (OAuth 2.1) **What:** Add OAuth 2.1 with Dynamic Client Registration to the self-hosted MCP server so ChatGPT can connect. diff --git a/VERSION b/VERSION index 5eef0f10..a3f5a8ed 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.10.2 +0.10.3 diff --git a/package.json b/package.json index 70eea76b..edfd036b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gbrain", - "version": "0.10.2", + "version": "0.10.3", "description": "Postgres-native personal knowledge brain with hybrid RAG search", "type": "module", "main": "src/core/index.ts", diff --git a/src/core/pglite-engine.ts b/src/core/pglite-engine.ts index cc1ca310..89c2312e 100644 --- a/src/core/pglite-engine.ts +++ b/src/core/pglite-engine.ts @@ -2,6 +2,20 @@ import { PGlite } from '@electric-sql/pglite'; import { vector } from '@electric-sql/pglite/vector'; import { pg_trgm } from '@electric-sql/pglite/contrib/pg_trgm'; import type { Transaction } from '@electric-sql/pglite'; +import { pathToFileURL } from 'node:url'; +// Embed extension tarballs into the compiled binary. PGLite's extensions +// resolve bundlePath via `new URL("../vector.tar.gz", import.meta.url)` in +// node_modules, which `bun build --compile` does NOT auto-embed — the binary +// crashes at runtime with "Extension bundle not found". These imports force +// Bun to embed the files and return a path we can feed back to PGLite. +import vectorBundlePath from '../../node_modules/@electric-sql/pglite/dist/vector.tar.gz' with { type: 'file' }; +import pgTrgmBundlePath from '../../node_modules/@electric-sql/pglite/dist/pg_trgm.tar.gz' with { type: 'file' }; +import pgliteWasmPath from '../../node_modules/@electric-sql/pglite/dist/pglite.wasm' with { type: 'file' }; +import initdbWasmPath from '../../node_modules/@electric-sql/pglite/dist/initdb.wasm' with { type: 'file' }; +import pgliteDataPath from '../../node_modules/@electric-sql/pglite/dist/pglite.data' with { type: 'file' }; +import { readFileSync, writeFileSync, mkdirSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; import type { BrainEngine } from './engine.ts'; import { MAX_SEARCH_LIMIT, clampSearchLimit } from './engine.ts'; import { runMigrations } from './migrate.ts'; @@ -23,6 +37,69 @@ import { validateSlug, contentHash, rowToPage, rowToChunk, rowToSearchResult } f type PGLiteDB = PGlite; +function withEmbeddedBundle Promise<{ bundlePath: URL }> }>( + ext: T, + bundlePath: string, +): T { + const url = pathToFileURL(bundlePath); + return { + ...ext, + setup: async (...args: Parameters) => ({ + ...(await ext.setup(...args)), + bundlePath: url, + }), + } as T; +} + +// Extension tarballs must live on the real filesystem because PGLite loads them +// via fs.createReadStream + zlib.createGunzip, which cannot traverse Bun's +// embedded virtual FS. Copy the embedded bytes to tmpdir once per process. +let _materializedExtPaths: { vector: string; pg_trgm: string } | null = null; + +function materializeExtensions() { + if (_materializedExtPaths) return _materializedExtPaths; + const dir = join(tmpdir(), 'gbrain-pglite-ext'); + mkdirSync(dir, { recursive: true }); + const pairs = [ + { name: 'vector.tar.gz', src: vectorBundlePath }, + { name: 'pg_trgm.tar.gz', src: pgTrgmBundlePath }, + ]; + for (const { name, src } of pairs) { + const out = join(dir, name); + // Rewrite every time — the embedded bytes are stable, but a stale/corrupt + // file from a prior crashed run would otherwise fail silently. + writeFileSync(out, readFileSync(src)); + } + _materializedExtPaths = { + vector: join(dir, 'vector.tar.gz'), + pg_trgm: join(dir, 'pg_trgm.tar.gz'), + }; + return _materializedExtPaths; +} + +const EXT_PATHS = materializeExtensions(); +const VECTOR_EXT = withEmbeddedBundle(vector, EXT_PATHS.vector); +const PG_TRGM_EXT = withEmbeddedBundle(pg_trgm, EXT_PATHS.pg_trgm); + +let _cachedCoreAssets: { + pgliteWasmModule: WebAssembly.Module; + initdbWasmModule: WebAssembly.Module; + fsBundle: Blob; +} | null = null; + +function loadCoreAssets() { + if (_cachedCoreAssets) return _cachedCoreAssets; + const pgliteWasmBytes = readFileSync(pgliteWasmPath); + const initdbWasmBytes = readFileSync(initdbWasmPath); + const fsBundleBytes = readFileSync(pgliteDataPath); + _cachedCoreAssets = { + pgliteWasmModule: new WebAssembly.Module(pgliteWasmBytes), + initdbWasmModule: new WebAssembly.Module(initdbWasmBytes), + fsBundle: new Blob([new Uint8Array(fsBundleBytes)]), + }; + return _cachedCoreAssets; +} + export class PGLiteEngine implements BrainEngine { private _db: PGLiteDB | null = null; private _lock: LockHandle | null = null; @@ -43,9 +120,13 @@ export class PGLiteEngine implements BrainEngine { throw new Error('Could not acquire PGLite lock. Another gbrain process is using the database.'); } + const { pgliteWasmModule, initdbWasmModule, fsBundle } = loadCoreAssets(); this._db = await PGlite.create({ dataDir, - extensions: { vector, pg_trgm }, + extensions: { vector: VECTOR_EXT, pg_trgm: PG_TRGM_EXT }, + pgliteWasmModule, + initdbWasmModule, + fsBundle, }); } diff --git a/test/e2e/compiled-binary.test.ts b/test/e2e/compiled-binary.test.ts new file mode 100644 index 00000000..474fed53 --- /dev/null +++ b/test/e2e/compiled-binary.test.ts @@ -0,0 +1,79 @@ +/** + * E2E Compiled Binary Test — Tier 1 (no API keys, no network) + * + * Regression guard for the Bun `--compile` asset embedding bug: + * PGLite references its WASM, data, and extension tarballs via + * `new URL("./pglite.wasm", import.meta.url)` etc. inside node_modules. + * Bun's compiler bundles JS modules but does NOT auto-embed arbitrary + * data files referenced this way, so the stock compile produces a binary + * that crashes at runtime with "Extension bundle not found". + * + * src/core/pglite-engine.ts fixes this by importing each asset with + * `with { type: 'file' }`, materializing extension tarballs to tmpdir, + * and feeding the core WASM/data modules directly to PGLite. + * + * This test compiles the binary, runs it against a scratch config dir, + * and asserts zero extension errors + clean list output. It catches + * any regression that silently drops an asset from the compile. + * + * Run: bun test test/e2e/compiled-binary.test.ts + */ + +import { describe, test, expect, beforeAll, afterAll } from 'bun:test'; +import { spawnSync } from 'node:child_process'; +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir, platform } from 'node:os'; +import { join } from 'node:path'; + +const REPO_ROOT = new URL('../..', import.meta.url).pathname.replace(/^\/([A-Z]:)/, '$1'); +const BINARY = join(REPO_ROOT, 'bin', platform() === 'win32' ? 'gbrain-compiled-test.exe' : 'gbrain-compiled-test'); + +let tmpHome: string; + +beforeAll(() => { + const build = spawnSync('bun', ['build', '--compile', '--outfile', BINARY, 'src/cli.ts'], { + cwd: REPO_ROOT, encoding: 'utf-8', timeout: 120_000, + }); + if (build.status !== 0) throw new Error(`Compile failed: ${build.stderr || build.stdout}`); + + tmpHome = mkdtempSync(join(tmpdir(), 'gbrain-compiled-test-')); + const env = { ...process.env, HOME: tmpHome, USERPROFILE: tmpHome }; + const init = spawnSync(BINARY, ['init'], { encoding: 'utf-8', timeout: 60_000, env }); + if (init.status !== 0) { + throw new Error(`gbrain init failed (status=${init.status}): ${init.stderr || init.stdout}`); + } +}, 180_000); + +afterAll(() => { + if (tmpHome) rmSync(tmpHome, { recursive: true, force: true }); + try { rmSync(BINARY, { force: true }); } catch { /* best-effort */ } +}); + +function runBinary(args: string[]) { + // homedir() on Windows reads USERPROFILE; on Unix, HOME. + const env = { ...process.env, HOME: tmpHome, USERPROFILE: tmpHome }; + return spawnSync(BINARY, args, { encoding: 'utf-8', timeout: 30_000, env }); +} + +describe('compiled binary: PGLite asset embedding', () => { + // Per-test timeout has to cover the first-run initdb (PGLite bootstraps + // a fresh cluster, takes ~5-10s on a cold machine). + test('list against fresh PGLite brain runs clean', () => { + const r = runBinary(['list', '-n', '5']); + const combined = (r.stdout || '') + (r.stderr || ''); + expect(combined).not.toContain('Extension bundle not found'); + expect(combined).not.toContain('Failed to fetch extension'); + expect(combined).not.toContain('pglite.data'); + expect(combined).not.toContain('pglite.wasm'); + expect(combined).not.toContain('initdb.wasm'); + expect(r.status).toBe(0); + }, 30_000); + + test('search against existing PGLite brain runs clean', () => { + const r = runBinary(['search', 'anything']); + const combined = (r.stdout || '') + (r.stderr || ''); + expect(combined).not.toContain('Extension bundle not found'); + expect(combined).not.toContain('Failed to fetch extension'); + expect(r.status).toBe(0); + }, 30_000); +});