diff --git a/packages/runtime-worker/src/adapters/typeberry.ts b/packages/runtime-worker/src/adapters/typeberry.ts index 288c594..7c3f573 100644 --- a/packages/runtime-worker/src/adapters/typeberry.ts +++ b/packages/runtime-worker/src/adapters/typeberry.ts @@ -55,12 +55,19 @@ function buildMemory( return builder.finalize(heapStart, heapEnd); } +// Typeberry fault status code (maps to "fault" via status-map) +const TYPEBERRY_STATUS_FAULT = 2; + /** Typeberry PVM interpreter wrapper implementing SyncPvmInterpreter. */ export class TypeberrySyncInterpreter implements SyncPvmInterpreter { private adapter: pvm.DebuggerAdapter; private storedProgram: Uint8Array | null = null; private storedInitialState: InitialMachineState | null = null; private storedLoadContext: ProgramLoadContext | undefined = undefined; + // Workaround for typeberry assertion errors in fault-handling code paths + // (e.g. getStartPageIndex using signed << on high addresses). + // When set, overrides getStatus() to report fault instead of ok. + private assertionFaultStatus: number | null = null; constructor() { this.adapter = new DebuggerAdapter(); @@ -74,6 +81,7 @@ export class TypeberrySyncInterpreter implements SyncPvmInterpreter { this.storedProgram = program; this.storedInitialState = initialState; this.storedLoadContext = loadContext; + this.assertionFaultStatus = null; this.doLoad(program, initialState, loadContext); } @@ -108,6 +116,7 @@ export class TypeberrySyncInterpreter implements SyncPvmInterpreter { if (!this.storedProgram || !this.storedInitialState) { throw new Error("Cannot reset: no program has been loaded"); } + this.assertionFaultStatus = null; this.doLoad( this.storedProgram, this.storedInitialState, @@ -116,15 +125,31 @@ export class TypeberrySyncInterpreter implements SyncPvmInterpreter { } step(n: number): { finished: boolean } { - if (n === 1) { - const running = this.adapter.nextStep(); + try { + if (n === 1) { + const running = this.adapter.nextStep(); + return { finished: !running }; + } + const running = this.adapter.nSteps(n); return { finished: !running }; + } catch (err) { + // Typeberry has a known bug where its fault-handling code path throws + // assertion errors (e.g. getStartPageIndex uses signed << on high + // addresses, producing negative values that fail tryAsMemoryIndex). + // The instruction itself was correctly identified as a fault, but the + // error-reporting code crashed. Treat this as a fault. + if (err instanceof Error && err.message.includes("Assertion failure")) { + this.assertionFaultStatus = TYPEBERRY_STATUS_FAULT; + return { finished: true }; + } + throw err; } - const running = this.adapter.nSteps(n); - return { finished: !running }; } getStatus(): number { + if (this.assertionFaultStatus !== null) { + return this.assertionFaultStatus; + } return this.adapter.getStatus() as number; } diff --git a/packages/runtime-worker/src/index.test.ts b/packages/runtime-worker/src/index.test.ts index 1f1527e..d82a710 100644 --- a/packages/runtime-worker/src/index.test.ts +++ b/packages/runtime-worker/src/index.test.ts @@ -770,3 +770,122 @@ describe("SPI loads", () => { expect(status1).toBe(status2); }); }); + +// ===== Doom diagnostic: compare Typeberry and Ananas ===== +describe("Doom program: Typeberry vs Ananas", () => { + let typeberry: TypeberrySyncInterpreter; + let ananas: AnanasSyncInterpreter; + let doomBytes: Uint8Array; + + const doomState: InitialMachineState = { + pc: 0, + gas: 1_000_000n, + registers: Array.from({ length: 13 }, () => 0n), + pageMap: [], + memoryChunks: [], + }; + + beforeAll(async () => { + typeberry = new TypeberrySyncInterpreter(); + const api = await initAnanas(); + ananas = new AnanasSyncInterpreter(api); + doomBytes = readFixture("doom.bin"); + }); + + it("initial state after load matches", () => { + typeberry.load(doomBytes, doomState); + ananas.load(doomBytes, doomState); + + const tbPc = typeberry.getPc(); + const anPc = ananas.getPc(); + const tbGas = typeberry.getGas(); + const anGas = ananas.getGas(); + const tbStatus = typeberry.getStatus(); + const anStatus = ananas.getStatus(); + const tbRegs = uint8ToRegs(typeberry.getRegisters()); + const anRegs = uint8ToRegs(ananas.getRegisters()); + + console.log("=== After load ==="); + console.log(`TB PC: ${tbPc}, AN PC: ${anPc}`); + console.log(`TB Gas: ${tbGas}, AN Gas: ${anGas}`); + console.log(`TB Status: ${tbStatus}, AN Status: ${anStatus}`); + console.log(`TB Regs: [${tbRegs.join(",")}]`); + console.log(`AN Regs: [${anRegs.join(",")}]`); + + expect(anPc).toBe(tbPc); + expect(anGas).toBe(tbGas); + expect(anStatus).toBe(tbStatus); + expect(anRegs).toEqual(tbRegs); + }); + + it("state after 1 step matches", () => { + typeberry.load(doomBytes, doomState); + ananas.load(doomBytes, doomState); + + typeberry.step(1); + ananas.step(1); + + const tbPc = typeberry.getPc(); + const anPc = ananas.getPc(); + const tbGas = typeberry.getGas(); + const anGas = ananas.getGas(); + const tbStatus = typeberry.getStatus(); + const anStatus = ananas.getStatus(); + const tbRegs = uint8ToRegs(typeberry.getRegisters()); + const anRegs = uint8ToRegs(ananas.getRegisters()); + + console.log("=== After 1 step ==="); + console.log(`TB PC: ${tbPc}, AN PC: ${anPc}`); + console.log(`TB Gas: ${tbGas}, AN Gas: ${anGas}`); + console.log(`TB Status: ${tbStatus}, AN Status: ${anStatus}`); + console.log(`TB Regs: [${tbRegs.join(",")}]`); + console.log(`AN Regs: [${anRegs.join(",")}]`); + + expect(anPc).toBe(tbPc); + expect(anGas).toBe(tbGas); + expect(anStatus).toBe(tbStatus); + expect(anRegs).toEqual(tbRegs); + }); + + it("both PVMs agree on fault after store to unmapped memory", () => { + typeberry.load(doomBytes, doomState); + ananas.load(doomBytes, doomState); + + // Step until one terminates (doom faults on step 2 due to unmapped memory store) + for (let i = 0; i < 10; i++) { + const tbResult = typeberry.step(1); + const anResult = ananas.step(1); + + const tbStatus = mapStatus(typeberry.getStatus()); + const anStatus = mapStatus(ananas.getStatus()); + + expect(tbResult.finished).toBe(anResult.finished); + expect(tbStatus).toBe(anStatus); + + if (tbResult.finished || anResult.finished) { + // Both should agree it's a fault + expect(tbStatus).toBe("fault"); + expect(anStatus).toBe("fault"); + break; + } + } + }); + + it("typeberry handles assertion fault gracefully on nSteps", () => { + typeberry.load(doomBytes, doomState); + + // nSteps should not throw — should return finished and fault status + const result = typeberry.step(100); + expect(result.finished).toBe(true); + expect(mapStatus(typeberry.getStatus())).toBe("fault"); + }); + + it("reset clears assertion fault status", () => { + typeberry.load(doomBytes, doomState); + typeberry.step(100); // triggers fault + expect(mapStatus(typeberry.getStatus())).toBe("fault"); + + typeberry.reset(); + expect(mapStatus(typeberry.getStatus())).toBe("ok"); + }); +}); diff --git a/spec/005-runtime-worker-and-adapters.md b/spec/005-runtime-worker-and-adapters.md index bc08a3a..6fda9ee 100644 --- a/spec/005-runtime-worker-and-adapters.md +++ b/spec/005-runtime-worker-and-adapters.md @@ -234,6 +234,12 @@ This is NOT `dest, src1, src2` order. The `addU32(first, second, result)` functi After every `resetGenericWithMemory()` or `resetJAM()`, Ananas requires calling `setNextProgramCounter(pc)`, `setGasLeft(gas)`, and `nextStep()` to prime the interpreter state. Without this, the first step result will be incorrect. +### Typeberry assertion error on high-address faults + +Typeberry's `getStartPageIndex()` in `memory/memory-utils.js` uses JavaScript's `<<` operator to page-align addresses, which converts the result to a signed 32-bit integer. For addresses >= `0x80000000`, the page-aligned result has bit 31 set, making it negative and failing `tryAsMemoryIndex`. This crash occurs in the **fault-handling code path** (not the address calculation itself), so the instruction was correctly identified as a fault but the error-reporting code throws. Programs that store to unmapped high addresses trigger this. + +**Workaround**: `TypeberrySyncInterpreter.step()` catches assertion errors and sets a synthetic fault status (code 2). The synthetic status is cleared on `load()` and `reset()`. See sprint-48 for full details. + ## Acceptance Criteria - `mapStatus()` covers all six known statuses and throws on unknown codes. @@ -249,6 +255,7 @@ After every `resetGenericWithMemory()` or `resetJAM()`, Ananas requires calling - `WorkerBridge` applies real `setPc`, `setGas`, `setRegisters`, `getMemory`, `setMemory`, and `reset` through the worker protocol. - `WorkerBridge` rejects stalled commands with `TimeoutError`. - Worker command handler returns structured error responses when interpreter throws. +- Typeberry and Ananas produce the same fault status for the doom binary (assertion fault workaround). - `npm run build -w packages/runtime-worker` succeeds. - `npm test -w packages/runtime-worker` succeeds. - `npm run build` succeeds for the workspace. diff --git a/spec/ui/sprint-48-typeberry-assertion-fault-workaround.md b/spec/ui/sprint-48-typeberry-assertion-fault-workaround.md new file mode 100644 index 0000000..5e7e353 --- /dev/null +++ b/spec/ui/sprint-48-typeberry-assertion-fault-workaround.md @@ -0,0 +1,114 @@ +# Sprint 48 — Typeberry Assertion Fault Workaround + +Status: Implemented + +## Goal + +Fix the doom example (and any large program that accesses high memory addresses) producing different results between Typeberry and Ananas. Typeberry crashes with a JavaScript assertion error on programs that fault at high memory addresses, while Ananas correctly reports a `fault` status. Add a defensive workaround in the Typeberry adapter so both PVMs agree. + +## Prior Sprint Dependencies + +- Spec 005: runtime-worker and PVM adapters (defines the `SyncPvmInterpreter` contract and adapter responsibilities) +- Sprint 39: Ananas PVM support (Ananas adapter and dual-PVM execution) +- Sprint 25: divergence detection (UI depends on both PVMs agreeing on status) + +## Root Cause + +The bug is in `@typeberry/lib`'s `getStartPageIndex()` function in `memory/memory-utils.js`: + +```javascript +// BUG: << converts result to signed 32-bit integer +export function getStartPageIndex(address) { + return tryAsMemoryIndex((address >>> PAGE_SIZE_SHIFT) << PAGE_SIZE_SHIFT); +} +``` + +JavaScript's `<<` operator converts its result to a **signed 32-bit integer**. For page-aligned addresses at or above `0x80000000`, the result has bit 31 set, which makes the signed interpretation negative. `tryAsMemoryIndex` then rejects the negative value with an assertion error. + +The sister function `getStartPageIndexFromPageNumber` in the same file correctly applies `>>> 0` to convert back to unsigned: + +```javascript +// CORRECT: >>> 0 converts back to unsigned 32-bit +export function getStartPageIndexFromPageNumber(pageNumber) { + return tryAsMemoryIndex((pageNumber << PAGE_SIZE_SHIFT) >>> 0); +} +``` + +### Trigger Sequence (doom example) + +1. Doom's first instruction loads `r1 = 0xFFFFFFFFFFFFFFF8` (-8 as signed 64-bit). +2. The second instruction is `storeIndU64` using `r1` as a base address. +3. The computed store address (lower 32 bits: `0xFFFFFFF8` + immediate offset) targets unmapped memory. +4. `StoreOps.store()` correctly identifies the access as a fault. +5. In the **fault-handling code path** (not the address calculation), `getStartPageIndex(tryAsMemoryIndex(storeResult.error.address))` is called to compute the page index for the error report. +6. `getStartPageIndex` aligns the error address to a page boundary: `(0xFFFFF >>> 12) << 12` = `0xFFFFF000` as unsigned, but `-4096` as signed 32-bit. +7. `tryAsMemoryIndex(-4096)` throws `Assertion failure: Incorrect memory index: -4096!`. + +### Impact + +- **Ananas**: returns `status = 2 (fault)` — correct behavior per PVM spec. +- **Typeberry**: throws an unrecoverable JavaScript exception — the debugger marks the session as `"failed"` instead of `"terminated"`. +- The UI divergence panel shows different lifecycles rather than matching fault statuses. +- Any program that stores to an address in the upper half of the 32-bit address space (>= 0x80000000) and faults will trigger this bug. + +## What Works After This Sprint + +### 1. Typeberry Assertion Fault Recovery + +`TypeberrySyncInterpreter.step()` catches assertion errors thrown by the Typeberry library during execution. When an assertion failure is caught: + +- The interpreter records a synthetic fault status (`TYPEBERRY_STATUS_FAULT = 2`). +- `step()` returns `{ finished: true }` instead of propagating the exception. +- `getStatus()` returns the synthetic fault code (2) instead of the stale ok code (255). + +This only catches errors whose message includes `"Assertion failure"` — all other errors propagate normally. + +### 2. Status Override Lifecycle + +The synthetic fault status is stored in a private `assertionFaultStatus` field: + +- Set to `TYPEBERRY_STATUS_FAULT` when an assertion error is caught during `step()`. +- Cleared to `null` on `load()` and `reset()`, restoring normal `getStatus()` delegation to the underlying adapter. +- `getStatus()` checks `assertionFaultStatus` first; if non-null, returns it instead of querying the adapter. + +### 3. Both PVMs Agree on Doom Execution + +After loading the doom binary with default initial state (pc=0, gas=1M, no page map): + +- **Step 1**: Both PVMs execute successfully. PC=3, Gas=999999, r1=0xFFFFFFFFFFFFFFF8. +- **Step 2**: Both PVMs report `fault` status and `finished=true`. The program terminates because the `storeIndU64` targets unmapped memory. +- No JavaScript exceptions escape from either adapter. + +## Files Changed + +| File | Changes | +|------|---------| +| `packages/runtime-worker/src/adapters/typeberry.ts` | Added `assertionFaultStatus` field, try/catch in `step()`, status override in `getStatus()`, clearing in `load()` and `reset()` | +| `packages/runtime-worker/src/index.test.ts` | Added "Doom program: Typeberry vs Ananas" test suite (5 tests): initial state match, 1-step match, fault agreement, nSteps graceful handling, reset clears fault status | + +## Implementation Notes + +- The workaround is scoped to assertion errors only. If Typeberry throws a non-assertion error (e.g. out of memory, invalid program), it propagates normally. +- The doom binary loaded as `generic_pvm` with no page map will always fault on step 2 because there are no writable memory pages. To actually run doom, the content pipeline would need to extract or provide appropriate page maps. This sprint does not change the loading behavior — it only ensures both PVMs agree on the fault outcome. +- This is a workaround for an upstream bug in `@typeberry/lib`. The fix in the library itself would be a one-character change: adding `>>> 0` in `getStartPageIndex()`. Consider reporting upstream and removing the workaround when a fixed version is published. + +## Acceptance Criteria + +- Typeberry does not throw JavaScript exceptions when stepping the doom binary. +- Both PVMs report `fault` status after doom's second instruction. +- Both PVMs report identical PC, gas, status, and registers after loading doom with default state. +- Both PVMs report identical PC, gas, status, and registers after 1 step of doom. +- `TypeberrySyncInterpreter.step(100)` on doom returns `{ finished: true }` without throwing. +- `mapStatus(typeberry.getStatus())` returns `"fault"` after the assertion recovery. +- `reset()` clears the assertion fault status; `getStatus()` returns `"ok"` (255) after reset. +- All 55 pre-existing runtime-worker tests continue to pass. +- Full workspace test suite (683 tests across 28 files) passes. + +## Verification + +```bash +npm run build -w packages/runtime-worker +npm test -w packages/runtime-worker +npm run build +npm test +```