diff --git a/README.md b/README.md index 0cd8317..4e93fbc 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,7 @@ cd ../llamaindex-basic && npm install && npm run start # 🔌 LangChain-Style Usage ```ts -import { mountPack } from '@knolo/core'; +import { mountPack } from '@knolo/core/node'; import { KnoLoRetriever } from '@knolo/langchain'; const pack = await mountPack({ src: './dist/knowledge.knolo' }); @@ -149,7 +149,7 @@ for (const doc of docs) { # 🦙 LlamaIndex-Style Usage ```ts -import { mountPack } from '@knolo/core'; +import { mountPack } from '@knolo/core/node'; import { KnoLoRetriever } from '@knolo/llamaindex'; const pack = await mountPack({ src: './dist/knowledge.knolo' }); @@ -165,6 +165,28 @@ for (const hit of nodes) { --- +# 📱 Expo / React Native Mounting + +Use the runtime-safe entrypoint (`@knolo/core`) and pass URL/bytes. +For local filesystem paths in Node.js, use `@knolo/core/node`. + +```ts +import { mountPack } from '@knolo/core'; + +const ab = await (await fetch(PACK_URL)).arrayBuffer(); +const pack = await mountPack({ src: new Uint8Array(ab) }); +``` + +Node-only local path usage: + +```ts +import { mountPack } from '@knolo/core/node'; + +const pack = await mountPack({ src: './dist/knowledge.knolo' }); +``` + +--- + # 🔀 Hybrid Retrieval (Optional) Lexical-first. Semantic rerank second. diff --git a/packages/core/README.md b/packages/core/README.md index 5249bb9..4a19fb8 100644 --- a/packages/core/README.md +++ b/packages/core/README.md @@ -91,19 +91,30 @@ You can write it to disk or store it in object storage. ## 2️⃣ Mount a Pack +### Node.js (local path convenience) + ```ts -import { mountPack } from "@knolo/core"; +import { mountPack } from "@knolo/core/node"; const pack = await mountPack({ src: "./dist/knowledge.knolo" }); ``` +### React Native / Expo (URL or bytes) + +```ts +import { mountPack } from "@knolo/core"; + +const ab = await (await fetch(PACK_URL)).arrayBuffer(); +const pack = await mountPack({ src: new Uint8Array(ab) }); +``` + You can mount from: -* File path +* URL string (runtime-safe entry) * Buffer / Uint8Array -* Remote fetch response +* Local file path in Node via `@knolo/core/node` * Object storage download Mount-time validation ensures: diff --git a/packages/core/package.json b/packages/core/package.json index 6ca2167..a2f6501 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -19,8 +19,14 @@ ], "exports": { ".": { + "react-native": "./dist/index.js", + "browser": "./dist/index.js", "import": "./dist/index.js", "types": "./dist/index.d.ts" + }, + "./node": { + "import": "./dist/node.js", + "types": "./dist/node.d.ts" } }, "sideEffects": false, @@ -28,9 +34,10 @@ "build": "tsc -p tsconfig.json", "prepublishOnly": "npm run build", "smoke": "node scripts/smoke.mjs", - "test": "npm run build && node scripts/test.mjs", - "format": "prettier --write src/agent.ts src/pack.ts src/builder.ts src/index.ts scripts/test.mjs ../../README.md", - "format:check": "prettier --check src/agent.ts src/pack.ts src/builder.ts src/index.ts scripts/test.mjs ../../README.md" + "test": "npm run build && node scripts/check-runtime-no-node.mjs && node scripts/test.mjs", + "format": "prettier --write src/agent.ts src/pack.ts src/pack.runtime.ts src/pack.node.ts src/node.ts src/builder.ts src/index.ts scripts/test.mjs scripts/check-runtime-no-node.mjs ../../README.md README.md", + "format:check": "prettier --check src/agent.ts src/pack.ts src/pack.runtime.ts src/pack.node.ts src/node.ts src/builder.ts src/index.ts scripts/test.mjs scripts/check-runtime-no-node.mjs ../../README.md README.md", + "check:runtime-no-node": "node scripts/check-runtime-no-node.mjs" }, "devDependencies": { "@types/node": "^20.11.0", diff --git a/packages/core/scripts/check-runtime-no-node.mjs b/packages/core/scripts/check-runtime-no-node.mjs new file mode 100644 index 0000000..ad971ce --- /dev/null +++ b/packages/core/scripts/check-runtime-no-node.mjs @@ -0,0 +1,17 @@ +import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; +import { fileURLToPath } from 'node:url'; + +const distIndexPath = fileURLToPath(new URL('../dist/index.js', import.meta.url)); +const runtimeBundle = await readFile(distIndexPath, 'utf8'); + +const forbidden = ['node:fs', 'fs/promises', 'node:path']; +for (const token of forbidden) { + assert.equal( + runtimeBundle.includes(token), + false, + `Runtime entry must not include Node stdlib reference: ${token}` + ); +} + +console.log('Runtime bundle contains no Node stdlib specifiers.'); diff --git a/packages/core/scripts/test.mjs b/packages/core/scripts/test.mjs index c62db32..1b6cece 100644 --- a/packages/core/scripts/test.mjs +++ b/packages/core/scripts/test.mjs @@ -31,6 +31,7 @@ import { validateRouteDecisionV1, selectAgentIdFromRouteDecisionV1, } from '../dist/index.js'; +import { mountPack as mountPackNode } from '../dist/node.js'; const execFileAsync = promisify(execFile); @@ -409,20 +410,20 @@ async function testMountPackFromLocalPathAndFileUrl() { try { await writeFile(packPath, bytes); - const fromPath = await mountPack({ src: packPath }); + const fromPath = await mountPackNode({ src: packPath }); const pathHits = query(fromPath, 'local path loading', { topK: 1 }); assert.equal( pathHits[0]?.source, 'local-doc', - 'expected mountPack to load plain filesystem paths' + 'expected @knolo/core/node mountPack to load plain filesystem paths' ); - const fromFileUrl = await mountPack({ src: pathToFileURL(packPath).href }); + const fromFileUrl = await mountPackNode({ src: pathToFileURL(packPath).href }); const fileUrlHits = query(fromFileUrl, 'local path loading', { topK: 1 }); assert.equal( fileUrlHits[0]?.source, 'local-doc', - 'expected mountPack to load file:// URLs' + 'expected @knolo/core/node mountPack to load file:// URLs' ); } finally { await rm(tmpDir, { recursive: true, force: true }); @@ -1251,7 +1252,7 @@ async function testCliEmbedsAgentsFromDirectory() { agentsDir, ]); - const pack = await mountPack({ src: outPath }); + const pack = await mountPackNode({ src: outPath }); assert.deepEqual( listAgents(pack), ['backend.agent', 'mobile.agent'], diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index a00b488..b1d0fa7 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -1,5 +1,5 @@ // src/index.ts -export { mountPack, hasSemantic } from './pack.js'; +export { mountPack, hasSemantic } from './pack.runtime.js'; export { query, lexConfidence, @@ -23,7 +23,7 @@ export { validateAgentRegistry, validateAgentDefinition, } from './agent.js'; -export type { MountOptions, PackMeta, Pack } from './pack.js'; +export type { MountOptions, PackMeta, Pack } from './pack.runtime.js'; export type { QueryOptions, Hit } from './query.js'; export type { ContextPatch } from './patch.js'; export type { BuildInputDoc, BuildPackOptions } from './builder.js'; diff --git a/packages/core/src/node.ts b/packages/core/src/node.ts new file mode 100644 index 0000000..d2162ca --- /dev/null +++ b/packages/core/src/node.ts @@ -0,0 +1,2 @@ +export { mountPack, hasSemantic } from './pack.node.js'; +export type { MountOptions, PackMeta, Pack } from './pack.node.js'; diff --git a/packages/core/src/pack.node.ts b/packages/core/src/pack.node.ts new file mode 100644 index 0000000..e821ce2 --- /dev/null +++ b/packages/core/src/pack.node.ts @@ -0,0 +1,42 @@ +import { mountPackFromBuffer, toArrayBuffer } from './pack.runtime.js'; +import type { MountOptions, Pack } from './pack.runtime.js'; +export { hasSemantic } from './pack.runtime.js'; +export type { MountOptions, PackMeta, Pack } from './pack.runtime.js'; + +export async function mountPack(opts: MountOptions): Promise { + const buf = await resolveToBuffer(opts.src); + return mountPackFromBuffer(buf); +} + +async function resolveToBuffer(src: MountOptions['src']): Promise { + if (typeof src === 'string') { + if (isLikelyLocalPath(src)) { + const { readFile } = await import('node:fs/promises'); + const filePath = src.startsWith('file://') + ? decodeURIComponent(new URL(src).pathname) + : src; + const data = await readFile(filePath); + return data.buffer.slice( + data.byteOffset, + data.byteOffset + data.byteLength + ); + } + const res = await fetch(src); + return await res.arrayBuffer(); + } + return toArrayBuffer(src); +} + +function isLikelyLocalPath(value: string): boolean { + if (value.startsWith('file://')) return true; + if ( + value.startsWith('./') || + value.startsWith('../') || + value.startsWith('/') || + value.startsWith('~') + ) + return true; + if (/^[A-Za-z]:[\\/]/.test(value)) return true; + if (/^[A-Za-z][A-Za-z\d+.-]*:/.test(value)) return false; + return true; +} diff --git a/packages/core/src/pack.runtime.ts b/packages/core/src/pack.runtime.ts new file mode 100644 index 0000000..f1f91cc --- /dev/null +++ b/packages/core/src/pack.runtime.ts @@ -0,0 +1,203 @@ +/* + * pack.runtime.ts + * + * Runtime-safe pack mounting for browser and React Native environments. + * No Node stdlib imports are allowed in this module. + */ + +import { getTextDecoder } from './utils/utf8.js'; +import type { AgentRegistry } from './agent.js'; +import { validateAgentRegistry } from './agent.js'; + +export type MountOptions = { src: string | ArrayBufferLike | Uint8Array }; + +export type PackMeta = { + version: number; + stats: { docs: number; blocks: number; terms: number; avgBlockLen?: number }; + agents?: AgentRegistry; +}; + +export type Pack = { + meta: PackMeta; + lexicon: Map; + postings: Uint32Array; + blocks: string[]; + headings?: (string | null)[]; + docIds?: (string | null)[]; + namespaces?: (string | null)[]; + blockTokenLens?: number[]; + semantic?: { + version: 1; + modelId: string; + dims: number; + encoding: 'int8_l2norm'; + perVectorScale: boolean; + vecs: Int8Array; + scales?: Uint16Array; + }; +}; + +export function hasSemantic(pack: Pack): boolean { + return Boolean( + pack.semantic && pack.semantic.dims > 0 && pack.semantic.vecs.length > 0 + ); +} + +export async function mountPack(opts: MountOptions): Promise { + const buf = await resolveToBuffer(opts.src); + return mountPackFromBuffer(buf); +} + +export function mountPackFromBuffer(buf: ArrayBuffer): Pack { + const dv = new DataView(buf); + const dec = getTextDecoder(); + let offset = 0; + + const metaLen = dv.getUint32(offset, true); + offset += 4; + const metaJson = dec.decode(new Uint8Array(buf, offset, metaLen)); + offset += metaLen; + const meta: PackMeta = JSON.parse(metaJson); + if (meta.agents) { + validateAgentRegistry(meta.agents); + } + + const lexLen = dv.getUint32(offset, true); + offset += 4; + const lexJson = dec.decode(new Uint8Array(buf, offset, lexLen)); + offset += lexLen; + const lexEntries: Array<[string, number]> = JSON.parse(lexJson); + const lexicon = new Map(lexEntries); + + const postCount = dv.getUint32(offset, true); + offset += 4; + const postings = new Uint32Array(postCount); + for (let i = 0; i < postCount; i++) { + postings[i] = dv.getUint32(offset, true); + offset += 4; + } + + const blocksLen = dv.getUint32(offset, true); + offset += 4; + const blocksJson = dec.decode(new Uint8Array(buf, offset, blocksLen)); + offset += blocksLen; + const parsed = JSON.parse(blocksJson); + + let blocks: string[] = []; + let headings: (string | null)[] | undefined; + let docIds: (string | null)[] | undefined; + let namespaces: (string | null)[] | undefined; + let blockTokenLens: number[] | undefined; + + if (Array.isArray(parsed) && parsed.length && typeof parsed[0] === 'string') { + blocks = parsed as string[]; + } else if (Array.isArray(parsed)) { + blocks = []; + headings = []; + docIds = []; + namespaces = []; + blockTokenLens = []; + for (const it of parsed) { + if (it && typeof it === 'object') { + blocks.push(String(it.text ?? '')); + headings.push(it.heading ?? null); + docIds.push(it.docId ?? null); + namespaces.push(it.namespace ?? null); + blockTokenLens.push(typeof it.len === 'number' ? it.len : 0); + } else { + blocks.push(String(it ?? '')); + headings.push(null); + docIds.push(null); + namespaces.push(null); + blockTokenLens.push(0); + } + } + } + + let semantic: Pack['semantic']; + if (offset < buf.byteLength) { + const semLen = dv.getUint32(offset, true); + offset += 4; + const semJson = dec.decode(new Uint8Array(buf, offset, semLen)); + offset += semLen; + const sem = JSON.parse(semJson); + + const semBlobLen = dv.getUint32(offset, true); + offset += 4; + const semBlob = new Uint8Array(buf, offset, semBlobLen); + semantic = parseSemanticSection(sem, semBlob); + } + + return { + meta, + lexicon, + postings, + blocks, + headings, + docIds, + namespaces, + blockTokenLens, + semantic, + }; +} + +function parseSemanticSection(sem: any, blob: Uint8Array): Pack['semantic'] { + const vectors = sem?.blocks?.vectors; + const scales = sem?.blocks?.scales; + + const vecs = new Int8Array( + blob.buffer, + blob.byteOffset + Number(vectors?.byteOffset ?? 0), + Number(vectors?.length ?? 0) + ); + + let scaleView: Uint16Array | undefined; + if (scales) { + const scaleLen = Number(scales.length ?? 0); + const scaleOffset = Number(scales.byteOffset ?? 0); + const dv = new DataView( + blob.buffer, + blob.byteOffset + scaleOffset, + scaleLen * 2 + ); + scaleView = new Uint16Array(scaleLen); + for (let i = 0; i < scaleLen; i++) { + scaleView[i] = dv.getUint16(i * 2, true); + } + } + + return { + version: 1, + modelId: String(sem?.modelId ?? ''), + dims: Number(sem?.dims ?? 0), + encoding: 'int8_l2norm', + perVectorScale: Boolean(sem?.perVectorScale), + vecs, + scales: scaleView, + }; +} + +async function resolveToBuffer(src: MountOptions['src']): Promise { + if (typeof src === 'string') { + try { + const res = await fetch(src); + return await res.arrayBuffer(); + } catch { + throw new Error( + 'mountPack({src: string}) expects a URL in React Native. For local files, load bytes in your app and pass Uint8Array/ArrayBuffer.' + ); + } + } + return toArrayBuffer(src); +} + +export function toArrayBuffer(src: ArrayBufferLike | Uint8Array): ArrayBuffer { + if (src instanceof Uint8Array) { + if (src.byteOffset === 0 && src.byteLength === src.buffer.byteLength) { + return src.buffer as ArrayBuffer; + } + const copy = src.slice(); + return copy.buffer as ArrayBuffer; + } + return src as ArrayBuffer; +} diff --git a/packages/core/src/pack.ts b/packages/core/src/pack.ts index df96abb..6082dc2 100644 --- a/packages/core/src/pack.ts +++ b/packages/core/src/pack.ts @@ -1,232 +1,2 @@ -/* - * pack.ts - * - * Mount `.knolo` packs across Node, browsers, and RN/Expo. Tolerant of: - * - blocks as string[] (v1) or object[] with { text, heading?, docId?, namespace?, len? } - * - meta.stats.avgBlockLen (optional) - * Includes RN/Expo-safe TextDecoder via ponyfill. - */ - -import { getTextDecoder } from './utils/utf8.js'; -import type { AgentRegistry } from './agent.js'; -import { validateAgentRegistry } from './agent.js'; - -export type MountOptions = { src: string | ArrayBufferLike | Uint8Array }; - -export type PackMeta = { - version: number; - stats: { docs: number; blocks: number; terms: number; avgBlockLen?: number }; - agents?: AgentRegistry; -}; - -export type Pack = { - meta: PackMeta; - lexicon: Map; - postings: Uint32Array; - blocks: string[]; - headings?: (string | null)[]; - docIds?: (string | null)[]; - namespaces?: (string | null)[]; - blockTokenLens?: number[]; - semantic?: { - version: 1; - modelId: string; - dims: number; - encoding: 'int8_l2norm'; - perVectorScale: boolean; - vecs: Int8Array; - scales?: Uint16Array; - }; -}; - -export function hasSemantic(pack: Pack): boolean { - return Boolean( - pack.semantic && pack.semantic.dims > 0 && pack.semantic.vecs.length > 0 - ); -} - -export async function mountPack(opts: MountOptions): Promise { - const buf = await resolveToBuffer(opts.src); - const dv = new DataView(buf); - const dec = getTextDecoder(); - let offset = 0; - - // meta - const metaLen = dv.getUint32(offset, true); - offset += 4; - const metaJson = dec.decode(new Uint8Array(buf, offset, metaLen)); - offset += metaLen; - const meta: PackMeta = JSON.parse(metaJson); - if (meta.agents) { - validateAgentRegistry(meta.agents); - } - - // lexicon - const lexLen = dv.getUint32(offset, true); - offset += 4; - const lexJson = dec.decode(new Uint8Array(buf, offset, lexLen)); - offset += lexLen; - const lexEntries: Array<[string, number]> = JSON.parse(lexJson); - const lexicon = new Map(lexEntries); - - // postings - const postCount = dv.getUint32(offset, true); - offset += 4; - const postings = new Uint32Array(postCount); - for (let i = 0; i < postCount; i++) { - postings[i] = dv.getUint32(offset, true); - offset += 4; - } - - // blocks (v1: string[]; v2/v3: {text, heading?, docId?, namespace?, len?}[]) - const blocksLen = dv.getUint32(offset, true); - offset += 4; - const blocksJson = dec.decode(new Uint8Array(buf, offset, blocksLen)); - offset += blocksLen; - const parsed = JSON.parse(blocksJson); - - let blocks: string[] = []; - let headings: (string | null)[] | undefined; - let docIds: (string | null)[] | undefined; - let namespaces: (string | null)[] | undefined; - let blockTokenLens: number[] | undefined; - - if (Array.isArray(parsed) && parsed.length && typeof parsed[0] === 'string') { - // v1 - blocks = parsed as string[]; - } else if (Array.isArray(parsed)) { - blocks = []; - headings = []; - docIds = []; - namespaces = []; - blockTokenLens = []; - for (const it of parsed) { - if (it && typeof it === 'object') { - blocks.push(String(it.text ?? '')); - headings.push(it.heading ?? null); - docIds.push(it.docId ?? null); - namespaces.push(it.namespace ?? null); - blockTokenLens.push(typeof it.len === 'number' ? it.len : 0); - } else { - blocks.push(String(it ?? '')); - headings.push(null); - docIds.push(null); - namespaces.push(null); - blockTokenLens.push(0); - } - } - } else { - blocks = []; - } - - let semantic: Pack['semantic']; - if (offset < buf.byteLength) { - const semLen = dv.getUint32(offset, true); - offset += 4; - const semJson = dec.decode(new Uint8Array(buf, offset, semLen)); - offset += semLen; - const sem = JSON.parse(semJson); - - const semBlobLen = dv.getUint32(offset, true); - offset += 4; - const semBlob = new Uint8Array(buf, offset, semBlobLen); - semantic = parseSemanticSection(sem, semBlob); - } - - return { - meta, - lexicon, - postings, - blocks, - headings, - docIds, - namespaces, - blockTokenLens, - semantic, - }; -} - -function parseSemanticSection(sem: any, blob: Uint8Array): Pack['semantic'] { - const vectors = sem?.blocks?.vectors; - const scales = sem?.blocks?.scales; - - const vecs = new Int8Array( - blob.buffer, - blob.byteOffset + Number(vectors?.byteOffset ?? 0), - Number(vectors?.length ?? 0) - ); - - let scaleView: Uint16Array | undefined; - if (scales) { - const scaleLen = Number(scales.length ?? 0); - const scaleOffset = Number(scales.byteOffset ?? 0); - const dv = new DataView( - blob.buffer, - blob.byteOffset + scaleOffset, - scaleLen * 2 - ); - scaleView = new Uint16Array(scaleLen); - for (let i = 0; i < scaleLen; i++) { - scaleView[i] = dv.getUint16(i * 2, true); - } - } - - return { - version: 1, - modelId: String(sem?.modelId ?? ''), - dims: Number(sem?.dims ?? 0), - encoding: 'int8_l2norm', - perVectorScale: Boolean(sem?.perVectorScale), - vecs, - scales: scaleView, - }; -} - -async function resolveToBuffer(src: MountOptions['src']): Promise { - if (typeof src === 'string') { - if (isNodeRuntime() && isLikelyLocalPath(src)) { - return await readLocalFileAsBuffer(src); - } - const res = await fetch(src); - return await res.arrayBuffer(); - } - if (src instanceof Uint8Array) { - if (src.byteOffset === 0 && src.byteLength === src.buffer.byteLength) { - return src.buffer as ArrayBuffer; - } - const copy = src.slice(); - return copy.buffer as ArrayBuffer; - } - return src as ArrayBuffer; -} - -function isNodeRuntime(): boolean { - const p = (globalThis as { process?: { versions?: { node?: unknown } } }) - .process; - return !!p?.versions?.node; -} - -function isLikelyLocalPath(value: string): boolean { - if (value.startsWith('file://')) return true; - if ( - value.startsWith('./') || - value.startsWith('../') || - value.startsWith('/') || - value.startsWith('~') - ) - return true; - if (/^[A-Za-z]:[\\/]/.test(value)) return true; // Windows absolute path - if (/^[A-Za-z][A-Za-z\d+.-]*:/.test(value)) return false; // URL scheme - return true; // plain relative path like "knowledge.knolo" -} - -async function readLocalFileAsBuffer( - pathOrFileUrl: string -): Promise { - const { readFile } = await import('node:fs/promises'); - const filePath = pathOrFileUrl.startsWith('file://') - ? decodeURIComponent(new URL(pathOrFileUrl).pathname) - : pathOrFileUrl; - const data = await readFile(filePath); - return data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength); -} +export { mountPack, hasSemantic } from './pack.runtime.js'; +export type { MountOptions, PackMeta, Pack } from './pack.runtime.js';