From 0064e3939b0445142157ff27d02ffb4abee0f979 Mon Sep 17 00:00:00 2001 From: Developer Date: Wed, 8 Apr 2026 07:00:07 +0800 Subject: [PATCH] fix: defer index creation on empty/insufficient tables Add row count guard before attempting vector/FTS index creation to prevent noisy error messages when LanceDB tables have insufficient training data (< 256 rows for IVF/PQ indices). Changes: - Add MIN_ROWS_FOR_INDEX constant (256) for LanceDB training data requirement - Add countRows() check before createIndex() in both createVectorIndexWithRetry and createFtsIndexWithRetry - Silently defer index creation with info log instead of 3 retry errors - Add countRows to LanceTable type definition - Add 6 test cases for empty/insufficient row scenarios Fixes #70 --- src/store.ts | 17 ++++++ test/unit/index-race-condition.test.ts | 84 ++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) diff --git a/src/store.ts b/src/store.ts index 7cfea0e..43236b0 100644 --- a/src/store.ts +++ b/src/store.ts @@ -16,6 +16,7 @@ type LanceTable = { addColumns(transforms: Array<{ name: string; valueSql: string }>): Promise; delete(filter: string): Promise; schema(): Promise<{ fields: Array<{ name: string }> }>; + countRows(filter?: string): Promise; query(): { where(expr: string): ReturnType; select(columns: string[]): ReturnType; @@ -69,6 +70,7 @@ export function storeFastCosine(a: number[], b: number[], normA: number, normB: } export class MemoryStore { + private static readonly MIN_ROWS_FOR_INDEX = 256; private lancedb: LanceModule | null = null; private connection: LanceConnection | null = null; private table: LanceTable | null = null; @@ -2069,6 +2071,13 @@ export class MemoryStore { return; } + const rowCount = await table.countRows(); + if (rowCount < MemoryStore.MIN_ROWS_FOR_INDEX) { + console.log(`[store] Deferring vector index creation: ${rowCount} rows found (need ≥ ${MemoryStore.MIN_ROWS_FOR_INDEX})`); + this.indexState.vector = false; + return; + } + let lastErrorMsg = ""; for (let attempt = 0; attempt < maxRetries; attempt++) { @@ -2129,6 +2138,14 @@ export class MemoryStore { return; } + const rowCount = await table.countRows(); + if (rowCount < MemoryStore.MIN_ROWS_FOR_INDEX) { + console.log(`[store] Deferring FTS index creation: ${rowCount} rows found (need ≥ ${MemoryStore.MIN_ROWS_FOR_INDEX})`); + this.indexState.fts = false; + this.indexState.ftsError = `Insufficient data: ${rowCount} rows (need ≥ ${MemoryStore.MIN_ROWS_FOR_INDEX})`; + return; + } + let lastErrorMsg = ""; for (let attempt = 0; attempt < maxRetries; attempt++) { diff --git a/test/unit/index-race-condition.test.ts b/test/unit/index-race-condition.test.ts index dfdd625..bf346c2 100644 --- a/test/unit/index-race-condition.test.ts +++ b/test/unit/index-race-condition.test.ts @@ -8,6 +8,7 @@ interface MockTable { add(rows: unknown[]): Promise; delete(filter: string): Promise; schema(): Promise<{ fields: Array<{ name: string }> }>; + countRows(filter?: string): Promise; query(): { where(expr: string): ReturnType; select(columns: string[]): ReturnType; @@ -29,6 +30,7 @@ function makeMockTable(overrides: Partial = {}): MockTable { async add() {}, async delete() {}, async schema() { return { fields: [] }; }, + async countRows() { return 1000; }, query() { return q; }, }; return { ...base, ...overrides }; @@ -149,3 +151,85 @@ test("createFtsIndexWithRetry: final-pass check adopts index created by concurre assert.strictEqual(indexState.fts, true, "FTS index should be adopted via final-pass check after all retries exhausted"); assert.strictEqual(indexState.ftsError, "", "ftsError should be cleared when adopted via final-pass"); }); + +test("createVectorIndexWithRetry: empty table defers index creation silently", async () => { + const store = makeStore(); + + const table = makeMockTable({ + async listIndices() { return []; }, + async countRows() { return 0; }, + }); + + const internal = asInternal(store); + await (internal.createVectorIndexWithRetry as (t: MockTable) => Promise).call(store, table); + + const indexState = internal.indexState as { vector: boolean }; + assert.strictEqual(indexState.vector, false, "vector index should be deferred on empty table"); +}); + +test("createVectorIndexWithRetry: insufficient rows defers index creation", async () => { + const store = makeStore(); + + const table = makeMockTable({ + async listIndices() { return []; }, + async countRows() { return 100; }, + }); + + const internal = asInternal(store); + await (internal.createVectorIndexWithRetry as (t: MockTable) => Promise).call(store, table); + + const indexState = internal.indexState as { vector: boolean }; + assert.strictEqual(indexState.vector, false, "vector index should be deferred when rows < 256"); +}); + +test("createVectorIndexWithRetry: sufficient rows attempts index creation", async () => { + const store = makeStore(); + + let createIndexCalled = false; + const table = makeMockTable({ + async listIndices() { return []; }, + async countRows() { return 300; }, + async createIndex() { + createIndexCalled = true; + }, + }); + + const internal = asInternal(store); + await (internal.createVectorIndexWithRetry as (t: MockTable) => Promise).call(store, table); + + assert.ok(createIndexCalled, "createIndex should be called when rows >= 256"); + const indexState = internal.indexState as { vector: boolean }; + assert.strictEqual(indexState.vector, true, "vector index should be created successfully"); +}); + +test("createFtsIndexWithRetry: empty table defers index creation with error message", async () => { + const store = makeStore(); + + const table = makeMockTable({ + async listIndices() { return []; }, + async countRows() { return 0; }, + }); + + const internal = asInternal(store); + await (internal.createFtsIndexWithRetry as (t: MockTable) => Promise).call(store, table); + + const indexState = internal.indexState as { fts: boolean; ftsError: string }; + assert.strictEqual(indexState.fts, false, "FTS index should be deferred on empty table"); + assert.ok(indexState.ftsError.includes("Insufficient data"), "ftsError should contain insufficient data message"); +}); + +test("createFtsIndexWithRetry: insufficient rows defers index creation", async () => { + const store = makeStore(); + + const table = makeMockTable({ + async listIndices() { return []; }, + async countRows() { return 50; }, + }); + + const internal = asInternal(store); + await (internal.createFtsIndexWithRetry as (t: MockTable) => Promise).call(store, table); + + const indexState = internal.indexState as { fts: boolean; ftsError: string }; + assert.strictEqual(indexState.fts, false, "FTS index should be deferred when rows < 256"); + assert.ok(indexState.ftsError.includes("50 rows"), "ftsError should include row count"); +});