From 6b5165bc3b98d171ca3a910c2b2882a9ad3b1581 Mon Sep 17 00:00:00 2001 From: Khalid Saidi Date: Sat, 28 Feb 2026 19:25:59 -0800 Subject: [PATCH] feat(api): add reachable freshness filter for search/top --- apps/api/src/app.ts | 35 ++++++- apps/api/src/rag/search.ts | 9 ++ apps/api/tests/api.test.ts | 185 ++++++++++++++++++++++++++++++++++ apps/mcp-remote/src/index.ts | 12 ++- docs/DEPLOYMENT.md | 4 + packages/mcp-local/src/cli.ts | 12 ++- packages/shared/src/index.ts | 1 + 7 files changed, 249 insertions(+), 9 deletions(-) diff --git a/apps/api/src/app.ts b/apps/api/src/app.ts index d0bf9b6..8c9d88f 100644 --- a/apps/api/src/app.ts +++ b/apps/api/src/app.ts @@ -433,8 +433,8 @@ function agentCard(baseUrl: string) { }, // So HTTP-only agents can call the API directly without installing the MCP apiEndpoints: { - search: { method: 'GET', path: '/rag/search', params: ['q', 'limit', 'hasRemote', 'reachable', 'citations', 'localOnly', 'minScore', 'categories', 'serverKind'] }, - top: { method: 'GET', path: '/rag/top', params: ['limit', 'minScore', 'hasRemote', 'reachable', 'localOnly', 'categories', 'serverKind'] }, + search: { method: 'GET', path: '/rag/search', params: ['q', 'limit', 'hasRemote', 'reachable', 'reachableMaxAgeHours', 'citations', 'localOnly', 'minScore', 'categories', 'serverKind'] }, + top: { method: 'GET', path: '/rag/top', params: ['limit', 'minScore', 'hasRemote', 'reachable', 'reachableMaxAgeHours', 'localOnly', 'categories', 'serverKind'] }, install: { method: 'GET', path: '/rag/install', params: ['name'] }, stats: { method: 'GET', path: '/rag/stats', params: [] }, listServers: { method: 'GET', path: '/v0.1/servers', params: ['limit', 'cursor'] }, @@ -471,6 +471,7 @@ const RagSearchQuerySchema = z.object({ registryType: z.string().optional(), hasRemote: z.enum(['true', 'false']).optional(), reachable: z.enum(['true', 'false']).optional(), + reachableMaxAgeHours: z.coerce.number().int().min(1).max(8760).optional(), citations: z.enum(['true', 'false']).optional(), localOnly: z.enum(['true', 'false']).optional(), serverKind: z.enum(['retriever', 'evaluator', 'indexer', 'router', 'other']).optional() @@ -482,6 +483,7 @@ const RagTopQuerySchema = z.object({ minScore: z.coerce.number().int().min(0).max(100).optional(), hasRemote: z.enum(['true', 'false']).optional(), reachable: z.enum(['true', 'false']).optional(), + reachableMaxAgeHours: z.coerce.number().int().min(1).max(8760).optional(), localOnly: z.enum(['true', 'false']).optional(), serverKind: z.enum(['retriever', 'evaluator', 'indexer', 'router', 'other']).optional() }); @@ -1228,6 +1230,7 @@ export async function buildApp(params: { env: Env; store: RegistryStore }) { const registryType = (query.registryType ?? '').trim() || undefined; const hasRemote = parseOptionalBool(query.hasRemote); const reachable = parseOptionalBool(query.reachable); + const reachableMaxAgeHours = reachable === true ? query.reachableMaxAgeHours : undefined; const citations = parseOptionalBool(query.citations); const localOnly = parseOptionalBool(query.localOnly); const filters: RagFilters = RagFiltersSchema.parse({ @@ -1237,6 +1240,7 @@ export async function buildApp(params: { env: Env; store: RegistryStore }) { registryType, hasRemote, reachable, + reachableMaxAgeHours, citations, localOnly, serverKind: query.serverKind @@ -1255,7 +1259,16 @@ export async function buildApp(params: { env: Env; store: RegistryStore }) { return { query: q, results: results.map(mapRagHit), - metadata: { count: results.length } + metadata: { + count: results.length, + ...(reachableMaxAgeHours != null + ? { + filters: { + reachableMaxAgeHours + } + } + : {}) + } }; }); @@ -1264,11 +1277,14 @@ export async function buildApp(params: { env: Env; store: RegistryStore }) { if (!query) return; const limit = query.limit ?? 25; + const reachable = parseOptionalBool(query.reachable); + const reachableMaxAgeHours = reachable === true ? query.reachableMaxAgeHours : undefined; const filters: RagFilters = RagFiltersSchema.parse({ categories: parseCategories(query.categories), minScore: query.minScore ?? 10, hasRemote: parseOptionalBool(query.hasRemote), - reachable: parseOptionalBool(query.reachable), + reachable, + reachableMaxAgeHours, localOnly: parseOptionalBool(query.localOnly), serverKind: query.serverKind ?? 'retriever' }); @@ -1276,7 +1292,16 @@ export async function buildApp(params: { env: Env; store: RegistryStore }) { const results = await params.store.searchRagTop({ limit, filters }); return { results: results.map(mapRagHit), - metadata: { count: results.length } + metadata: { + count: results.length, + ...(reachableMaxAgeHours != null + ? { + filters: { + reachableMaxAgeHours + } + } + : {}) + } }; }); diff --git a/apps/api/src/rag/search.ts b/apps/api/src/rag/search.ts index 36df38c..0d79d56 100644 --- a/apps/api/src/rag/search.ts +++ b/apps/api/src/rag/search.ts @@ -194,6 +194,15 @@ function passesFilters(item: RagSearchItem, filters: RagFilters | undefined) { if (filters.reachable === true) { if (enrichment?.reachable !== true) return false; } + if (filters.reachable === true && filters.reachableMaxAgeHours != null) { + const checkedAtRaw = (enrichment as any)?.reachableCheckedAt; + if (typeof checkedAtRaw !== 'string' || !checkedAtRaw) return false; + const checkedAtMs = Date.parse(checkedAtRaw); + if (!Number.isFinite(checkedAtMs)) return false; + const maxAgeMs = filters.reachableMaxAgeHours * 3_600_000; + const ageMs = Date.now() - checkedAtMs; + if (ageMs > maxAgeMs) return false; + } if (filters.citations === true) { if (enrichment?.citations !== true) return false; } diff --git a/apps/api/tests/api.test.ts b/apps/api/tests/api.test.ts index ca80d86..ea5c47b 100644 --- a/apps/api/tests/api.test.ts +++ b/apps/api/tests/api.test.ts @@ -752,6 +752,105 @@ test('rag search exposes reachability metadata fields', async () => { await app.close(); }); +test('rag search reachableMaxAgeHours keeps only fresh reachable results and leaves reachable=true unchanged when omitted', async () => { + const store = new InMemoryStore(); + const nowMs = Date.now(); + const freshCheckedAt = new Date(nowMs - 1 * 3_600_000).toISOString(); + const staleCheckedAt = new Date(nowMs - 30 * 3_600_000).toISOString(); + + await store.upsertServerVersion({ + runId: 'run_test', + at: new Date(), + server: { + name: 'example/freshness-fresh', + version: '1.0.0', + description: 'freshness retriever', + remotes: [{ type: 'streamable-http', url: 'https://example.com/fresh' }] + }, + official: { isLatest: true, updatedAt: new Date().toISOString(), publishedAt: new Date().toISOString() }, + ragmap: { + categories: ['rag'], + ragScore: 80, + reasons: ['test'], + keywords: ['freshness'], + hasRemote: true, + reachable: true, + reachableCheckedAt: freshCheckedAt, + serverKind: 'retriever' + }, + hidden: false + }); + await store.upsertServerVersion({ + runId: 'run_test', + at: new Date(), + server: { + name: 'example/freshness-stale', + version: '1.0.0', + description: 'freshness retriever', + remotes: [{ type: 'streamable-http', url: 'https://example.com/stale' }] + }, + official: { isLatest: true, updatedAt: new Date().toISOString(), publishedAt: new Date().toISOString() }, + ragmap: { + categories: ['rag'], + ragScore: 70, + reasons: ['test'], + keywords: ['freshness'], + hasRemote: true, + reachable: true, + reachableCheckedAt: staleCheckedAt, + serverKind: 'retriever' + }, + hidden: false + }); + await store.upsertServerVersion({ + runId: 'run_test', + at: new Date(), + server: { + name: 'example/freshness-missing-checked-at', + version: '1.0.0', + description: 'freshness retriever', + remotes: [{ type: 'streamable-http', url: 'https://example.com/missing' }] + }, + official: { isLatest: true, updatedAt: new Date().toISOString(), publishedAt: new Date().toISOString() }, + ragmap: { + categories: ['rag'], + ragScore: 60, + reasons: ['test'], + keywords: ['freshness'], + hasRemote: true, + reachable: true, + serverKind: 'retriever' + }, + hidden: false + }); + + const app = await buildApp({ env, store }); + const withFreshness = await app.inject({ + method: 'GET', + url: '/rag/search?q=freshness&hasRemote=true&reachable=true&reachableMaxAgeHours=24&limit=50' + }); + assert.equal(withFreshness.statusCode, 200); + const withFreshnessBody = withFreshness.json() as any; + assert.equal(withFreshnessBody.metadata.count, 1); + assert.equal(withFreshnessBody.metadata.filters.reachableMaxAgeHours, 24); + assert.equal(withFreshnessBody.results[0].name, 'example/freshness-fresh'); + + const withoutFreshness = await app.inject({ + method: 'GET', + url: '/rag/search?q=freshness&hasRemote=true&reachable=true&limit=50' + }); + assert.equal(withoutFreshness.statusCode, 200); + const withoutFreshnessBody = withoutFreshness.json() as any; + const names = withoutFreshnessBody.results.map((result: any) => result.name); + assert.equal(withoutFreshnessBody.metadata.count, 3); + assert.equal(names.includes('example/freshness-fresh'), true); + assert.equal(names.includes('example/freshness-stale'), true); + assert.equal(names.includes('example/freshness-missing-checked-at'), true); + assert.equal(withoutFreshnessBody.metadata.filters, undefined); + + await app.close(); +}); + test('rag top returns non-empty recommended retrievers with default filters', async () => { const store = new InMemoryStore(); await store.upsertServerVersion({ @@ -778,6 +877,92 @@ test('rag top returns non-empty recommended retrievers with default filters', as await app.close(); }); +test('rag top applies reachableMaxAgeHours when reachable=true and echoes the filter', async () => { + const store = new InMemoryStore(); + const nowMs = Date.now(); + const freshCheckedAt = new Date(nowMs - 1 * 3_600_000).toISOString(); + const staleCheckedAt = new Date(nowMs - 30 * 3_600_000).toISOString(); + + await store.upsertServerVersion({ + runId: 'run_test', + at: new Date(), + server: { + name: 'example/top-fresh', + version: '0.1.0', + description: 'retrieval semantic search rag server', + remotes: [{ type: 'streamable-http', url: 'https://example.com/top-fresh' }] + }, + official: { isLatest: true, updatedAt: new Date().toISOString(), publishedAt: new Date().toISOString() }, + ragmap: { + categories: ['rag'], + ragScore: 90, + reasons: ['test'], + keywords: ['top'], + serverKind: 'retriever', + hasRemote: true, + reachable: true, + reachableCheckedAt: freshCheckedAt + }, + hidden: false + }); + await store.upsertServerVersion({ + runId: 'run_test', + at: new Date(), + server: { + name: 'example/top-stale', + version: '0.1.0', + description: 'retrieval semantic search rag server', + remotes: [{ type: 'streamable-http', url: 'https://example.com/top-stale' }] + }, + official: { isLatest: true, updatedAt: new Date().toISOString(), publishedAt: new Date().toISOString() }, + ragmap: { + categories: ['rag'], + ragScore: 80, + reasons: ['test'], + keywords: ['top'], + serverKind: 'retriever', + hasRemote: true, + reachable: true, + reachableCheckedAt: staleCheckedAt + }, + hidden: false + }); + await store.upsertServerVersion({ + runId: 'run_test', + at: new Date(), + server: { + name: 'example/top-missing-checked-at', + version: '0.1.0', + description: 'retrieval semantic search rag server', + remotes: [{ type: 'streamable-http', url: 'https://example.com/top-missing' }] + }, + official: { isLatest: true, updatedAt: new Date().toISOString(), publishedAt: new Date().toISOString() }, + ragmap: { + categories: ['rag'], + ragScore: 70, + reasons: ['test'], + keywords: ['top'], + serverKind: 'retriever', + hasRemote: true, + reachable: true + }, + hidden: false + }); + + const app = await buildApp({ env, store }); + const withFreshness = await app.inject({ + method: 'GET', + url: '/rag/top?reachable=true&reachableMaxAgeHours=24&minScore=0&serverKind=retriever&limit=50' + }); + assert.equal(withFreshness.statusCode, 200); + const withFreshnessBody = withFreshness.json() as any; + assert.equal(withFreshnessBody.metadata.count, 1); + assert.equal(withFreshnessBody.metadata.filters.reachableMaxAgeHours, 24); + assert.equal(withFreshnessBody.results[0].name, 'example/top-fresh'); + + await app.close(); +}); + test('rag install returns copy-ready config object', async () => { const store = new InMemoryStore(); await store.upsertServerVersion({ diff --git a/apps/mcp-remote/src/index.ts b/apps/mcp-remote/src/index.ts index 4398af0..0747745 100644 --- a/apps/mcp-remote/src/index.ts +++ b/apps/mcp-remote/src/index.ts @@ -61,13 +61,14 @@ function registerTools(server: McpServer) { registryType: z.string().min(1).optional(), hasRemote: z.boolean().optional(), reachable: z.boolean().optional(), + reachableMaxAgeHours: z.number().int().min(1).max(8760).optional(), citations: z.boolean().optional(), localOnly: z.boolean().optional(), serverKind: z.enum(['retriever', 'evaluator', 'indexer', 'router', 'other']).optional(), limit: z.number().int().min(1).max(50).optional() } }, - async ({ query, categories, minScore, transport, registryType, hasRemote, reachable, citations, localOnly, serverKind, limit }) => { + async ({ query, categories, minScore, transport, registryType, hasRemote, reachable, reachableMaxAgeHours, citations, localOnly, serverKind, limit }) => { const response = await apiGet('/rag/search', { q: query ?? 'rag', limit: String(limit ?? 10), @@ -77,6 +78,9 @@ function registerTools(server: McpServer) { ...(registryType ? { registryType } : {}), ...(hasRemote !== undefined ? { hasRemote: String(hasRemote) } : {}), ...(reachable !== undefined ? { reachable: String(reachable) } : {}), + ...(reachable === true && reachableMaxAgeHours != null + ? { reachableMaxAgeHours: String(reachableMaxAgeHours) } + : {}), ...(citations !== undefined ? { citations: String(citations) } : {}), ...(localOnly !== undefined ? { localOnly: String(localOnly) } : {}), ...(serverKind ? { serverKind } : {}) @@ -99,18 +103,22 @@ function registerTools(server: McpServer) { minScore: z.number().int().min(0).max(100).optional(), hasRemote: z.boolean().optional(), reachable: z.boolean().optional(), + reachableMaxAgeHours: z.number().int().min(1).max(8760).optional(), localOnly: z.boolean().optional(), serverKind: z.enum(['retriever', 'evaluator', 'indexer', 'router', 'other']).optional(), limit: z.number().int().min(1).max(50).optional() } }, - async ({ categories, minScore, hasRemote, reachable, localOnly, serverKind, limit }) => { + async ({ categories, minScore, hasRemote, reachable, reachableMaxAgeHours, localOnly, serverKind, limit }) => { const response = await apiGet('/rag/top', { limit: String(limit ?? 25), ...(categories && categories.length ? { categories: categories.join(',') } : {}), ...(minScore != null ? { minScore: String(minScore) } : {}), ...(hasRemote !== undefined ? { hasRemote: String(hasRemote) } : {}), ...(reachable !== undefined ? { reachable: String(reachable) } : {}), + ...(reachable === true && reachableMaxAgeHours != null + ? { reachableMaxAgeHours: String(reachableMaxAgeHours) } + : {}), ...(localOnly !== undefined ? { localOnly: String(localOnly) } : {}), ...(serverKind ? { serverKind } : {}) }); diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index 1e4277e..e447369 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -97,6 +97,10 @@ For repository workflows that call `/internal/*` routes: - Reachability probes now cover both `streamable-http` and `sse` remotes: - `streamable-http`: `HEAD` probe, with `GET` fallback. - `sse`: short `GET` with `Accept: text/event-stream`, then immediate body cancel so checks do not hang on streaming responses. +- Search freshness filter: + - `/rag/search` and `/rag/top` support `reachableMaxAgeHours`. + - `reachable=true&reachableMaxAgeHours=24` means "reachable and checked within the last 24 hours." + - When `reachableMaxAgeHours` is set, entries missing `reachableCheckedAt` are excluded. - `/rag/install` now emits remote configs for both `streamable-http` and `sse` endpoints. Note: SSE support depends on the MCP host/client; Ragmap only emits the correct transport config. - `/rag/install` also emits `claudeDesktopNote` so UIs can clarify that Claude Desktop remote MCP servers may need to be added via the Connectors UI. diff --git a/packages/mcp-local/src/cli.ts b/packages/mcp-local/src/cli.ts index f34d5c7..a5e22f6 100644 --- a/packages/mcp-local/src/cli.ts +++ b/packages/mcp-local/src/cli.ts @@ -38,13 +38,14 @@ server.registerTool( registryType: z.string().min(1).optional(), hasRemote: z.boolean().optional(), reachable: z.boolean().optional(), + reachableMaxAgeHours: z.number().int().min(1).max(8760).optional(), citations: z.boolean().optional(), localOnly: z.boolean().optional(), serverKind: z.enum(['retriever', 'evaluator', 'indexer', 'router', 'other']).optional(), limit: z.number().int().min(1).max(50).optional() } }, - async ({ query, categories, minScore, transport, registryType, hasRemote, reachable, citations, localOnly, serverKind, limit }) => { + async ({ query, categories, minScore, transport, registryType, hasRemote, reachable, reachableMaxAgeHours, citations, localOnly, serverKind, limit }) => { const response = await apiGet('/rag/search', { q: query ?? 'rag', limit: String(limit ?? 10), @@ -54,6 +55,9 @@ server.registerTool( ...(registryType ? { registryType } : {}), ...(hasRemote !== undefined ? { hasRemote: String(hasRemote) } : {}), ...(reachable !== undefined ? { reachable: String(reachable) } : {}), + ...(reachable === true && reachableMaxAgeHours != null + ? { reachableMaxAgeHours: String(reachableMaxAgeHours) } + : {}), ...(citations !== undefined ? { citations: String(citations) } : {}), ...(localOnly !== undefined ? { localOnly: String(localOnly) } : {}), ...(serverKind ? { serverKind } : {}) @@ -74,18 +78,22 @@ server.registerTool( minScore: z.number().int().min(0).max(100).optional(), hasRemote: z.boolean().optional(), reachable: z.boolean().optional(), + reachableMaxAgeHours: z.number().int().min(1).max(8760).optional(), localOnly: z.boolean().optional(), serverKind: z.enum(['retriever', 'evaluator', 'indexer', 'router', 'other']).optional(), limit: z.number().int().min(1).max(50).optional() } }, - async ({ categories, minScore, hasRemote, reachable, localOnly, serverKind, limit }) => { + async ({ categories, minScore, hasRemote, reachable, reachableMaxAgeHours, localOnly, serverKind, limit }) => { const response = await apiGet('/rag/top', { limit: String(limit ?? 25), ...(categories && categories.length ? { categories: categories.join(',') } : {}), ...(minScore != null ? { minScore: String(minScore) } : {}), ...(hasRemote !== undefined ? { hasRemote: String(hasRemote) } : {}), ...(reachable !== undefined ? { reachable: String(reachable) } : {}), + ...(reachable === true && reachableMaxAgeHours != null + ? { reachableMaxAgeHours: String(reachableMaxAgeHours) } + : {}), ...(localOnly !== undefined ? { localOnly: String(localOnly) } : {}), ...(serverKind ? { serverKind } : {}) }); diff --git a/packages/shared/src/index.ts b/packages/shared/src/index.ts index ee053b3..9124680 100644 --- a/packages/shared/src/index.ts +++ b/packages/shared/src/index.ts @@ -87,6 +87,7 @@ export const RagFiltersSchema = z registryType: z.string().optional(), hasRemote: z.boolean().optional(), reachable: z.boolean().optional(), + reachableMaxAgeHours: z.number().int().min(1).max(8760).optional(), citations: z.boolean().optional(), localOnly: z.boolean().optional(), serverKind: ServerKindSchema.optional()