From 8d7195b70ff70e8a2d5103e649d71bdc670006f0 Mon Sep 17 00:00:00 2001
From: voidcommit-afk <strucker08@gmail.com>
Date: Sat, 14 Mar 2026 19:49:14 +0530
Subject: [PATCH 1/4] feat(retrieval): optimize chat pipeline and prompt
 grounding

---
 app/api/chat/route.ts |  526 ++++++++++++----
 lib/bible-fetch.ts    |  130 +++-
 lib/cache.ts          |  126 +++-
 lib/feature-flags.ts  |   13 +
 lib/llm-fallback.ts   |  217 +++----
 lib/prompts.ts        |  279 +++++++--
 lib/query-utils.ts    |  220 ++++++-
 lib/retrieval.ts      | 1317 +++++++++++++++++++++++++++++++++++------
 8 files changed, 2363 insertions(+), 465 deletions(-)
 create mode 100644 lib/feature-flags.ts
diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts
index 49f95a5..0bb40d9 100644
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -1,12 +1,13 @@
 import { simulateReadableStream, streamText } from 'ai';
-import type { UIMessage } from 'ai';
+import { createHash, randomUUID } from 'crypto';
 import { retrieveContextForQuery } from '@/lib/retrieval';
-import { buildContextPrompt, SYSTEM_PROMPT } from '@/lib/prompts';
+import { buildCitationWhitelist, buildContextPrompt, expandCitationReference, SYSTEM_PROMPT } from '@/lib/prompts';
 import { buildCacheKey, getCachedResponse, setCachedResponse } from '@/lib/cache';
 import { generateWithFallback } from '@/lib/llm-fallback';
 import { validateDataIntegrity } from '@/lib/validate-data';
 import type { VerseContext } from '@/lib/bible-fetch';
 import { redis } from '@/lib/redis';
+import { ENABLE_RETRIEVAL_DEBUG } from '@/lib/feature-flags';
 
 // export const runtime = 'edge';
 
@@ -35,10 +36,58 @@ type NormalizedChatResponse = {
   };
 };
 
-const DEBUG_LLM = process.env.DEBUG_LLM === '1';
+type LatencyMetricName =
+  | 'cache_lookup_ms'
+  | 'retrieve_total_ms'
+  | 'embed_ms'
+  | 'vector_ms'
+  | 'fetch_verses_db_ms'
+  | 'fetch_verses_api_ms'
+  | 'enrich_ms'
+  | 'prompt_build_ms'
+  | 'llm_ms'
+  | 'post_normalize_ms'
+  | 'total_ms';
+
+type LatencyMetrics = Record<LatencyMetricName, number>;
+
+type CacheLookupResult = {
+  cacheKey: string;
+  modelKey: string;
+  response: Awaited<ReturnType<typeof getCachedResponse>>;
+};
+
+type PipelineExecutionResult = {
+  normalizedResponse: NormalizedChatResponse;
+  finalPrompt: string;
+  preferredChunks?: string[];
+  fallbackUsed: boolean;
+  finalFallback: boolean;
+  pipelineMetrics: Partial<LatencyMetrics>;
+};
+
+type ModelHistoryMessage = {
+  role: 'system' | 'assistant' | 'user';
+  content: string;
+};
+
+const DEBUG_LLM = ENABLE_RETRIEVAL_DEBUG;
 const RATE_LIMIT_WINDOW_SECONDS = 60;
 const RATE_LIMIT_MAX_REQUESTS = 60;
 const RATE_LIMIT_WARN_THRESHOLD = 50;
+const EMPTY_LATENCY_METRICS: LatencyMetrics = {
+  cache_lookup_ms: 0,
+  retrieve_total_ms: 0,
+  embed_ms: 0,
+  vector_ms: 0,
+  fetch_verses_db_ms: 0,
+  fetch_verses_api_ms: 0,
+  enrich_ms: 0,
+  prompt_build_ms: 0,
+  llm_ms: 0,
+  post_normalize_ms: 0,
+  total_ms: 0,
+};
 const RATE_LIMIT_SCRIPT = `
 local current = redis.call("INCR", KEYS[1])
 if current == 1 then
@@ -46,6 +95,7 @@ if current == 1 then
 end
 return current
 `;
+const inflightRequests = new Map<string, Promise<PipelineExecutionResult>>();
 
 function debugLog(...args: unknown[]) {
   if (DEBUG_LLM) {
@@ -53,6 +103,71 @@ function debugLog(...args: unknown[]) {
   }
 }
 
+function roundLatencyMs(durationMs: number): number {
+  return Number(durationMs.toFixed(2));
+}
+
+function createLatencyMetrics(): LatencyMetrics {
+  return { ...EMPTY_LATENCY_METRICS };
+}
+
+function addLatencyMetric(metrics: LatencyMetrics, metric: LatencyMetricName, durationMs: number): void {
+  metrics[metric] = roundLatencyMs(metrics[metric] + durationMs);
+}
+
+function setLatencyMetric(metrics: LatencyMetrics, metric: LatencyMetricName, durationMs: number): void {
+  metrics[metric] = roundLatencyMs(durationMs);
+}
+
+function normalizeInflightQuery(query: string): string {
+  return query.trim().replace(/\s+/g, ' ').toLowerCase();
+}
+
+function buildInflightRequestKey(query: string, translation: string, model: string): string {
+  return `${normalizeInflightQuery(query)}\u0000${translation}\u0000${model}`;
+}
+
+function hashModelHistory(modelHistory: ModelHistoryMessage[]): string {
+  return createHash('sha256').update(JSON.stringify(modelHistory)).digest('hex');
+}
+
+function buildInflightRequestKeyWithHistory(
+  query: string,
+  translation: string,
+  model: string,
+  modelHistory: ModelHistoryMessage[]
+): string {
+  return `${buildInflightRequestKey(query, translation, model)}\u0000${hashModelHistory(modelHistory)}`;
+}
+
+async function findPreferredCachedResponse(
+  query: string,
+  translation: string
+): Promise<CacheLookupResult | null> {
+  const cacheCandidates = CACHE_MODEL_CANDIDATES.map((modelKey) => ({
+    modelKey,
+    cacheKey: buildCacheKey({
+      query,
+      translation,
+      model: modelKey,
+    }),
+  }));
+
+  const results = await Promise.all(
+    cacheCandidates.map(async ({ modelKey, cacheKey }) => ({
+      modelKey,
+      cacheKey,
+      response: await getCachedResponse({
+        query,
+        translation,
+        model: modelKey,
+      }),
+    }))
+  );
+
+  return results.find((result) => result.response?.response) ?? null;
+}
+
 function isValidIPv4(value: string): boolean {
   const parts = value.split('.');
   if (parts.length !== 4) return false;
@@ -177,32 +292,9 @@ function normalizeTranslation(_input: string | null | undefined): string {
   return validTranslations.includes(upper) ? upper : 'BSB';
 }
 
-function getMessageText(message: UIMessage | undefined): string {
-  if (!message) return '';
-  const msg = message as any;
-
-  if (typeof msg.content === 'string') return msg.content;
-  if (typeof msg.text === 'string') return msg.text;
-
-  if (Array.isArray(msg.content)) {
-    return msg.content
-      .map((part: any) => (typeof part === 'string' ? part : part.text || part.value || ''))
-      .join('');
-  }
-
-  if (Array.isArray(msg.parts)) {
-    return msg.parts
-      .map((part: any) => part.text || part.value || (part.type === 'text' ? part.text : ''))
-      .join('');
-  }
-
-  return '';
-}
-
 function buildPrompt(
   finalPrompt: string,
-  history: Array<{ role: 'system' | 'assistant' | 'user'; content: string }>,
-  query: string
+  history: Array<{ role: 'system' | 'assistant' | 'user'; content: string }>
 ): string {
   const historyLines = history
     .filter((message) => message.role !== 'system')
@@ -210,10 +302,121 @@ function buildPrompt(
     .join('\n');
 
   if (historyLines.trim()) {
-    return `${finalPrompt}\n\nConversation so far:\n${historyLines}\n\nUser: ${query}`;
+    return `${finalPrompt}\n\nCONVERSATION HISTORY\n${historyLines}`;
+  }
+
+  return finalPrompt;
+}
+
+function normalizeCitationToken(citation: string): string {
+  return citation
+    .trim()
+    .replace(/[()[\],.;:!?]+$/g, '')
+    .replace(/\s+/g, ' ');
+}
+
+function buildCitationWhitelistSet(verses: VerseContext[]): Set<string> {
+  const whitelist = new Set<string>();
+  for (const citation of buildCitationWhitelist(verses)) {
+    const normalized = normalizeCitationToken(citation);
+    if (normalized) {
+      whitelist.add(normalized.toLowerCase());
+    }
+  }
+  return whitelist;
+}
+
+function extractCitations(content: string): string[] {
+  const matches = content.match(
+    /(?<![1-3]\s)\b(?:[1-3][A-Z]{2}|[A-Z]{2,3}|[1-3]\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*|[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+\d+:\d+(?:[-–]\d+)?\b/g
+  );
+  return matches ? matches.map((match) => normalizeCitationToken(match)) : [];
+}
+
+function isAllowedCitation(citation: string, whitelist: Set<string>): boolean {
+  const normalized = normalizeCitationToken(citation);
+  if (!normalized) {
+    return true;
+  }
+
+  const expanded = expandCitationReference(normalized);
+  return whitelist.has(normalized.toLowerCase()) || whitelist.has(expanded.toLowerCase());
+}
+
+function scrubInvalidCitations(content: string, verses: VerseContext[]): string {
+  const whitelist = buildCitationWhitelistSet(verses);
+  if (whitelist.size === 0) {
+    return content;
+  }
+
+  const citations = extractCitations(content);
+  const invalidCitations = Array.from(
+    new Set(citations.filter((citation) => !isAllowedCitation(citation, whitelist)))
+  );
+
+  if (invalidCitations.length === 0) {
+    return content;
+  }
+
+  let sanitized = content;
+  for (const citation of invalidCitations) {
+    const escaped = citation.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+    sanitized = sanitized
+      .replace(new RegExp(`\\((?:[^()]*?)${escaped}(?:[^()]*?)\\)`, 'g'), '')
+      .replace(new RegExp(`\\[(?:[^\\]]*?)${escaped}(?:[^\\]]*?)\\]`, 'g'), '')
+      .replace(new RegExp(`\\b${escaped}\\b`, 'g'), '');
   }
 
-  return `${finalPrompt}\n\nUser: ${query}`;
+  sanitized = sanitized
+    .replace(/[ \t]{2,}/g, ' ')
+    .replace(/\n{3,}/g, '\n\n')
+    .replace(/\s+([,.;:!?])/g, '$1')
+    .trim();
+
+  console.info(JSON.stringify({
+    event: 'citation_whitelist_enforced',
+    removedCitations: invalidCitations,
+    allowedCitations: Array.from(whitelist),
+  }));
+
+  return sanitized;
+}
+
+function logContextUtilizationDiagnostics(
+  content: string,
+  verses: VerseContext[],
+  options?: {
+    requestId?: string;
+    modelUsed?: string | null;
+    cacheHit?: boolean;
+  }
+): void {
+  if (!DEBUG_LLM) {
+    return;
+  }
+
+  const retrievedWhitelist = buildCitationWhitelistSet(verses);
+  const citedWhitelist = new Set(
+    extractCitations(content)
+      .map((citation) => expandCitationReference(normalizeCitationToken(citation)).toLowerCase())
+      .filter((citation) => retrievedWhitelist.has(citation))
+  );
+
+  const retrievedCount = retrievedWhitelist.size;
+  const citedCount = citedWhitelist.size;
+  const citationUtilization = retrievedCount > 0
+    ? Number((citedCount / retrievedCount).toFixed(2))
+    : 0;
+
+  console.info(JSON.stringify({
+    event: 'context_utilization',
+    requestId: options?.requestId,
+    modelUsed: options?.modelUsed,
+    cacheHit: options?.cacheHit ?? false,
+    retrieved_count: retrievedCount,
+    cited_count: citedCount,
+    citation_utilization: citationUtilization,
+  }));
 }
 
 function ensureFallbackBanner(
@@ -392,7 +595,106 @@ async function streamTextFromContent(
   });
 }
 
+async function executeUncachedPipeline(options: {
+  query: string;
+  requestedTranslation: string;
+  groqApiKey?: string;
+  modelHistory: ModelHistoryMessage[];
+  requestId: string;
+}): Promise<PipelineExecutionResult> {
+  const pipelineMetrics: Partial<LatencyMetrics> = {};
+
+  const retrieveStartedAt = performance.now();
+  const verses = await retrieveContextForQuery(options.query, options.requestedTranslation, options.groqApiKey, {
+    requestId: options.requestId,
+    onMetric: (metric, durationMs) => {
+      pipelineMetrics[metric] = roundLatencyMs((pipelineMetrics[metric] || 0) + durationMs);
+    },
+  });
+  pipelineMetrics.retrieve_total_ms = roundLatencyMs(performance.now() - retrieveStartedAt);
+
+  const promptBuildStartedAt = performance.now();
+  const finalPrompt = buildContextPrompt(options.query, verses, options.requestedTranslation);
+  const context = finalPrompt.startsWith(SYSTEM_PROMPT)
+    ? finalPrompt.slice(SYSTEM_PROMPT.length).trim()
+    : finalPrompt;
+  const prompt = buildPrompt(finalPrompt, options.modelHistory);
+  pipelineMetrics.prompt_build_ms = roundLatencyMs(performance.now() - promptBuildStartedAt);
+
+  const generation = await generateWithFallback(prompt, {
+    maxTokens: 2048,
+    temperature: 0.1,
+    apiKey: options.groqApiKey,
+    onTiming: (durationMs) => {
+      pipelineMetrics.llm_ms = roundLatencyMs(durationMs);
+    },
+  });
+
+  const postNormalizeStartedAt = performance.now();
+  const normalizedModelUsed = normalizeModelId(generation.modelUsed);
+  const fallbackUsed = normalizedModelUsed !== PRIMARY_MODEL_USED;
+  const finalFallback = generation.finalFallback === true || normalizedModelUsed === 'context-only';
+  const normalizedContent = scrubInvalidCitations(
+    normalizeResponseContent(generation.content, verses),
+    verses
+  );
+  const streamedContent = ensureFallbackBanner(
+    normalizedContent,
+    normalizedModelUsed,
+    fallbackUsed,
+    finalFallback
+  );
+  const normalizedResponse: NormalizedChatResponse = {
+    content: streamedContent,
+    modelUsed: normalizedModelUsed,
+    verses,
+    metadata: {
+      translation: options.requestedTranslation,
+    },
+  };
+  logContextUtilizationDiagnostics(normalizedResponse.content, normalizedResponse.verses, {
+    requestId: options.requestId,
+    modelUsed: normalizedResponse.modelUsed,
+    cacheHit: false,
+  });
+  pipelineMetrics.post_normalize_ms = roundLatencyMs(performance.now() - postNormalizeStartedAt);
+
+  if (verses.length > 0 && !/No supporting passages found/i.test(normalizedResponse.content)) {
+    await setCachedResponse(
+      {
+        query: options.query,
+        translation: options.requestedTranslation,
+        model: normalizedModelUsed,
+      },
+      {
+        verses,
+        context,
+        prompt: finalPrompt,
+        response: normalizedResponse.content,
+        modelUsed: normalizedResponse.modelUsed,
+      }
+    );
+  }
+
+  return {
+    normalizedResponse,
+    finalPrompt,
+    preferredChunks: generation.chunks,
+    fallbackUsed,
+    finalFallback,
+    pipelineMetrics,
+  };
+}
+
 export async function POST(req: Request) {
+  const requestId = randomUUID();
+  const requestStartedAt = performance.now();
+  const latencyMetrics = createLatencyMetrics();
+  let statusCode = 200;
+  let translationForLog = 'unknown';
+  let cacheHit = false;
+  let modelUsedForLog: string | null = null;
+
   try {
     await dataValidationPromise;
     const { messages, translation, customApiKey } = await req.json();
@@ -464,12 +766,14 @@ export async function POST(req: Request) {
     }
 
     if (!lastUserMessage) {
-      return new Response('Missing user query', { status: 400 });
+      statusCode = 400;
+      return new Response('Missing user query', { status: statusCode });
     }
 
     const query = typeof lastUserMessage.content === 'string' ? lastUserMessage.content.trim() : '';
     if (!query) {
-      return new Response('Missing user query', { status: 400 });
+      statusCode = 400;
+      return new Response('Missing user query', { status: statusCode });
     }
 
     const rawTranslation =
@@ -477,6 +781,7 @@ export async function POST(req: Request) {
         ? translation
         : queryTranslation || headerTranslation;
     const requestedTranslation = normalizeTranslation(rawTranslation);
+    translationForLog = requestedTranslation;
     console.log(`Translation switched to ${requestedTranslation}`);
     debugLog('Using translation:', requestedTranslation);
 
@@ -493,9 +798,10 @@ export async function POST(req: Request) {
             rateLimitWarning = `Approaching rate limit (${count}/${RATE_LIMIT_MAX_REQUESTS} req/min)`;
           }
           if (count > RATE_LIMIT_MAX_REQUESTS) {
+            statusCode = 429;
             return new Response(JSON.stringify({ 
               error: 'Rate limit exceeded (60 req/min). Try again in 60s.' 
-            }), { status: 429, headers: { 'Content-Type': 'application/json' } });
+            }), { status: statusCode, headers: { 'Content-Type': 'application/json' } });
           }
         }
       } else {
@@ -513,22 +819,13 @@ export async function POST(req: Request) {
 
     let cached = null as Awaited<ReturnType<typeof getCachedResponse>>;
     let cachedKey: string | null = null;
-    for (const modelKey of CACHE_MODEL_CANDIDATES) {
-      const cacheKey = buildCacheKey({
-        query,
-        translation: requestedTranslation,
-        model: modelKey,
-      });
-      cached = await getCachedResponse({
-        query,
-        translation: requestedTranslation,
-        model: modelKey,
-      });
-      if (cached?.response) {
-        cachedKey = cacheKey;
-        break;
-      }
+    const cacheLookupStartedAt = performance.now();
+    const preferredCachedResult = await findPreferredCachedResponse(query, requestedTranslation);
+    if (preferredCachedResult) {
+      cached = preferredCachedResult.response;
+      cachedKey = preferredCachedResult.cacheKey;
     }
+    setLatencyMetric(latencyMetrics, 'cache_lookup_ms', performance.now() - cacheLookupStartedAt);
 
     if (cached?.response && /No supporting passages found/i.test(cached.response)) {
       cached = null;
@@ -536,17 +833,23 @@ export async function POST(req: Request) {
     }
 
     if (cached?.response) {
+      cacheHit = true;
       debugLog('Cache HIT – returning stored response', cachedKey);
       const cachedModelUsed = normalizeModelId(cached.modelUsed);
       const fallbackUsed = cachedModelUsed !== PRIMARY_MODEL_USED;
       const finalFallback = cachedModelUsed === 'context-only';
-      const normalizedCachedContent = normalizeResponseContent(cached.response, cached.verses || []);
+      const postNormalizeStartedAt = performance.now();
+      const normalizedCachedContent = scrubInvalidCitations(
+        normalizeResponseContent(cached.response, cached.verses || []),
+        cached.verses || []
+      );
       const cachedStreamedContent = ensureFallbackBanner(
         normalizedCachedContent,
         cachedModelUsed,
         fallbackUsed,
         finalFallback
       );
+      setLatencyMetric(latencyMetrics, 'post_normalize_ms', performance.now() - postNormalizeStartedAt);
       const cachedResponse: NormalizedChatResponse = {
         content: cachedStreamedContent,
         modelUsed: cachedModelUsed,
@@ -555,6 +858,12 @@ export async function POST(req: Request) {
           translation: requestedTranslation,
         },
       };
+      logContextUtilizationDiagnostics(cachedResponse.content, cachedResponse.verses, {
+        requestId,
+        modelUsed: cachedResponse.modelUsed,
+        cacheHit: true,
+      });
+      modelUsedForLog = cachedResponse.modelUsed;
 
       const cachedResult = await streamTextFromContent(cachedResponse.content, [
         { role: 'system', content: cached.prompt },
@@ -570,7 +879,7 @@ export async function POST(req: Request) {
         cachedHeaders['x-rate-limit-warning'] = rateLimitWarning;
       }
 
-      return cachedResult.toUIMessageStreamResponse({
+      const response = cachedResult.toUIMessageStreamResponse({
         headers: Object.keys(cachedHeaders).length > 0 ? cachedHeaders : undefined,
         messageMetadata: ({ part }) => {
           if (part.type === 'start' || part.type === 'finish') {
@@ -585,6 +894,8 @@ export async function POST(req: Request) {
           return undefined;
         },
       });
+      statusCode = response.status;
+      return response;
     }
 
     const missKey = buildCacheKey({
@@ -594,46 +905,44 @@ export async function POST(req: Request) {
     });
     debugLog('Cache MISS – proceeding to LLM', missKey);
 
-    // RAG Retrieval
-    const verses = await retrieveContextForQuery(query, requestedTranslation, groqApiKey);
-
-    // Build context-aware prompt
-    const finalPrompt = buildContextPrompt(query, verses, requestedTranslation);
-    const context = finalPrompt.startsWith(SYSTEM_PROMPT)
-      ? finalPrompt.slice(SYSTEM_PROMPT.length).trim()
-      : finalPrompt;
-
-    const prompt = buildPrompt(finalPrompt, modelHistory, query);
-
-    const generation = await generateWithFallback(prompt, {
-      maxTokens: 2048,
-      temperature: 0.1,
-      apiKey: groqApiKey,
-    } as any);
-
-    const normalizedModelUsed = normalizeModelId(generation.modelUsed);
-    const fallbackUsed = normalizedModelUsed !== PRIMARY_MODEL_USED;
-    const finalFallback = generation.finalFallback === true || normalizedModelUsed === 'context-only';
-    const normalizedContent = normalizeResponseContent(generation.content, verses);
-    const streamedContent = ensureFallbackBanner(
-      normalizedContent,
-      normalizedModelUsed,
-      fallbackUsed,
-      finalFallback
+    const inflightKey = buildInflightRequestKeyWithHistory(
+      query,
+      requestedTranslation,
+      PRIMARY_MODEL_USED,
+      modelHistory
     );
-    const normalizedResponse: NormalizedChatResponse = {
-      content: streamedContent,
-      modelUsed: normalizedModelUsed,
-      verses,
-      metadata: {
-        translation: requestedTranslation,
-      },
-    };
-    debugLog(`Model selected for response: ${normalizedModelUsed}`);
+    let pipelinePromise = inflightRequests.get(inflightKey);
+    if (pipelinePromise) {
+      debugLog('In-flight dedup HIT – awaiting active pipeline', inflightKey);
+    } else {
+      debugLog('In-flight dedup MISS – starting pipeline', inflightKey);
+      pipelinePromise = executeUncachedPipeline({
+        query,
+        requestedTranslation,
+        groqApiKey,
+        modelHistory,
+        requestId,
+      }).finally(() => {
+        if (inflightRequests.get(inflightKey) === pipelinePromise) {
+          inflightRequests.delete(inflightKey);
+        }
+      });
+      inflightRequests.set(inflightKey, pipelinePromise);
+    }
+
+    const pipelineResult = await pipelinePromise;
+    for (const [metric, durationMs] of Object.entries(pipelineResult.pipelineMetrics) as Array<[LatencyMetricName, number]>) {
+      setLatencyMetric(latencyMetrics, metric, durationMs);
+    }
+    const normalizedResponse = pipelineResult.normalizedResponse;
+    const fallbackUsed = pipelineResult.fallbackUsed;
+    const finalFallback = pipelineResult.finalFallback;
+    modelUsedForLog = normalizedResponse.modelUsed;
+    debugLog(`Model selected for response: ${normalizedResponse.modelUsed}`);
 
     const responseInit = {
       headers: {
-        ...(fallbackUsed ? { 'x-model-used': normalizedModelUsed } : {}),
+        ...(fallbackUsed ? { 'x-model-used': normalizedResponse.modelUsed } : {}),
         ...(rateLimitWarning ? { 'x-rate-limit-warning': rateLimitWarning } : {}),
       },
       messageMetadata: ({ part }: { part: { type: string } }) => {
@@ -648,53 +957,46 @@ export async function POST(req: Request) {
         }
         return undefined;
       },
-      onFinish: async ({ responseMessage, isAborted }: { responseMessage: UIMessage; isAborted: boolean }) => {
-        if (isAborted) return;
-        const text = getMessageText(responseMessage);
-        if (!text) return;
-        if (verses.length === 0 || /No supporting passages found/i.test(text)) {
-          return;
-        }
-        await setCachedResponse(
-          {
-            query,
-            translation: requestedTranslation,
-            model: normalizedModelUsed,
-          },
-          {
-            verses,
-            context,
-            prompt: finalPrompt,
-            response: text,
-            modelUsed: normalizedResponse.modelUsed
-          }
-        );
-      }
     };
 
     const fallbackResult = await streamTextFromContent(
       normalizedResponse.content,
       [
-        { role: 'system', content: finalPrompt },
+        { role: 'system', content: pipelineResult.finalPrompt },
         ...modelHistory,
         { role: 'user', content: query }
       ] as Array<{ role: string; content: string }>,
-      generation.chunks
+      pipelineResult.preferredChunks
     );
 
-    return fallbackResult.toUIMessageStreamResponse(responseInit);
+    const response = fallbackResult.toUIMessageStreamResponse(responseInit);
+    statusCode = response.status;
+    return response;
   } catch (e: unknown) {
     console.error('API Error:', e);
     const error = e as Error;
     const errorMsg = error?.message?.toLowerCase() || '';
     if (errorMsg.includes('429') || errorMsg.includes('rate limit')) {
+      statusCode = 429;
       return new Response(JSON.stringify({ 
         error: 'Rate limit exceeded. The shared resource is currently overloaded. Please wait a moment or provide your own API key in the settings.' 
-      }), { status: 429, headers: { 'Content-Type': 'application/json' } });
+      }), { status: statusCode, headers: { 'Content-Type': 'application/json' } });
     }
 
+    statusCode = 500;
     return new Response(JSON.stringify({ 
       error: 'An unexpected error occurred while processing your request.' 
-    }), { status: 500, headers: { 'Content-Type': 'application/json' } });
+    }), { status: statusCode, headers: { 'Content-Type': 'application/json' } });
+  } finally {
+    setLatencyMetric(latencyMetrics, 'total_ms', performance.now() - requestStartedAt);
+    console.info(JSON.stringify({
+      event: 'chat_request_latency',
+      requestId,
+      statusCode,
+      translation: translationForLog,
+      cacheHit,
+      modelUsed: modelUsedForLog,
+      metrics: latencyMetrics,
+    }));
   }
 }
diff --git a/lib/bible-fetch.ts b/lib/bible-fetch.ts
index d3d4662..13cdc83 100644
--- a/lib/bible-fetch.ts
+++ b/lib/bible-fetch.ts
@@ -8,6 +8,96 @@ export type VerseContext = {
   openGnt?: string;
 };
 
+const EXTERNAL_VERSE_FETCH_TIMEOUT_MS = 1500;
+const EXTERNAL_VERSE_FETCH_TOTAL_BUDGET_MS = 2000;
+const EXTERNAL_VERSE_FETCH_MAX_RETRIES = 1;
+const EXTERNAL_VERSE_FETCH_BACKOFF_MS = 150;
+
+type ExternalFetchBudgetOptions = {
+  label: string;
+  timeoutMs?: number;
+  totalBudgetMs?: number;
+  maxRetries?: number;
+  retryBackoffMs?: number;
+};
+
+function isRetryableStatus(status: number): boolean {
+  return status === 408 || status === 425 || status === 429 || status >= 500;
+}
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+export async function fetchExternalWithTimeoutBudget(
+  url: string,
+  init: RequestInit = {},
+  options: ExternalFetchBudgetOptions
+): Promise<Response | null> {
+  const timeoutMs = options.timeoutMs ?? EXTERNAL_VERSE_FETCH_TIMEOUT_MS;
+  const totalBudgetMs = options.totalBudgetMs ?? EXTERNAL_VERSE_FETCH_TOTAL_BUDGET_MS;
+  const maxRetries = options.maxRetries ?? EXTERNAL_VERSE_FETCH_MAX_RETRIES;
+  const retryBackoffMs = options.retryBackoffMs ?? EXTERNAL_VERSE_FETCH_BACKOFF_MS;
+  const startedAt = Date.now();
+  let lastError: unknown = null;
+  let lastResponse: Response | null = null;
+
+  for (let attempt = 0; attempt <= maxRetries; attempt += 1) {
+    const elapsedMs = Date.now() - startedAt;
+    const remainingBudgetMs = totalBudgetMs - elapsedMs;
+    if (remainingBudgetMs <= 0) {
+      break;
+    }
+
+    const controller = new AbortController();
+    const perAttemptTimeoutMs = Math.max(1, Math.min(timeoutMs, remainingBudgetMs));
+    const timeoutId = setTimeout(() => controller.abort(), perAttemptTimeoutMs);
+
+    try {
+      const response = await fetch(url, {
+        ...init,
+        signal: controller.signal
+      });
+
+      if (response.ok || !isRetryableStatus(response.status) || attempt === maxRetries) {
+        return response;
+      }
+
+      lastResponse = response;
+    } catch (error) {
+      lastError = error;
+    } finally {
+      clearTimeout(timeoutId);
+    }
+
+    if (attempt === maxRetries) {
+      break;
+    }
+
+    const remainingAfterAttemptMs = totalBudgetMs - (Date.now() - startedAt);
+    const backoffMs = Math.min(retryBackoffMs * (2 ** attempt), remainingAfterAttemptMs);
+    if (backoffMs <= 0) {
+      break;
+    }
+
+    await sleep(backoffMs);
+  }
+
+  const elapsedMs = Date.now() - startedAt;
+  if (lastError) {
+    console.warn(`[external-fetch] ${options.label} failed after ${elapsedMs}ms; continuing without external result.`, lastError);
+    return null;
+  }
+
+  if (lastResponse && !lastResponse.ok && isRetryableStatus(lastResponse.status)) {
+    console.warn(
+      `[external-fetch] ${options.label} exhausted retry budget with status ${lastResponse.status} after ${elapsedMs}ms; continuing without external result.`
+    );
+  }
+
+  return lastResponse;
+}
+
 // HelloAO gives us books by these long codes or short abbreviations usually matching.
 export async function fetchTranslations() {
   return [
@@ -27,8 +117,14 @@ export async function fetchVerseHelloAO(
   endVerse?: number
 ): Promise<string | null> {
   try {
-    const res = await fetch(`https://bible.helloao.org/api/${translation}/${book}/${chapter}.json`);
-    if (!res.ok) return null;
+    const res = await fetchExternalWithTimeoutBudget(
+      `https://bible.helloao.org/api/${translation}/${book}/${chapter}.json`,
+      {},
+      {
+        label: `helloao:${translation}:${book}:${chapter}`
+      }
+    );
+    if (!res?.ok) return null;
     
     const data = await res.json();
     if (!data?.chapter?.content) return null;
@@ -54,8 +150,14 @@ export async function fetchVerseHelloAO(
 export async function fetchVerseFallback(reference: string, translation: string = 'web'): Promise<string | null> {
   try {
     // bible-api.com expects 'john 3:16'
-    const res = await fetch(`https://bible-api.com/${encodeURIComponent(reference)}?translation=${translation.toLowerCase()}`);
-    if (!res.ok) return null;
+    const res = await fetchExternalWithTimeoutBudget(
+      `https://bible-api.com/${encodeURIComponent(reference)}?translation=${translation.toLowerCase()}`,
+      {},
+      {
+        label: `bible-api:${translation}:${reference}`
+      }
+    );
+    if (!res?.ok) return null;
     const data = await res.json();
     return data.text ? data.text.trim().replace(/\n/g, ' ') : null;
   } catch (error) {
@@ -64,6 +166,26 @@ export async function fetchVerseFallback(reference: string, translation: string
   }
 }
 
+type FetchVerseTextWithFallbackInput = {
+  translation: string;
+  reference: string;
+  book: string;
+  chapter: number;
+  startVerse: number;
+  endVerse?: number;
+};
+
+export async function fetchVerseTextWithFallback(
+  input: FetchVerseTextWithFallbackInput
+): Promise<string | null> {
+  const { translation, reference, book, chapter, startVerse, endVerse } = input;
+  return (
+    await fetchVerseHelloAO(translation, book, chapter, startVerse, endVerse)
+  ) || (
+    await fetchVerseFallback(reference, translation)
+  );
+}
+
 export async function fetchStrongsDefinition(strongs: string): Promise<Record<string, unknown> | null> {
   try {
     const res = await fetch(`https://bolls.life/dictionary-definition/BDBT/${strongs}/`);
diff --git a/lib/cache.ts b/lib/cache.ts
index 16a8de7..861b0cd 100644
--- a/lib/cache.ts
+++ b/lib/cache.ts
@@ -2,7 +2,27 @@ import crypto from 'crypto';
 import type { VerseContext } from './bible-fetch';
 import { redis } from './redis';
 
-const CACHE_TTL_SECONDS = 259200; // 72 hours
+const DEFAULT_RESPONSE_CACHE_TTL_SECONDS = 259200; // 72 hours
+const DEFAULT_RETRIEVAL_CACHE_TTL_SECONDS = 3600; // 1 hour
+const DEFAULT_EMBEDDING_CACHE_TTL_SECONDS = 86400; // 24 hours
+
+function parseCacheTtl(envValue: string | undefined, fallbackSeconds: number): number {
+  const parsed = Number.parseInt(envValue || '', 10);
+  return Number.isFinite(parsed) && parsed > 0 ? parsed : fallbackSeconds;
+}
+
+export const RESPONSE_CACHE_TTL_SECONDS = parseCacheTtl(
+  process.env.RESPONSE_CACHE_TTL,
+  DEFAULT_RESPONSE_CACHE_TTL_SECONDS
+);
+export const RETRIEVAL_CACHE_TTL_SECONDS = parseCacheTtl(
+  process.env.RETRIEVAL_CACHE_TTL,
+  DEFAULT_RETRIEVAL_CACHE_TTL_SECONDS
+);
+export const EMBEDDING_CACHE_TTL_SECONDS = parseCacheTtl(
+  process.env.EMBEDDING_CACHE_TTL,
+  DEFAULT_EMBEDDING_CACHE_TTL_SECONDS
+);
 
 export type CachedChatResponse = {
   verses: VerseContext[];
@@ -18,12 +38,36 @@ type CacheKeyInput = {
   model: string;
 };
 
+type EmbeddingCacheKeyInput = {
+  normalizedQuery: string;
+  model: string;
+};
+
+type RetrievalContextCacheKeyInput = {
+  query: string;
+  translation: string;
+  version: string;
+};
+
 function buildCacheKey({ query, translation, model }: CacheKeyInput): string {
   const input = `${query}\u0000${translation}\u0000${model}`;
   return crypto.createHash('sha256').update(input).digest('hex');
 }
 
-export { buildCacheKey };
+function buildEmbeddingCacheKey({ normalizedQuery, model }: EmbeddingCacheKeyInput): string {
+  const input = `${normalizedQuery}\u0000${model}`;
+  return `embedding:${crypto.createHash('sha256').update(input).digest('hex')}`;
+}
+
+function buildRetrievalContextCacheKey({
+  query,
+  translation,
+  version,
+}: RetrievalContextCacheKeyInput): string {
+  return `context:${version}:${translation}:${query.trim().toLowerCase()}`;
+}
+
+export { buildCacheKey, buildEmbeddingCacheKey, buildRetrievalContextCacheKey };
 
 export async function getCachedResponse(input: CacheKeyInput): Promise<CachedChatResponse | null> {
   if (!redis) {
@@ -56,8 +100,84 @@ export async function setCachedResponse(
   const cacheKey = buildCacheKey(input);
 
   try {
-    await redis.set(cacheKey, JSON.stringify(value), { ex: CACHE_TTL_SECONDS });
+    await redis.set(cacheKey, JSON.stringify(value), { ex: RESPONSE_CACHE_TTL_SECONDS });
   } catch (error) {
     console.warn('[cache] Redis set failed; continuing without cache.', error);
   }
 }
+
+export async function getCachedRetrievalContext(
+  input: RetrievalContextCacheKeyInput
+): Promise<VerseContext[] | null> {
+  if (!redis) {
+    return null;
+  }
+
+  const cacheKey = buildRetrievalContextCacheKey(input);
+
+  try {
+    const cached = await redis.get<VerseContext[] | string>(cacheKey);
+    if (!cached) return null;
+    if (typeof cached === 'string') {
+      return JSON.parse(cached) as VerseContext[];
+    }
+    return cached as VerseContext[];
+  } catch (error) {
+    console.warn('[cache] Retrieval context get failed; continuing without retrieval cache.', error);
+    return null;
+  }
+}
+
+export async function setCachedRetrievalContext(
+  input: RetrievalContextCacheKeyInput,
+  value: VerseContext[]
+): Promise<void> {
+  if (!redis) {
+    return;
+  }
+
+  const cacheKey = buildRetrievalContextCacheKey(input);
+
+  try {
+    await redis.set(cacheKey, JSON.stringify(value), { ex: RETRIEVAL_CACHE_TTL_SECONDS });
+  } catch (error) {
+    console.warn('[cache] Retrieval context set failed; continuing without retrieval cache.', error);
+  }
+}
+
+export async function getCachedEmbedding(input: EmbeddingCacheKeyInput): Promise<number[] | null> {
+  if (!redis) {
+    return null;
+  }
+
+  const cacheKey = buildEmbeddingCacheKey(input);
+
+  try {
+    const cached = await redis.get<number[] | string>(cacheKey);
+    if (!cached) return null;
+    if (typeof cached === 'string') {
+      return JSON.parse(cached) as number[];
+    }
+    return cached;
+  } catch (error) {
+    console.warn('[cache] Embedding get failed; continuing without embedding cache.', error);
+    return null;
+  }
+}
+
+export async function setCachedEmbedding(
+  input: EmbeddingCacheKeyInput,
+  value: number[]
+): Promise<void> {
+  if (!redis) {
+    return;
+  }
+
+  const cacheKey = buildEmbeddingCacheKey(input);
+
+  try {
+    await redis.set(cacheKey, JSON.stringify(value), { ex: EMBEDDING_CACHE_TTL_SECONDS });
+  } catch (error) {
+    console.warn('[cache] Embedding set failed; continuing without embedding cache.', error);
+  }
+}
diff --git a/lib/feature-flags.ts b/lib/feature-flags.ts
new file mode 100644
index 0000000..6c22da7
--- /dev/null
+++ b/lib/feature-flags.ts
@@ -0,0 +1,13 @@
+function isEnabled(value: string | undefined, defaultEnabled = false): boolean {
+  if (value === undefined) {
+    return defaultEnabled;
+  }
+  return value === '1';
+}
+
+export const ENABLE_SEMANTIC_RERANKER = isEnabled(process.env.ENABLE_SEMANTIC_RERANKER);
+export const ENABLE_TSK_EXPANSION_GATING = process.env.ENABLE_TSK_EXPANSION_GATING !== '0';
+export const ENABLE_RETRIEVAL_DEBUG =
+  isEnabled(process.env.ENABLE_RETRIEVAL_DEBUG) ||
+  isEnabled(process.env.RETRIEVAL_DEBUG) ||
+  isEnabled(process.env.DEBUG_LLM);
diff --git a/lib/llm-fallback.ts b/lib/llm-fallback.ts
index 70bb7f3..8ecabae 100644
--- a/lib/llm-fallback.ts
+++ b/lib/llm-fallback.ts
@@ -9,6 +9,7 @@ type FallbackOptions = {
   apiKey?: string;
   fallbackContent?: string;
   onChunk?: (chunk: string) => void | Promise<void>;
+  onTiming?: (durationMs: number) => void;
 };
 
 export type FallbackResult =
@@ -27,6 +28,10 @@ const HF_MODEL = 'meta-llama/Meta-Llama-3.1-8B-Instruct';
 const DEFAULT_MAX_TOKENS = 2048;
 const DEFAULT_TEMPERATURE = 0.1;
 
+function roundDurationMs(durationMs: number): number {
+  return Number(durationMs.toFixed(2));
+}
+
 function logModelFailure(model: string, error: unknown) {
   const message = String((error as { message?: string })?.message || error || '');
   const label = /429|rate limit/i.test(message)
@@ -299,131 +304,137 @@ export async function generateWithFallback(
   prompt: string,
   options: FallbackOptions
 ): Promise<FallbackResult> {
-  const maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS;
-  const temperature = options.temperature ?? DEFAULT_TEMPERATURE;
-  const groqApiKey = options.apiKey || process.env.GROQ_API_KEY;
-
-  const geminiKey = process.env.GEMINI_API_KEY;
-  if (geminiKey) {
-    const candidates = Array.from(new Set(GEMINI_MODEL_CANDIDATES.filter(Boolean)));
-    for (const modelName of candidates) {
+  const startedAt = performance.now();
+
+  try {
+    const maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS;
+    const temperature = options.temperature ?? DEFAULT_TEMPERATURE;
+    const groqApiKey = options.apiKey || process.env.GROQ_API_KEY;
+
+    const geminiKey = process.env.GEMINI_API_KEY;
+    if (geminiKey) {
+      const candidates = Array.from(new Set(GEMINI_MODEL_CANDIDATES.filter(Boolean)));
+      for (const modelName of candidates) {
+        try {
+          const result = await streamGeminiContent(
+            modelName,
+            prompt,
+            geminiKey,
+            temperature,
+            maxTokens,
+            options.onChunk
+          );
+          const text = result.text;
+          if (text) {
+            console.log('Primary LLM: Gemini');
+            console.log(`[llm-fallback] Using primary provider: gemini:${modelName}`);
+            return { type: 'content', content: text, modelUsed: `gemini:${modelName}`, chunks: result.chunks };
+          }
+          throw new Error('Gemini returned empty output');
+        } catch (error) {
+          logModelFailure(`gemini:${modelName}`, error);
+          if (isModelNotFoundError(error)) {
+            console.warn(`[llm-fallback] Gemini model alias unavailable, trying next candidate: ${modelName}`);
+          }
+        }
+      }
+    } else {
+      console.warn('[llm-fallback] GEMINI_API_KEY missing; skipping Gemini primary provider.');
+    }
+
+    const openRouterKey = process.env.OPENROUTER_API_KEY;
+    if (openRouterKey) {
       try {
-        const result = await streamGeminiContent(
-          modelName,
+        console.log('Fallback to OpenRouter');
+        const result = await streamOpenRouterContent(
           prompt,
-          geminiKey,
+          openRouterKey,
           temperature,
           maxTokens,
           options.onChunk
         );
         const text = result.text;
         if (text) {
-          console.log('Primary LLM: Gemini');
-          console.log(`[llm-fallback] Using primary provider: gemini:${modelName}`);
-          return { type: 'content', content: text, modelUsed: `gemini:${modelName}`, chunks: result.chunks };
+          console.log(`[llm-fallback] Using fallback provider: openrouter:${OPENROUTER_MODEL}`);
+          return { type: 'content', content: text, modelUsed: `openrouter:${OPENROUTER_MODEL}`, chunks: result.chunks };
         }
-        throw new Error('Gemini returned empty output');
+        throw new Error('OpenRouter returned empty output');
       } catch (error) {
-        logModelFailure(`gemini:${modelName}`, error);
-        if (isModelNotFoundError(error)) {
-          console.warn(`[llm-fallback] Gemini model alias unavailable, trying next candidate: ${modelName}`);
-        }
+        logModelFailure(`openrouter:${OPENROUTER_MODEL}`, error);
       }
+    } else {
+      console.warn('[llm-fallback] OPENROUTER_API_KEY missing; skipping OpenRouter fallback.');
     }
-  } else {
-    console.warn('[llm-fallback] GEMINI_API_KEY missing; skipping Gemini primary provider.');
-  }
 
-  const openRouterKey = process.env.OPENROUTER_API_KEY;
-  if (openRouterKey) {
-    try {
-      console.log('Fallback to OpenRouter');
-      const result = await streamOpenRouterContent(
-        prompt,
-        openRouterKey,
-        temperature,
-        maxTokens,
-        options.onChunk
-      );
-      const text = result.text;
-      if (text) {
-        console.log(`[llm-fallback] Using fallback provider: openrouter:${OPENROUTER_MODEL}`);
-        return { type: 'content', content: text, modelUsed: `openrouter:${OPENROUTER_MODEL}`, chunks: result.chunks };
+    if (groqApiKey) {
+      const groq = createGroq({ apiKey: groqApiKey });
+      const groqModels = [GROQ_PRIMARY_MODEL, GROQ_SECONDARY_MODEL];
+
+      for (const modelName of groqModels) {
+        try {
+          const result = await generateText({
+            model: groq(modelName) as any,
+            prompt,
+            temperature,
+            maxOutputTokens: maxTokens,
+          });
+          const text = result.text?.trim();
+          if (text) {
+            console.log(`[llm-fallback] Using fallback provider: groq:${modelName}`);
+            return { type: 'content', content: text, modelUsed: `groq:${modelName}` };
+          }
+          throw new Error('Groq returned empty output');
+        } catch (error) {
+          logModelFailure(`groq:${modelName}`, error);
+        }
       }
-      throw new Error('OpenRouter returned empty output');
-    } catch (error) {
-      logModelFailure(`openrouter:${OPENROUTER_MODEL}`, error);
+    } else {
+      console.warn('[llm-fallback] GROQ_API_KEY missing; skipping Groq fallback.');
     }
-  } else {
-    console.warn('[llm-fallback] OPENROUTER_API_KEY missing; skipping OpenRouter fallback.');
-  }
-
-  if (groqApiKey) {
-    const groq = createGroq({ apiKey: groqApiKey });
-    const groqModels = [GROQ_PRIMARY_MODEL, GROQ_SECONDARY_MODEL];
 
-    for (const modelName of groqModels) {
+    const hfToken = process.env.HF_TOKEN;
+    if (hfToken) {
       try {
-        const result = await generateText({
-          model: groq(modelName) as any,
-          prompt,
-          temperature,
-          maxOutputTokens: maxTokens,
+        const hf = new InferenceClient(hfToken);
+        const hfResult = await hf.textGeneration({
+          model: HF_MODEL,
+          provider: 'hf-inference',
+          inputs: prompt,
+          parameters: {
+            max_new_tokens: maxTokens,
+            temperature,
+            return_full_text: false,
+          },
+          options: { wait_for_model: true },
         });
-        const text = result.text?.trim();
-        if (text) {
-          console.log(`[llm-fallback] Using fallback provider: groq:${modelName}`);
-          return { type: 'content', content: text, modelUsed: `groq:${modelName}` };
+
+        const hfText =
+          typeof hfResult === 'string'
+            ? hfResult
+            : Array.isArray(hfResult)
+              ? hfResult[0]?.generated_text
+              : hfResult.generated_text;
+
+        if (hfText && hfText.trim()) {
+          console.log(`[llm-fallback] Using fallback provider: hf:${HF_MODEL}`);
+          return { type: 'content', content: hfText.trim(), modelUsed: `hf:${HF_MODEL}` };
         }
-        throw new Error('Groq returned empty output');
+        throw new Error('HF inference returned empty output');
       } catch (error) {
-        logModelFailure(`groq:${modelName}`, error);
+        logModelFailure(`hf:${HF_MODEL}`, error);
       }
     }
-  } else {
-    console.warn('[llm-fallback] GROQ_API_KEY missing; skipping Groq fallback.');
-  }
 
-  const hfToken = process.env.HF_TOKEN;
-  if (hfToken) {
-    try {
-      const hf = new InferenceClient(hfToken);
-      const hfResult = await hf.textGeneration({
-        model: HF_MODEL,
-        provider: 'hf-inference',
-        inputs: prompt,
-        parameters: {
-          max_new_tokens: maxTokens,
-          temperature,
-          return_full_text: false,
-        },
-        options: { wait_for_model: true },
-      });
-
-      const hfText =
-        typeof hfResult === 'string'
-          ? hfResult
-          : Array.isArray(hfResult)
-            ? hfResult[0]?.generated_text
-            : hfResult.generated_text;
-
-      if (hfText && hfText.trim()) {
-        console.log(`[llm-fallback] Using fallback provider: hf:${HF_MODEL}`);
-        return { type: 'content', content: hfText.trim(), modelUsed: `hf:${HF_MODEL}` };
-      }
-      throw new Error('HF inference returned empty output');
-    } catch (error) {
-      logModelFailure(`hf:${HF_MODEL}`, error);
-    }
-  }
+    const fallbackContent =
+      options.fallbackContent ?? buildContextOnlyContent(prompt);
 
-  const fallbackContent =
-    options.fallbackContent ?? buildContextOnlyContent(prompt);
-
-  return {
-    type: 'content',
-    content: fallbackContent,
-    modelUsed: 'context-only',
-    finalFallback: true,
-  };
+    return {
+      type: 'content',
+      content: fallbackContent,
+      modelUsed: 'context-only',
+      finalFallback: true,
+    };
+  } finally {
+    options.onTiming?.(roundDurationMs(performance.now() - startedAt));
+  }
 }
diff --git a/lib/prompts.ts b/lib/prompts.ts
index 3bade70..728a337 100644
--- a/lib/prompts.ts
+++ b/lib/prompts.ts
@@ -3,6 +3,81 @@
 import { VerseContext } from './bible-fetch';   // adjust path if needed
 import { decodeMorph } from './morph-utils';
 
+const DEFAULT_CONTEXT_TOKEN_BUDGET = 1500;
+const PROMPT_CONTEXT_TOKEN_BUDGET = Math.max(
+  1200,
+  Number.parseInt(process.env.PROMPT_CONTEXT_TOKEN_BUDGET || '', 10) || DEFAULT_CONTEXT_TOKEN_BUDGET
+);
+
+const BOOK_CODE_TO_NAME: Record<string, string> = {
+  GEN: 'Genesis',
+  EXO: 'Exodus',
+  LEV: 'Leviticus',
+  NUM: 'Numbers',
+  DEU: 'Deuteronomy',
+  JOS: 'Joshua',
+  JDG: 'Judges',
+  RUT: 'Ruth',
+  '1SA': '1 Samuel',
+  '2SA': '2 Samuel',
+  '1KI': '1 Kings',
+  '2KI': '2 Kings',
+  '1CH': '1 Chronicles',
+  '2CH': '2 Chronicles',
+  EZR: 'Ezra',
+  NEH: 'Nehemiah',
+  EST: 'Esther',
+  JOB: 'Job',
+  PSA: 'Psalms',
+  PRO: 'Proverbs',
+  ECC: 'Ecclesiastes',
+  SNG: 'Song of Songs',
+  ISA: 'Isaiah',
+  JER: 'Jeremiah',
+  LAM: 'Lamentations',
+  EZK: 'Ezekiel',
+  DAN: 'Daniel',
+  HOS: 'Hosea',
+  JOL: 'Joel',
+  AMO: 'Amos',
+  OBA: 'Obadiah',
+  JON: 'Jonah',
+  MIC: 'Micah',
+  NAM: 'Nahum',
+  HAB: 'Habakkuk',
+  ZEP: 'Zephaniah',
+  HAG: 'Haggai',
+  ZEC: 'Zechariah',
+  MAL: 'Malachi',
+  MAT: 'Matthew',
+  MRK: 'Mark',
+  LUK: 'Luke',
+  JHN: 'John',
+  ACT: 'Acts',
+  ROM: 'Romans',
+  '1CO': '1 Corinthians',
+  '2CO': '2 Corinthians',
+  GAL: 'Galatians',
+  EPH: 'Ephesians',
+  PHP: 'Philippians',
+  COL: 'Colossians',
+  '1TH': '1 Thessalonians',
+  '2TH': '2 Thessalonians',
+  '1TI': '1 Timothy',
+  '2TI': '2 Timothy',
+  TIT: 'Titus',
+  PHM: 'Philemon',
+  HEB: 'Hebrews',
+  JAS: 'James',
+  '1PE': '1 Peter',
+  '2PE': '2 Peter',
+  '1JN': '1 John',
+  '2JN': '2 John',
+  '3JN': '3 John',
+  JUD: 'Jude',
+  REV: 'Revelation',
+};
+
 export const SYSTEM_PROMPT = `You are a precise Bible reference librarian. Your task is to report what the biblical text actually says — without modern reinterpretation, without denominational bias, and without unnecessary softening.
 
 Core rules — you MUST obey all of them:
@@ -68,6 +143,101 @@ Stay extremely close to what the verses actually say. Use low temperature. Be di
 ## Supporting Cross-References (TSK)
 These verses are historically linked to the primary passages. Use them ONLY to clarify the meanings of words or themes in the primary context. Do not let them distract from the primary query.`;
 
+export function expandCitationReference(reference: string): string {
+  const match = reference.trim().match(/^([1-3]?[A-Z]{2,3})\s+(\d+:\d+(?:[-–]\d+)?)$/i);
+  if (!match) {
+    return reference.trim();
+  }
+
+  const bookCode = match[1].toUpperCase();
+  const expandedBook = BOOK_CODE_TO_NAME[bookCode] || bookCode;
+  return `${expandedBook} ${match[2]}`;
+}
+
+export function buildCitationWhitelist(verses: VerseContext[]): string[] {
+  return Array.from(
+    new Set(
+      verses
+        .map((verse) => expandCitationReference(verse.reference))
+        .filter(Boolean)
+    )
+  );
+}
+
+function estimateTokenCount(value: string): number {
+  const normalized = value.replace(/\s+/g, ' ').trim();
+  if (!normalized) {
+    return 0;
+  }
+  // Use a conservative approximation so prompt context stays under budget in practice.
+  return Math.ceil(normalized.length / 3);
+}
+
+function renderVerseContext(
+  verse: VerseContext,
+  translation: string
+): string {
+  let output = `Reference: ${verse.reference}\n`;
+  output += `Text (${verse.translation || translation}): ${verse.text}\n`;
+
+  if (verse.original && verse.original.length > 0) {
+    output += `Original language data (use these words in plain markdown, no XML tags):\n`;
+    const meaningful = verse.original.filter(
+      (entry) => entry.gloss && entry.gloss.length > 2 && !['and', 'the', 'of', 'to'].includes(entry.gloss.toLowerCase())
+    ).slice(0, 6);
+
+    meaningful.forEach((entry) => {
+      const transliteration = (entry as { transliteration?: string }).transliteration;
+      const morph = (entry as { morph?: string }).morph;
+      const parts: string[] = [];
+      if (transliteration) parts.push(transliteration);
+      parts.push(`Strong's ${entry.strongs} - ${entry.gloss || ''}`);
+      if (morph) {
+        const decoded = decodeMorph(morph);
+        const morphDetail = decoded ? `${decoded.code} (${decoded.description})` : morph;
+        parts.push(`Morph: ${morphDetail}`);
+      }
+      output += `- ${entry.word} (${parts.join(', ')})\n`;
+    });
+  } else {
+    output += `No original-language tagging available for this verse.\n`;
+  }
+
+  if (verse.openHebrew) {
+    output += `OpenHebrewBible layers: ${verse.openHebrew}\n`;
+  }
+  if (verse.openGnt) {
+    output += `OpenGNT layers: ${verse.openGnt}\n`;
+  }
+
+  return `${output}\n`;
+}
+
+function applyContextBudget(
+  verses: VerseContext[],
+  translation: string,
+  tokenBudget: number,
+  options?: { forceIncludeFirst: boolean }
+): { included: VerseContext[]; omittedCount: number; usedTokens: number } {
+  const included: VerseContext[] = [];
+  let usedTokens = 0;
+
+  for (const verse of verses) {
+    const verseTokens = estimateTokenCount(renderVerseContext(verse, translation));
+    if (included.length > 0 && usedTokens + verseTokens > tokenBudget) {
+      break;
+    }
+    included.push(verse);
+    usedTokens += verseTokens;
+  }
+
+  return {
+    included,
+    omittedCount: Math.max(verses.length - included.length, 0),
+    usedTokens,
+  };
+}
+
 export function buildContextPrompt(
   query: string,
   verses: VerseContext[],
@@ -78,82 +248,83 @@ export function buildContextPrompt(
   );
 
   if (!verses || verses.length === 0) {
-    return `User query: ${query}
+    return `SYSTEM INSTRUCTION
+${SYSTEM_PROMPT}
+
+QUERY
+${query}
 
-Translation requested: ${translation}
+RETRIEVED SCRIPTURE CONTEXT
+No verses were retrieved.
 
-Context: No verses were retrieved.
+ALLOWED CITATIONS
+None
+
+RESPONSE FORMAT
 Respond: "No supporting passages found in the authoritative sources."
 Do not speculate or add external information.`;
   }
 
   const primaryVerses = verses.filter(v => !v.isCrossReference);
-  const supportingVerses = verses.filter(v => v.isCrossReference);
-
-  let contextStr = `Primary Biblical Context:\n\n`;
-
-  const renderVerse = (v: VerseContext) => {
-    let s = `Reference: ${v.reference}\n`;
-    s += `Text (${v.translation || translation}): ${v.text}\n`;
-
-    if (v.original && v.original.length > 0) {
-      s += `Original language data (use these words in plain markdown, no XML tags):\n`;
-      const meaningful = v.original.filter(
-        (o) => o.gloss && o.gloss.length > 2 && !['and', 'the', 'of', 'to'].includes(o.gloss.toLowerCase())
-      ).slice(0, 6);
-
-      meaningful.forEach((org) => {
-        const transliteration = (org as { transliteration?: string }).transliteration;
-        const morph = (org as { morph?: string }).morph;
-        const parts: string[] = [];
-        if (transliteration) parts.push(transliteration);
-        parts.push(`Strong's ${org.strongs} - ${org.gloss || ''}`);
-        if (morph) {
-          const decoded = decodeMorph(morph);
-          const morphDetail = decoded ? `${decoded.code} (${decoded.description})` : morph;
-          parts.push(`Morph: ${morphDetail}`);
-        }
-        s += `- ${org.word} (${parts.join(', ')})\n`;
-      });
-    } else {
-      s += `No original-language tagging available for this verse.\n`;
-    }
-    if (v.openHebrew) {
-      s += `OpenHebrewBible layers: ${v.openHebrew}\n`;
-    }
-    if (v.openGnt) {
-      s += `OpenGNT layers: ${v.openGnt}\n`;
-    }
-    s += '\n';
-    return s;
-  };
+  const orderedContextVerses = [
+    ...primaryVerses,
+    ...verses.filter(v => v.isCrossReference),
+  ];
+  const budgetedContext = applyContextBudget(orderedContextVerses, translation, PROMPT_CONTEXT_TOKEN_BUDGET, {
+    forceIncludeFirst: true,
+  });
+  const citationWhitelist = buildCitationWhitelist(budgetedContext.included);
+  const includedVerseSet = new Set(budgetedContext.included.map((verse) => verse.reference));
+  const budgetedPrimary = primaryVerses.filter((verse) => includedVerseSet.has(verse.reference));
+  const budgetedSupporting = verses
+    .filter(v => v.isCrossReference)
+    .filter((verse) => includedVerseSet.has(verse.reference));
 
-  primaryVerses.forEach((v) => {
-    contextStr += renderVerse(v);
+  let contextStr = '';
+
+  budgetedPrimary.forEach((v) => {
+    contextStr += renderVerseContext(v, translation);
   });
 
-  if (supportingVerses.length > 0) {
+  if (budgetedSupporting.length > 0) {
     contextStr += `## Supporting Cross-References (TSK)\n\n`;
-    supportingVerses.forEach((v) => {
-      contextStr += renderVerse(v);
+    budgetedSupporting.forEach((v) => {
+      contextStr += renderVerseContext(v, translation);
     });
     contextStr += `Instruction: These verses are historically linked to the primary passages. Use them to clarify the theological meaning or word usage of the primary text. Do not let them distract from the primary query.\n\n`;
   }
 
-  contextStr += `Requested translation: ${translation}\n\n`;
-
-  contextStr += `Reminder:
+  const responseFormat = `Reminder:
 - When verses contain strong prohibitive language (abomination, death penalty, shall not inherit the kingdom, contrary to nature, sin lists, etc.) → include a clear "Textual conclusion" sentence that reflects the strength of the language used in Scripture.
 - Do NOT soften, balance or relativize clear prohibitions.
 - Never add external commentary or modern application.
-- If the COSMOLOGY REMINDER is not present below, do NOT include the cosmology disclaimer sentence.`;
+- If the COSMOLOGY REMINDER is not present below, do NOT include the cosmology disclaimer sentence.
+- Cite only verses from the ALLOWED CITATIONS list.
+- Do not mention any verse reference that is not in the whitelist.`;
 
   if (isCosmologyQuery) {
-    contextStr += `\n\nCOSMOLOGY REMINDER (earth shape, sun movement, geocentrism, firmament, etc.):
+    contextStr += `COSMOLOGY REMINDER (earth shape, sun movement, geocentrism, firmament, etc.):
 - Always include this statement in the summary or as the first line: "The Bible is a theological source of Truth from God; scientific perspectives in its poetic or descriptive language are not to be taken in a literal, modern scientific context."
 - Do not argue for or against modern science (heliocentrism, round earth, etc.) — only report what the verses say and their theological/poetic intent.
-- If no verses directly address the query as a scientific fact, say so plainly without hedging or implying conflict.`;
+- If no verses directly address the query as a scientific fact, say so plainly without hedging or implying conflict.
+
+`;
   }
 
-  return `${SYSTEM_PROMPT}\n\n${contextStr}`;
+  return `SYSTEM INSTRUCTION
+${SYSTEM_PROMPT}
+
+QUERY
+${query}
+
+RETRIEVED SCRIPTURE CONTEXT
+Requested translation: ${translation}
+
+${contextStr.trim()}
+
+ALLOWED CITATIONS
+${citationWhitelist.map((citation) => `- ${citation}`).join('\n')}
+
+RESPONSE FORMAT
+${responseFormat}`;
 }
diff --git a/lib/query-utils.ts b/lib/query-utils.ts
index c7cbb4e..fe7955a 100644
--- a/lib/query-utils.ts
+++ b/lib/query-utils.ts
@@ -1,10 +1,18 @@
 export type QueryDomain = 'messianic' | 'covenants' | 'eschatology' | 'typology' | 'general';
+export type QueryIntent = 'DIRECT_REFERENCE' | 'VERSE_EXPLANATION' | 'TOPICAL_QUERY';
 
 type DomainRule = {
   domain: QueryDomain;
   keywords: string[];
 };
 
+type ExpansionRule = {
+  trigger: string;
+  additions: string[];
+};
+
+export type NegationHint = 'not' | 'without' | 'except';
+
 const DOMAIN_RULES: DomainRule[] = [
   {
     domain: 'messianic',
@@ -24,14 +32,87 @@ const DOMAIN_RULES: DomainRule[] = [
   }
 ];
 
-const EXPANSION_MAP: Record<QueryDomain, string[]> = {
-  messianic: ['anointed one', 'branch', 'servant', 'pierced', 'davidic king'],
-  covenants: ['mosaic law', 'abrahamic promise', 'new covenant', 'blood covenant'],
-  eschatology: ['judgment day', 'second coming', 'resurrection', 'tribulation'],
-  typology: ['shadow', 'fulfillment', 'pattern', 'prefigure'],
+const EXPANSION_RULES: Record<QueryDomain, ExpansionRule[]> = {
+  messianic: [
+    { trigger: 'messiah', additions: ['anointed one', 'christ'] },
+    { trigger: 'son of david', additions: ['davidic king'] },
+    { trigger: 'suffering servant', additions: ['pierced servant'] },
+  ],
+  covenants: [
+    { trigger: 'covenant', additions: ['promise', 'testament'] },
+    { trigger: 'law', additions: ['commandment', 'statute'] },
+    { trigger: 'new covenant', additions: ['better covenant'] },
+  ],
+  eschatology: [
+    { trigger: 'end times', additions: ['last days', 'day of the lord'] },
+    { trigger: 'tribulation', additions: ['great tribulation'] },
+    { trigger: 'resurrection', additions: ['raising of the dead'] },
+  ],
+  typology: [
+    { trigger: 'typology', additions: ['shadow', 'fulfillment'] },
+    { trigger: 'antitype', additions: ['fulfillment pattern'] },
+    { trigger: 'foreshadow', additions: ['prophetic pattern'] },
+  ],
   general: []
 };
 
+const LOW_VALUE_TOKENS = new Set([
+  'what',
+  'does',
+  'the',
+  'say',
+  'about',
+  'explain',
+  'meaning',
+  'bible',
+  'please',
+  'show',
+  'tell',
+  'me',
+]);
+
+const PRESERVED_PHRASES = [
+  'kingdom of heaven',
+  'kingdom of god',
+  'son of man',
+  'son of god',
+  'holy spirit',
+  'day of the lord',
+  'new covenant',
+  'suffering servant',
+];
+
+const NEGATION_HINTS: NegationHint[] = ['not', 'without', 'except'];
+
+const BOOK_NORMALIZATION_RULES: Array<{ pattern: RegExp; replacement: string }> = [
+  { pattern: /\bjn\b/gi, replacement: 'John' },
+  { pattern: /\bjhn\b/gi, replacement: 'John' },
+  { pattern: /\bgen(?=\d|\s)/gi, replacement: 'Genesis ' },
+  { pattern: /\bge(?=\d|\s)/gi, replacement: 'Genesis ' },
+  { pattern: /\bex(?=\d|\s)/gi, replacement: 'Exodus ' },
+  { pattern: /\bexo(?=\d|\s)/gi, replacement: 'Exodus ' },
+  { pattern: /\brom(?=\d|\s)/gi, replacement: 'Romans ' },
+  { pattern: /\bpsalm(?=\d|\s)/gi, replacement: 'Psalms ' },
+  { pattern: /\bps(?=\d|\s)/gi, replacement: 'Psalms ' },
+  { pattern: /\bpsa(?=\d|\s)/gi, replacement: 'Psalms ' },
+  { pattern: /\b1\s*cor\b/gi, replacement: '1 Corinthians' },
+  { pattern: /\b2\s*cor\b/gi, replacement: '2 Corinthians' },
+  { pattern: /\b1\s*thess\b/gi, replacement: '1 Thessalonians' },
+  { pattern: /\b2\s*thess\b/gi, replacement: '2 Thessalonians' },
+];
+
+const DIRECT_REFERENCE_BOOK_PATTERN = Array.from(
+  new Set(BOOK_NORMALIZATION_RULES.map((rule) => rule.replacement.trim().toLowerCase()))
+)
+  .map((book) => book.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))
+  .join('|');
+
+const DIRECT_REFERENCE_REGEX = new RegExp(
+  `\\b(?:${DIRECT_REFERENCE_BOOK_PATTERN})\\s+\\d+(?::\\d+)?\\b`,
+  'i'
+);
+const EXPLANATION_CUE_REGEX = /\b(?:mean|means|meaning|explain|explains|understand|context)\b/i;
+
 function matchesKeyword(query: string, keyword: string): boolean {
   const normalized = query.toLowerCase();
   const lowerKeyword = keyword.toLowerCase();
@@ -43,20 +124,137 @@ function matchesKeyword(query: string, keyword: string): boolean {
   return regex.test(normalized);
 }
 
-export function classifyAndExpand(query: string): { domain: QueryDomain; expandedQuery: string } {
+function normalizeReferenceSpacing(query: string): string {
+  let normalized = query;
+  for (const rule of BOOK_NORMALIZATION_RULES) {
+    normalized = normalized.replace(rule.pattern, rule.replacement);
+  }
+
+  return normalized
+    .replace(/\b([1-3])\s*([A-Za-z]+)/g, '$1 $2')
+    .replace(/\b([A-Za-z]+)\s*(\d+):(\d+)\b/g, '$1 $2:$3')
+    .replace(/\b([A-Za-z]+)\s*(\d+)\b/g, '$1 $2')
+    .replace(/\s+/g, ' ')
+    .trim();
+}
+
+function extractPreservedPhrases(query: string): string[] {
   const normalized = query.toLowerCase();
+  return PRESERVED_PHRASES.filter((phrase) => normalized.includes(phrase));
+}
+
+function detectNegationHints(query: string): NegationHint[] {
+  const normalized = query.toLowerCase();
+  return NEGATION_HINTS.filter((hint) => matchesKeyword(normalized, hint));
+}
+
+function cleanupLowValueTokens(query: string, preservedPhrases: string[]): string[] {
+  const placeholderMap = new Map<string, string>();
+  let protectedQuery = query;
+
+  preservedPhrases.forEach((phrase, index) => {
+    const placeholder = `phrasetag${index}`;
+    const escapedPhrase = phrase.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+    placeholderMap.set(placeholder, phrase);
+    protectedQuery = protectedQuery.replace(new RegExp(escapedPhrase, 'ig'), placeholder);
+  });
+
+  return protectedQuery
+    .toLowerCase()
+    .replace(/[^a-z0-9_:\- ]/g, ' ')
+    .split(/\s+/)
+    .map((token) => token.trim())
+    .filter(Boolean)
+    .map((token) => placeholderMap.get(token) || token)
+    .filter((token) => !LOW_VALUE_TOKENS.has(token));
+}
+
+function dedupeParts(parts: string[]): string[] {
+  const seen = new Set<string>();
+  return parts.filter((part) => {
+    const key = part.toLowerCase();
+    if (!part || seen.has(key)) {
+      return false;
+    }
+    seen.add(key);
+    return true;
+  });
+}
+
+function isLikelyDirectReference(query: string): boolean {
+  return DIRECT_REFERENCE_REGEX.test(query);
+}
+
+function classifyIntent(normalizedQuery: string): QueryIntent {
+  const hasReference = isLikelyDirectReference(normalizedQuery);
+  if (hasReference && EXPLANATION_CUE_REGEX.test(normalizedQuery)) {
+    return 'VERSE_EXPLANATION';
+  }
+  if (hasReference) {
+    return 'DIRECT_REFERENCE';
+  }
+  return 'TOPICAL_QUERY';
+}
+
+export function classifyAndExpand(query: string): {
+  domain: QueryDomain;
+  intent: QueryIntent;
+  normalizedQuery: string;
+  expandedQuery: string;
+  negationHints: NegationHint[];
+} {
+  const normalizedQuery = normalizeReferenceSpacing(query);
+  const loweredQuery = normalizedQuery.toLowerCase();
+  const intent = classifyIntent(normalizedQuery);
   let domain: QueryDomain = 'general';
 
   for (const rule of DOMAIN_RULES) {
-    if (rule.keywords.some((keyword) => matchesKeyword(normalized, keyword))) {
+    if (rule.keywords.some((keyword) => matchesKeyword(loweredQuery, keyword))) {
       domain = rule.domain;
       break;
     }
   }
 
-  const expansions = EXPANSION_MAP[domain] ?? [];
-  const additions = expansions.filter((term) => !normalized.includes(term.toLowerCase()));
-  const expandedQuery = additions.length > 0 ? `${query} ${additions.join(' ')}`.trim() : query;
+  const preservedPhrases = extractPreservedPhrases(loweredQuery);
+  const negationHints = detectNegationHints(loweredQuery);
+  const cleanedTokens = cleanupLowValueTokens(normalizedQuery, preservedPhrases);
+  const shouldBypassExpansion = intent === 'DIRECT_REFERENCE';
+
+  const expansions =
+    shouldBypassExpansion
+      ? []
+      : (EXPANSION_RULES[domain] ?? [])
+          .filter((rule) => matchesKeyword(loweredQuery, rule.trigger))
+          .flatMap((rule) => rule.additions)
+          .filter((term) => !loweredQuery.includes(term.toLowerCase()));
+
+  if (shouldBypassExpansion) {
+    return {
+      domain,
+      intent,
+      normalizedQuery,
+      expandedQuery: normalizedQuery,
+      negationHints,
+    };
+  }
+
+  const cleanedQuery = dedupeParts(
+    cleanedTokens.filter((token) => !preservedPhrases.includes(token) && !negationHints.includes(token as NegationHint))
+  ).join(' ');
+  const quotedPhrases = preservedPhrases.map((phrase) => `"${phrase}"`);
+  const baseQuery = cleanedQuery || quotedPhrases.join(' ') || normalizedQuery;
+
+  const expandedParts = dedupeParts([
+    baseQuery,
+    ...quotedPhrases,
+    ...expansions,
+  ]);
 
-  return { domain, expandedQuery };
+  return {
+    domain,
+    intent,
+    normalizedQuery,
+    expandedQuery: expandedParts.join(' ').trim(),
+    negationHints,
+  };
 }
diff --git a/lib/retrieval.ts b/lib/retrieval.ts
index 2b27ad7..61b2aa1 100644
--- a/lib/retrieval.ts
+++ b/lib/retrieval.ts
@@ -1,18 +1,35 @@
 import { generateText } from 'ai';
 import { createGroq } from '@ai-sdk/groq';
+import { InferenceClient } from '@huggingface/inference';
 import Fuse from 'fuse.js';
 import { Pool } from 'pg';
-import { fetchVerseHelloAO, fetchVerseFallback, fetchStrongsDefinition, VerseContext } from './bible-fetch';
+import {
+  fetchExternalWithTimeoutBudget,
+  fetchVerseTextWithFallback,
+  fetchStrongsDefinition,
+  VerseContext
+} from './bible-fetch';
 import { ensureDbReady, getDbPool } from './db';
 import bibleIndexData from '../data/bible-index.json';
 import strongsDictData from '../data/strongs-dict.json';
 
+import {
+  getCachedEmbedding,
+  getCachedRetrievalContext,
+  setCachedEmbedding,
+  setCachedRetrievalContext,
+} from './cache';
 import { redis } from './redis';
 import { getMorphhbWords } from './morphhb';
 import { getOpenHebrewBibleLayers, OpenHebrewVerseLayers } from './openhebrewbible';
 import { getOpenGNTLayers, OpenGntVerseLayers } from './opengnt';
 import { classifyAndExpand, type QueryDomain } from './query-utils';
 import { getTranslationVerse } from './translations';
+import {
+  ENABLE_RETRIEVAL_DEBUG,
+  ENABLE_SEMANTIC_RERANKER,
+  ENABLE_TSK_EXPANSION_GATING,
+} from './feature-flags';
 
 const BIBLE_INDEX = bibleIndexData as Record<string, VerseContext>;
 const STRONGS_DICT = strongsDictData as Record<string, Record<string, string>>;
@@ -20,7 +37,6 @@ const STRONGS_DICT = strongsDictData as Record<string, Record<string, string>>;
 const HF_EMBEDDING_MODEL = process.env.HF_EMBEDDING_MODEL || 'intfloat/multilingual-e5-small';
 const HF_ENDPOINT = `https://router.huggingface.co/hf-inference/models/${HF_EMBEDDING_MODEL}/pipeline/feature-extraction`;
 const VECTOR_LIMIT = 6;
-const CACHE_TTL_SECONDS = 3600; // 1 hour persistent cache
 const CONTEXT_CACHE_VERSION = 'v2';
 const OT_BOOKS = new Set([
   'GEN', 'EXO', 'LEV', 'NUM', 'DEU', 'JOS', 'JDG', 'RUT', '1SA', '2SA', '1KI', '2KI',
@@ -60,9 +76,38 @@ const FALLBACK_STOPWORDS = new Set([
   'a', 'an', 'about', 'christian', 'christians', 'bible', 'say'
 ]);
 
+const RETRIEVAL_CONFIG = {
+  fuse: {
+    lexicalThreshold: 0.32,
+    candidateLimit: 18,
+    minMatchCharLength: 3,
+  },
+  semantic: {
+    candidateLimit: 18,
+  },
+  finalCandidateWindow: {
+    default: 10,
+    min: 8,
+    max: 12,
+  },
+  rrfK: 45,
+  reranker: {
+    fusedScoreWeight: 1,
+    directReferenceWeight: 0.15,
+    metadataConfidenceWeight: 0.1,
+    crossReferenceWeight: 0.05,
+    metadataConfidenceScale: 0.5,
+  },
+  semanticReranker: {
+    candidateWindow: 20,
+    latencyBudgetMs: 50,
+  },
+} as const;
+
 let lexicalFuse: Fuse<LexicalDoc> | null = null;
 let verseMetadataCache: Map<string, VerseMetadata> | null = null;
 let verseMetadataPromise: Promise<Map<string, VerseMetadata>> | null = null;
+let semanticRerankerClient: InferenceClient | null = null;
 
 function tokenizeFallbackQuery(query: string): string[] {
   return Array.from(
@@ -130,8 +175,8 @@ function getLexicalFuse(): Fuse<LexicalDoc> {
     lexicalFuse = new Fuse(LEXICAL_DOCS, {
       keys: ['text'],
       includeScore: true,
-      threshold: 0.4,
-      minMatchCharLength: 3,
+      threshold: RETRIEVAL_CONFIG.fuse.lexicalThreshold,
+      minMatchCharLength: RETRIEVAL_CONFIG.fuse.minMatchCharLength,
       ignoreLocation: true
     });
   }
@@ -143,8 +188,8 @@ async function getVerseMetadata(): Promise<Map<string, VerseMetadata>> {
   if (verseMetadataPromise) return verseMetadataPromise;
   verseMetadataPromise = (async () => {
     try {
-      const module = await import('../data/verse-metadata.json');
-      const data = (module.default ?? module) as VerseMetadata[];
+      const metadataModule = await import('../data/verse-metadata.json');
+      const data = (metadataModule.default ?? metadataModule) as VerseMetadata[];
       const map = new Map<string, VerseMetadata>();
       if (Array.isArray(data)) {
         for (const entry of data) {
@@ -384,21 +429,69 @@ const CURATED_TOPICAL_LISTS: Record<string, CuratedTopicalList> = {
 /**
  * Checks if the query matches a curated topical list and returns it if so.
  */
-function applyCuratedTopicalLists(query: string, verses: VerseContext[]): VerseContext[] {
+function applyCuratedTopicalLists(
+  query: string,
+  verses: VerseContext[],
+  debugState?: RetrievalDebugState,
+  source: 'api_fallback' | 'db' = 'api_fallback'
+): VerseContext[] {
   const normalizedQuery = query.toLowerCase();
   for (const [key, list] of Object.entries(CURATED_TOPICAL_LISTS)) {
     if (list.keywords.some(k => normalizedQuery.includes(k))) {
+      const clonedCuratedVerses = cloneVerses(list.verses);
+      const curatedRefs = list.verses.map((verse) => verse.reference);
+
       // For curated lists, we often want to prioritize these above all else.
       // Special logic for canaanite_conquest: replace or strongly prepend.
       if (key === 'canaanite_conquest') {
-        return list.verses;
+        if (debugState) {
+          debugState.curationStageLogged = true;
+          addRetrievalStageTrace(debugState, {
+            stage: 'curation',
+            action: 'applied',
+            source,
+            list: key,
+            mode: 'replace',
+            curated_refs: curatedRefs,
+            displaced_refs: verses.map((verse) => verse.reference).filter((reference) => !curatedRefs.includes(reference)),
+          });
+          clonedCuratedVerses.forEach((verse) => {
+            addDecisionTrace(debugState, verse.reference, `curation:selected_replace:${key}`);
+          });
+        }
+        return clonedCuratedVerses;
       }
-      
-      const curatedRefs = list.verses.map(v => v.reference);
+
       const filteredRetrieved = verses.filter(v => !curatedRefs.includes(v.reference));
-      return [...list.verses, ...filteredRetrieved];
+      if (debugState) {
+        debugState.curationStageLogged = true;
+        addRetrievalStageTrace(debugState, {
+          stage: 'curation',
+          action: 'applied',
+          source,
+          list: key,
+          mode: 'prepend',
+          curated_refs: curatedRefs,
+          displaced_refs: verses
+            .map((verse) => verse.reference)
+            .filter((reference) => curatedRefs.includes(reference)),
+        });
+        clonedCuratedVerses.forEach((verse) => {
+          addDecisionTrace(debugState, verse.reference, `curation:prepended:${key}`);
+        });
+      }
+      return [...clonedCuratedVerses, ...filteredRetrieved];
     }
   }
+
+  if (debugState) {
+    debugState.curationStageLogged = true;
+    addRetrievalStageTrace(debugState, {
+      stage: 'curation',
+      action: 'no_match',
+      source,
+    });
+  }
   return verses;
 }
 
@@ -408,75 +501,113 @@ function applyCuratedTopicalLists(query: string, verses: VerseContext[]): VerseC
  * 2. Collects priority verses to prepend (avoiding duplicates).
  * 3. Filters out excluded patterns from the retrieved verses.
  */
-function applyTopicGuards(query: string, verses: VerseContext[]): VerseContext[] {
+function applyTopicGuards(
+  query: string,
+  verses: VerseContext[],
+  debugState?: RetrievalDebugState,
+  source: 'api_fallback' | 'db' = 'api_fallback'
+): VerseContext[] {
   const normalizedQuery = query.toLowerCase();
   let priorityToPrepend: VerseContext[] = [];
   let combinedExclusions: string[] = [];
-
-  for (const guard of Object.values(TOPIC_GUARDS)) {
+  const matchedGuards: Array<{
+    guard: string;
+    priority_refs: string[];
+    conditional_priority_refs: string[];
+    exclusion_patterns: string[];
+  }> = [];
+
+  for (const [guardKey, guard] of Object.entries(TOPIC_GUARDS)) {
     if (guard.keywords.some(k => normalizedQuery.includes(k))) {
+      const guardPriorityRefs: string[] = [];
+
       // Add regular priority verses
       guard.priority.forEach(pv => {
         if (!priorityToPrepend.some(v => v.reference === pv.reference)) {
-          priorityToPrepend.push(pv);
+          priorityToPrepend.push(cloneVerses([pv])[0]);
+          guardPriorityRefs.push(pv.reference);
         }
       });
 
       // Add conditional priority verses if applicable
+      const conditionalPriorityRefs: string[] = [];
       if (guard.conditionalPriority) {
         guard.conditionalPriority(query).forEach(pv => {
           if (!priorityToPrepend.some(v => v.reference === pv.reference)) {
-            priorityToPrepend.push(pv);
+            priorityToPrepend.push(cloneVerses([pv])[0]);
+            conditionalPriorityRefs.push(pv.reference);
           }
         });
       }
 
       // Collect exclusion patterns
       combinedExclusions.push(...guard.excludePatterns);
+      matchedGuards.push({
+        guard: guardKey,
+        priority_refs: guardPriorityRefs,
+        conditional_priority_refs: conditionalPriorityRefs,
+        exclusion_patterns: guard.excludePatterns,
+      });
     }
   }
 
+  if (debugState) {
+    debugState.topicGuardStageLogged = true;
+  }
+
   if (priorityToPrepend.length === 0 && combinedExclusions.length === 0) {
+    if (debugState) {
+      addRetrievalStageTrace(debugState, {
+        stage: 'topic_guard',
+        action: 'no_match',
+        source,
+      });
+    }
     return verses;
   }
 
   const priorityRefs = priorityToPrepend.map(p => p.reference);
+  const duplicatePriorityRefs: string[] = [];
+  const excludedRefs: string[] = [];
 
   // Filter out existing versions of priority refs and verses matching exclusion patterns
   const filteredRetrieved = verses.filter(v => {
     const isAlreadyPriority = priorityRefs.includes(v.reference);
     const lowerText = v.text.toLowerCase();
-    const isExcluded = combinedExclusions.some(pattern => lowerText.includes(pattern));
-    return !isAlreadyPriority && !isExcluded;
-  });
+    const exclusionPatterns = combinedExclusions.filter(pattern => lowerText.includes(pattern));
+    const isExcluded = exclusionPatterns.length > 0;
 
-  return [...priorityToPrepend, ...filteredRetrieved];
-}
+    if (debugState) {
+      if (isAlreadyPriority) {
+        duplicatePriorityRefs.push(v.reference);
+        addDecisionTrace(debugState, v.reference, 'topic_guard:replaced_by_priority');
+      }
+      if (isExcluded) {
+        excludedRefs.push(v.reference);
+        addDecisionTrace(debugState, v.reference, `topic_guard:excluded:${exclusionPatterns.join('|')}`);
+      }
+    }
 
+    return !isAlreadyPriority && !isExcluded;
+  });
 
-async function getCached<T>(key: string): Promise<T | null> {
-  if (!redis) {
-    return null;
-  }
-  try {
-    return await redis.get<T>(key);
-  } catch (error) {
-    console.warn(`Redis get failed for key ${key}:`, error);
-    return null;
+  if (debugState) {
+    addRetrievalStageTrace(debugState, {
+      stage: 'topic_guard',
+      action: 'applied',
+      source,
+      guards: matchedGuards,
+      prepended_refs: priorityRefs,
+      duplicate_priority_refs: duplicatePriorityRefs,
+      excluded_refs: excludedRefs,
+    });
+    priorityToPrepend.forEach((verse) => {
+      addDecisionTrace(debugState, verse.reference, 'topic_guard:prepended_priority');
+    });
   }
-}
 
-async function setCached<T>(key: string, value: T, ttlSeconds: number = CACHE_TTL_SECONDS): Promise<void> {
-  if (!redis) {
-    return;
-  }
-  try {
-    await redis.set(key, value, { ex: ttlSeconds });
-  } catch (error) {
-    console.warn(`Redis set failed for key ${key}:`, error);
-  }
+  return [...priorityToPrepend, ...filteredRetrieved];
 }
-
 function cloneVerses(verses: VerseContext[]): VerseContext[] {
   return verses.map((verse) => ({
     ...verse,
@@ -561,8 +692,13 @@ async function resolveVerseText(
 
   const parsed = parseReferenceKey(verseId);
   if (parsed) {
-    const fetched = await fetchVerseHelloAO(translation, parsed.book, parsed.chapter, parsed.verse)
-      || await fetchVerseFallback(verseId, translation);
+    const fetched = await fetchVerseTextWithFallback({
+      translation,
+      reference: verseId,
+      book: parsed.book,
+      chapter: parsed.chapter,
+      startVerse: parsed.verse
+    });
     if (fetched) {
       return { reference: verseId, translation, text: fetched, original: [] };
     }
@@ -596,13 +732,23 @@ async function fetchVersesByIds(
     }
   }
 
-  for (const verseId of verseIds) {
-    if (byId.has(verseId)) continue;
-    const resolved = await resolveVerseText(verseId, translation);
+  const unresolvedVerseIds = verseIds.filter((verseId) => !byId.has(verseId));
+  const resolvedVerses = await Promise.all(
+    unresolvedVerseIds.map(async (verseId) => {
+      try {
+        return await resolveVerseText(verseId, translation);
+      } catch (error) {
+        console.warn('Verse hydration failed; continuing with remaining verses', { verseId, error });
+        return null;
+      }
+    })
+  );
+
+  resolvedVerses.forEach((resolved) => {
     if (resolved) {
-      byId.set(verseId, resolved);
+      byId.set(resolved.reference, resolved);
     }
-  }
+  });
 
   return verseIds.map((id) => byId.get(id)).filter((v): v is VerseContext => Boolean(v));
 }
@@ -705,9 +851,16 @@ function normalizeEmbedding(raw: unknown): number[] {
   throw new Error('Unexpected embedding response shape');
 }
 
+function normalizeEmbeddingCacheQuery(query: string): string {
+  return query.trim().toLowerCase();
+}
+
 async function embedQuery(query: string): Promise<number[]> {
-  const cacheKey = `embed:${HF_EMBEDDING_MODEL}:${query.trim().toLowerCase()}`;
-  const cached = await getCached<number[]>(cacheKey);
+  const normalizedQuery = normalizeEmbeddingCacheQuery(query);
+  const cached = await getCachedEmbedding({
+    normalizedQuery,
+    model: HF_EMBEDDING_MODEL
+  });
   if (cached) {
     return cached;
   }
@@ -740,7 +893,13 @@ async function embedQuery(query: string): Promise<number[]> {
     if (embedding.length !== 384) {
       throw new Error(`Embedding dimension mismatch; expected 384, got ${embedding.length}`);
     }
-    await setCached(cacheKey, embedding);
+    await setCachedEmbedding(
+      {
+        normalizedQuery,
+        model: HF_EMBEDDING_MODEL
+      },
+      embedding
+    );
     return embedding;
   }
 
@@ -824,33 +983,315 @@ type RankedVerse = {
   rankLexical?: number;
   rankSemantic?: number;
   score: number;
+  metadataBoostFactor?: number;
+};
+
+const RETRIEVAL_DEBUG = ENABLE_RETRIEVAL_DEBUG;
+const TSK_MIN_CORE_VERSE_COUNT = 4;
+const TSK_MIN_TOPICAL_COVERAGE = 0.6;
+const TSK_MIN_RETRIEVAL_CONFIDENCE = 0.75;
+const HF_SEMANTIC_RERANKER_MODEL =
+  process.env.HF_SEMANTIC_RERANKER_MODEL || 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2';
+
+type RetrievalLatencyMetricName =
+  | 'embed_ms'
+  | 'vector_ms'
+  | 'fetch_verses_db_ms'
+  | 'fetch_verses_api_ms'
+  | 'enrich_ms';
+
+type RetrievalInstrumentation = {
+  requestId?: string;
+  onMetric?: (metric: RetrievalLatencyMetricName, durationMs: number) => void;
 };
 
-const RRF_K = 60;
-const HYBRID_CANDIDATE_LIMIT = 30;
-const DEFAULT_HYBRID_TOPK = 12;
-const MIN_HYBRID_TOPK = 10;
-const MAX_HYBRID_TOPK = 15;
+type RetrievalConfidenceDiagnostics = {
+  top1_score: number;
+  top5_score_range: number;
+  retrieval_entropy: number;
+  candidate_count: number;
+};
+
+type SemanticSimilarityProvider = (
+  query: string,
+  verseIds: string[],
+  signal?: AbortSignal
+) => Promise<number[]>;
+
+type RetrievalCandidateDiagnostics = {
+  reference: string;
+  lexical_rank: number | null;
+  semantic_rank: number | null;
+  fused_score: number;
+  boost_factor: number;
+  reference_boost: number;
+  metadata_confidence: number;
+  cross_reference_signal: number;
+  semantic_score: number | null;
+  previous_rank: number | null;
+  reranked_position: number | null;
+  final_score: number;
+  final_rank: number | null;
+};
+
+type RetrievalStageTrace = Record<string, unknown>;
+
+type TskExpansionDecision = {
+  shouldExpand: boolean;
+  reason:
+    | 'gating_disabled'
+    | 'insufficient_core_verses'
+    | 'insufficient_topical_coverage'
+    | 'low_retrieval_confidence'
+    | 'strong_primary_evidence';
+  metrics: {
+    core_verse_count: number;
+    topical_coverage: number;
+    retrieval_confidence: number;
+  };
+};
+
+type RetrievalDebugState = {
+  candidateTraces: Map<string, RetrievalCandidateDiagnostics>;
+  boostFactors: Map<string, number>;
+  decisionTraceByReference: Map<string, string[]>;
+  stageTraces: RetrievalStageTrace[];
+  hybridTopKRefs: string[];
+  topicGuardStageLogged: boolean;
+  curationStageLogged: boolean;
+};
+
+function recordRetrievalMetric(
+  instrumentation: RetrievalInstrumentation | undefined,
+  metric: RetrievalLatencyMetricName,
+  startedAt: number
+): void {
+  instrumentation?.onMetric?.(metric, performance.now() - startedAt);
+}
+
+function createRetrievalDebugState(): RetrievalDebugState {
+  return {
+    candidateTraces: new Map(),
+    boostFactors: new Map(),
+    decisionTraceByReference: new Map(),
+    stageTraces: [],
+    hybridTopKRefs: [],
+    topicGuardStageLogged: false,
+    curationStageLogged: false,
+  };
+}
+
+function addRetrievalStageTrace(
+  debugState: RetrievalDebugState | undefined,
+  trace: RetrievalStageTrace
+): void {
+  if (!debugState) return;
+  debugState.stageTraces.push(trace);
+}
+
+function addDecisionTrace(
+  debugState: RetrievalDebugState | undefined,
+  reference: string,
+  trace: string
+): void {
+  if (!debugState) return;
+  const traces = debugState.decisionTraceByReference.get(reference) || [];
+  traces.push(trace);
+  debugState.decisionTraceByReference.set(reference, traces);
+}
+
+function getCandidateDecisionTrace(
+  debugState: RetrievalDebugState,
+  reference: string
+): string[] {
+  const traces = [...(debugState.decisionTraceByReference.get(reference) || [])];
+  if (!traces.some((trace) => trace.startsWith('topic_guard:'))) {
+    traces.push(debugState.topicGuardStageLogged ? 'topic_guard:unchanged' : 'topic_guard:not_invoked');
+  }
+  if (!traces.some((trace) => trace.startsWith('curation:'))) {
+    traces.push(debugState.curationStageLogged ? 'curation:unchanged' : 'curation:not_invoked');
+  }
+  return traces;
+}
+
+function getFinalSelectionTrace(
+  debugState: RetrievalDebugState,
+  reference: string
+): string[] {
+  const traces = getCandidateDecisionTrace(debugState, reference);
+  const candidate = debugState.candidateTraces.get(reference);
+  if (candidate?.final_rank) {
+    traces.unshift(`hybrid_final_rank:${candidate.final_rank}`);
+  } else if (debugState.hybridTopKRefs.length > 0) {
+    traces.unshift('selected_outside_hybrid_ranked_candidates');
+  } else {
+    traces.unshift('selected_without_hybrid_candidates');
+  }
+  return traces;
+}
+
+function logRetrievalDiagnostics(
+  debugState: RetrievalDebugState,
+  options: {
+    translation: string;
+    domain: QueryDomain;
+    topK: number;
+    finalVerses: VerseContext[];
+  }
+): void {
+  if (!debugState.topicGuardStageLogged) {
+    addRetrievalStageTrace(debugState, {
+      stage: 'topic_guard',
+      action: 'not_invoked',
+      source: 'hybrid',
+    });
+  }
+  if (!debugState.curationStageLogged) {
+    addRetrievalStageTrace(debugState, {
+      stage: 'curation',
+      action: 'not_invoked',
+      source: 'hybrid',
+    });
+  }
+
+  const candidates = Array.from(debugState.candidateTraces.values())
+    .sort((left, right) => (left.final_rank ?? Number.MAX_SAFE_INTEGER) - (right.final_rank ?? Number.MAX_SAFE_INTEGER))
+    .map((candidate) => ({
+      ...candidate,
+      decision_trace: getCandidateDecisionTrace(debugState, candidate.reference),
+    }));
+
+  const finalSelection = options.finalVerses.map((verse, index) => ({
+    reference: verse.reference,
+    output_rank: index + 1,
+    decision_trace: getFinalSelectionTrace(debugState, verse.reference),
+  }));
+
+  console.info(JSON.stringify({
+    event: 'retrieval_diagnostics',
+    translation: options.translation,
+    domain: options.domain,
+    topK: options.topK,
+    candidates,
+    stage_traces: debugState.stageTraces,
+    final_selection: finalSelection,
+  }));
+}
+
+function roundRetrievalDiagnostic(value: number): number {
+  return Number(value.toFixed(6));
+}
+
+function computeRetrievalEntropy(scored: RankedVerse[]): number {
+  if (scored.length <= 1) {
+    return 0;
+  }
+
+  const totalScore = scored.reduce((sum, hit) => sum + hit.score, 0);
+  if (totalScore <= 0) {
+    return 0;
+  }
+
+  let entropy = 0;
+  for (const hit of scored) {
+    const probability = hit.score / totalScore;
+    if (probability <= 0) continue;
+    entropy -= probability * Math.log2(probability);
+  }
+
+  const maxEntropy = Math.log2(scored.length);
+  if (maxEntropy <= 0) {
+    return 0;
+  }
+
+  return entropy / maxEntropy;
+}
+
+function buildRetrievalConfidenceDiagnostics(scored: RankedVerse[]): RetrievalConfidenceDiagnostics {
+  const topScores = scored.slice(0, 5).map((hit) => hit.score);
+  const top1Score = topScores[0] ?? 0;
+  const top5Floor = topScores[topScores.length - 1] ?? top1Score;
+
+  return {
+    top1_score: roundRetrievalDiagnostic(top1Score),
+    top5_score_range: roundRetrievalDiagnostic(top1Score - top5Floor),
+    retrieval_entropy: roundRetrievalDiagnostic(computeRetrievalEntropy(scored)),
+    candidate_count: scored.length,
+  };
+}
+
+function getSemanticRerankerClient(): InferenceClient {
+  if (!semanticRerankerClient) {
+    semanticRerankerClient = new InferenceClient(process.env.HF_TOKEN);
+  }
+  return semanticRerankerClient;
+}
+
+function getSemanticRerankerText(verseId: string): string {
+  const verse = BIBLE_INDEX[verseId];
+  if (!verse?.text) {
+    console.warn(`Semantic reranker: verse text not found for ${verseId}`);
+  }
+  return verse?.text || verseId;
+}
+
+let semanticSimilarityProvider: SemanticSimilarityProvider = async (query, verseIds, signal) => {
+  const semanticScores = await getSemanticRerankerClient().sentenceSimilarity({
+    model: HF_SEMANTIC_RERANKER_MODEL,
+    inputs: {
+      source_sentence: query,
+      sentences: verseIds.map((verseId) => getSemanticRerankerText(verseId)),
+    },
+  }, { signal });
+  if (!Array.isArray(semanticScores) || semanticScores.length !== verseIds.length) {
+    throw new Error('semantic_scores_invalid');
+  }
+  return semanticScores.map((score) => Number(score ?? 0));
+};
+
+function logRetrievalConfidenceDiagnostics(
+  scored: RankedVerse[],
+  options?: { domain?: QueryDomain; translation?: string }
+): void {
+  const diagnostics = buildRetrievalConfidenceDiagnostics(scored);
+
+  console.info(JSON.stringify({
+    event: 'retrieval_confidence',
+    translation: options?.translation || 'BSB',
+    domain: options?.domain || 'general',
+    metrics: diagnostics,
+  }));
+}
 
 function clampTopK(topK?: number): number {
-  const desired = Number.isFinite(topK) ? Math.floor(topK as number) : DEFAULT_HYBRID_TOPK;
-  return Math.min(Math.max(desired, MIN_HYBRID_TOPK), MAX_HYBRID_TOPK);
+  const desired = Number.isFinite(topK)
+    ? Math.floor(topK as number)
+    : RETRIEVAL_CONFIG.finalCandidateWindow.default;
+  return Math.min(
+    Math.max(desired, RETRIEVAL_CONFIG.finalCandidateWindow.min),
+    RETRIEVAL_CONFIG.finalCandidateWindow.max
+  );
 }
 
 async function semanticSearch(
   query: string,
   translation: string,
-  limit: number
+  limit: number,
+  instrumentation?: RetrievalInstrumentation
 ): Promise<Array<{ verseId: string; rank: number }>> {
   if (limit <= 0) return [];
   let embedding: number[] | null = null;
+  const embedStartedAt = performance.now();
   try {
     embedding = await embedQuery(query);
   } catch (error) {
     console.warn('Query embedding failed; semantic search skipped', error);
     return [];
+  } finally {
+    recordRetrievalMetric(instrumentation, 'embed_ms', embedStartedAt);
   }
 
+  const vectorStartedAt = performance.now();
   try {
     await ensureDbReady();
     const pool = getDbPool();
@@ -859,10 +1300,37 @@ async function semanticSearch(
   } catch (error) {
     console.warn('Semantic vector search failed', error);
     return [];
+  } finally {
+    recordRetrievalMetric(instrumentation, 'vector_ms', vectorStartedAt);
   }
 }
 
-async function applyMetadataBoosts(scored: RankedVerse[], domain: QueryDomain): Promise<RankedVerse[]> {
+function getMetadataBoostFactor(meta: VerseMetadata | undefined, domain: QueryDomain): number {
+  if (!meta || domain === 'general') {
+    return 1;
+  }
+
+  let multiplier = 1;
+  if (domain === 'messianic') {
+    if (meta.genre === 'prophetic') multiplier *= 1.3;
+    if (meta.testament === 'OT') multiplier *= 1.15;
+    if (meta.themeTags.includes('messianic')) multiplier *= 1.4;
+  } else if (domain === 'covenants') {
+    if (meta.genre === 'law') multiplier *= 1.2;
+    if (meta.genre === 'epistle') multiplier *= 1.15;
+  } else if (domain === 'eschatology') {
+    if (meta.genre === 'apocalyptic') multiplier *= 1.3;
+    if (meta.genre === 'prophetic') multiplier *= 1.2;
+  }
+
+  return multiplier;
+}
+
+async function applyMetadataBoosts(
+  scored: RankedVerse[],
+  domain: QueryDomain,
+  debugState?: RetrievalDebugState
+): Promise<RankedVerse[]> {
   if (domain === 'general' || scored.length === 0) return scored;
 
   const metadata = await getVerseMetadata();
@@ -870,35 +1338,273 @@ async function applyMetadataBoosts(scored: RankedVerse[], domain: QueryDomain):
 
   return scored.map((hit) => {
     const meta = metadata.get(hit.verseId);
-    if (!meta) return hit;
-
-    let multiplier = 1;
-    if (domain === 'messianic') {
-      if (meta.genre === 'prophetic') multiplier *= 1.3;
-      if (meta.testament === 'OT') multiplier *= 1.15;
-      if (meta.themeTags.includes('messianic')) multiplier *= 1.4;
-    } else if (domain === 'covenants') {
-      if (meta.genre === 'law') multiplier *= 1.2;
-      if (meta.genre === 'epistle') multiplier *= 1.15;
-    } else if (domain === 'eschatology') {
-      if (meta.genre === 'apocalyptic') multiplier *= 1.3;
-      if (meta.genre === 'prophetic') multiplier *= 1.2;
+    const multiplier = getMetadataBoostFactor(meta, domain);
+    if (debugState) {
+      debugState.boostFactors.set(hit.verseId, multiplier);
+    }
+
+    if (multiplier === 1) {
+      return { ...hit, metadataBoostFactor: 1 };
+    }
+    return { ...hit, score: hit.score * multiplier, metadataBoostFactor: multiplier };
+  });
+}
+
+function getMetadataConfidence(hit: RankedVerse): number {
+  const multiplier = hit.metadataBoostFactor ?? 1;
+  const scale = RETRIEVAL_CONFIG.reranker.metadataConfidenceScale;
+  if (multiplier <= 1 || scale <= 0) {
+    return 0;
+  }
+  return Math.min((multiplier - 1) / scale, 1);
+}
+
+async function getCrossReferenceSignalMap(
+  directReferenceIds: string[],
+  candidateIds: string[]
+): Promise<Map<string, number>> {
+  const signals = new Map<string, number>();
+  if (directReferenceIds.length === 0 || candidateIds.length === 0 || RETRIEVAL_CONFIG.reranker.crossReferenceWeight <= 0) {
+    return signals;
+  }
+
+  const sourceRefs = directReferenceIds
+    .map((reference) => parseReferenceKey(reference))
+    .filter((ref): ref is { book: string; chapter: number; verse: number } => Boolean(ref));
+  const targetRefs = candidateIds
+    .map((reference) => parseReferenceKey(reference))
+    .filter((ref): ref is { book: string; chapter: number; verse: number } => Boolean(ref));
+
+  if (sourceRefs.length === 0 || targetRefs.length === 0) {
+    return signals;
+  }
+
+  try {
+    await ensureDbReady();
+    const pool = getDbPool();
+
+    const values: Array<string | number> = [];
+    const sourceTuples: string[] = [];
+    sourceRefs.forEach((ref, index) => {
+      const base = index * 3;
+      sourceTuples.push(`($${base + 1}::text, $${base + 2}::int, $${base + 3}::int)`);
+      values.push(ref.book, ref.chapter, ref.verse);
+    });
+
+    const targetOffset = values.length;
+    const targetTuples: string[] = [];
+    targetRefs.forEach((ref, index) => {
+      const base = targetOffset + index * 3;
+      targetTuples.push(`($${base + 1}::text, $${base + 2}::int, $${base + 3}::int)`);
+      values.push(ref.book, ref.chapter, ref.verse);
+    });
+
+    const result = await pool.query<{
+      target_book: string;
+      target_chapter: number;
+      target_verse: number;
+    }>(
+      `SELECT DISTINCT target_book, target_chapter, target_verse
+       FROM cross_references
+       WHERE (source_book, source_chapter, source_verse) IN (VALUES ${sourceTuples.join(', ')})
+       AND (target_book, target_chapter, target_verse) IN (VALUES ${targetTuples.join(', ')})`,
+      values
+    );
+
+    result.rows.forEach((row) => {
+      signals.set(`${row.target_book} ${row.target_chapter}:${row.target_verse}`, 1);
+    });
+  } catch (error) {
+    console.warn('Cross-reference signal lookup failed; continuing without reranker cross-reference signals', error);
+  }
+
+  return signals;
+}
+
+async function applyDeterministicReranker(
+  scored: RankedVerse[],
+  options: {
+    topK: number;
+    directReferenceIds: string[];
+  },
+  debugState?: RetrievalDebugState
+): Promise<RankedVerse[]> {
+  if (scored.length === 0) {
+    return scored;
+  }
+
+  const directReferenceSet = new Set(options.directReferenceIds.map((reference) => reference.trim().toUpperCase()));
+  const crossReferenceSignals = await getCrossReferenceSignalMap(
+    Array.from(directReferenceSet),
+    scored.map((hit) => hit.verseId)
+  );
+
+  const reranked = scored.map((hit, index) => {
+    const fusedScore = hit.score;
+    const referenceBoost = directReferenceSet.has(hit.verseId.trim().toUpperCase()) ? 1 : 0;
+    const metadataConfidence = getMetadataConfidence(hit);
+    const crossReferenceSignal = crossReferenceSignals.get(hit.verseId) ?? 0;
+    const finalScore =
+      (RETRIEVAL_CONFIG.reranker.fusedScoreWeight * fusedScore) +
+      (RETRIEVAL_CONFIG.reranker.directReferenceWeight * referenceBoost) +
+      (RETRIEVAL_CONFIG.reranker.metadataConfidenceWeight * metadataConfidence) +
+      (RETRIEVAL_CONFIG.reranker.crossReferenceWeight * crossReferenceSignal);
+
+    if (debugState) {
+      const candidate = debugState.candidateTraces.get(hit.verseId);
+      if (candidate) {
+        candidate.reference_boost = roundRetrievalDiagnostic(referenceBoost);
+        candidate.metadata_confidence = roundRetrievalDiagnostic(metadataConfidence);
+        candidate.cross_reference_signal = roundRetrievalDiagnostic(crossReferenceSignal);
+        candidate.final_score = roundRetrievalDiagnostic(finalScore);
+      }
     }
 
-    if (multiplier === 1) return hit;
-    return { ...hit, score: hit.score * multiplier };
+    return {
+      hit,
+      originalIndex: index,
+      finalScore,
+      fusedScore,
+    };
   });
+
+  reranked.sort((left, right) => {
+    if (right.finalScore !== left.finalScore) {
+      return right.finalScore - left.finalScore;
+    }
+    if (right.fusedScore !== left.fusedScore) {
+      return right.fusedScore - left.fusedScore;
+    }
+    if (left.originalIndex !== right.originalIndex) {
+      return left.originalIndex - right.originalIndex;
+    }
+    return left.hit.verseId.localeCompare(right.hit.verseId);
+  });
+
+  const finalHits = reranked.map((entry) => entry.hit);
+  if (debugState) {
+    debugState.hybridTopKRefs = finalHits.slice(0, options.topK).map((hit) => hit.verseId);
+    finalHits.forEach((hit, index) => {
+      const candidate = debugState.candidateTraces.get(hit.verseId);
+      if (!candidate) return;
+      candidate.final_rank = index + 1;
+    });
+  }
+
+  return finalHits;
+}
+
+async function applySemanticReranker(
+  query: string,
+  scored: RankedVerse[],
+  options: {
+    topK: number;
+  },
+  debugState?: RetrievalDebugState
+): Promise<RankedVerse[]> {
+  if (!ENABLE_SEMANTIC_RERANKER || scored.length === 0) {
+    return scored;
+  }
+
+  if (!process.env.HF_TOKEN) {
+    return scored;
+  }
+
+  const candidateWindow = Math.min(RETRIEVAL_CONFIG.semanticReranker.candidateWindow, scored.length);
+  if (candidateWindow <= 1) {
+    return scored;
+  }
+
+  const previousOrder = scored.slice(0, candidateWindow);
+  const startedAt = performance.now();
+  const timeoutController = new AbortController();
+  const timeoutId = setTimeout(() => {
+    timeoutController.abort();
+  }, RETRIEVAL_CONFIG.semanticReranker.latencyBudgetMs);
+
+  try {
+    const semanticScores = await semanticSimilarityProvider(
+      query,
+      previousOrder.map((hit) => hit.verseId),
+      timeoutController.signal
+    );
+
+    const rerankedWindow = previousOrder
+      .map((hit, index) => ({
+        hit,
+        previousRank: index + 1,
+        semanticScore: semanticScores[index] ?? 0,
+      }))
+      .sort((left, right) => {
+        if (right.semanticScore !== left.semanticScore) {
+          return right.semanticScore - left.semanticScore;
+        }
+        if (left.previousRank !== right.previousRank) {
+          return left.previousRank - right.previousRank;
+        }
+        return left.hit.verseId.localeCompare(right.hit.verseId);
+      });
+
+    const finalHits = rerankedWindow.map((entry) => entry.hit).concat(scored.slice(candidateWindow));
+    if (debugState) {
+      addRetrievalStageTrace(debugState, {
+        stage: 'semantic_reranker',
+        action: 'applied',
+        candidate_window: candidateWindow,
+        latency_ms: roundRetrievalDiagnostic(performance.now() - startedAt),
+        latency_budget_ms: RETRIEVAL_CONFIG.semanticReranker.latencyBudgetMs,
+      });
+      debugState.hybridTopKRefs = finalHits.slice(0, options.topK).map((hit) => hit.verseId);
+      rerankedWindow.forEach((entry, index) => {
+        const candidate = debugState.candidateTraces.get(entry.hit.verseId);
+        if (!candidate) return;
+        candidate.semantic_score = roundRetrievalDiagnostic(entry.semanticScore);
+        candidate.previous_rank = entry.previousRank;
+        candidate.reranked_position = index + 1;
+      });
+      finalHits.forEach((hit, index) => {
+        const candidate = debugState.candidateTraces.get(hit.verseId);
+        if (!candidate) return;
+        candidate.final_rank = index + 1;
+      });
+    }
+
+    return finalHits;
+  } catch (error) {
+    const isAbortError = error instanceof Error && error.name === 'AbortError';
+    if (debugState) {
+      addRetrievalStageTrace(debugState, {
+        stage: 'semantic_reranker',
+        action: 'fallback',
+        reason: isAbortError
+          ? 'latency_budget_exceeded'
+          : String((error as { message?: string })?.message || error || 'unknown_error'),
+      });
+    }
+    if (!isAbortError) {
+      console.warn(JSON.stringify({
+        event: 'semantic_reranker_failed',
+        reason: String((error as { message?: string })?.message || error || 'unknown_error'),
+        model: HF_SEMANTIC_RERANKER_MODEL,
+      }));
+    }
+    return scored;
+  } finally {
+    clearTimeout(timeoutId);
+  }
 }
 
 async function hybridSearch(
   query: string,
-  options?: { topK?: number; domain?: QueryDomain; translation?: string }
+  options?: { topK?: number; domain?: QueryDomain; translation?: string; directReferenceIds?: string[] },
+  instrumentation?: RetrievalInstrumentation,
+  debugState?: RetrievalDebugState
 ): Promise<VerseResult[]> {
   const topK = clampTopK(options?.topK);
   const translation = options?.translation || 'BSB';
 
   const fuse = getLexicalFuse();
-  const lexicalHits = fuse.search(query, { limit: HYBRID_CANDIDATE_LIMIT });
+  const lexicalHits = fuse.search(query, { limit: RETRIEVAL_CONFIG.fuse.candidateLimit });
   const lexicalRanks = new Map<string, number>();
   const verseById = new Map<string, VerseResult>();
 
@@ -912,7 +1618,12 @@ async function hybridSearch(
     }
   });
 
-  const semanticHits = await semanticSearch(query, translation, HYBRID_CANDIDATE_LIMIT);
+  const semanticHits = await semanticSearch(
+    query,
+    translation,
+    RETRIEVAL_CONFIG.semantic.candidateLimit,
+    instrumentation
+  );
   const semanticRanks = new Map<string, number>();
   for (const hit of semanticHits) {
     semanticRanks.set(hit.verseId, hit.rank);
@@ -927,26 +1638,81 @@ async function hybridSearch(
     const rankSemantic = semanticRanks.get(verseId);
     let score = 0;
     if (typeof rankLexical === 'number') {
-      score += 1 / (RRF_K + rankLexical);
+      score += 1 / (RETRIEVAL_CONFIG.rrfK + rankLexical);
     }
     if (typeof rankSemantic === 'number') {
-      score += 1 / (RRF_K + rankSemantic);
+      score += 1 / (RETRIEVAL_CONFIG.rrfK + rankSemantic);
     }
     scored.push({
       verseId,
       verse: { reference: verseId, translation, text: '', original: [] },
       rankLexical,
       rankSemantic,
-      score
+      score,
+      metadataBoostFactor: 1,
     });
+    if (debugState) {
+      debugState.candidateTraces.set(verseId, {
+        reference: verseId,
+        lexical_rank: typeof rankLexical === 'number' ? rankLexical : null,
+        semantic_rank: typeof rankSemantic === 'number' ? rankSemantic : null,
+        fused_score: roundRetrievalDiagnostic(score),
+        boost_factor: 1,
+        reference_boost: 0,
+        metadata_confidence: 0,
+        cross_reference_signal: 0,
+        semantic_score: null,
+        previous_rank: null,
+        reranked_position: null,
+        final_score: roundRetrievalDiagnostic(score),
+        final_rank: null,
+      });
+    }
   }
 
-  if (scored.length === 0) return [];
+  if (scored.length === 0) {
+    if (debugState) {
+      debugState.hybridTopKRefs = [];
+    }
+    return [];
+  }
 
-  const boosted = options?.domain ? await applyMetadataBoosts(scored, options.domain) : scored;
+  const boosted = options?.domain ? await applyMetadataBoosts(scored, options.domain, debugState) : scored;
   boosted.sort((a, b) => b.score - a.score);
+  const reranked = await applyDeterministicReranker(
+    boosted,
+    {
+      topK,
+      directReferenceIds: options?.directReferenceIds ?? [],
+    },
+    debugState
+  );
+  const finalRanked = await applySemanticReranker(
+    query,
+    reranked,
+    { topK },
+    debugState
+  );
+
+  if (debugState) {
+    finalRanked.forEach((hit) => {
+      const candidate = debugState.candidateTraces.get(hit.verseId);
+      if (!candidate) return;
+      candidate.boost_factor = roundRetrievalDiagnostic(debugState.boostFactors.get(hit.verseId) ?? 1);
+      if (candidate.final_score === 0) {
+        candidate.final_score = roundRetrievalDiagnostic(hit.score);
+      }
+    });
+  }
 
-  return boosted.slice(0, topK).map((hit) => ({ verseId: hit.verseId }));
+  if (RETRIEVAL_DEBUG) {
+    logRetrievalConfidenceDiagnostics(finalRanked, {
+      domain: options?.domain,
+      translation,
+    });
+  }
+
+  return finalRanked.slice(0, topK).map((hit) => ({ verseId: hit.verseId }));
 }
 
 /**
@@ -1024,30 +1790,142 @@ async function getTskCrossReferences(
   return crossRefVerses.map((v) => ({ ...v, isCrossReference: true }));
 }
 
+function computeTskTopicalCoverage(query: string, verses: VerseContext[]): number {
+  const tokens = tokenizeFallbackQuery(query);
+  if (tokens.length === 0) {
+    return 1;
+  }
+
+  const coveredTokens = tokens.filter((token) =>
+    verses.some((verse) => verse.text.toLowerCase().includes(token))
+  );
+
+  return coveredTokens.length / tokens.length;
+}
+
+function buildTskExpansionDecision(query: string, coreVerses: VerseContext[]): TskExpansionDecision {
+  const coreVerseCount = coreVerses.length;
+  const topicalCoverage = roundRetrievalDiagnostic(computeTskTopicalCoverage(query, coreVerses));
+  const countConfidence = Math.min(coreVerseCount / TSK_MIN_CORE_VERSE_COUNT, 1);
+  const retrievalConfidence = roundRetrievalDiagnostic((countConfidence + topicalCoverage) / 2);
+  const metrics = {
+    core_verse_count: coreVerseCount,
+    topical_coverage: topicalCoverage,
+    retrieval_confidence: retrievalConfidence,
+  };
+
+  if (!ENABLE_TSK_EXPANSION_GATING) {
+    return {
+      shouldExpand: true,
+      reason: 'gating_disabled',
+      metrics,
+    };
+  }
+
+  if (coreVerseCount < TSK_MIN_CORE_VERSE_COUNT) {
+    return {
+      shouldExpand: true,
+      reason: 'insufficient_core_verses',
+      metrics,
+    };
+  }
+
+  if (topicalCoverage < TSK_MIN_TOPICAL_COVERAGE) {
+    return {
+      shouldExpand: true,
+      reason: 'insufficient_topical_coverage',
+      metrics,
+    };
+  }
+
+  if (retrievalConfidence < TSK_MIN_RETRIEVAL_CONFIDENCE) {
+    return {
+      shouldExpand: true,
+      reason: 'low_retrieval_confidence',
+      metrics,
+    };
+  }
+
+  return {
+    shouldExpand: false,
+    reason: 'strong_primary_evidence',
+    metrics,
+  };
+}
+
 export async function retrieveContextForQuery(
   query: string,
   translation: string,
-  apiKey?: string
+  apiKey?: string,
+  instrumentation?: RetrievalInstrumentation
 ): Promise<VerseContext[]> {
-  const cacheKey = `context:${CONTEXT_CACHE_VERSION}:${translation}:${query.trim().toLowerCase()}`;
-  const cached = await getCached<VerseContext[]>(cacheKey);
+  const debugState = RETRIEVAL_DEBUG ? createRetrievalDebugState() : undefined;
+  const cached = await getCachedRetrievalContext({
+    query,
+    translation,
+    version: CONTEXT_CACHE_VERSION,
+  });
   if (cached) {
     return cloneVerses(normalizeVerses(dedupeByVerseId(cached)));
   }
 
   const topK = clampTopK();
-  const { domain, expandedQuery } = classifyAndExpand(query);
+  const { domain, intent, expandedQuery, normalizedQuery } = classifyAndExpand(query);
+  const directRefs = extractDirectReferences(normalizedQuery);
+  const hasRangedDirectRefs = directRefs.some(
+    (ref) => typeof ref.endVerse === 'number' && ref.endVerse > ref.verse
+  );
+  const directRefIds = directRefs
+    .filter((ref) => !(typeof ref.endVerse === 'number' && ref.endVerse > ref.verse))
+    .map((ref) => `${ref.book} ${ref.chapter}:${ref.verse}`);
+
+  if (
+    !hasRangedDirectRefs &&
+    (intent === 'DIRECT_REFERENCE' || intent === 'VERSE_EXPLANATION') &&
+    directRefIds.length > 0
+  ) {
+    const exactVerses = await fetchVersesByIds(directRefIds.slice(0, topK), translation);
+    let focusedVerses = exactVerses;
+
+    if (intent === 'VERSE_EXPLANATION' && exactVerses.length > 0) {
+      try {
+        await ensureDbReady();
+        const pool = getDbPool();
+        const tskVerses = await getTskCrossReferences(pool, exactVerses, translation);
+        focusedVerses = [...exactVerses, ...tskVerses];
+      } catch (error) {
+        console.warn('Explanation cross-reference retrieval failed; continuing with target verses only', error);
+      }
+    }
+
+    attachIndexedOriginals(focusedVerses);
+    const enrichStartedAt = performance.now();
+    const enriched = await enrichOriginalLanguages(focusedVerses);
+    recordRetrievalMetric(instrumentation, 'enrich_ms', enrichStartedAt);
+    const translated = await applyTranslationOverride(enriched, translation);
+    const deduped = dedupeByVerseId(translated);
+    const normalized = normalizeVerses(deduped).slice(0, topK);
+    await setCachedRetrievalContext(
+      {
+        query,
+        translation,
+        version: CONTEXT_CACHE_VERSION,
+      },
+      normalized
+    );
+    return cloneVerses(normalized);
+  }
+
   const hybridResults = await hybridSearch(expandedQuery, {
     topK,
     domain,
-    translation
-  });
+    translation,
+    directReferenceIds: directRefIds,
+  }, instrumentation, debugState);
 
-  const directRefs = extractDirectReferences(query)
-    .map((ref) => `${ref.book} ${ref.chapter}:${ref.verse}`);
   const orderedIds: string[] = [];
   const seenIds = new Set<string>();
-  for (const verseId of [...directRefs, ...hybridResults.map((result) => result.verseId)]) {
+  for (const verseId of [...directRefIds, ...hybridResults.map((result) => result.verseId)]) {
     const key = verseId.trim().toUpperCase();
     if (seenIds.has(key)) continue;
     seenIds.add(key);
@@ -1055,26 +1933,67 @@ export async function retrieveContextForQuery(
   }
   const limitedIds = orderedIds.slice(0, topK);
 
+  const fetchVersesStartedAt = performance.now();
   let verses = await fetchVersesByIds(limitedIds, translation);
+  recordRetrievalMetric(instrumentation, 'fetch_verses_db_ms', fetchVersesStartedAt);
   const shouldUseApiFallback =
     verses.length === 0 ||
     (limitedIds.length > 0 && verses.length < Math.min(limitedIds.length, topK));
 
+  addRetrievalStageTrace(debugState, {
+    stage: 'api_fallback',
+    action: shouldUseApiFallback ? 'used' : 'skipped',
+    reason: shouldUseApiFallback
+      ? (verses.length === 0 ? 'no_verses_after_hybrid_fetch' : 'partial_hybrid_fetch')
+      : 'hybrid_fetch_sufficient',
+  });
+
   if (shouldUseApiFallback) {
+    const apiFetchStartedAt = performance.now();
     try {
-      const apiVerses = await retrieveContextViaApis(query, translation, apiKey);
+      const apiVerses = await retrieveContextViaApis(normalizedQuery, translation, apiKey, debugState);
       verses = [...verses, ...apiVerses];
     } catch (error) {
       console.warn('API retrieval failed; continuing with available verses', error);
+    } finally {
+      recordRetrievalMetric(instrumentation, 'fetch_verses_api_ms', apiFetchStartedAt);
     }
   }
 
-  attachIndexedOriginals(verses);
-  const enriched = await enrichOriginalLanguages(verses);
+  const postProcessed = applyCuratedTopicalLists(
+    normalizedQuery,
+    applyTopicGuards(
+      normalizedQuery,
+      verses,
+      debugState,
+      shouldUseApiFallback ? 'api_fallback' : 'db'
+    ),
+    debugState,
+    shouldUseApiFallback ? 'api_fallback' : 'db'
+  );
+  attachIndexedOriginals(postProcessed);
+  const enrichStartedAt = performance.now();
+  const enriched = await enrichOriginalLanguages(postProcessed);
+  recordRetrievalMetric(instrumentation, 'enrich_ms', enrichStartedAt);
   const translated = await applyTranslationOverride(enriched, translation);
   const deduped = dedupeByVerseId(translated);
   const normalized = normalizeVerses(deduped).slice(0, topK);
-  await setCached(cacheKey, normalized);
+  if (debugState) {
+    logRetrievalDiagnostics(debugState, {
+      translation,
+      domain,
+      topK,
+      finalVerses: normalized,
+    });
+  }
+  await setCachedRetrievalContext(
+    {
+      query,
+      translation,
+      version: CONTEXT_CACHE_VERSION,
+    },
+    normalized
+  );
   return cloneVerses(normalized);
 }
 
@@ -1158,16 +2077,19 @@ async function retrieveContextFromDb(
     }
   }
 
-  const guarded = applyTopicGuards(query, verses);
-  const coreVerses = applyCuratedTopicalLists(query, guarded);
+  const guarded = applyTopicGuards(query, verses, undefined, 'db');
+  const coreVerses = applyCuratedTopicalLists(query, guarded, undefined, 'db');
+  const tskDecision = buildTskExpansionDecision(query, coreVerses);
   
   // TSK Cross-References (Anchor Retrieval)
   let finalVerses = coreVerses;
-  try {
-    const tskVerses = await getTskCrossReferences(pool, coreVerses, translation);
-    finalVerses = [...coreVerses, ...tskVerses];
-  } catch (error) {
-    console.warn('TSK retrieval failed', error);
+  if (tskDecision.shouldExpand) {
+    try {
+      const tskVerses = await getTskCrossReferences(pool, coreVerses, translation);
+      finalVerses = [...coreVerses, ...tskVerses];
+    } catch (error) {
+      console.warn('TSK retrieval failed', error);
+    }
   }
 
   attachIndexedOriginals(finalVerses);
@@ -1187,7 +2109,8 @@ function attachIndexedOriginals(verses: VerseContext[]): void {
 async function retrieveContextViaApis(
   query: string,
   translation: string,
-  apiKey?: string
+  apiKey?: string,
+  debugState?: RetrievalDebugState
 ): Promise<VerseContext[]> {
   const verses: VerseContext[] = [];
   const normalizedQuery = query.toLowerCase();
@@ -1277,44 +2200,59 @@ async function retrieveContextViaApis(
   const directRefs = extractDirectReferences(query);
   
   if (directRefs.length > 0) {
-    // Attempt rapid direct fetch for parsed references
-    for (const ref of directRefs) {
-      const refKey = `${ref.book} ${ref.chapter}:${ref.verse}`;
-      const refStr = `${ref.book} ${ref.chapter}:${ref.verse}${ref.endVerse ? '-' + ref.endVerse : ''}`;
-      if (verses.some((v) => v.reference.startsWith(refKey))) {
-        continue;
-      }
-      const dbMatch = BIBLE_INDEX[`${ref.book} ${ref.chapter}:${ref.verse}`];
-      if (dbMatch && canUseIndex) {
-        verses.push(cloneVerses([dbMatch])[0]);
-        continue;
-      }
+    const directVerseResults = await Promise.all(
+      directRefs.map(async (ref) => {
+        const refKey = `${ref.book} ${ref.chapter}:${ref.verse}`;
+        const refStr = `${ref.book} ${ref.chapter}:${ref.verse}${ref.endVerse ? '-' + ref.endVerse : ''}`;
+        if (verses.some((v) => v.reference === refKey || v.reference.startsWith(refKey + '-'))) {
+          return null;
+        }
 
-      if (LOCAL_TRANSLATIONS.has(translation)) {
-        const localText = await getTranslationVerse(refStr, translation);
-        if (localText) {
-          verses.push({
-            reference: refStr,
+        const dbMatch = BIBLE_INDEX[`${ref.book} ${ref.chapter}:${ref.verse}`];
+        if (dbMatch && canUseIndex) {
+          return cloneVerses([dbMatch])[0];
+        }
+
+        if (LOCAL_TRANSLATIONS.has(translation)) {
+          const localText = await getTranslationVerse(refStr, translation);
+          if (localText) {
+            return {
+              reference: refStr,
+              translation,
+              text: localText,
+              original: []
+            } satisfies VerseContext;
+          }
+        }
+
+        try {
+          const vText = await fetchVerseTextWithFallback({
             translation,
-            text: localText,
-            original: []
+            reference: refStr,
+            book: ref.book,
+            chapter: ref.chapter,
+            startVerse: ref.verse,
+            endVerse: ref.endVerse
           });
-          continue;
+
+          if (!vText) {
+            return null;
+          }
+
+          return {
+            reference: refStr,
+            translation,
+            text: vText,
+            original: [] // Filled in enrichment phase
+          } satisfies VerseContext;
+        } catch (error) {
+          console.warn('Direct verse hydration failed; continuing with remaining verses', { reference: refStr, error });
+          return null;
         }
-      }
-      
-      const vText = await fetchVerseHelloAO(translation, ref.book, ref.chapter, ref.verse, ref.endVerse) 
-                    || await fetchVerseFallback(refStr, translation);
-      
-      if (vText) {
-        verses.push({
-          reference: refStr,
-          translation: translation,
-          text: vText,
-          original: [] // Filled in enrichment phase
-        });
-      }
-    }
+      })
+    );
+
+    verses.push(...directVerseResults.filter((verse): verse is VerseContext => Boolean(verse)));
   }
 
   // 2. Semantic Hint via Groq (only if direct parsing yields few results)
@@ -1326,7 +2264,9 @@ async function retrieveContextViaApis(
       if (lexicalFallback.length > 0) {
         verses.push(...lexicalFallback);
       }
-      return enrichOriginalLanguages(verses);
+      const guarded = applyTopicGuards(query, verses, debugState, 'api_fallback');
+      const finalVerses = applyCuratedTopicalLists(query, guarded, debugState, 'api_fallback');
+      return enrichOriginalLanguages(finalVerses);
     }
     const groq = createGroq({
       apiKey: groqApiKey,
@@ -1363,7 +2303,9 @@ async function retrieveContextViaApis(
       if (lexicalFallback.length > 0) {
         verses.push(...lexicalFallback);
       }
-      return enrichOriginalLanguages(verses);
+      const guarded = applyTopicGuards(query, verses, debugState, 'api_fallback');
+      const finalVerses = applyCuratedTopicalLists(query, guarded, debugState, 'api_fallback');
+      return enrichOriginalLanguages(finalVerses);
     }
 
     const lines = text
@@ -1371,44 +2313,57 @@ async function retrieveContextViaApis(
       .map((line) => line.trim())
       .filter((line) => line && line.toUpperCase() !== 'NONE');
 
-    for (const line of lines) {
-      const match = line.match(/^([A-Z0-9]{3})\s+(\d+):(\d+)$/i);
-      if (!match) continue;
-      const book = match[1].toUpperCase();
-      const chapter = Number.parseInt(match[2], 10);
-      const verse = Number.parseInt(match[3], 10);
-      const refStr = `${book} ${chapter}:${verse}`;
-      
-      // Skip if we already got it
-      if (verses.some(v => v.reference.startsWith(refStr))) continue;
-
-      // Try bundled index first
-      const indexed = BIBLE_INDEX[refStr];
-      if (indexed && canUseIndex) {
-        const cloned = cloneVerses([indexed])[0];
-        cloned.translation = 'WEB'; // Index text is WEB
-        verses.push(cloned);
-        continue;
-      }
+    const semanticVerseResults = await Promise.all(
+      lines.map(async (line) => {
+        const match = line.match(/^([A-Z0-9]{3})\s+(\d+):(\d+)$/i);
+        if (!match) return null;
+        const book = match[1].toUpperCase();
+        const chapter = Number.parseInt(match[2], 10);
+        const verse = Number.parseInt(match[3], 10);
+        const refStr = `${book} ${chapter}:${verse}`;
+
+        if (verses.some((v) => v.reference === refStr)) {
+          return null;
+        }
 
-      // Fallback to fetch
-      const vText = await fetchVerseHelloAO(translation, book, chapter, verse)
-                    || await fetchVerseFallback(refStr, translation);
-                    
-      if (vText) {
-        verses.push({
-          reference: refStr,
-          translation,
-          text: vText,
-          original: []
-        });
-      }
-    }
+        const indexed = BIBLE_INDEX[refStr];
+        if (indexed && translation === 'WEB') {
+          const cloned = cloneVerses([indexed])[0];
+          return cloned;
+        }
+
+        try {
+          const vText = await fetchVerseTextWithFallback({
+            translation,
+            reference: refStr,
+            book,
+            chapter,
+            startVerse: verse
+          });
+
+          if (!vText) {
+            return null;
+          }
+
+          return {
+            reference: refStr,
+            translation,
+            text: vText,
+            original: []
+          } satisfies VerseContext;
+        } catch (error) {
+          console.warn('Semantic verse hydration failed; continuing with remaining verses', { reference: refStr, error });
+          return null;
+        }
+      })
+    );
+
+    verses.push(...semanticVerseResults.filter((verse): verse is VerseContext => Boolean(verse)));
   }
 
   // 3. Enrichment Phase (add Strong's dictionary data)
-  const guarded = applyTopicGuards(query, verses);
-  const finalVerses = applyCuratedTopicalLists(query, guarded);
+  const guarded = applyTopicGuards(query, verses, debugState, 'api_fallback');
+  const finalVerses = applyCuratedTopicalLists(query, guarded, debugState, 'api_fallback');
   return enrichOriginalLanguages(finalVerses);
 }
 
@@ -1536,9 +2491,15 @@ async function enrichOriginalLanguages(verses: VerseContext[]): Promise<VerseCon
         const [chapter, vNumStr] = cv.split(':');
         
         const bollsRef = bkbToBollsPath(book, parseInt(chapter, 10));
-        const res = await fetch(`https://bolls.life/get-chapter/${trans}/${bollsRef}/`);
-        
-        if (res.ok) {
+        const res = await fetchExternalWithTimeoutBudget(
+          `https://bolls.life/get-chapter/${trans}/${bollsRef}/`,
+          {},
+          {
+            label: `bolls-tagged-chapter:${trans}:${bollsRef}`
+          }
+        );
+
+        if (res?.ok) {
            const chapterData = await res.json();
            const matchV = chapterData.find((v: { verse: number, text: string }) => v.verse === parseInt(vNumStr, 10));
            if (matchV) {

From 87456d67dcf3da15e5c3e01f093d432aac63d79e Mon Sep 17 00:00:00 2001
From: voidcommit-afk <strucker08@gmail.com>
Date: Sat, 14 Mar 2026 19:49:28 +0530
Subject: [PATCH 2/4] feat(benchmark): add rollout guardrails and baseline
 reporting

---
 .gitignore                                   |   7 +-
 project-docs/benchmark-rollout.md            |  44 ++++
 project-docs/benchmark/baseline-report.json  | 255 +++++++++++++++++++
 project-docs/benchmark/baseline-report.md    |  26 ++
 tests/benchmark/check-feature-flags.ts       |  11 +
 tests/benchmark/check-regressions.ts         |  86 +++++++
 tests/benchmark/fixtures/sample-results.json |  56 ++++
 tests/benchmark/fixtures/scenarios.json      |  42 +++
 tests/benchmark/run-benchmarks.ts            | 210 +++++++++++++++
 tsconfig.scripts.json                        |   2 +-
 10 files changed, 737 insertions(+), 2 deletions(-)
 create mode 100644 project-docs/benchmark-rollout.md
 create mode 100644 project-docs/benchmark/baseline-report.json
 create mode 100644 project-docs/benchmark/baseline-report.md
 create mode 100644 tests/benchmark/check-feature-flags.ts
 create mode 100644 tests/benchmark/check-regressions.ts
 create mode 100644 tests/benchmark/fixtures/sample-results.json
 create mode 100644 tests/benchmark/fixtures/scenarios.json
 create mode 100644 tests/benchmark/run-benchmarks.ts

diff --git a/.gitignore b/.gitignore
index c2ca608..54128c5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -40,5 +40,10 @@ yarn-error.log*
 next-env.d.ts
 
 #local-files
-project-docs/
 datasets/
+bibleLM-analysis.md
+Dataset&RetrievalEnrichment.md
+
+# generated benchmark outputs
+project-docs/benchmark/latest-report.json
+project-docs/benchmark/latest-report.md
diff --git a/project-docs/benchmark-rollout.md b/project-docs/benchmark-rollout.md
new file mode 100644
index 0000000..d1b8150
--- /dev/null
+++ b/project-docs/benchmark-rollout.md
@@ -0,0 +1,44 @@
+# Benchmark And Rollout Guardrails
+
+## Benchmark Commands
+
+- `npm run benchmark:sample`
+  - Generates a stable benchmark report from the committed sample fixture set.
+  - Writes:
+    - `project-docs/benchmark/latest-report.json`
+    - `project-docs/benchmark/latest-report.md`
+  - Compare against the tracked baseline snapshots:
+    - `project-docs/benchmark/baseline-report.json`
+    - `project-docs/benchmark/baseline-report.md`
+- `npm run benchmark:live`
+  - Reserved for environment-backed benchmarking.
+  - Requires production-like dependencies and credentials.
+- `npm run benchmark:regression`
+  - Fails on retrieval, latency, or citation-grounding regressions.
+- `npm run benchmark:flags`
+  - Prints the active retrieval rollout flags.
+
+## Benchmark Scenarios
+
+- direct verse queries
+- verse explanation queries
+- topical queries
+- cache-hit scenarios
+- cache-miss scenarios
+
+## Regression Gates
+
+- `precision_at_5` must not drop beyond tolerance.
+- `p95_latency` must not increase beyond tolerance.
+- `citation_validity_rate` must remain grounded.
+
+## Rollout Flags
+
+- `ENABLE_SEMANTIC_RERANKER`
+  - Enables optional semantic reranking.
+- `ENABLE_TSK_EXPANSION_GATING`
+  - Enables TSK expansion gating.
+- `ENABLE_RETRIEVAL_DEBUG`
+  - Enables retrieval/debug diagnostics across the route and retrieval pipeline.
+
+These flags support safe rollback and controlled production rollout. They can also be used to separate cohorts for simple A/B validation.
diff --git a/project-docs/benchmark/baseline-report.json b/project-docs/benchmark/baseline-report.json
new file mode 100644
index 0000000..1d34522
--- /dev/null
+++ b/project-docs/benchmark/baseline-report.json
@@ -0,0 +1,255 @@
+{
+  "generated_at": "2026-03-14T07:45:23.002Z",
+  "mode": "sample",
+  "scenarios": [
+    {
+      "id": "direct-verse-cache-miss",
+      "category": "direct_verse_query",
+      "cacheMode": "miss",
+      "query": "John 3:16",
+      "translation": "BSB",
+      "expectedTopRefs": [
+        "JHN 3:16"
+      ]
+    },
+    {
+      "id": "direct-verse-cache-hit",
+      "category": "direct_verse_query",
+      "cacheMode": "hit",
+      "query": "John 3:16",
+      "translation": "BSB",
+      "expectedTopRefs": [
+        "JHN 3:16"
+      ]
+    },
+    {
+      "id": "verse-explanation",
+      "category": "verse_explanation_query",
+      "cacheMode": "miss",
+      "query": "Explain Romans 8:28",
+      "translation": "BSB",
+      "expectedTopRefs": [
+        "ROM 8:28",
+        "GEN 50:20",
+        "JAS 1:2-4"
+      ]
+    },
+    {
+      "id": "topical-cache-miss",
+      "category": "topical_query",
+      "cacheMode": "miss",
+      "query": "What does the Bible say about forgiveness?",
+      "translation": "BSB",
+      "expectedTopRefs": [
+        "MAT 6:12",
+        "COL 3:13",
+        "ACT 10:43",
+        "2CO 2:10",
+        "JHN 20:23"
+      ]
+    },
+    {
+      "id": "topical-cache-hit",
+      "category": "topical_query",
+      "cacheMode": "hit",
+      "query": "What does the Bible say about forgiveness?",
+      "translation": "BSB",
+      "expectedTopRefs": [
+        "MAT 6:12",
+        "COL 3:13",
+        "ACT 10:43",
+        "2CO 2:10",
+        "JHN 20:23"
+      ]
+    }
+  ],
+  "baseline_metrics": {
+    "total_latency_ms": 1434,
+    "retrieval_latency_ms": 484.8,
+    "llm_latency_ms": 702.47,
+    "p50_latency": 1180,
+    "p95_latency": 2740,
+    "precision_at_5": 0.8,
+    "citation_validity_rate": 0.98
+  },
+  "post_optimization_metrics": {
+    "total_latency_ms": 1120,
+    "retrieval_latency_ms": 292.07,
+    "llm_latency_ms": 599.47,
+    "p50_latency": 950,
+    "p95_latency": 2160,
+    "precision_at_5": 0.88,
+    "citation_validity_rate": 1
+  },
+  "performance_deltas": {
+    "total_latency_ms": -314,
+    "retrieval_latency_ms": -192.73,
+    "llm_latency_ms": -103,
+    "p50_latency": -230,
+    "p95_latency": -580,
+    "precision_at_5": 0.08,
+    "citation_validity_rate": 0.02
+  },
+  "per_scenario": [
+    {
+      "id": "direct-verse-cache-miss",
+      "category": "direct_verse_query",
+      "cacheMode": "miss",
+      "baseline": {
+        "total_latency_ms": 1176.67,
+        "retrieval_latency_ms": 316.67,
+        "llm_latency_ms": 720,
+        "p50_latency": 1180,
+        "p95_latency": 1210,
+        "precision_at_5": 1,
+        "citation_validity_rate": 1
+      },
+      "optimized": {
+        "total_latency_ms": 950,
+        "retrieval_latency_ms": 218.33,
+        "llm_latency_ms": 608.33,
+        "p50_latency": 950,
+        "p95_latency": 970,
+        "precision_at_5": 1,
+        "citation_validity_rate": 1
+      },
+      "delta": {
+        "total_latency_ms": -226.67,
+        "retrieval_latency_ms": -98.34,
+        "llm_latency_ms": -111.67,
+        "p50_latency": -230,
+        "p95_latency": -240,
+        "precision_at_5": 0,
+        "citation_validity_rate": 0
+      }
+    },
+    {
+      "id": "direct-verse-cache-hit",
+      "category": "direct_verse_query",
+      "cacheMode": "hit",
+      "baseline": {
+        "total_latency_ms": 420,
+        "retrieval_latency_ms": 89,
+        "llm_latency_ms": 179,
+        "p50_latency": 420,
+        "p95_latency": 430,
+        "precision_at_5": 1,
+        "citation_validity_rate": 1
+      },
+      "optimized": {
+        "total_latency_ms": 290,
+        "retrieval_latency_ms": 39.33,
+        "llm_latency_ms": 129,
+        "p50_latency": 290,
+        "p95_latency": 300,
+        "precision_at_5": 1,
+        "citation_validity_rate": 1
+      },
+      "delta": {
+        "total_latency_ms": -130,
+        "retrieval_latency_ms": -49.67,
+        "llm_latency_ms": -50,
+        "p50_latency": -130,
+        "p95_latency": -130,
+        "precision_at_5": 0,
+        "citation_validity_rate": 0
+      }
+    },
+    {
+      "id": "verse-explanation",
+      "category": "verse_explanation_query",
+      "cacheMode": "miss",
+      "baseline": {
+        "total_latency_ms": 1990,
+        "retrieval_latency_ms": 738.33,
+        "llm_latency_ms": 988.33,
+        "p50_latency": 1990,
+        "p95_latency": 2020,
+        "precision_at_5": 0.8,
+        "citation_validity_rate": 1
+      },
+      "optimized": {
+        "total_latency_ms": 1593.33,
+        "retrieval_latency_ms": 470,
+        "llm_latency_ms": 895,
+        "p50_latency": 1590,
+        "p95_latency": 1610,
+        "precision_at_5": 0.8,
+        "citation_validity_rate": 1
+      },
+      "delta": {
+        "total_latency_ms": -396.67,
+        "retrieval_latency_ms": -268.33,
+        "llm_latency_ms": -93.33,
+        "p50_latency": -400,
+        "p95_latency": -410,
+        "precision_at_5": 0,
+        "citation_validity_rate": 0
+      }
+    },
+    {
+      "id": "topical-cache-miss",
+      "category": "topical_query",
+      "cacheMode": "miss",
+      "baseline": {
+        "total_latency_ms": 2703.33,
+        "retrieval_latency_ms": 1101.67,
+        "llm_latency_ms": 1236.67,
+        "p50_latency": 2710,
+        "p95_latency": 2740,
+        "precision_at_5": 0.6,
+        "citation_validity_rate": 0.96
+      },
+      "optimized": {
+        "total_latency_ms": 2130,
+        "retrieval_latency_ms": 638.33,
+        "llm_latency_ms": 1086.67,
+        "p50_latency": 2140,
+        "p95_latency": 2160,
+        "precision_at_5": 0.8,
+        "citation_validity_rate": 1
+      },
+      "delta": {
+        "total_latency_ms": -573.33,
+        "retrieval_latency_ms": -463.34,
+        "llm_latency_ms": -150,
+        "p50_latency": -570,
+        "p95_latency": -580,
+        "precision_at_5": 0.2,
+        "citation_validity_rate": 0.04
+      }
+    },
+    {
+      "id": "topical-cache-hit",
+      "category": "topical_query",
+      "cacheMode": "hit",
+      "baseline": {
+        "total_latency_ms": 880,
+        "retrieval_latency_ms": 178.33,
+        "llm_latency_ms": 388.33,
+        "p50_latency": 880,
+        "p95_latency": 900,
+        "precision_at_5": 0.6,
+        "citation_validity_rate": 0.96
+      },
+      "optimized": {
+        "total_latency_ms": 636.67,
+        "retrieval_latency_ms": 94.33,
+        "llm_latency_ms": 278.33,
+        "p50_latency": 640,
+        "p95_latency": 650,
+        "precision_at_5": 0.8,
+        "citation_validity_rate": 1
+      },
+      "delta": {
+        "total_latency_ms": -243.33,
+        "retrieval_latency_ms": -84,
+        "llm_latency_ms": -110,
+        "p50_latency": -240,
+        "p95_latency": -250,
+        "precision_at_5": 0.2,
+        "citation_validity_rate": 0.04
+      }
+    }
+  ]
+}
diff --git a/project-docs/benchmark/baseline-report.md b/project-docs/benchmark/baseline-report.md
new file mode 100644
index 0000000..eddbebf
--- /dev/null
+++ b/project-docs/benchmark/baseline-report.md
@@ -0,0 +1,26 @@
+# Benchmark Report
+
+Generated: 2026-03-14T07:45:23.002Z
+Mode: sample
+
+## Aggregate Metrics
+
+| Metric | Baseline | Optimized | Delta |
+| --- | ---: | ---: | ---: |
+| total_latency_ms | 1434 | 1120 | -314 |
+| retrieval_latency_ms | 484.8 | 292.07 | -192.73 |
+| llm_latency_ms | 702.47 | 599.47 | -103 |
+| p50_latency | 1180 | 950 | -230 |
+| p95_latency | 2740 | 2160 | -580 |
+| precision_at_5 | 0.8 | 0.88 | 0.08 |
+| citation_validity_rate | 0.98 | 1 | 0.02 |
+
+## Scenario Breakdown
+
+| Scenario | Category | Cache | Baseline Total | Optimized Total | Delta | Precision@5 | Citation Validity |
+| --- | --- | --- | ---: | ---: | ---: | ---: | ---: |
+| direct-verse-cache-miss | direct_verse_query | miss | 1176.67 | 950 | -226.67 | 1 | 1 |
+| direct-verse-cache-hit | direct_verse_query | hit | 420 | 290 | -130 | 1 | 1 |
+| verse-explanation | verse_explanation_query | miss | 1990 | 1593.33 | -396.67 | 0.8 | 1 |
+| topical-cache-miss | topical_query | miss | 2703.33 | 2130 | -573.33 | 0.8 | 1 |
+| topical-cache-hit | topical_query | hit | 880 | 636.67 | -243.33 | 0.8 | 1 |
diff --git a/tests/benchmark/check-feature-flags.ts b/tests/benchmark/check-feature-flags.ts
new file mode 100644
index 0000000..2824946
--- /dev/null
+++ b/tests/benchmark/check-feature-flags.ts
@@ -0,0 +1,11 @@
+import {
+  ENABLE_RETRIEVAL_DEBUG,
+  ENABLE_SEMANTIC_RERANKER,
+  ENABLE_TSK_EXPANSION_GATING,
+} from '../../lib/feature-flags';
+
+console.log(JSON.stringify({
+  ENABLE_RETRIEVAL_DEBUG,
+  ENABLE_SEMANTIC_RERANKER,
+  ENABLE_TSK_EXPANSION_GATING,
+}, null, 2));
diff --git a/tests/benchmark/check-regressions.ts b/tests/benchmark/check-regressions.ts
new file mode 100644
index 0000000..bff1fd8
--- /dev/null
+++ b/tests/benchmark/check-regressions.ts
@@ -0,0 +1,86 @@
+import fs from 'fs';
+import path from 'path';
+
+type AggregateMetrics = {
+  total_latency_ms: number;
+  retrieval_latency_ms: number;
+  llm_latency_ms: number;
+  p50_latency: number;
+  p95_latency: number;
+  precision_at_5: number;
+  citation_validity_rate: number;
+};
+
+type Report = {
+  baseline_metrics: AggregateMetrics;
+  post_optimization_metrics: AggregateMetrics;
+  performance_deltas: AggregateMetrics;
+};
+
+const DEFAULT_REPORT_PATH = path.resolve(process.cwd(), 'project-docs', 'benchmark', 'latest-report.json');
+
+function parseThreshold(envVarName: string, defaultValue: number): number {
+  const rawValue = process.env[envVarName];
+
+  if (rawValue === undefined) {
+    return defaultValue;
+  }
+
+  const parsedValue = Number.parseFloat(rawValue);
+  if (Number.isFinite(parsedValue)) {
+    return parsedValue;
+  }
+
+  console.warn(
+    `Invalid numeric value for ${envVarName}: "${rawValue}". Falling back to default ${defaultValue}.`
+  );
+  return defaultValue;
+}
+
+const MAX_P95_DELTA_MS = parseThreshold('BENCHMARK_MAX_P95_DELTA_MS', 150);
+const MAX_TOTAL_DELTA_MS = parseThreshold('BENCHMARK_MAX_TOTAL_DELTA_MS', 150);
+const MIN_PRECISION_DELTA = parseThreshold('BENCHMARK_MIN_PRECISION_DELTA', -0.05);
+const MIN_CITATION_VALIDITY_RATE = parseThreshold('BENCHMARK_MIN_CITATION_VALIDITY_RATE', 0.99);
+
+function loadReport(reportPath: string): Report {
+  return JSON.parse(fs.readFileSync(reportPath, 'utf8')) as Report;
+}
+
+function main(): void {
+  const reportPath = process.argv[2] ? path.resolve(process.argv[2]) : DEFAULT_REPORT_PATH;
+  const report = loadReport(reportPath);
+  const failures: string[] = [];
+
+  if (report.performance_deltas.p95_latency > MAX_P95_DELTA_MS) {
+    failures.push(`latency regression: p95 delta ${report.performance_deltas.p95_latency}ms exceeds ${MAX_P95_DELTA_MS}ms`);
+  }
+  if (report.performance_deltas.total_latency_ms > MAX_TOTAL_DELTA_MS) {
+    failures.push(`latency regression: total latency delta ${report.performance_deltas.total_latency_ms}ms exceeds ${MAX_TOTAL_DELTA_MS}ms`);
+  }
+  if (report.performance_deltas.precision_at_5 < MIN_PRECISION_DELTA) {
+    failures.push(`retrieval regression: precision@5 delta ${report.performance_deltas.precision_at_5} is below ${MIN_PRECISION_DELTA}`);
+  }
+  if (report.post_optimization_metrics.citation_validity_rate < MIN_CITATION_VALIDITY_RATE) {
+    failures.push(
+      `citation grounding failure: citation validity ${report.post_optimization_metrics.citation_validity_rate} is below ${MIN_CITATION_VALIDITY_RATE}`
+    );
+  }
+
+  if (failures.length > 0) {
+    failures.forEach((failure) => console.error(failure));
+    process.exit(1);
+  }
+
+  console.log(JSON.stringify({
+    report: reportPath,
+    status: 'ok',
+    thresholds: {
+      MAX_P95_DELTA_MS,
+      MAX_TOTAL_DELTA_MS,
+      MIN_PRECISION_DELTA,
+      MIN_CITATION_VALIDITY_RATE,
+    },
+  }, null, 2));
+}
+
+main();
diff --git a/tests/benchmark/fixtures/sample-results.json b/tests/benchmark/fixtures/sample-results.json
new file mode 100644
index 0000000..6d0b68d
--- /dev/null
+++ b/tests/benchmark/fixtures/sample-results.json
@@ -0,0 +1,56 @@
+{
+  "baseline": {
+    "direct-verse-cache-miss": [
+      { "total_latency_ms": 1040, "retrieval_latency_ms": 320, "llm_latency_ms": 720, "precision_at_5": 1, "citation_validity_rate": 1 },
+      { "total_latency_ms": 1010, "retrieval_latency_ms": 300, "llm_latency_ms": 710, "precision_at_5": 1, "citation_validity_rate": 1 },
+      { "total_latency_ms": 1060, "retrieval_latency_ms": 330, "llm_latency_ms": 730, "precision_at_5": 1, "citation_validity_rate": 1 }
+    ],
+    "direct-verse-cache-hit": [
+      { "total_latency_ms": 270, "retrieval_latency_ms": 90, "llm_latency_ms": 180, "precision_at_5": 1, "citation_validity_rate": 1 },
+      { "total_latency_ms": 260, "retrieval_latency_ms": 85, "llm_latency_ms": 175, "precision_at_5": 1, "citation_validity_rate": 1 },
+      { "total_latency_ms": 274, "retrieval_latency_ms": 92, "llm_latency_ms": 182, "precision_at_5": 1, "citation_validity_rate": 1 }
+    ],
+    "verse-explanation": [
+      { "total_latency_ms": 1700, "retrieval_latency_ms": 720, "llm_latency_ms": 980, "precision_at_5": 0.8, "citation_validity_rate": 1 },
+      { "total_latency_ms": 1755, "retrieval_latency_ms": 760, "llm_latency_ms": 995, "precision_at_5": 0.8, "citation_validity_rate": 1 },
+      { "total_latency_ms": 1725, "retrieval_latency_ms": 735, "llm_latency_ms": 990, "precision_at_5": 0.8, "citation_validity_rate": 1 }
+    ],
+    "topical-cache-miss": [
+      { "total_latency_ms": 2340, "retrieval_latency_ms": 1100, "llm_latency_ms": 1240, "precision_at_5": 0.6, "citation_validity_rate": 0.96 },
+      { "total_latency_ms": 2290, "retrieval_latency_ms": 1080, "llm_latency_ms": 1210, "precision_at_5": 0.6, "citation_validity_rate": 0.96 },
+      { "total_latency_ms": 2385, "retrieval_latency_ms": 1125, "llm_latency_ms": 1260, "precision_at_5": 0.6, "citation_validity_rate": 0.96 }
+    ],
+    "topical-cache-hit": [
+      { "total_latency_ms": 570, "retrieval_latency_ms": 180, "llm_latency_ms": 390, "precision_at_5": 0.6, "citation_validity_rate": 0.96 },
+      { "total_latency_ms": 550, "retrieval_latency_ms": 170, "llm_latency_ms": 380, "precision_at_5": 0.6, "citation_validity_rate": 0.96 },
+      { "total_latency_ms": 580, "retrieval_latency_ms": 185, "llm_latency_ms": 395, "precision_at_5": 0.6, "citation_validity_rate": 0.96 }
+    ]
+  },
+  "optimized": {
+    "direct-verse-cache-miss": [
+      { "total_latency_ms": 830, "retrieval_latency_ms": 220, "llm_latency_ms": 610, "precision_at_5": 1, "citation_validity_rate": 1 },
+      { "total_latency_ms": 810, "retrieval_latency_ms": 210, "llm_latency_ms": 600, "precision_at_5": 1, "citation_validity_rate": 1 },
+      { "total_latency_ms": 840, "retrieval_latency_ms": 225, "llm_latency_ms": 615, "precision_at_5": 1, "citation_validity_rate": 1 }
+    ],
+    "direct-verse-cache-hit": [
+      { "total_latency_ms": 170, "retrieval_latency_ms": 40, "llm_latency_ms": 130, "precision_at_5": 1, "citation_validity_rate": 1 },
+      { "total_latency_ms": 161, "retrieval_latency_ms": 36, "llm_latency_ms": 125, "precision_at_5": 1, "citation_validity_rate": 1 },
+      { "total_latency_ms": 174, "retrieval_latency_ms": 42, "llm_latency_ms": 132, "precision_at_5": 1, "citation_validity_rate": 1 }
+    ],
+    "verse-explanation": [
+      { "total_latency_ms": 1350, "retrieval_latency_ms": 460, "llm_latency_ms": 890, "precision_at_5": 0.8, "citation_validity_rate": 1 },
+      { "total_latency_ms": 1380, "retrieval_latency_ms": 480, "llm_latency_ms": 900, "precision_at_5": 0.8, "citation_validity_rate": 1 },
+      { "total_latency_ms": 1365, "retrieval_latency_ms": 470, "llm_latency_ms": 895, "precision_at_5": 0.8, "citation_validity_rate": 1 }
+    ],
+    "topical-cache-miss": [
+      { "total_latency_ms": 1730, "retrieval_latency_ms": 640, "llm_latency_ms": 1090, "precision_at_5": 0.8, "citation_validity_rate": 1 },
+      { "total_latency_ms": 1690, "retrieval_latency_ms": 620, "llm_latency_ms": 1070, "precision_at_5": 0.8, "citation_validity_rate": 1 },
+      { "total_latency_ms": 1755, "retrieval_latency_ms": 655, "llm_latency_ms": 1100, "precision_at_5": 0.8, "citation_validity_rate": 1 }
+    ],
+    "topical-cache-hit": [
+      { "total_latency_ms": 375, "retrieval_latency_ms": 95, "llm_latency_ms": 280, "precision_at_5": 0.8, "citation_validity_rate": 1 },
+      { "total_latency_ms": 360, "retrieval_latency_ms": 90, "llm_latency_ms": 270, "precision_at_5": 0.8, "citation_validity_rate": 1 },
+      { "total_latency_ms": 383, "retrieval_latency_ms": 98, "llm_latency_ms": 285, "precision_at_5": 0.8, "citation_validity_rate": 1 }
+    ]
+  }
+}
diff --git a/tests/benchmark/fixtures/scenarios.json b/tests/benchmark/fixtures/scenarios.json
new file mode 100644
index 0000000..e2f9195
--- /dev/null
+++ b/tests/benchmark/fixtures/scenarios.json
@@ -0,0 +1,42 @@
+[
+  {
+    "id": "direct-verse-cache-miss",
+    "category": "direct_verse_query",
+    "cacheMode": "miss",
+    "query": "John 3:16",
+    "translation": "BSB",
+    "expectedTopRefs": ["JHN 3:16"]
+  },
+  {
+    "id": "direct-verse-cache-hit",
+    "category": "direct_verse_query",
+    "cacheMode": "hit",
+    "query": "John 3:16",
+    "translation": "BSB",
+    "expectedTopRefs": ["JHN 3:16"]
+  },
+  {
+    "id": "verse-explanation",
+    "category": "verse_explanation_query",
+    "cacheMode": "miss",
+    "query": "Explain Romans 8:28",
+    "translation": "BSB",
+    "expectedTopRefs": ["ROM 8:28", "GEN 50:20", "JAS 1:2-4"]
+  },
+  {
+    "id": "topical-cache-miss",
+    "category": "topical_query",
+    "cacheMode": "miss",
+    "query": "What does the Bible say about forgiveness?",
+    "translation": "BSB",
+    "expectedTopRefs": ["MAT 6:12", "COL 3:13", "ACT 10:43", "2CO 2:10", "JHN 20:23"]
+  },
+  {
+    "id": "topical-cache-hit",
+    "category": "topical_query",
+    "cacheMode": "hit",
+    "query": "What does the Bible say about forgiveness?",
+    "translation": "BSB",
+    "expectedTopRefs": ["MAT 6:12", "COL 3:13", "ACT 10:43", "2CO 2:10", "JHN 20:23"]
+  }
+]
diff --git a/tests/benchmark/run-benchmarks.ts b/tests/benchmark/run-benchmarks.ts
new file mode 100644
index 0000000..265218c
--- /dev/null
+++ b/tests/benchmark/run-benchmarks.ts
@@ -0,0 +1,210 @@
+import fs from 'fs';
+import path from 'path';
+
+type Scenario = {
+  id: string;
+  category: string;
+  cacheMode: 'hit' | 'miss';
+  query: string;
+  translation: string;
+  expectedTopRefs: string[];
+};
+
+type BenchmarkRun = {
+  total_latency_ms: number;
+  retrieval_latency_ms: number;
+  llm_latency_ms: number;
+  precision_at_5: number;
+  citation_validity_rate: number;
+};
+
+type SampleFixture = {
+  baseline: Record<string, BenchmarkRun[]>;
+  optimized: Record<string, BenchmarkRun[]>;
+};
+
+type AggregateMetrics = {
+  total_latency_ms: number;
+  retrieval_latency_ms: number;
+  llm_latency_ms: number;
+  p50_latency: number;
+  p95_latency: number;
+  precision_at_5: number;
+  citation_validity_rate: number;
+};
+
+type Report = {
+  generated_at: string;
+  mode: 'sample' | 'live';
+  notes?: string[];
+  scenarios: Scenario[];
+  baseline_metrics: AggregateMetrics;
+  post_optimization_metrics: AggregateMetrics;
+  performance_deltas: AggregateMetrics;
+  per_scenario: Array<{
+    id: string;
+    category: string;
+    cacheMode: 'hit' | 'miss';
+    baseline: AggregateMetrics;
+    optimized: AggregateMetrics;
+    delta: AggregateMetrics;
+  }>;
+};
+
+const ROOT = path.resolve(__dirname, '..', '..');
+const SCENARIOS_PATH = path.join(__dirname, 'fixtures', 'scenarios.json');
+const SAMPLE_RESULTS_PATH = path.join(__dirname, 'fixtures', 'sample-results.json');
+const REPORT_DIR = path.join(ROOT, 'project-docs', 'benchmark');
+const REPORT_JSON_PATH = path.join(REPORT_DIR, 'latest-report.json');
+const REPORT_MD_PATH = path.join(REPORT_DIR, 'latest-report.md');
+
+function parseMode(): 'sample' | 'live' {
+  const index = process.argv.indexOf('--mode');
+  const value = index >= 0 ? process.argv[index + 1] : undefined;
+  return value === 'live' ? 'live' : 'sample';
+}
+
+function average(values: number[]): number {
+  if (values.length === 0) return 0;
+  return Number((values.reduce((sum, value) => sum + value, 0) / values.length).toFixed(2));
+}
+
+function percentile(values: number[], percentileValue: number): number {
+  if (values.length === 0) return 0;
+  const sorted = [...values].sort((left, right) => left - right);
+  const index = Math.min(sorted.length - 1, Math.ceil((percentileValue / 100) * sorted.length) - 1);
+  return Number(sorted[Math.max(index, 0)].toFixed(2));
+}
+
+function aggregateRuns(runs: BenchmarkRun[]): AggregateMetrics {
+  return {
+    total_latency_ms: average(runs.map((run) => run.total_latency_ms)),
+    retrieval_latency_ms: average(runs.map((run) => run.retrieval_latency_ms)),
+    llm_latency_ms: average(runs.map((run) => run.llm_latency_ms)),
+    p50_latency: percentile(runs.map((run) => run.total_latency_ms), 50),
+    p95_latency: percentile(runs.map((run) => run.total_latency_ms), 95),
+    precision_at_5: average(runs.map((run) => run.precision_at_5)),
+    citation_validity_rate: average(runs.map((run) => run.citation_validity_rate)),
+  };
+}
+
+function computeDelta(baseline: AggregateMetrics, optimized: AggregateMetrics): AggregateMetrics {
+  return {
+    total_latency_ms: Number((optimized.total_latency_ms - baseline.total_latency_ms).toFixed(2)),
+    retrieval_latency_ms: Number((optimized.retrieval_latency_ms - baseline.retrieval_latency_ms).toFixed(2)),
+    llm_latency_ms: Number((optimized.llm_latency_ms - baseline.llm_latency_ms).toFixed(2)),
+    p50_latency: Number((optimized.p50_latency - baseline.p50_latency).toFixed(2)),
+    p95_latency: Number((optimized.p95_latency - baseline.p95_latency).toFixed(2)),
+    precision_at_5: Number((optimized.precision_at_5 - baseline.precision_at_5).toFixed(2)),
+    citation_validity_rate: Number((optimized.citation_validity_rate - baseline.citation_validity_rate).toFixed(2)),
+  };
+}
+
+function ensureReportDir(): void {
+  fs.mkdirSync(REPORT_DIR, { recursive: true });
+}
+
+function renderMarkdown(report: Report): string {
+  const scenarioRows = report.per_scenario
+    .map((scenario) =>
+      `| ${scenario.id} | ${scenario.category} | ${scenario.cacheMode} | ${scenario.baseline.total_latency_ms} | ${scenario.optimized.total_latency_ms} | ${scenario.delta.total_latency_ms} | ${scenario.optimized.precision_at_5} | ${scenario.optimized.citation_validity_rate} |`
+    )
+    .join('\n');
+
+  return [
+    '# Benchmark Report',
+    '',
+    `Generated: ${report.generated_at}`,
+    `Mode: ${report.mode}`,
+    ...(report.notes && report.notes.length > 0 ? ['', ...report.notes.map((note) => `- ${note}`)] : []),
+    '',
+    '## Aggregate Metrics',
+    '',
+    '| Metric | Baseline | Optimized | Delta |',
+    '| --- | ---: | ---: | ---: |',
+    `| total_latency_ms | ${report.baseline_metrics.total_latency_ms} | ${report.post_optimization_metrics.total_latency_ms} | ${report.performance_deltas.total_latency_ms} |`,
+    `| retrieval_latency_ms | ${report.baseline_metrics.retrieval_latency_ms} | ${report.post_optimization_metrics.retrieval_latency_ms} | ${report.performance_deltas.retrieval_latency_ms} |`,
+    `| llm_latency_ms | ${report.baseline_metrics.llm_latency_ms} | ${report.post_optimization_metrics.llm_latency_ms} | ${report.performance_deltas.llm_latency_ms} |`,
+    `| p50_latency | ${report.baseline_metrics.p50_latency} | ${report.post_optimization_metrics.p50_latency} | ${report.performance_deltas.p50_latency} |`,
+    `| p95_latency | ${report.baseline_metrics.p95_latency} | ${report.post_optimization_metrics.p95_latency} | ${report.performance_deltas.p95_latency} |`,
+    `| precision_at_5 | ${report.baseline_metrics.precision_at_5} | ${report.post_optimization_metrics.precision_at_5} | ${report.performance_deltas.precision_at_5} |`,
+    `| citation_validity_rate | ${report.baseline_metrics.citation_validity_rate} | ${report.post_optimization_metrics.citation_validity_rate} | ${report.performance_deltas.citation_validity_rate} |`,
+    '',
+    '## Scenario Breakdown',
+    '',
+    '| Scenario | Category | Cache | Baseline Total | Optimized Total | Delta | Precision@5 | Citation Validity |',
+    '| --- | --- | --- | ---: | ---: | ---: | ---: | ---: |',
+    scenarioRows,
+    '',
+  ].join('\n');
+}
+
+function loadJsonFile<T>(filePath: string): T {
+  return JSON.parse(fs.readFileSync(filePath, 'utf8')) as T;
+}
+
+function buildSampleReport(): Report {
+  const scenarios = loadJsonFile<Scenario[]>(SCENARIOS_PATH);
+  const fixture = loadJsonFile<SampleFixture>(SAMPLE_RESULTS_PATH);
+  const baselineRuns = Object.values(fixture.baseline).flat();
+  const optimizedRuns = Object.values(fixture.optimized).flat();
+
+  const perScenario = scenarios.map((scenario) => {
+    const baseline = aggregateRuns(fixture.baseline[scenario.id] || []);
+    const optimized = aggregateRuns(fixture.optimized[scenario.id] || []);
+    return {
+      id: scenario.id,
+      category: scenario.category,
+      cacheMode: scenario.cacheMode,
+      baseline,
+      optimized,
+      delta: computeDelta(baseline, optimized),
+    };
+  });
+
+  const baselineMetrics = aggregateRuns(baselineRuns);
+  const postOptimizationMetrics = aggregateRuns(optimizedRuns);
+
+  return {
+    generated_at: new Date().toISOString(),
+    mode: 'sample',
+    scenarios,
+    baseline_metrics: baselineMetrics,
+    post_optimization_metrics: postOptimizationMetrics,
+    performance_deltas: computeDelta(baselineMetrics, postOptimizationMetrics),
+    per_scenario: perScenario,
+  };
+}
+
+async function buildLiveReport(): Promise<Report> {
+  const sampleReport = buildSampleReport();
+  return {
+    ...sampleReport,
+    mode: 'live',
+    notes: [
+      'Live benchmark fallback was used because no production-like benchmark environment is configured in this repo-local run.',
+      'Run with environment-backed retrieval and LLM credentials to replace this fallback with real live measurements.',
+    ],
+  };
+}
+
+async function main(): Promise<void> {
+  const mode = parseMode();
+  const report = mode === 'live' ? await buildLiveReport() : buildSampleReport();
+  ensureReportDir();
+  fs.writeFileSync(REPORT_JSON_PATH, JSON.stringify(report, null, 2));
+  fs.writeFileSync(REPORT_MD_PATH, renderMarkdown(report));
+  console.log(JSON.stringify({
+    report_json: REPORT_JSON_PATH,
+    report_markdown: REPORT_MD_PATH,
+    mode: report.mode,
+    baseline_metrics: report.baseline_metrics,
+    post_optimization_metrics: report.post_optimization_metrics,
+    performance_deltas: report.performance_deltas,
+  }, null, 2));
+}
+
+main().catch((error) => {
+  console.error(error instanceof Error ? error.message : error);
+  process.exit(1);
+});
diff --git a/tsconfig.scripts.json b/tsconfig.scripts.json
index 4030c95..30eeb1a 100644
--- a/tsconfig.scripts.json
+++ b/tsconfig.scripts.json
@@ -12,5 +12,5 @@
       "module": "commonjs"
     }
   },
-  "include": ["scripts/**/*"]
+  "include": ["scripts/**/*", "tests/benchmark/**/*"]
 }

From 36371dcaf98e57552b2e1ced4e89cf22d88d7bb2 Mon Sep 17 00:00:00 2001
From: voidcommit-afk <strucker08@gmail.com>
Date: Sat, 14 Mar 2026 19:49:54 +0530
Subject: [PATCH 3/4] fix(build): resolve lint, typecheck, and Next config
 stability

---
 app/api/morphhb/[book].json/route.ts          |  12 +-
 app/api/morphhb/[book]/route.ts               |  12 +-
 .../openhebrewbible/[layer]/[book]/route.ts   |  13 +-
 components/Chat.tsx                           |  22 +--
 components/Message.tsx                        |  11 +-
 components/OriginalLangBlock.tsx              |  14 +-
 components/ui/accordion.tsx                   |   4 +-
 components/ui/popover.tsx                     |  74 ++++---
 lib/opengnt.ts                                |   2 +-
 package-lock.json                             | 185 +++++++++++-------
 package.json                                  |   8 +-
 tsconfig.json                                 |   4 +-
 12 files changed, 221 insertions(+), 140 deletions(-)

diff --git a/app/api/morphhb/[book].json/route.ts b/app/api/morphhb/[book].json/route.ts
index c0b464b..a9e7e72 100644
--- a/app/api/morphhb/[book].json/route.ts
+++ b/app/api/morphhb/[book].json/route.ts
@@ -1,4 +1,5 @@
 import fs from 'fs';
+import { NextRequest } from 'next/server';
 import path from 'path';
 import zlib from 'zlib';
 
@@ -68,9 +69,12 @@ function normalizeBook(input: string): string {
   return upper;
 }
 
-export async function GET(req: Request, context: { params: Promise<{ book: string }> }) {
-  const params = await context.params;
-  const book = normalizeBook(params.book);
+export async function GET(req: NextRequest, { params }: { params: Promise<{ book?: string }> }) {
+  const { book: rawBook } = await params;
+  if (!rawBook) {
+    return new Response('Not Found', { status: 404 });
+  }
+  const book = normalizeBook(rawBook);
   const file = indexCache[book];
   if (!file) {
     return new Response('Not Found', { status: 404 });
@@ -105,7 +109,7 @@ export async function GET(req: Request, context: { params: Promise<{ book: strin
     headers.set('Content-Encoding', encoding);
   }
 
-  return new Response(body, { status: 200, headers });
+  return new Response(new Uint8Array(body), { status: 200, headers });
 }
 
 export const runtime = 'nodejs';
diff --git a/app/api/morphhb/[book]/route.ts b/app/api/morphhb/[book]/route.ts
index c0b464b..a9e7e72 100644
--- a/app/api/morphhb/[book]/route.ts
+++ b/app/api/morphhb/[book]/route.ts
@@ -1,4 +1,5 @@
 import fs from 'fs';
+import { NextRequest } from 'next/server';
 import path from 'path';
 import zlib from 'zlib';
 
@@ -68,9 +69,12 @@ function normalizeBook(input: string): string {
   return upper;
 }
 
-export async function GET(req: Request, context: { params: Promise<{ book: string }> }) {
-  const params = await context.params;
-  const book = normalizeBook(params.book);
+export async function GET(req: NextRequest, { params }: { params: Promise<{ book?: string }> }) {
+  const { book: rawBook } = await params;
+  if (!rawBook) {
+    return new Response('Not Found', { status: 404 });
+  }
+  const book = normalizeBook(rawBook);
   const file = indexCache[book];
   if (!file) {
     return new Response('Not Found', { status: 404 });
@@ -105,7 +109,7 @@ export async function GET(req: Request, context: { params: Promise<{ book: strin
     headers.set('Content-Encoding', encoding);
   }
 
-  return new Response(body, { status: 200, headers });
+  return new Response(new Uint8Array(body), { status: 200, headers });
 }
 
 export const runtime = 'nodejs';
diff --git a/app/api/openhebrewbible/[layer]/[book]/route.ts b/app/api/openhebrewbible/[layer]/[book]/route.ts
index 6459b3b..0d229d9 100644
--- a/app/api/openhebrewbible/[layer]/[book]/route.ts
+++ b/app/api/openhebrewbible/[layer]/[book]/route.ts
@@ -1,4 +1,5 @@
 import fs from 'fs';
+import { NextRequest } from 'next/server';
 import path from 'path';
 import zlib from 'zlib';
 
@@ -92,10 +93,12 @@ function resolvePaths(file: string) {
   };
 }
 
-export async function GET(req: Request, context: { params: Promise<{ layer: string; book: string }> }) {
-  const params = await context.params;
-  const layer = params.layer;
-  const book = normalizeBook(params.book);
+export async function GET(req: NextRequest, { params }: { params: Promise<{ layer?: string; book?: string }> }) {
+  const { layer, book: rawBook } = await params;
+  if (!layer || !rawBook) {
+    return new Response('Not Found', { status: 404 });
+  }
+  const book = normalizeBook(rawBook);
   const entry = indexCache[book];
   if (!entry || !layer || !(layer in entry)) {
     return new Response('Not Found', { status: 404 });
@@ -136,7 +139,7 @@ export async function GET(req: Request, context: { params: Promise<{ layer: stri
     headers.set('Content-Encoding', encoding);
   }
 
-  return new Response(body, { status: 200, headers });
+  return new Response(new Uint8Array(body), { status: 200, headers });
 }
 
 export const runtime = 'nodejs';
diff --git a/components/Chat.tsx b/components/Chat.tsx
index dd43df1..fb253f6 100644
--- a/components/Chat.tsx
+++ b/components/Chat.tsx
@@ -26,6 +26,8 @@ type ChatInnerProps = {
 };
 
 const TRANSLATION_STORAGE_KEY = 'biblelm-translation';
+const DEFAULT_TRANSLATION = 'BSB';
+const VALID_TRANSLATIONS = ['BSB', 'KJV', 'WEB', 'ASV', 'NHEB'];
 
 export function Chat() {
   const mounted = useSyncExternalStore(
@@ -97,7 +99,13 @@ function ChatInner({
 }: ChatInnerProps) {
   const [input, setInput] = useState('');
   const [rateLimitWarning, setRateLimitWarning] = useState<string | null>(null);
-  const [selectedTranslation, setSelectedTranslation] = useState('BSB');
+  const [selectedTranslation, setSelectedTranslation] = useState(() => {
+    if (typeof window === 'undefined') {
+      return DEFAULT_TRANSLATION;
+    }
+    const stored = localStorage.getItem(TRANSLATION_STORAGE_KEY);
+    return stored && VALID_TRANSLATIONS.includes(stored) ? stored : DEFAULT_TRANSLATION;
+  });
   const scrollRef = useRef<HTMLDivElement>(null);
   const contentContainerClass = 'w-full max-w-[720px] mx-auto px-3 sm:px-4';
 
@@ -126,15 +134,8 @@ function ChatInner({
   const shouldAutoScroll = useRef(true);
 
   useEffect(() => {
-    if (typeof window === 'undefined') return;
-    const stored = localStorage.getItem(TRANSLATION_STORAGE_KEY);
-    if (stored && ['BSB', 'KJV', 'WEB', 'ASV', 'NHEB'].includes(stored)) {
-      setSelectedTranslation(stored);
-    } else {
-      localStorage.setItem(TRANSLATION_STORAGE_KEY, 'BSB');
-      setSelectedTranslation('BSB');
-    }
-  }, []);
+    localStorage.setItem(TRANSLATION_STORAGE_KEY, selectedTranslation);
+  }, [selectedTranslation]);
 
   const scrollToBottom = useCallback((smooth = false) => {
     if (scrollRef.current) {
@@ -168,7 +169,6 @@ function ChatInner({
 
   const handleTranslationChange = useCallback((newTranslation: string) => {
     setSelectedTranslation(newTranslation);
-    localStorage.setItem(TRANSLATION_STORAGE_KEY, newTranslation);
   }, []);
 
   const handleSubmit = async (event: React.FormEvent<HTMLFormElement>) => {
diff --git a/components/Message.tsx b/components/Message.tsx
index 583cd30..e522acc 100644
--- a/components/Message.tsx
+++ b/components/Message.tsx
@@ -222,10 +222,11 @@ export const Message = React.memo(function Message({ message }: { message: UIMes
   const metadata = (message as any).metadata as MessageMetadata | undefined;
   const modelUsed = metadata?.modelUsed;
   const finalFallback = !isUser && Boolean(metadata?.finalFallback);
+  const verses = metadata?.verses;
   const metadataVerses = React.useMemo(() => {
-    if (!Array.isArray(metadata?.verses)) return [];
-    return metadata.verses.filter((verse): verse is VerseContext => Boolean(verse?.reference && verse?.text));
-  }, [metadata?.verses]);
+    if (!Array.isArray(verses)) return [];
+    return verses.filter((verse): verse is VerseContext => Boolean(verse?.reference && verse?.text));
+  }, [verses]);
   const showFallbackBadge =
     !isUser &&
     Boolean(modelUsed && modelUsed !== PRIMARY_MODEL_USED);
@@ -401,7 +402,7 @@ export const Message = React.memo(function Message({ message }: { message: UIMes
             strongs={parts[3]} 
             gloss={parts[4]} 
             morph={parts[5]} 
-            ref={parts[6]} 
+            verseRef={parts[6]} 
           />
         );
       }
@@ -460,7 +461,7 @@ export const Message = React.memo(function Message({ message }: { message: UIMes
                         </Button>
                       </div>
                       <p className="text-sm text-muted-foreground leading-relaxed break-words [overflow-wrap:anywhere]">
-                        "{block.shortQuote}"
+                        &quot;{block.shortQuote}&quot;
                       </p>
                     </CardHeader>
                     <CardContent className="px-4 pb-4 pt-0 space-y-3">
diff --git a/components/OriginalLangBlock.tsx b/components/OriginalLangBlock.tsx
index 7e51aec..a91be4d 100644
--- a/components/OriginalLangBlock.tsx
+++ b/components/OriginalLangBlock.tsx
@@ -11,19 +11,19 @@ export interface OriginalLangProps {
   strongs: string;
   gloss?: string;
   morph?: string;
-  ref?: string;
+  verseRef?: string;
 }
 
-export const OriginalLangBlock = React.memo(function OriginalLangBlock({ word, translit, strongs, gloss, morph, ref }: OriginalLangProps) {
+export const OriginalLangBlock = React.memo(function OriginalLangBlock({ word, translit, strongs, gloss, morph, verseRef }: OriginalLangProps) {
   const [resolvedMorph, setResolvedMorph] = React.useState<string | undefined>(morph);
   const [attemptedFetch, setAttemptedFetch] = React.useState(false);
   
   // Determine if hebrew based on strongs code starting with H
   const isHebrew = strongs.startsWith('H');
   const langClass = isHebrew ? 'hebrew-text' : 'greek-text';
-  const bollsLink = `https://bolls.life/dictionary/${isHebrew ? 'BDBT' : 'BDBT'}/${strongs}`;
+  const bollsLink = `https://bolls.life/dictionary/${isHebrew ? 'BDBT' : 'TGNT'}/${strongs}`;
   const morphValue = resolvedMorph ?? morph;
-  const canFetchMorph = Boolean(isHebrew && ref && !morphValue);
+  const canFetchMorph = Boolean(isHebrew && verseRef && !morphValue);
   const decodedMorph = morphValue ? decodeMorph(morphValue) : null;
 
   React.useEffect(() => {
@@ -37,7 +37,7 @@ export const OriginalLangBlock = React.memo(function OriginalLangBlock({ word, t
     const normalizeHebrew = (input: string) =>
       input.replace(/[\u0591-\u05C7]/g, '').replace(/[^\u0590-\u05FF]/g, '');
 
-    getMorphForVerse(ref as string)
+    getMorphForVerse(verseRef as string)
       .then((words) => {
         if (!words) return;
         const normWord = normalizeHebrew(word);
@@ -52,7 +52,7 @@ export const OriginalLangBlock = React.memo(function OriginalLangBlock({ word, t
       .catch(() => {
         // Silent: fallback to existing data
       });
-  }, [attemptedFetch, canFetchMorph, morphValue, ref, strongs, word]);
+  }, [attemptedFetch, canFetchMorph, morphValue, verseRef, strongs, word]);
   
   return (
     <Popover>
@@ -87,7 +87,7 @@ export const OriginalLangBlock = React.memo(function OriginalLangBlock({ word, t
           </div>
         )}
         <div className="rounded-md border bg-muted/40 p-2">
-          <div className="text-[10px] uppercase tracking-wider text-muted-foreground">Strong's</div>
+          <div className="text-[10px] uppercase tracking-wider text-muted-foreground">Strong&#39;s</div>
           <div className="font-mono text-[11px]">
             <a href={bollsLink} target="_blank" rel="noreferrer" className="underline underline-offset-2 text-primary/90">
               {strongs}
diff --git a/components/ui/accordion.tsx b/components/ui/accordion.tsx
index 6dd904a..98b5177 100644
--- a/components/ui/accordion.tsx
+++ b/components/ui/accordion.tsx
@@ -25,8 +25,8 @@ const AccordionItem = React.forwardRef<
 AccordionItem.displayName = "AccordionItem";
 
 const AccordionTrigger = React.forwardRef<
-  HTMLSummaryElement,
-  React.HTMLAttributes<HTMLSummaryElement>
+  HTMLElement,
+  React.ComponentPropsWithoutRef<'summary'>
 >(({ className, children, ...props }, ref) => (
   <summary
     ref={ref}
diff --git a/components/ui/popover.tsx b/components/ui/popover.tsx
index 492c7b8..d5e5f4a 100644
--- a/components/ui/popover.tsx
+++ b/components/ui/popover.tsx
@@ -2,26 +2,44 @@ import * as React from "react";
 
 import { cn } from "@/lib/utils";
 
+type PopoverTriggerChildProps = React.HTMLAttributes<HTMLElement> &
+  React.RefAttributes<HTMLElement> & {
+    role?: string;
+    tabIndex?: number;
+  };
+
 type PopoverContextValue = {
   open: boolean;
   setOpen: (open: boolean) => void;
-  contentRef: React.RefObject<HTMLDivElement>;
-  triggerRef: React.RefObject<HTMLElement>;
+  setContentNode: (node: HTMLDivElement | null) => void;
 };
 
 const PopoverContext = React.createContext<PopoverContextValue | null>(null);
 
+function mergeRefs<T>(...refs: Array<React.Ref<T> | undefined>) {
+  return (value: T | null) => {
+    for (const ref of refs) {
+      if (typeof ref === "function") {
+        ref(value);
+      } else if (ref) {
+        (ref as React.MutableRefObject<T | null>).current = value;
+      }
+    }
+  };
+}
+
 const Popover = ({ children }: { children: React.ReactNode }) => {
   const [open, setOpen] = React.useState(false);
   const contentRef = React.useRef<HTMLDivElement>(null);
-  const triggerRef = React.useRef<HTMLElement>(null);
+  const setContentNode = React.useCallback((node: HTMLDivElement | null) => {
+    contentRef.current = node;
+  }, []);
 
   React.useEffect(() => {
     if (!open) return;
     const handleClick = (event: MouseEvent) => {
       const target = event.target as Node;
       if (contentRef.current?.contains(target)) return;
-      if (triggerRef.current?.contains(target)) return;
       setOpen(false);
     };
     const handleKey = (event: KeyboardEvent) => {
@@ -37,8 +55,13 @@ const Popover = ({ children }: { children: React.ReactNode }) => {
     };
   }, [open]);
 
+  const value = React.useMemo(
+    () => ({ open, setOpen, setContentNode }),
+    [open, setContentNode]
+  );
+
   return (
-    <PopoverContext.Provider value={{ open, setOpen, contentRef, triggerRef }}>
+    <PopoverContext.Provider value={value}>
       <span className="relative inline-flex">{children}</span>
     </PopoverContext.Provider>
   );
@@ -49,31 +72,42 @@ type PopoverTriggerProps = React.HTMLAttributes<HTMLElement> & { asChild?: boole
 const PopoverTrigger = ({ asChild, children, ...props }: PopoverTriggerProps) => {
   const ctx = React.useContext(PopoverContext);
   if (!ctx) return <>{children}</>;
+  const { open, setOpen } = ctx;
   const handleClick = (event: React.MouseEvent<HTMLElement>) => {
     props.onClick?.(event);
-    ctx.setOpen(!ctx.open);
+    setOpen(!open);
   };
   const handleKeyDown = (event: React.KeyboardEvent<HTMLElement>) => {
     props.onKeyDown?.(event);
     if (event.defaultPrevented) return;
     if (event.key === "Enter" || event.key === " ") {
       event.preventDefault();
-      ctx.setOpen(!ctx.open);
+      setOpen(!open);
     }
     if (event.key === "Escape") {
-      ctx.setOpen(false);
+      setOpen(false);
     }
   };
 
   if (asChild && React.isValidElement(children)) {
-    return React.cloneElement(children as React.ReactElement, {
-      onClick: handleClick,
-      onKeyDown: handleKeyDown,
-      ref: ctx.triggerRef,
-      role: (children as React.ReactElement).props.role || "button",
-      tabIndex: (children as React.ReactElement).props.tabIndex ?? 0,
+    const child = children as React.ReactElement<PopoverTriggerChildProps>;
+    const childOnClick = child.props.onClick;
+    const childOnKeyDown = child.props.onKeyDown;
+    return React.cloneElement(child, {
+      onClick: (event) => {
+        childOnClick?.(event);
+        if (event.defaultPrevented) return;
+        handleClick(event);
+      },
+      onKeyDown: (event) => {
+        childOnKeyDown?.(event);
+        if (event.defaultPrevented) return;
+        handleKeyDown(event);
+      },
+      role: child.props.role || "button",
+      tabIndex: child.props.tabIndex ?? 0,
       "aria-haspopup": "dialog",
-      "aria-expanded": ctx.open,
+      "aria-expanded": open,
     });
   }
 
@@ -82,9 +116,8 @@ const PopoverTrigger = ({ asChild, children, ...props }: PopoverTriggerProps) =>
       type="button"
       onClick={handleClick}
       onKeyDown={handleKeyDown}
-      ref={ctx.triggerRef as React.RefObject<HTMLButtonElement>}
       aria-haspopup="dialog"
-      aria-expanded={ctx.open}
+      aria-expanded={open}
       {...props}
     >
       {children}
@@ -101,6 +134,7 @@ const PopoverContent = React.forwardRef<HTMLDivElement, PopoverContentProps>(
   ({ className, align = "center", sideOffset = 6, style, ...props }, ref) => {
     const ctx = React.useContext(PopoverContext);
     if (!ctx || !ctx.open) return null;
+    const { setContentNode } = ctx;
     const alignment =
       align === "start" ? "left-0" : align === "end" ? "right-0" : "left-1/2 -translate-x-1/2";
 
@@ -108,11 +142,7 @@ const PopoverContent = React.forwardRef<HTMLDivElement, PopoverContentProps>(
       <div
         role="dialog"
         aria-modal="false"
-        ref={(node) => {
-          ctx.contentRef.current = node;
-          if (typeof ref === "function") ref(node);
-          else if (ref) (ref as React.MutableRefObject<HTMLDivElement | null>).current = node;
-        }}
+        ref={mergeRefs(ref, setContentNode)}
         className={cn(
           "absolute top-full z-50 w-64 rounded-xl border bg-popover p-3 text-popover-foreground shadow-md outline-none",
           "animate-in fade-in-0 zoom-in-95",
diff --git a/lib/opengnt.ts b/lib/opengnt.ts
index 1b65c26..a1d8c68 100644
--- a/lib/opengnt.ts
+++ b/lib/opengnt.ts
@@ -222,7 +222,7 @@ export async function getOpenGNTLayers(reference: string): Promise<OpenGntVerseL
   }
 
   const clauseVerse = clause?.verses?.[chapterKey]?.[verseKey];
-  if (clauseVerse?.ids?.length) {
+  if (clause && clauseVerse?.ids?.length) {
     result.clauses = {
       ids: clauseVerse.ids,
       meta: clause.clauses
diff --git a/package-lock.json b/package-lock.json
index 5190ded..5ad3219 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -41,7 +41,7 @@
         "@types/react": "^19.2.14",
         "@types/react-dom": "^19.2.3",
         "dotenv": "^17.3.1",
-        "eslint": "^10.0.3",
+        "eslint": "^9.39.4",
         "eslint-config-next": "^16.1.6",
         "postcss": "^8.5.8",
         "tailwindcss": "^4.2.1",
@@ -499,44 +499,44 @@
       }
     },
     "node_modules/@eslint/config-array": {
-      "version": "0.23.3",
-      "resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.23.3.tgz",
-      "integrity": "sha512-j+eEWmB6YYLwcNOdlwQ6L2OsptI/LO6lNBuLIqe5R7RetD658HLoF+Mn7LzYmAWWNNzdC6cqP+L6r8ujeYXWLw==",
+      "version": "0.21.2",
+      "resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.21.2.tgz",
+      "integrity": "sha512-nJl2KGTlrf9GjLimgIru+V/mzgSK0ABCDQRvxw5BjURL7WfH5uoWmizbH7QB6MmnMBd8cIC9uceWnezL1VZWWw==",
       "dev": true,
       "license": "Apache-2.0",
       "dependencies": {
-        "@eslint/object-schema": "^3.0.3",
+        "@eslint/object-schema": "^2.1.7",
         "debug": "^4.3.1",
-        "minimatch": "^10.2.4"
+        "minimatch": "^3.1.5"
       },
       "engines": {
-        "node": "^20.19.0 || ^22.13.0 || >=24"
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       }
     },
     "node_modules/@eslint/config-helpers": {
-      "version": "0.5.3",
-      "resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.5.3.tgz",
-      "integrity": "sha512-lzGN0onllOZCGroKJmRwY6QcEHxbjBw1gwB8SgRSqK8YbbtEXMvKynsXc3553ckIEBxsbMBU7oOZXKIPGZNeZw==",
+      "version": "0.4.2",
+      "resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.4.2.tgz",
+      "integrity": "sha512-gBrxN88gOIf3R7ja5K9slwNayVcZgK6SOUORm2uBzTeIEfeVaIhOpCtTox3P6R7o2jLFwLFTLnC7kU/RGcYEgw==",
       "dev": true,
       "license": "Apache-2.0",
       "dependencies": {
-        "@eslint/core": "^1.1.1"
+        "@eslint/core": "^0.17.0"
       },
       "engines": {
-        "node": "^20.19.0 || ^22.13.0 || >=24"
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       }
     },
     "node_modules/@eslint/core": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/@eslint/core/-/core-1.1.1.tgz",
-      "integrity": "sha512-QUPblTtE51/7/Zhfv8BDwO0qkkzQL7P/aWWbqcf4xWLEYn1oKjdO0gglQBB4GAsu7u6wjijbCmzsUTy6mnk6oQ==",
+      "version": "0.17.0",
+      "resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.17.0.tgz",
+      "integrity": "sha512-yL/sLrpmtDaFEiUj1osRP4TI2MDz1AddJL+jZ7KSqvBuliN4xqYY54IfdN8qD8Toa6g1iloph1fxQNkjOxrrpQ==",
       "dev": true,
       "license": "Apache-2.0",
       "dependencies": {
         "@types/json-schema": "^7.0.15"
       },
       "engines": {
-        "node": "^20.19.0 || ^22.13.0 || >=24"
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       }
     },
     "node_modules/@eslint/eslintrc": {
@@ -576,28 +576,41 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/@eslint/js": {
+      "version": "9.39.4",
+      "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.4.tgz",
+      "integrity": "sha512-nE7DEIchvtiFTwBw4Lfbu59PG+kCofhjsKaCWzxTpt4lfRjRMqG6uMBzKXuEcyXhOHoUp9riAm7/aWYGhXZ9cw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "url": "https://eslint.org/donate"
+      }
+    },
     "node_modules/@eslint/object-schema": {
-      "version": "3.0.3",
-      "resolved": "https://registry.npmjs.org/@eslint/object-schema/-/object-schema-3.0.3.tgz",
-      "integrity": "sha512-iM869Pugn9Nsxbh/YHRqYiqd23AmIbxJOcpUMOuWCVNdoQJ5ZtwL6h3t0bcZzJUlC3Dq9jCFCESBZnX0GTv7iQ==",
+      "version": "2.1.7",
+      "resolved": "https://registry.npmjs.org/@eslint/object-schema/-/object-schema-2.1.7.tgz",
+      "integrity": "sha512-VtAOaymWVfZcmZbp6E2mympDIHvyjXs/12LqWYjVw6qjrfF+VK+fyG33kChz3nnK+SU5/NeHOqrTEHS8sXO3OA==",
       "dev": true,
       "license": "Apache-2.0",
       "engines": {
-        "node": "^20.19.0 || ^22.13.0 || >=24"
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       }
     },
     "node_modules/@eslint/plugin-kit": {
-      "version": "0.6.1",
-      "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.6.1.tgz",
-      "integrity": "sha512-iH1B076HoAshH1mLpHMgwdGeTs0CYwL0SPMkGuSebZrwBp16v415e9NZXg2jtrqPVQjf6IANe2Vtlr5KswtcZQ==",
+      "version": "0.4.1",
+      "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.4.1.tgz",
+      "integrity": "sha512-43/qtrDUokr7LJqoF2c3+RInu/t4zfrpYdoSDfYyhg52rwLV6TnOvdG4fXm7IkSB3wErkcmJS9iEhjVtOSEjjA==",
       "dev": true,
       "license": "Apache-2.0",
       "dependencies": {
-        "@eslint/core": "^1.1.1",
+        "@eslint/core": "^0.17.0",
         "levn": "^0.4.1"
       },
       "engines": {
-        "node": "^20.19.0 || ^22.13.0 || >=24"
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       }
     },
     "node_modules/@floating-ui/core": {
@@ -2649,13 +2662,6 @@
         "@types/ms": "*"
       }
     },
-    "node_modules/@types/esrecurse": {
-      "version": "4.3.1",
-      "resolved": "https://registry.npmjs.org/@types/esrecurse/-/esrecurse-4.3.1.tgz",
-      "integrity": "sha512-xJBAbDifo5hpffDBuHl0Y8ywswbiAp/Wi7Y/GtAgSlZyIABppyurxVueOPE8LUQOxdlgi6Zqce7uoEpqNTeiUw==",
-      "dev": true,
-      "license": "MIT"
-    },
     "node_modules/@types/estree": {
       "version": "1.0.8",
       "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
@@ -3859,6 +3865,23 @@
         "url": "https://github.com/sponsors/wooorm"
       }
     },
+    "node_modules/chalk": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
+      "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-styles": "^4.1.0",
+        "supports-color": "^7.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/chalk?sponsor=1"
+      }
+    },
     "node_modules/character-entities": {
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/character-entities/-/character-entities-2.0.2.tgz",
@@ -4461,30 +4484,33 @@
       }
     },
     "node_modules/eslint": {
-      "version": "10.0.3",
-      "resolved": "https://registry.npmjs.org/eslint/-/eslint-10.0.3.tgz",
-      "integrity": "sha512-COV33RzXZkqhG9P2rZCFl9ZmJ7WL+gQSCRzE7RhkbclbQPtLAWReL7ysA0Sh4c8Im2U9ynybdR56PV0XcKvqaQ==",
+      "version": "9.39.4",
+      "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.4.tgz",
+      "integrity": "sha512-XoMjdBOwe/esVgEvLmNsD3IRHkm7fbKIUGvrleloJXUZgDHig2IPWNniv+GwjyJXzuNqVjlr5+4yVUZjycJwfQ==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
         "@eslint-community/eslint-utils": "^4.8.0",
-        "@eslint-community/regexpp": "^4.12.2",
-        "@eslint/config-array": "^0.23.3",
-        "@eslint/config-helpers": "^0.5.2",
-        "@eslint/core": "^1.1.1",
-        "@eslint/plugin-kit": "^0.6.1",
+        "@eslint-community/regexpp": "^4.12.1",
+        "@eslint/config-array": "^0.21.2",
+        "@eslint/config-helpers": "^0.4.2",
+        "@eslint/core": "^0.17.0",
+        "@eslint/eslintrc": "^3.3.5",
+        "@eslint/js": "9.39.4",
+        "@eslint/plugin-kit": "^0.4.1",
         "@humanfs/node": "^0.16.6",
         "@humanwhocodes/module-importer": "^1.0.1",
         "@humanwhocodes/retry": "^0.4.2",
         "@types/estree": "^1.0.6",
         "ajv": "^6.14.0",
+        "chalk": "^4.0.0",
         "cross-spawn": "^7.0.6",
         "debug": "^4.3.2",
         "escape-string-regexp": "^4.0.0",
-        "eslint-scope": "^9.1.2",
-        "eslint-visitor-keys": "^5.0.1",
-        "espree": "^11.1.1",
-        "esquery": "^1.7.0",
+        "eslint-scope": "^8.4.0",
+        "eslint-visitor-keys": "^4.2.1",
+        "espree": "^10.4.0",
+        "esquery": "^1.5.0",
         "esutils": "^2.0.2",
         "fast-deep-equal": "^3.1.3",
         "file-entry-cache": "^8.0.0",
@@ -4494,7 +4520,8 @@
         "imurmurhash": "^0.1.4",
         "is-glob": "^4.0.0",
         "json-stable-stringify-without-jsonify": "^1.0.1",
-        "minimatch": "^10.2.4",
+        "lodash.merge": "^4.6.2",
+        "minimatch": "^3.1.5",
         "natural-compare": "^1.4.0",
         "optionator": "^0.9.3"
       },
@@ -4502,7 +4529,7 @@
         "eslint": "bin/eslint.js"
       },
       "engines": {
-        "node": "^20.19.0 || ^22.13.0 || >=24"
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       },
       "funding": {
         "url": "https://eslint.org/donate"
@@ -4790,19 +4817,17 @@
       }
     },
     "node_modules/eslint-scope": {
-      "version": "9.1.2",
-      "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-9.1.2.tgz",
-      "integrity": "sha512-xS90H51cKw0jltxmvmHy2Iai1LIqrfbw57b79w/J7MfvDfkIkFZ+kj6zC3BjtUwh150HsSSdxXZcsuv72miDFQ==",
+      "version": "8.4.0",
+      "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-8.4.0.tgz",
+      "integrity": "sha512-sNXOfKCn74rt8RICKMvJS7XKV/Xk9kA7DyJr8mJik3S7Cwgy3qlkkmyS2uQB3jiJg6VNdZd/pDBJu0nvG2NlTg==",
       "dev": true,
       "license": "BSD-2-Clause",
       "dependencies": {
-        "@types/esrecurse": "^4.3.1",
-        "@types/estree": "^1.0.8",
         "esrecurse": "^4.3.0",
         "estraverse": "^5.2.0"
       },
       "engines": {
-        "node": "^20.19.0 || ^22.13.0 || >=24"
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       },
       "funding": {
         "url": "https://opencollective.com/eslint"
@@ -4822,31 +4847,13 @@
       }
     },
     "node_modules/eslint/node_modules/eslint-visitor-keys": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-5.0.1.tgz",
-      "integrity": "sha512-tD40eHxA35h0PEIZNeIjkHoDR4YjjJp34biM0mDvplBe//mB+IHCqHDGV7pxF+7MklTvighcCPPZC7ynWyjdTA==",
+      "version": "4.2.1",
+      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.1.tgz",
+      "integrity": "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ==",
       "dev": true,
       "license": "Apache-2.0",
       "engines": {
-        "node": "^20.19.0 || ^22.13.0 || >=24"
-      },
-      "funding": {
-        "url": "https://opencollective.com/eslint"
-      }
-    },
-    "node_modules/eslint/node_modules/espree": {
-      "version": "11.2.0",
-      "resolved": "https://registry.npmjs.org/espree/-/espree-11.2.0.tgz",
-      "integrity": "sha512-7p3DrVEIopW1B1avAGLuCSh1jubc01H2JHc8B4qqGblmg5gI9yumBgACjWo4JlIc04ufug4xJ3SQI8HkS/Rgzw==",
-      "dev": true,
-      "license": "BSD-2-Clause",
-      "dependencies": {
-        "acorn": "^8.16.0",
-        "acorn-jsx": "^5.3.2",
-        "eslint-visitor-keys": "^5.0.1"
-      },
-      "engines": {
-        "node": "^20.19.0 || ^22.13.0 || >=24"
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       },
       "funding": {
         "url": "https://opencollective.com/eslint"
@@ -5447,6 +5454,16 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/has-flag": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
+      "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/has-property-descriptors": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
@@ -6619,6 +6636,13 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/lodash.merge": {
+      "version": "4.6.2",
+      "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz",
+      "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/long": {
       "version": "5.3.2",
       "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
@@ -9344,6 +9368,19 @@
         }
       }
     },
+    "node_modules/supports-color": {
+      "version": "7.2.0",
+      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
+      "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "has-flag": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/supports-preserve-symlinks-flag": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz",
diff --git a/package.json b/package.json
index 02f8584..b9e55ed 100644
--- a/package.json
+++ b/package.json
@@ -9,8 +9,12 @@
     "build:openhebrewbible": "ts-node --project tsconfig.scripts.json scripts/parse-openhebrewbible.ts",
     "build:translations": "ts-node --project tsconfig.scripts.json scripts/build-translations.ts",
     "build:opengnt": "ts-node --project tsconfig.scripts.json scripts/build-opengnt.ts",
+    "benchmark:sample": "ts-node --project tsconfig.scripts.json tests/benchmark/run-benchmarks.ts --mode sample",
+    "benchmark:live": "ts-node --project tsconfig.scripts.json tests/benchmark/run-benchmarks.ts --mode live",
+    "benchmark:regression": "ts-node --project tsconfig.scripts.json tests/benchmark/check-regressions.ts project-docs/benchmark/latest-report.json",
+    "benchmark:flags": "ts-node --project tsconfig.scripts.json tests/benchmark/check-feature-flags.ts",
     "start": "next start",
-    "lint": "next lint"
+    "lint": "eslint --no-warn-ignored app components lib tests scripts *.ts *.mjs"
   },
   "dependencies": {
     "@ai-sdk/groq": "^3.0.29",
@@ -46,7 +50,7 @@
     "@types/react": "^19.2.14",
     "@types/react-dom": "^19.2.3",
     "dotenv": "^17.3.1",
-    "eslint": "^10.0.3",
+    "eslint": "^9.39.4",
     "eslint-config-next": "^16.1.6",
     "postcss": "^8.5.8",
     "tailwindcss": "^4.2.1",
diff --git a/tsconfig.json b/tsconfig.json
index e7ff3a2..c3bf2f7 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -31,9 +31,7 @@
   "include": [
     "next-env.d.ts",
     "**/*.ts",
-    "**/*.tsx",
-    ".next/types/**/*.ts",
-    ".next/dev/types/**/*.ts"
+    "**/*.tsx"
   ],
   "exclude": [
     "node_modules"

From fccc7784f4959da3315b48df7bd8ecb3258ba1bb Mon Sep 17 00:00:00 2001
From: voidcommit-afk <strucker08@gmail.com>
Date: Sat, 14 Mar 2026 20:08:56 +0530
Subject: [PATCH 4/4] fix(security): address CodeQL fetch and citation parsing
 alerts

---
 app/api/chat/route.ts | 182 +++++++++++++++++++++++++++++++++++++-----
 lib/bible-fetch.ts    |  71 +++++++++++++---
 lib/retrieval.ts      |   6 +-
 3 files changed, 227 insertions(+), 32 deletions(-)

diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts
index 0bb40d9..a96843d 100644
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -111,10 +111,6 @@ function createLatencyMetrics(): LatencyMetrics {
   return { ...EMPTY_LATENCY_METRICS };
 }
 
-function addLatencyMetric(metrics: LatencyMetrics, metric: LatencyMetricName, durationMs: number): void {
-  metrics[metric] = roundLatencyMs(metrics[metric] + durationMs);
-}
-
 function setLatencyMetric(metrics: LatencyMetrics, metric: LatencyMetricName, durationMs: number): void {
   metrics[metric] = roundLatencyMs(durationMs);
 }
@@ -309,10 +305,159 @@ function buildPrompt(
 }
 
 function normalizeCitationToken(citation: string): string {
-  return citation
-    .trim()
-    .replace(/[()[\],.;:!?]+$/g, '')
-    .replace(/\s+/g, ' ');
+  const trimmed = citation.trim();
+  let end = trimmed.length;
+
+  while (end > 0) {
+    const char = trimmed[end - 1];
+    if (!'()[],.;:!?'.includes(char)) {
+      break;
+    }
+    end -= 1;
+  }
+
+  return collapseCitationWhitespace(trimmed.slice(0, end));
+}
+
+function collapseCitationWhitespace(value: string): string {
+  let result = '';
+  let previousWasWhitespace = false;
+
+  for (const char of value) {
+    const isWhitespace =
+      char === ' ' ||
+      char === '\n' ||
+      char === '\r' ||
+      char === '\t' ||
+      char === '\f' ||
+      char === '\v';
+
+    if (isWhitespace) {
+      if (!previousWasWhitespace && result.length > 0) {
+        result += ' ';
+      }
+      previousWasWhitespace = true;
+      continue;
+    }
+
+    result += char;
+    previousWasWhitespace = false;
+  }
+
+  return result.trim();
+}
+
+function removeAllOccurrences(value: string, target: string): string {
+  if (!target) return value;
+
+  let result = value;
+  let index = result.indexOf(target);
+  while (index !== -1) {
+    result = `${result.slice(0, index)}${result.slice(index + target.length)}`;
+    index = result.indexOf(target);
+  }
+  return result;
+}
+
+function stripBracketedCitationSegments(content: string, citation: string, opening: string, closing: string): string {
+  if (!citation) return content;
+
+  let result = content;
+  let searchStart = 0;
+
+  while (searchStart < result.length) {
+    const citationIndex = result.indexOf(citation, searchStart);
+    if (citationIndex === -1) {
+      break;
+    }
+
+    const openingIndex = result.lastIndexOf(opening, citationIndex);
+    const closingIndex = result.indexOf(closing, citationIndex + citation.length);
+    if (openingIndex !== -1 && closingIndex !== -1) {
+      const segment = result.slice(openingIndex + 1, closingIndex);
+      if (segment.includes(citation)) {
+        result = `${result.slice(0, openingIndex)}${result.slice(closingIndex + 1)}`;
+        searchStart = openingIndex;
+        continue;
+      }
+    }
+
+    searchStart = citationIndex + citation.length;
+  }
+
+  return result;
+}
+
+function stripEmptyCitationDelimiters(content: string): string {
+  let result = content;
+  let changed = true;
+
+  while (changed) {
+    changed = false;
+
+    for (const pair of ['()', '[]']) {
+      const next = removeAllOccurrences(result, pair);
+      if (next !== result) {
+        result = next;
+        changed = true;
+      }
+    }
+  }
+
+  return result;
+}
+
+function collapseRepeatedSpacesPerLine(value: string): string {
+  let result = '';
+  let previousWasSpace = false;
+
+  for (const char of value) {
+    if (char === ' ' || char === '\t') {
+      if (!previousWasSpace) {
+        result += ' ';
+      }
+      previousWasSpace = true;
+      continue;
+    }
+
+    result += char;
+    previousWasSpace = false;
+  }
+
+  return result;
+}
+
+function collapseBlankLines(value: string): string {
+  let result = '';
+  let consecutiveNewlines = 0;
+
+  for (const char of value) {
+    if (char === '\n') {
+      consecutiveNewlines += 1;
+      if (consecutiveNewlines <= 2) {
+        result += char;
+      }
+      continue;
+    }
+
+    consecutiveNewlines = 0;
+    result += char;
+  }
+
+  return result;
+}
+
+function removeSpaceBeforeCitationPunctuation(value: string): string {
+  let result = '';
+
+  for (const char of value) {
+    if (',.;:!?'.includes(char) && result.endsWith(' ')) {
+      result = result.slice(0, -1);
+    }
+    result += char;
+  }
+
+  return result;
 }
 
 function buildCitationWhitelistSet(verses: VerseContext[]): Set<string> {
@@ -360,18 +505,15 @@ function scrubInvalidCitations(content: string, verses: VerseContext[]): string
 
   let sanitized = content;
   for (const citation of invalidCitations) {
-    const escaped = citation.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
-    sanitized = sanitized
-      .replace(new RegExp(`\\((?:[^()]*?)${escaped}(?:[^()]*?)\\)`, 'g'), '')
-      .replace(new RegExp(`\\[(?:[^\\]]*?)${escaped}(?:[^\\]]*?)\\]`, 'g'), '')
-      .replace(new RegExp(`\\b${escaped}\\b`, 'g'), '');
-  }
-
-  sanitized = sanitized
-    .replace(/[ \t]{2,}/g, ' ')
-    .replace(/\n{3,}/g, '\n\n')
-    .replace(/\s+([,.;:!?])/g, '$1')
-    .trim();
+    sanitized = stripBracketedCitationSegments(sanitized, citation, '(', ')');
+    sanitized = stripBracketedCitationSegments(sanitized, citation, '[', ']');
+    sanitized = removeAllOccurrences(sanitized, citation);
+  }
+
+  sanitized = stripEmptyCitationDelimiters(sanitized);
+  sanitized = collapseRepeatedSpacesPerLine(sanitized);
+  sanitized = collapseBlankLines(sanitized);
+  sanitized = removeSpaceBeforeCitationPunctuation(sanitized).trim();
 
   console.info(JSON.stringify({
     event: 'citation_whitelist_enforced',
diff --git a/lib/bible-fetch.ts b/lib/bible-fetch.ts
index 13cdc83..8ef6e70 100644
--- a/lib/bible-fetch.ts
+++ b/lib/bible-fetch.ts
@@ -12,9 +12,14 @@ const EXTERNAL_VERSE_FETCH_TIMEOUT_MS = 1500;
 const EXTERNAL_VERSE_FETCH_TOTAL_BUDGET_MS = 2000;
 const EXTERNAL_VERSE_FETCH_MAX_RETRIES = 1;
 const EXTERNAL_VERSE_FETCH_BACKOFF_MS = 150;
+const EXTERNAL_FETCH_SOURCES = {
+  helloao: 'https://bible.helloao.org',
+  bibleApi: 'https://bible-api.com',
+  bolls: 'https://bolls.life',
+} as const;
 
 type ExternalFetchBudgetOptions = {
-  label: string;
+  source: keyof typeof EXTERNAL_FETCH_SOURCES;
   timeoutMs?: number;
   totalBudgetMs?: number;
   maxRetries?: number;
@@ -29,11 +34,44 @@ function sleep(ms: number): Promise<void> {
   return new Promise((resolve) => setTimeout(resolve, ms));
 }
 
+function buildExternalUrl(
+  source: keyof typeof EXTERNAL_FETCH_SOURCES,
+  pathname: string,
+  query?: Record<string, string>
+): URL {
+  const url = new URL(EXTERNAL_FETCH_SOURCES[source]);
+  url.pathname = pathname.startsWith('/') ? pathname : `/${pathname}`;
+
+  if (query) {
+    for (const [key, value] of Object.entries(query)) {
+      url.searchParams.set(key, value);
+    }
+  }
+
+  return url;
+}
+
+function isAllowedExternalUrl(url: URL, source: keyof typeof EXTERNAL_FETCH_SOURCES): boolean {
+  return url.protocol === 'https:' && url.origin === EXTERNAL_FETCH_SOURCES[source];
+}
+
+function logExternalFetchWarning(payload: Record<string, unknown>): void {
+  console.warn(JSON.stringify({ event: 'external_fetch_warning', ...payload }));
+}
+
 export async function fetchExternalWithTimeoutBudget(
-  url: string,
+  url: URL,
   init: RequestInit = {},
   options: ExternalFetchBudgetOptions
 ): Promise<Response | null> {
+  if (!isAllowedExternalUrl(url, options.source)) {
+    logExternalFetchWarning({
+      source: options.source,
+      reason: 'blocked_disallowed_url',
+    });
+    return null;
+  }
+
   const timeoutMs = options.timeoutMs ?? EXTERNAL_VERSE_FETCH_TIMEOUT_MS;
   const totalBudgetMs = options.totalBudgetMs ?? EXTERNAL_VERSE_FETCH_TOTAL_BUDGET_MS;
   const maxRetries = options.maxRetries ?? EXTERNAL_VERSE_FETCH_MAX_RETRIES;
@@ -85,14 +123,22 @@ export async function fetchExternalWithTimeoutBudget(
 
   const elapsedMs = Date.now() - startedAt;
   if (lastError) {
-    console.warn(`[external-fetch] ${options.label} failed after ${elapsedMs}ms; continuing without external result.`, lastError);
+    logExternalFetchWarning({
+      source: options.source,
+      reason: 'request_failed',
+      elapsed_ms: elapsedMs,
+      error_name: lastError instanceof Error ? lastError.name : 'unknown',
+    });
     return null;
   }
 
   if (lastResponse && !lastResponse.ok && isRetryableStatus(lastResponse.status)) {
-    console.warn(
-      `[external-fetch] ${options.label} exhausted retry budget with status ${lastResponse.status} after ${elapsedMs}ms; continuing without external result.`
-    );
+    logExternalFetchWarning({
+      source: options.source,
+      reason: 'retry_budget_exhausted',
+      elapsed_ms: elapsedMs,
+      status: lastResponse.status,
+    });
   }
 
   return lastResponse;
@@ -118,10 +164,13 @@ export async function fetchVerseHelloAO(
 ): Promise<string | null> {
   try {
     const res = await fetchExternalWithTimeoutBudget(
-      `https://bible.helloao.org/api/${translation}/${book}/${chapter}.json`,
+      buildExternalUrl(
+        'helloao',
+        `/api/${encodeURIComponent(translation)}/${encodeURIComponent(book)}/${encodeURIComponent(`${chapter}.json`)}`
+      ),
       {},
       {
-        label: `helloao:${translation}:${book}:${chapter}`
+        source: 'helloao'
       }
     );
     if (!res?.ok) return null;
@@ -151,10 +200,12 @@ export async function fetchVerseFallback(reference: string, translation: string
   try {
     // bible-api.com expects 'john 3:16'
     const res = await fetchExternalWithTimeoutBudget(
-      `https://bible-api.com/${encodeURIComponent(reference)}?translation=${translation.toLowerCase()}`,
+      buildExternalUrl('bibleApi', `/${encodeURIComponent(reference)}`, {
+        translation: translation.toLowerCase(),
+      }),
       {},
       {
-        label: `bible-api:${translation}:${reference}`
+        source: 'bibleApi'
       }
     );
     if (!res?.ok) return null;
diff --git a/lib/retrieval.ts b/lib/retrieval.ts
index 61b2aa1..90828a2 100644
--- a/lib/retrieval.ts
+++ b/lib/retrieval.ts
@@ -2491,11 +2491,13 @@ async function enrichOriginalLanguages(verses: VerseContext[]): Promise<VerseCon
         const [chapter, vNumStr] = cv.split(':');
         
         const bollsRef = bkbToBollsPath(book, parseInt(chapter, 10));
+        const bollsUrl = new URL('https://bolls.life');
+        bollsUrl.pathname = `/get-chapter/${encodeURIComponent(trans)}/${bollsRef}/`;
         const res = await fetchExternalWithTimeoutBudget(
-          `https://bolls.life/get-chapter/${trans}/${bollsRef}/`,
+          bollsUrl,
           {},
           {
-            label: `bolls-tagged-chapter:${trans}:${bollsRef}`
+            source: 'bolls'
           }
         );