From 48d3d3778ec651620b02fe77491ce6160205cb5b Mon Sep 17 00:00:00 2001
From: dadukhankevin <danieljlosey@gmail.com>
Date: Wed, 11 Mar 2026 23:54:46 -0500
Subject: [PATCH 1/5] Surface raw HTML content through translation pair
 pipeline for allowHtmlPredictions

The allowHtmlPredictions toggle was incomplete: HTML was stripped at SQLite
index time but the raw content (already stored in s_raw_content/t_raw_content
columns) was never surfaced to the prompt builder. Now rawContent flows through
MinimalCellResult so buildFewShotExamplesText can use HTML-preserving examples
when the toggle is on.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../contextAware/contentIndexes/indexes/search.ts         | 8 ++++----
 .../contextAware/contentIndexes/indexes/sqliteIndex.ts    | 8 ++++++++
 src/providers/translationSuggestions/shared.ts            | 4 ++--
 types/index.d.ts                                          | 1 +
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/activationHelpers/contextAware/contentIndexes/indexes/search.ts b/src/activationHelpers/contextAware/contentIndexes/indexes/search.ts
index ffdf88433..4b76d8792 100644
--- a/src/activationHelpers/contextAware/contentIndexes/indexes/search.ts
+++ b/src/activationHelpers/contextAware/contentIndexes/indexes/search.ts
@@ -265,8 +265,8 @@ export async function getTranslationPairsFromSourceCellQuery(
                 debug(`[getTranslationPairsFromSourceCellQuery] ✅ Adding direct result for ${cellId}`);
                 translationPairs.push({
                     cellId,
-                    sourceCell: { cellId, content: searchResult.sourceContent, uri: searchResult.uri || "", line: searchResult.line || 0 },
-                    targetCell: { cellId, content: searchResult.targetContent, uri: searchResult.uri || "", line: searchResult.line || 0 },
+                    sourceCell: { cellId, content: searchResult.sourceContent, rawContent: searchResult.rawSourceContent, uri: searchResult.uri || "", line: searchResult.line || 0 },
+                    targetCell: { cellId, content: searchResult.targetContent, rawContent: searchResult.rawTargetContent, uri: searchResult.uri || "", line: searchResult.line || 0 },
                 });
             } else {
                 debug(`[getTranslationPairsFromSourceCellQuery] ❌ Skipping ${cellId} - empty content after trim`);
@@ -278,8 +278,8 @@ export async function getTranslationPairsFromSourceCellQuery(
                 debug(`[getTranslationPairsFromSourceCellQuery] ✅ Adding fetched result for ${cellId}`);
                 translationPairs.push({
                     cellId,
-                    sourceCell: { cellId, content: translationPair.sourceContent, uri: translationPair.uri || "", line: translationPair.line || 0 },
-                    targetCell: { cellId, content: translationPair.targetContent, uri: translationPair.uri || "", line: translationPair.line || 0 },
+                    sourceCell: { cellId, content: translationPair.sourceContent, rawContent: translationPair.rawSourceContent, uri: translationPair.uri || "", line: translationPair.line || 0 },
+                    targetCell: { cellId, content: translationPair.targetContent, rawContent: translationPair.rawTargetContent, uri: translationPair.uri || "", line: translationPair.line || 0 },
                 });
             } else {
                 debug(`[getTranslationPairsFromSourceCellQuery] ❌ Skipping ${cellId} - no valid translation pair found`);
diff --git a/src/activationHelpers/contextAware/contentIndexes/indexes/sqliteIndex.ts b/src/activationHelpers/contextAware/contentIndexes/indexes/sqliteIndex.ts
index 400b7bee5..aed97c8f2 100644
--- a/src/activationHelpers/contextAware/contentIndexes/indexes/sqliteIndex.ts
+++ b/src/activationHelpers/contextAware/contentIndexes/indexes/sqliteIndex.ts
@@ -3038,6 +3038,8 @@ export class SQLiteIndexManager {
                         cellLabel: row.cell_label, // NO FALLBACK
                         sourceContent: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
                         targetContent: returnRawContent && row.raw_target_content ? row.raw_target_content : row.target_content,
+                        rawSourceContent: row.raw_source_content || row.source_content,
+                        rawTargetContent: row.raw_target_content || row.target_content,
                         content: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
                         uri: row.uri,
                         line: row.line,
@@ -3186,6 +3188,8 @@ export class SQLiteIndexManager {
                     cellLabel: row.cell_label, // NO FALLBACK
                     sourceContent: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
                     targetContent: returnRawContent && rawTargetContent ? rawTargetContent : targetContent,
+                    rawSourceContent: row.raw_source_content || row.source_content,
+                    rawTargetContent: rawTargetContent || targetContent,
                     content: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
                     uri: row.uri,
                     line: row.line,
@@ -3272,6 +3276,8 @@ export class SQLiteIndexManager {
                             cellLabel: row.cell_label || null,
                             sourceContent: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
                             targetContent: returnRawContent && row.raw_target_content ? row.raw_target_content : row.target_content,
+                            rawSourceContent: row.raw_source_content || row.source_content,
+                            rawTargetContent: row.raw_target_content || row.target_content,
                             content: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
                             uri: row.uri,
                             line: row.line,
@@ -3442,6 +3448,8 @@ export class SQLiteIndexManager {
                             cellLabel: row.cell_label, // NO FALLBACK - show raw value
                             sourceContent: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
                             targetContent: returnRawContent && rawTargetContent ? rawTargetContent : targetContent,
+                            rawSourceContent: row.raw_source_content || row.source_content,
+                            rawTargetContent: rawTargetContent || targetContent,
                             content: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
                             uri: row.uri,
                             line: row.line,
diff --git a/src/providers/translationSuggestions/shared.ts b/src/providers/translationSuggestions/shared.ts
index f43276e80..c2a121772 100644
--- a/src/providers/translationSuggestions/shared.ts
+++ b/src/providers/translationSuggestions/shared.ts
@@ -184,8 +184,8 @@ export function buildFewShotExamplesText(
   
   const examplesInner = pairs
     .map((pair) => {
-      const sourceRaw = pair.sourceCell?.content ?? "";
-      const targetRaw = pair.targetCell?.content ?? "";
+      const sourceRaw = allowHtml ? (pair.sourceCell?.rawContent || pair.sourceCell?.content || "") : (pair.sourceCell?.content ?? "");
+      const targetRaw = allowHtml ? (pair.targetCell?.rawContent || pair.targetCell?.content || "") : (pair.targetCell?.content ?? "");
       const target = allowHtml ? targetRaw.trim() : stripHtmlTags(targetRaw).trim();
       const source = allowHtml ? sourceRaw.trim() : stripHtmlTags(sourceRaw).trim();
       const targetInner = allowHtml ? wrapCdata(target) : xmlEscape(target);
diff --git a/types/index.d.ts b/types/index.d.ts
index 3b39faefc..d024c001f 100644
--- a/types/index.d.ts
+++ b/types/index.d.ts
@@ -517,6 +517,7 @@ type MiniSearchVerseResult = {
 type MinimalCellResult = {
     cellId?: string;
     content?: string;
+    rawContent?: string;
     uri?: string;
     line?: number;
     notebookId?: string;

From e7f8e5ff6efd8ea7d54fd0dd4f8c556cde943614 Mon Sep 17 00:00:00 2001
From: dadukhankevin <danieljlosey@gmail.com>
Date: Thu, 12 Mar 2026 17:38:13 -0500
Subject: [PATCH 2/5] Improve LLM instruction following and simplify system
 prompt

- Restructure system message: consolidate 12 appended instructions into
  focused paragraphs, reduce noise and redundancy
- Simplify HTML preservation: always instruct model to preserve HTML from
  source, toggle only controls whether HTML is present in examples/context
- Fix temperature passthrough: always send configured temperature instead
  of silently dropping it for the default model
- Remove redundant "Instructions" header from user message
- Add diagnostic logging for system message and few-shot example HTML

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../translationSuggestions/llmCompletion.ts   |  1 +
 .../translationSuggestions/shared.ts          | 63 ++++++++++---------
 src/utils/llmUtils.ts                         |  3 +-
 src/utils/metadataManager.ts                  |  2 +
 4 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/src/providers/translationSuggestions/llmCompletion.ts b/src/providers/translationSuggestions/llmCompletion.ts
index 784dd9c67..4423925ac 100644
--- a/src/providers/translationSuggestions/llmCompletion.ts
+++ b/src/providers/translationSuggestions/llmCompletion.ts
@@ -216,6 +216,7 @@ export async function llmCompletion(
             // Build messages — buildMessages is the single source of truth for
             // system message construction. Pass the raw chatSystemMessage and let
             // buildMessages append instructions exactly once.
+            console.log(`[llmCompletion] System message from config (first 200 chars): "${chatSystemMessage?.substring(0, 200)}..."`);
             const messages = buildMessages(
                 targetLanguage,
                 chatSystemMessage,
diff --git a/src/providers/translationSuggestions/shared.ts b/src/providers/translationSuggestions/shared.ts
index c2a121772..c4d7c3533 100644
--- a/src/providers/translationSuggestions/shared.ts
+++ b/src/providers/translationSuggestions/shared.ts
@@ -180,14 +180,19 @@ export function buildFewShotExamplesText(
   allowHtml: boolean = false, 
   exampleFormat: string = "source-and-target"
 ): string {
-  console.debug(`[buildFewShotExamplesText] Building ${pairs.length} examples in '${exampleFormat}' format`);
-  
+  console.debug(`[buildFewShotExamplesText] Building ${pairs.length} examples in '${exampleFormat}' format, allowHtml=${allowHtml}`);
+
   const examplesInner = pairs
-    .map((pair) => {
+    .map((pair, idx) => {
       const sourceRaw = allowHtml ? (pair.sourceCell?.rawContent || pair.sourceCell?.content || "") : (pair.sourceCell?.content ?? "");
       const targetRaw = allowHtml ? (pair.targetCell?.rawContent || pair.targetCell?.content || "") : (pair.targetCell?.content ?? "");
       const target = allowHtml ? targetRaw.trim() : stripHtmlTags(targetRaw).trim();
       const source = allowHtml ? sourceRaw.trim() : stripHtmlTags(sourceRaw).trim();
+      if (allowHtml && idx < 3) {
+        const hasHtmlInTarget = /<[a-z][^>]*>/i.test(target);
+        const hasHtmlInSource = /<[a-z][^>]*>/i.test(source);
+        console.log(`[buildFewShotExamplesText] Example ${idx}: hasHtmlInSource=${hasHtmlInSource}, hasHtmlInTarget=${hasHtmlInTarget}, targetRawContent=${pair.targetCell?.rawContent ? 'present' : 'MISSING'}, target preview="${target.substring(0, 100)}"`);
+      }
       const targetInner = allowHtml ? wrapCdata(target) : xmlEscape(target);
       const sourceInner = allowHtml ? wrapCdata(source) : xmlEscape(source);
       
@@ -218,35 +223,37 @@ export function buildMessages(
   exampleFormat: string = "source-and-target",
   sourceLanguage: string | null = null
 ): ChatMessage[] {
-  let systemMessage = chatSystemMessage || `You are a helpful assistant`;
+  const sourceLangText = sourceLanguage ? `${sourceLanguage}` : "the source language";
+  const targetLangText = targetLanguage || "the target language";
 
-  if (exampleFormat === "target-only") {
-    systemMessage += `\n\nReference translations are provided in XML <target> tags. Use these as examples of the translation style and patterns you should follow.`;
-  } else {
-    systemMessage += `\n\nInput sections for examples and context are provided in XML. Only use values within <source> and <target> tags.`;
+  // Build a focused system message: critical output format first, then translation guidance
+  const parts: string[] = [];
+
+  // User's custom instructions (from metadata.json) come first
+  if (chatSystemMessage) {
+    parts.push(chatSystemMessage);
   }
-  // Preserve line breaks and specify output format
-  if (allowHtml) {
-    systemMessage += `\n\nYou may include inline HTML tags when appropriate (e.g., <span>, <i>, <b>) consistent with examples. Preserve original line breaks from <currentTask><source> by returning text with the same number of lines separated by newline characters.`;
+
+  // Translation direction and approach
+  parts.push(`Translate from ${sourceLangText} to ${targetLangText}. This may be an ultra-low resource language — follow the patterns, style, and vocabulary of the provided reference data closely. When in doubt, err on the side of literalness.`);
+
+  // HTML preservation — always instruct to preserve HTML based on source
+  parts.push(`If the source text contains HTML formatting (e.g., <span>, <i>, <b> tags), preserve that HTML structure in your translation. Match the formatting of the source.`);
+
+  // Line preservation
+  parts.push(`Preserve original line breaks from <currentTask><source> by returning text with the same number of lines.`);
+
+  // Output format
+  parts.push(`Wrap your final translation in <final_answer>...</final_answer> tags. Provide only the translation — no commentary, explanations, or metadata.`);
+
+  // Data format hint
+  if (exampleFormat === "target-only") {
+    parts.push(`Reference translations are provided in XML <target> tags. Use these as examples of the translation style and patterns to follow.`);
   } else {
-    systemMessage += `\n\nReturn plain text only (no XML/HTML). Preserve original line breaks from <currentTask><source> by returning text with the same number of lines separated by newline characters.`;
-  }
-  const sourceLangText = sourceLanguage ? `from ${sourceLanguage} ` : "from the source language ";
-  systemMessage += `\n\nAlways translate ${sourceLangText}to the target language ${targetLanguage || ""
-    }, relying strictly on reference data and context provided by the user. The language may be an ultra-low resource language, so it is critical to follow the patterns and style of the provided reference data closely.`;
-
-  systemMessage += `\n\n1. Analyze the provided reference data to understand the translation patterns and style.`;
-  systemMessage += `\n2. Complete the partial or complete translation of the line.`;
-  systemMessage += `\n3. Ensure your translation fits seamlessly with the existing partial translation.`;
-  systemMessage += `\n4. Provide only the completed translation without any additional commentary or metadata.`;
-  systemMessage += `\n5. Translate only into the target language ${targetLanguage || ""}.`;
-  systemMessage += `\n6. Pay careful attention to the provided reference data.`;
-  systemMessage += `\n7. If in doubt, err on the side of literalness.`;
-  if (allowHtml) {
-    systemMessage += `\n8. If the project has any styles, return HTML with the appropriate tags or classes as per the examples in the translation memory.`;
+    parts.push(`Examples and context are provided in XML with <source> and <target> tags.`);
   }
 
-  systemMessage += `\n\nWrap your final translation in <final_answer>...</final_answer> XML tags. Do not include any other XML tags in your response outside of these tags.`;
+  const systemMessage = parts.join("\n\n");
 
   const contextXml = `<context>\n${precedingContextPairs.filter(Boolean).join("\n")}\n</context>`;
   const currentTaskXml = allowHtml
@@ -254,8 +261,6 @@ export function buildMessages(
     : `<currentTask><source>${xmlEscape(currentCellSourceContent)}</source></currentTask>`;
 
   const userMessage = [
-    "## Instructions",
-    "Follow the translation patterns and style as shown.",
     "## Translation Memory (XML)",
     fewShotExamples,
     "## Current Context (XML)",
diff --git a/src/utils/llmUtils.ts b/src/utils/llmUtils.ts
index 85fddca98..b0553a15f 100644
--- a/src/utils/llmUtils.ts
+++ b/src/utils/llmUtils.ts
@@ -95,8 +95,7 @@ export async function callLLM(
                     const completion = await openai.chat.completions.create({
                         model,
                         messages: messages as ChatCompletionMessageParam[],
-                        // Let the server decide temperature for the default model.
-                        ...(model.toLowerCase() === "default" ? {} : (model.toLowerCase() === "gpt-5" ? { temperature: 1 } : { temperature: config.temperature })),
+                            temperature: config.temperature,
                     }, {
                         signal: abortController.signal
                     });
diff --git a/src/utils/metadataManager.ts b/src/utils/metadataManager.ts
index bdcc0930d..219d6fff6 100644
--- a/src/utils/metadataManager.ts
+++ b/src/utils/metadataManager.ts
@@ -402,8 +402,10 @@ export class MetadataManager {
         if (result.success && result.metadata) {
             const chatSystemMessage = (result.metadata as any).chatSystemMessage as string | undefined;
             if (chatSystemMessage) {
+                console.log(`[MetadataManager.getChatSystemMessage] Returning stored message (first 100 chars): "${chatSystemMessage.substring(0, 100)}..."`);
                 return chatSystemMessage;
             }
+            console.log(`[MetadataManager.getChatSystemMessage] No chatSystemMessage found in metadata.json, will try to generate`);
         }
 
         // Try to generate chatSystemMessage if it doesn't exist

From f287432ae284d880f215619a3c3a1bc70fb4abf2 Mon Sep 17 00:00:00 2001
From: dadukhankevin <danieljlosey@gmail.com>
Date: Fri, 13 Mar 2026 11:58:37 -0500
Subject: [PATCH 3/5] Preserve source HTML in LLM prompts

Keep raw source HTML in the current-task prompt when HTML predictions are enabled while still using sanitized text for example search. Add regression coverage to ensure the prompt preserves source markup and spacing boundaries.

Made-with: Cursor
---
 .../translationSuggestions/llmCompletion.ts   | 35 ++++++++++++++-----
 .../suite/codexCellEditorProvider.test.ts     | 26 +++++++++++---
 2 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/src/providers/translationSuggestions/llmCompletion.ts b/src/providers/translationSuggestions/llmCompletion.ts
index 4423925ac..2032337ac 100644
--- a/src/providers/translationSuggestions/llmCompletion.ts
+++ b/src/providers/translationSuggestions/llmCompletion.ts
@@ -136,14 +136,18 @@ export async function llmCompletion(
             throw new Error(`No source content found for cell ${currentCellId}. The search index may be incomplete. Try running "Force Complete Rebuild" from the command palette.`);
         }
 
-        // Sanitize HTML content to extract plain text (handles transcription spans, etc.)
+        // Convert source HTML into search-friendly plain text while preserving word
+        // boundaries that would otherwise be lost when tags are stripped.
         const sanitizeHtmlContent = (html: string): string => {
             if (!html) return '';
             return html
                 .replace(/<sup[^>]*class=["']footnote-marker["'][^>]*>[\s\S]*?<\/sup>/gi, '')
                 .replace(/<sup[^>]*data-footnote[^>]*>[\s\S]*?<\/sup>/gi, '')
                 .replace(/<sup[^>]*>[\s\S]*?<\/sup>/gi, '')
+                .replace(/<br\s*\/?>/gi, ' ')
                 .replace(/<\/p>/gi, ' ')
+                .replace(/<\/div>/gi, ' ')
+                .replace(/<\/li>/gi, ' ')
                 .replace(/<[^>]*>/g, '')
                 .replace(/&nbsp;/g, ' ')
                 .replace(/&amp;/g, '&')
@@ -157,16 +161,32 @@ export async function llmCompletion(
                 .trim();
         };
 
-        const sourceContent = validSourceCells
-            .map((cell) => sanitizeHtmlContent(cell!.content || ""))
+        const preserveHtmlInPrompt = Boolean(completionConfig.allowHtmlPredictions);
+        const searchSourceContent = validSourceCells
+            .map((cell) => sanitizeHtmlContent(cell?.rawContent || cell?.content || ""))
             .join(" ");
 
+        const currentCellSourceContent = validSourceCells
+            .map((cell) => {
+                const rawSourceContent = cell?.rawContent || cell?.content || "";
+                if (!preserveHtmlInPrompt) {
+                    return sanitizeHtmlContent(rawSourceContent);
+                }
+
+                return rawSourceContent
+                    .replace(/<sup[^>]*class=["']footnote-marker["'][^>]*>[\s\S]*?<\/sup>/gi, "")
+                    .replace(/<sup[^>]*data-footnote[^>]*>[\s\S]*?<\/sup>/gi, "")
+                    .replace(/<sup[^>]*>[\s\S]*?<\/sup>/gi, "")
+                    .trim();
+            })
+            .join(preserveHtmlInPrompt ? "\n" : " ");
+
         // Get few-shot examples (existing behavior encapsulated)
         if (completionConfig.debugMode) {
-            console.debug(`[llmCompletion] Fetching few-shot examples with query: "${sourceContent}", cellId: ${currentCellId}, count: ${numberOfFewShotExamples}, onlyValidated: ${completionConfig.useOnlyValidatedExamples}`);
+            console.debug(`[llmCompletion] Fetching few-shot examples with query: "${searchSourceContent}", cellId: ${currentCellId}, count: ${numberOfFewShotExamples}, onlyValidated: ${completionConfig.useOnlyValidatedExamples}`);
         }
         const finalExamples = await fetchFewShotExamples(
-            sourceContent,
+            searchSourceContent,
             currentCellId,
             numberOfFewShotExamples,
             completionConfig.useOnlyValidatedExamples
@@ -203,12 +223,11 @@ export async function llmCompletion(
 
         try {
             const currentCellIdString = currentCellIds.join(", ");
-            const currentCellSourceContent = sourceContent;
 
             // Generate few-shot examples
             const fewShotExamples = buildFewShotExamplesText(
                 finalExamples, 
-                Boolean(completionConfig.allowHtmlPredictions), 
+                preserveHtmlInPrompt,
                 fewShotExampleFormat || "source-and-target"
             );
             console.log(`[llmCompletion] Built few-shot examples text (${fewShotExamples.length} chars, format: ${fewShotExampleFormat}):`, fewShotExamples.substring(0, 200) + '...');
@@ -223,7 +242,7 @@ export async function llmCompletion(
                 fewShotExamples,
                 precedingTranslationPairs,
                 currentCellSourceContent,
-                Boolean(completionConfig.allowHtmlPredictions),
+                preserveHtmlInPrompt,
                 fewShotExampleFormat || "source-and-target",
                 sourceLanguage
             );
diff --git a/src/test/suite/codexCellEditorProvider.test.ts b/src/test/suite/codexCellEditorProvider.test.ts
index 29a2a5fcf..7079d0481 100644
--- a/src/test/suite/codexCellEditorProvider.test.ts
+++ b/src/test/suite/codexCellEditorProvider.test.ts
@@ -3865,7 +3865,7 @@ suite("CodexCellEditorProvider Test Suite", () => {
             );
 
             const cellId = codexSubtitleContent.cells[0].metadata.id;
-            const sourceContent = "Test source content";
+            const sourceContent = "<p><span class='source-highlight'>Test source content</span><br /></p>";
 
             // Track onlyValidated parameter
             let capturedOnlyValidated: boolean | null = null;
@@ -3968,7 +3968,7 @@ suite("CodexCellEditorProvider Test Suite", () => {
             );
 
             const cellId = codexSubtitleContent.cells[0].metadata.id;
-            const sourceContent = "Test source content";
+            const sourceContent = "<p><span class='source-highlight'>Test source content</span><br /></p>";
 
             // Track onlyValidated parameter
             let capturedOnlyValidated: boolean | null = null;
@@ -3982,7 +3982,13 @@ suite("CodexCellEditorProvider Test Suite", () => {
                     return [];
                 }
                 if (command === "codex-editor-extension.getSourceCellByCellIdFromAllSourceCells") {
-                    return { cellId: args[0], content: sourceContent, versions: [], notebookId: "nb1" } as MinimalCellResult;
+                    return {
+                        cellId: args[0],
+                        content: sourceContent,
+                        rawContent: sourceContent,
+                        versions: [],
+                        notebookId: "nb1",
+                    } as MinimalCellResult;
                 }
                 return originalExecuteCommand.apply(vscode.commands, [command, ...args]);
             };
@@ -4207,7 +4213,7 @@ suite("CodexCellEditorProvider Test Suite", () => {
             );
 
             const cellId = codexSubtitleContent.cells[0].metadata.id;
-            const sourceContent = "Test source content";
+            const sourceContent = "<p><span class='source-highlight'>Test source content</span><br /></p>";
             const htmlExample = "<span class='highlight'>HTML content</span>";
 
             // Mock translation pairs with HTML content
@@ -4229,7 +4235,13 @@ suite("CodexCellEditorProvider Test Suite", () => {
                     return mockTranslationPairs;
                 }
                 if (command === "codex-editor-extension.getSourceCellByCellIdFromAllSourceCells") {
-                    return { cellId: args[0], content: sourceContent, versions: [], notebookId: "nb1" } as MinimalCellResult;
+                    return {
+                        cellId: args[0],
+                        content: sourceContent,
+                        rawContent: sourceContent,
+                        versions: [],
+                        notebookId: "nb1",
+                    } as MinimalCellResult;
                 }
                 return originalExecuteCommand.apply(vscode.commands, [command, ...args]);
             };
@@ -4303,6 +4315,10 @@ suite("CodexCellEditorProvider Test Suite", () => {
                 assert.ok(userMessage.content.includes("<span"), "User message should contain HTML tags when allowHtmlPredictions is enabled");
                 assert.ok(userMessage.content.includes("class='highlight'"), "User message should preserve HTML attributes");
                 assert.ok(userMessage.content.includes("HTML content"), "User message should contain HTML content");
+                assert.ok(
+                    userMessage.content.includes("<currentTask><source><![CDATA[<p><span class='source-highlight'>Test source content</span><br /></p>]]></source></currentTask>"),
+                    "Current task source should preserve source HTML when allowHtmlPredictions is enabled"
+                );
 
                 // Verify system message mentions HTML
                 const systemMessage = (capturedMessages as any[]).find((m: any) => m.role === "system");

From e6c5932bad457ee5cadb17d34f60846d3c709d8d Mon Sep 17 00:00:00 2001
From: Ben Scholtens <ben.scholtens@me.com>
Date: Tue, 17 Mar 2026 17:30:25 -0400
Subject: [PATCH 4/5] remove logs

---
 .../translationSuggestions/llmCompletion.ts   |  3 +-
 .../translationSuggestions/shared.ts          | 32 +++++++++----------
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/providers/translationSuggestions/llmCompletion.ts b/src/providers/translationSuggestions/llmCompletion.ts
index 2032337ac..047608f0b 100644
--- a/src/providers/translationSuggestions/llmCompletion.ts
+++ b/src/providers/translationSuggestions/llmCompletion.ts
@@ -226,7 +226,7 @@ export async function llmCompletion(
 
             // Generate few-shot examples
             const fewShotExamples = buildFewShotExamplesText(
-                finalExamples, 
+                finalExamples,
                 preserveHtmlInPrompt,
                 fewShotExampleFormat || "source-and-target"
             );
@@ -235,7 +235,6 @@ export async function llmCompletion(
             // Build messages — buildMessages is the single source of truth for
             // system message construction. Pass the raw chatSystemMessage and let
             // buildMessages append instructions exactly once.
-            console.log(`[llmCompletion] System message from config (first 200 chars): "${chatSystemMessage?.substring(0, 200)}..."`);
             const messages = buildMessages(
                 targetLanguage,
                 chatSystemMessage,
diff --git a/src/providers/translationSuggestions/shared.ts b/src/providers/translationSuggestions/shared.ts
index c4d7c3533..0652528f9 100644
--- a/src/providers/translationSuggestions/shared.ts
+++ b/src/providers/translationSuggestions/shared.ts
@@ -14,7 +14,7 @@ export async function fetchFewShotExamples(
   // Use a higher multiplier since many candidates may be incomplete pairs
   const initialCandidateCount = Math.max(numberOfFewShotExamples * 10, 100);
   console.debug(`[fetchFewShotExamples] Starting search with query: "${sourceContent}" (length: ${sourceContent?.length || 0}), requesting ${initialCandidateCount} candidates, validated only: ${useOnlyValidatedExamples}`);
-  
+
   let similarSourceCells: TranslationPair[] = [];
   try {
     similarSourceCells = await vscode.commands.executeCommand(
@@ -52,7 +52,7 @@ export async function fetchFewShotExamples(
 
   // Instead of filtering, rank all valid complete pairs by relevance
   const currentTokens = tokenizeText({ method: "whitespace_and_punctuation", text: sourceContent });
-  
+
   const rankedPairs = (similarSourceCells || [])
     .filter((pair) => {
       // Basic validity filters only
@@ -62,7 +62,7 @@ export async function fetchFewShotExamples(
         }
         return false;
       }
-      
+
       // Must have both source and target content for complete pairs
       const pairSourceContent = pair.sourceCell?.content || "";
       const pairTargetContent = pair.targetCell?.content || "";
@@ -70,7 +70,7 @@ export async function fetchFewShotExamples(
         console.debug(`[fetchFewShotExamples] Filtering out pair ${pair.cellId} - incomplete pair (missing source or target)`);
         return false;
       }
-      
+
       return true;
     })
     .map((pair) => {
@@ -79,13 +79,13 @@ export async function fetchFewShotExamples(
       const pairSourceContentRaw = pair.sourceCell?.content || "";
       const pairSourceContentSanitized = sanitizeHtmlContent(pairSourceContentRaw);
       const pairTokens = tokenizeText({ method: "whitespace_and_punctuation", text: pairSourceContentSanitized });
-      
+
       // Calculate overlap ratio
       const overlapCount = currentTokens.filter(token => pairTokens.includes(token)).length;
       const overlapRatio = currentTokens.length > 0 ? overlapCount / currentTokens.length : 0;
-      
+
       console.debug(`[fetchFewShotExamples] Pair ${pair.cellId} - overlap: ${overlapCount}/${currentTokens.length} = ${(overlapRatio * 100).toFixed(1)}%`);
-      
+
       return {
         pair,
         overlapRatio,
@@ -99,23 +99,23 @@ export async function fetchFewShotExamples(
       }
       return b.overlapCount - a.overlapCount;
     });
-  
+
   console.debug(`[fetchFewShotExamples] Ranked ${rankedPairs.length} complete pairs by relevance`);
-  
+
   // Take the top N most relevant complete pairs
   const filteredSimilarSourceCells = rankedPairs
     .slice(0, numberOfFewShotExamples)
     .map(ranked => ranked.pair);
 
   console.debug(`[fetchFewShotExamples] Returning ${filteredSimilarSourceCells.length} top-ranked examples (requested: ${numberOfFewShotExamples})`);
-  
+
   if (filteredSimilarSourceCells.length === 0) {
     console.debug(`[fetchFewShotExamples] No complete translation pairs found. Source length: ${sourceContent?.length || 0}`);
     console.debug(`[fetchFewShotExamples] Database may contain only incomplete pairs (source-only or target-only).`);
   } else if (filteredSimilarSourceCells.length < numberOfFewShotExamples) {
     console.debug(`[fetchFewShotExamples] Found fewer examples than requested: ${filteredSimilarSourceCells.length}/${numberOfFewShotExamples}`);
   }
-  
+
   return filteredSimilarSourceCells;
 }
 
@@ -176,11 +176,11 @@ export async function getPrecedingTranslationPairs(
 }
 
 export function buildFewShotExamplesText(
-  pairs: TranslationPair[], 
-  allowHtml: boolean = false, 
+  pairs: TranslationPair[],
+  allowHtml: boolean = false,
   exampleFormat: string = "source-and-target"
 ): string {
-  console.debug(`[buildFewShotExamplesText] Building ${pairs.length} examples in '${exampleFormat}' format, allowHtml=${allowHtml}`);
+
 
   const examplesInner = pairs
     .map((pair, idx) => {
@@ -191,11 +191,11 @@ export function buildFewShotExamplesText(
       if (allowHtml && idx < 3) {
         const hasHtmlInTarget = /<[a-z][^>]*>/i.test(target);
         const hasHtmlInSource = /<[a-z][^>]*>/i.test(source);
-        console.log(`[buildFewShotExamplesText] Example ${idx}: hasHtmlInSource=${hasHtmlInSource}, hasHtmlInTarget=${hasHtmlInTarget}, targetRawContent=${pair.targetCell?.rawContent ? 'present' : 'MISSING'}, target preview="${target.substring(0, 100)}"`);
+
       }
       const targetInner = allowHtml ? wrapCdata(target) : xmlEscape(target);
       const sourceInner = allowHtml ? wrapCdata(source) : xmlEscape(source);
-      
+
       // Format examples based on the setting
       if (exampleFormat === "target-only") {
         return `<example><target>${targetInner}</target></example>`;

From e426f0c1a97e70734518f35caf785e5281b24a47 Mon Sep 17 00:00:00 2001
From: Ben Scholtens <ben.scholtens@me.com>
Date: Tue, 17 Mar 2026 23:07:38 -0400
Subject: [PATCH 5/5] Update CodexCellEditorProvider tests to enhance system
 message assertions

- Adjusted assertions to verify that the system message includes the target language "fr" or "French".
- Updated format instructions check to ensure it mentions HTML/formatting handling instead of just plain text when HTML is disabled.
---
 src/test/suite/codexCellEditorProvider.test.ts | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/test/suite/codexCellEditorProvider.test.ts b/src/test/suite/codexCellEditorProvider.test.ts
index 7079d0481..ad83f1831 100644
--- a/src/test/suite/codexCellEditorProvider.test.ts
+++ b/src/test/suite/codexCellEditorProvider.test.ts
@@ -3826,7 +3826,7 @@ suite("CodexCellEditorProvider Test Suite", () => {
                 assert.ok(userMessage, "Should have a user message");
 
                 // Verify system message contains expected content
-                assert.ok(systemMessage.content.includes("target language"), "System message should mention target language");
+                assert.ok(systemMessage.content.includes("fr") || systemMessage.content.includes("target language"), "System message should mention target language");
                 assert.ok(systemMessage.content.includes("fr") || systemMessage.content.includes("French"), "System message should include target language");
 
                 // Verify user message contains examples
@@ -4180,10 +4180,10 @@ suite("CodexCellEditorProvider Test Suite", () => {
                     "System message should contain translation instructions"
                 );
 
-                // Verify format instructions (plain text since allowHtmlPredictions is false)
+                // Verify format instructions (HTML preservation guidance is always included)
                 assert.ok(
-                    systemContent.includes("plain text") || systemContent.includes("no XML/HTML"),
-                    "System message should mention plain text format when HTML is disabled"
+                    systemContent.includes("HTML") || systemContent.includes("formatting"),
+                    "System message should mention HTML/formatting handling"
                 );
 
                 // Verify reference to examples/patterns