From 48d3d3778ec651620b02fe77491ce6160205cb5b Mon Sep 17 00:00:00 2001
From: dadukhankevin
Date: Wed, 11 Mar 2026 23:54:46 -0500
Subject: [PATCH 1/5] Surface raw HTML content through translation pair
pipeline for allowHtmlPredictions
The allowHtmlPredictions toggle was incomplete: HTML was stripped at SQLite
index time but the raw content (already stored in s_raw_content/t_raw_content
columns) was never surfaced to the prompt builder. Now rawContent flows through
MinimalCellResult so buildFewShotExamplesText can use HTML-preserving examples
when the toggle is on.
Co-Authored-By: Claude Opus 4.6
---
.../contextAware/contentIndexes/indexes/search.ts | 8 ++++----
.../contextAware/contentIndexes/indexes/sqliteIndex.ts | 8 ++++++++
src/providers/translationSuggestions/shared.ts | 4 ++--
types/index.d.ts | 1 +
4 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/src/activationHelpers/contextAware/contentIndexes/indexes/search.ts b/src/activationHelpers/contextAware/contentIndexes/indexes/search.ts
index ffdf88433..4b76d8792 100644
--- a/src/activationHelpers/contextAware/contentIndexes/indexes/search.ts
+++ b/src/activationHelpers/contextAware/contentIndexes/indexes/search.ts
@@ -265,8 +265,8 @@ export async function getTranslationPairsFromSourceCellQuery(
debug(`[getTranslationPairsFromSourceCellQuery] ✅ Adding direct result for ${cellId}`);
translationPairs.push({
cellId,
- sourceCell: { cellId, content: searchResult.sourceContent, uri: searchResult.uri || "", line: searchResult.line || 0 },
- targetCell: { cellId, content: searchResult.targetContent, uri: searchResult.uri || "", line: searchResult.line || 0 },
+ sourceCell: { cellId, content: searchResult.sourceContent, rawContent: searchResult.rawSourceContent, uri: searchResult.uri || "", line: searchResult.line || 0 },
+ targetCell: { cellId, content: searchResult.targetContent, rawContent: searchResult.rawTargetContent, uri: searchResult.uri || "", line: searchResult.line || 0 },
});
} else {
debug(`[getTranslationPairsFromSourceCellQuery] ❌ Skipping ${cellId} - empty content after trim`);
@@ -278,8 +278,8 @@ export async function getTranslationPairsFromSourceCellQuery(
debug(`[getTranslationPairsFromSourceCellQuery] ✅ Adding fetched result for ${cellId}`);
translationPairs.push({
cellId,
- sourceCell: { cellId, content: translationPair.sourceContent, uri: translationPair.uri || "", line: translationPair.line || 0 },
- targetCell: { cellId, content: translationPair.targetContent, uri: translationPair.uri || "", line: translationPair.line || 0 },
+ sourceCell: { cellId, content: translationPair.sourceContent, rawContent: translationPair.rawSourceContent, uri: translationPair.uri || "", line: translationPair.line || 0 },
+ targetCell: { cellId, content: translationPair.targetContent, rawContent: translationPair.rawTargetContent, uri: translationPair.uri || "", line: translationPair.line || 0 },
});
} else {
debug(`[getTranslationPairsFromSourceCellQuery] ❌ Skipping ${cellId} - no valid translation pair found`);
diff --git a/src/activationHelpers/contextAware/contentIndexes/indexes/sqliteIndex.ts b/src/activationHelpers/contextAware/contentIndexes/indexes/sqliteIndex.ts
index 400b7bee5..aed97c8f2 100644
--- a/src/activationHelpers/contextAware/contentIndexes/indexes/sqliteIndex.ts
+++ b/src/activationHelpers/contextAware/contentIndexes/indexes/sqliteIndex.ts
@@ -3038,6 +3038,8 @@ export class SQLiteIndexManager {
cellLabel: row.cell_label, // NO FALLBACK
sourceContent: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
targetContent: returnRawContent && row.raw_target_content ? row.raw_target_content : row.target_content,
+ rawSourceContent: row.raw_source_content || row.source_content,
+ rawTargetContent: row.raw_target_content || row.target_content,
content: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
uri: row.uri,
line: row.line,
@@ -3186,6 +3188,8 @@ export class SQLiteIndexManager {
cellLabel: row.cell_label, // NO FALLBACK
sourceContent: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
targetContent: returnRawContent && rawTargetContent ? rawTargetContent : targetContent,
+ rawSourceContent: row.raw_source_content || row.source_content,
+ rawTargetContent: rawTargetContent || targetContent,
content: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
uri: row.uri,
line: row.line,
@@ -3272,6 +3276,8 @@ export class SQLiteIndexManager {
cellLabel: row.cell_label || null,
sourceContent: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
targetContent: returnRawContent && row.raw_target_content ? row.raw_target_content : row.target_content,
+ rawSourceContent: row.raw_source_content || row.source_content,
+ rawTargetContent: row.raw_target_content || row.target_content,
content: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
uri: row.uri,
line: row.line,
@@ -3442,6 +3448,8 @@ export class SQLiteIndexManager {
cellLabel: row.cell_label, // NO FALLBACK - show raw value
sourceContent: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
targetContent: returnRawContent && rawTargetContent ? rawTargetContent : targetContent,
+ rawSourceContent: row.raw_source_content || row.source_content,
+ rawTargetContent: rawTargetContent || targetContent,
content: returnRawContent && row.raw_source_content ? row.raw_source_content : row.source_content,
uri: row.uri,
line: row.line,
diff --git a/src/providers/translationSuggestions/shared.ts b/src/providers/translationSuggestions/shared.ts
index f43276e80..c2a121772 100644
--- a/src/providers/translationSuggestions/shared.ts
+++ b/src/providers/translationSuggestions/shared.ts
@@ -184,8 +184,8 @@ export function buildFewShotExamplesText(
const examplesInner = pairs
.map((pair) => {
- const sourceRaw = pair.sourceCell?.content ?? "";
- const targetRaw = pair.targetCell?.content ?? "";
+ const sourceRaw = allowHtml ? (pair.sourceCell?.rawContent || pair.sourceCell?.content || "") : (pair.sourceCell?.content ?? "");
+ const targetRaw = allowHtml ? (pair.targetCell?.rawContent || pair.targetCell?.content || "") : (pair.targetCell?.content ?? "");
const target = allowHtml ? targetRaw.trim() : stripHtmlTags(targetRaw).trim();
const source = allowHtml ? sourceRaw.trim() : stripHtmlTags(sourceRaw).trim();
const targetInner = allowHtml ? wrapCdata(target) : xmlEscape(target);
diff --git a/types/index.d.ts b/types/index.d.ts
index 3b39faefc..d024c001f 100644
--- a/types/index.d.ts
+++ b/types/index.d.ts
@@ -517,6 +517,7 @@ type MiniSearchVerseResult = {
type MinimalCellResult = {
cellId?: string;
content?: string;
+ rawContent?: string;
uri?: string;
line?: number;
notebookId?: string;
From e7f8e5ff6efd8ea7d54fd0dd4f8c556cde943614 Mon Sep 17 00:00:00 2001
From: dadukhankevin
Date: Thu, 12 Mar 2026 17:38:13 -0500
Subject: [PATCH 2/5] Improve LLM instruction following and simplify system
prompt
- Restructure system message: consolidate 12 appended instructions into
focused paragraphs, reduce noise and redundancy
- Simplify HTML preservation: always instruct model to preserve HTML from
source, toggle only controls whether HTML is present in examples/context
- Fix temperature passthrough: always send configured temperature instead
of silently dropping it for the default model
- Remove redundant "Instructions" header from user message
- Add diagnostic logging for system message and few-shot example HTML
Co-Authored-By: Claude Opus 4.6
---
.../translationSuggestions/llmCompletion.ts | 1 +
.../translationSuggestions/shared.ts | 63 ++++++++++---------
src/utils/llmUtils.ts | 3 +-
src/utils/metadataManager.ts | 2 +
4 files changed, 38 insertions(+), 31 deletions(-)
diff --git a/src/providers/translationSuggestions/llmCompletion.ts b/src/providers/translationSuggestions/llmCompletion.ts
index 784dd9c67..4423925ac 100644
--- a/src/providers/translationSuggestions/llmCompletion.ts
+++ b/src/providers/translationSuggestions/llmCompletion.ts
@@ -216,6 +216,7 @@ export async function llmCompletion(
// Build messages — buildMessages is the single source of truth for
// system message construction. Pass the raw chatSystemMessage and let
// buildMessages append instructions exactly once.
+ console.log(`[llmCompletion] System message from config (first 200 chars): "${chatSystemMessage?.substring(0, 200)}..."`);
const messages = buildMessages(
targetLanguage,
chatSystemMessage,
diff --git a/src/providers/translationSuggestions/shared.ts b/src/providers/translationSuggestions/shared.ts
index c2a121772..c4d7c3533 100644
--- a/src/providers/translationSuggestions/shared.ts
+++ b/src/providers/translationSuggestions/shared.ts
@@ -180,14 +180,19 @@ export function buildFewShotExamplesText(
allowHtml: boolean = false,
exampleFormat: string = "source-and-target"
): string {
- console.debug(`[buildFewShotExamplesText] Building ${pairs.length} examples in '${exampleFormat}' format`);
-
+ console.debug(`[buildFewShotExamplesText] Building ${pairs.length} examples in '${exampleFormat}' format, allowHtml=${allowHtml}`);
+
const examplesInner = pairs
- .map((pair) => {
+ .map((pair, idx) => {
const sourceRaw = allowHtml ? (pair.sourceCell?.rawContent || pair.sourceCell?.content || "") : (pair.sourceCell?.content ?? "");
const targetRaw = allowHtml ? (pair.targetCell?.rawContent || pair.targetCell?.content || "") : (pair.targetCell?.content ?? "");
const target = allowHtml ? targetRaw.trim() : stripHtmlTags(targetRaw).trim();
const source = allowHtml ? sourceRaw.trim() : stripHtmlTags(sourceRaw).trim();
+ if (allowHtml && idx < 3) {
+ const hasHtmlInTarget = /<[a-z][^>]*>/i.test(target);
+ const hasHtmlInSource = /<[a-z][^>]*>/i.test(source);
+ console.log(`[buildFewShotExamplesText] Example ${idx}: hasHtmlInSource=${hasHtmlInSource}, hasHtmlInTarget=${hasHtmlInTarget}, targetRawContent=${pair.targetCell?.rawContent ? 'present' : 'MISSING'}, target preview="${target.substring(0, 100)}"`);
+ }
const targetInner = allowHtml ? wrapCdata(target) : xmlEscape(target);
const sourceInner = allowHtml ? wrapCdata(source) : xmlEscape(source);
@@ -218,35 +223,37 @@ export function buildMessages(
exampleFormat: string = "source-and-target",
sourceLanguage: string | null = null
): ChatMessage[] {
- let systemMessage = chatSystemMessage || `You are a helpful assistant`;
+ const sourceLangText = sourceLanguage ? `${sourceLanguage}` : "the source language";
+ const targetLangText = targetLanguage || "the target language";
- if (exampleFormat === "target-only") {
- systemMessage += `\n\nReference translations are provided in XML tags. Use these as examples of the translation style and patterns you should follow.`;
- } else {
- systemMessage += `\n\nInput sections for examples and context are provided in XML. Only use values within and tags.`;
+ // Build a focused system message: critical output format first, then translation guidance
+ const parts: string[] = [];
+
+ // User's custom instructions (from metadata.json) come first
+ if (chatSystemMessage) {
+ parts.push(chatSystemMessage);
}
- // Preserve line breaks and specify output format
- if (allowHtml) {
- systemMessage += `\n\nYou may include inline HTML tags when appropriate (e.g., , , ) consistent with examples. Preserve original line breaks from by returning text with the same number of lines separated by newline characters.`;
+
+ // Translation direction and approach
+ parts.push(`Translate from ${sourceLangText} to ${targetLangText}. This may be an ultra-low resource language — follow the patterns, style, and vocabulary of the provided reference data closely. When in doubt, err on the side of literalness.`);
+
+ // HTML preservation — always instruct to preserve HTML based on source
+ parts.push(`If the source text contains HTML formatting (e.g., , , tags), preserve that HTML structure in your translation. Match the formatting of the source.`);
+
+ // Line preservation
+ parts.push(`Preserve original line breaks from by returning text with the same number of lines.`);
+
+ // Output format
+ parts.push(`Wrap your final translation in ... tags. Provide only the translation — no commentary, explanations, or metadata.`);
+
+ // Data format hint
+ if (exampleFormat === "target-only") {
+ parts.push(`Reference translations are provided in XML tags. Use these as examples of the translation style and patterns to follow.`);
} else {
- systemMessage += `\n\nReturn plain text only (no XML/HTML). Preserve original line breaks from by returning text with the same number of lines separated by newline characters.`;
- }
- const sourceLangText = sourceLanguage ? `from ${sourceLanguage} ` : "from the source language ";
- systemMessage += `\n\nAlways translate ${sourceLangText}to the target language ${targetLanguage || ""
- }, relying strictly on reference data and context provided by the user. The language may be an ultra-low resource language, so it is critical to follow the patterns and style of the provided reference data closely.`;
-
- systemMessage += `\n\n1. Analyze the provided reference data to understand the translation patterns and style.`;
- systemMessage += `\n2. Complete the partial or complete translation of the line.`;
- systemMessage += `\n3. Ensure your translation fits seamlessly with the existing partial translation.`;
- systemMessage += `\n4. Provide only the completed translation without any additional commentary or metadata.`;
- systemMessage += `\n5. Translate only into the target language ${targetLanguage || ""}.`;
- systemMessage += `\n6. Pay careful attention to the provided reference data.`;
- systemMessage += `\n7. If in doubt, err on the side of literalness.`;
- if (allowHtml) {
- systemMessage += `\n8. If the project has any styles, return HTML with the appropriate tags or classes as per the examples in the translation memory.`;
+ parts.push(`Examples and context are provided in XML with and tags.`);
}
- systemMessage += `\n\nWrap your final translation in ... XML tags. Do not include any other XML tags in your response outside of these tags.`;
+ const systemMessage = parts.join("\n\n");
const contextXml = `\n${precedingContextPairs.filter(Boolean).join("\n")}\n`;
const currentTaskXml = allowHtml
@@ -254,8 +261,6 @@ export function buildMessages(
: `${xmlEscape(currentCellSourceContent)}`;
const userMessage = [
- "## Instructions",
- "Follow the translation patterns and style as shown.",
"## Translation Memory (XML)",
fewShotExamples,
"## Current Context (XML)",
diff --git a/src/utils/llmUtils.ts b/src/utils/llmUtils.ts
index 85fddca98..b0553a15f 100644
--- a/src/utils/llmUtils.ts
+++ b/src/utils/llmUtils.ts
@@ -95,8 +95,7 @@ export async function callLLM(
const completion = await openai.chat.completions.create({
model,
messages: messages as ChatCompletionMessageParam[],
- // Let the server decide temperature for the default model.
- ...(model.toLowerCase() === "default" ? {} : (model.toLowerCase() === "gpt-5" ? { temperature: 1 } : { temperature: config.temperature })),
+ temperature: config.temperature,
}, {
signal: abortController.signal
});
diff --git a/src/utils/metadataManager.ts b/src/utils/metadataManager.ts
index bdcc0930d..219d6fff6 100644
--- a/src/utils/metadataManager.ts
+++ b/src/utils/metadataManager.ts
@@ -402,8 +402,10 @@ export class MetadataManager {
if (result.success && result.metadata) {
const chatSystemMessage = (result.metadata as any).chatSystemMessage as string | undefined;
if (chatSystemMessage) {
+ console.log(`[MetadataManager.getChatSystemMessage] Returning stored message (first 100 chars): "${chatSystemMessage.substring(0, 100)}..."`);
return chatSystemMessage;
}
+ console.log(`[MetadataManager.getChatSystemMessage] No chatSystemMessage found in metadata.json, will try to generate`);
}
// Try to generate chatSystemMessage if it doesn't exist
From f287432ae284d880f215619a3c3a1bc70fb4abf2 Mon Sep 17 00:00:00 2001
From: dadukhankevin
Date: Fri, 13 Mar 2026 11:58:37 -0500
Subject: [PATCH 3/5] Preserve source HTML in LLM prompts
Keep raw source HTML in the current-task prompt when HTML predictions are enabled while still using sanitized text for example search. Add regression coverage to ensure the prompt preserves source markup and spacing boundaries.
Made-with: Cursor
---
.../translationSuggestions/llmCompletion.ts | 35 ++++++++++++++-----
.../suite/codexCellEditorProvider.test.ts | 26 +++++++++++---
2 files changed, 48 insertions(+), 13 deletions(-)
diff --git a/src/providers/translationSuggestions/llmCompletion.ts b/src/providers/translationSuggestions/llmCompletion.ts
index 4423925ac..2032337ac 100644
--- a/src/providers/translationSuggestions/llmCompletion.ts
+++ b/src/providers/translationSuggestions/llmCompletion.ts
@@ -136,14 +136,18 @@ export async function llmCompletion(
throw new Error(`No source content found for cell ${currentCellId}. The search index may be incomplete. Try running "Force Complete Rebuild" from the command palette.`);
}
- // Sanitize HTML content to extract plain text (handles transcription spans, etc.)
+ // Convert source HTML into search-friendly plain text while preserving word
+ // boundaries that would otherwise be lost when tags are stripped.
const sanitizeHtmlContent = (html: string): string => {
if (!html) return '';
return html
.replace(/]*class=["']footnote-marker["'][^>]*>[\s\S]*?<\/sup>/gi, '')
.replace(/]*data-footnote[^>]*>[\s\S]*?<\/sup>/gi, '')
.replace(/]*>[\s\S]*?<\/sup>/gi, '')
+ .replace(/
/gi, ' ')
.replace(/<\/p>/gi, ' ')
+ .replace(/<\/div>/gi, ' ')
+ .replace(/<\/li>/gi, ' ')
.replace(/<[^>]*>/g, '')
.replace(/ /g, ' ')
.replace(/&/g, '&')
@@ -157,16 +161,32 @@ export async function llmCompletion(
.trim();
};
- const sourceContent = validSourceCells
- .map((cell) => sanitizeHtmlContent(cell!.content || ""))
+ const preserveHtmlInPrompt = Boolean(completionConfig.allowHtmlPredictions);
+ const searchSourceContent = validSourceCells
+ .map((cell) => sanitizeHtmlContent(cell?.rawContent || cell?.content || ""))
.join(" ");
+ const currentCellSourceContent = validSourceCells
+ .map((cell) => {
+ const rawSourceContent = cell?.rawContent || cell?.content || "";
+ if (!preserveHtmlInPrompt) {
+ return sanitizeHtmlContent(rawSourceContent);
+ }
+
+ return rawSourceContent
+ .replace(/]*class=["']footnote-marker["'][^>]*>[\s\S]*?<\/sup>/gi, "")
+ .replace(/]*data-footnote[^>]*>[\s\S]*?<\/sup>/gi, "")
+ .replace(/]*>[\s\S]*?<\/sup>/gi, "")
+ .trim();
+ })
+ .join(preserveHtmlInPrompt ? "\n" : " ");
+
// Get few-shot examples (existing behavior encapsulated)
if (completionConfig.debugMode) {
- console.debug(`[llmCompletion] Fetching few-shot examples with query: "${sourceContent}", cellId: ${currentCellId}, count: ${numberOfFewShotExamples}, onlyValidated: ${completionConfig.useOnlyValidatedExamples}`);
+ console.debug(`[llmCompletion] Fetching few-shot examples with query: "${searchSourceContent}", cellId: ${currentCellId}, count: ${numberOfFewShotExamples}, onlyValidated: ${completionConfig.useOnlyValidatedExamples}`);
}
const finalExamples = await fetchFewShotExamples(
- sourceContent,
+ searchSourceContent,
currentCellId,
numberOfFewShotExamples,
completionConfig.useOnlyValidatedExamples
@@ -203,12 +223,11 @@ export async function llmCompletion(
try {
const currentCellIdString = currentCellIds.join(", ");
- const currentCellSourceContent = sourceContent;
// Generate few-shot examples
const fewShotExamples = buildFewShotExamplesText(
finalExamples,
- Boolean(completionConfig.allowHtmlPredictions),
+ preserveHtmlInPrompt,
fewShotExampleFormat || "source-and-target"
);
console.log(`[llmCompletion] Built few-shot examples text (${fewShotExamples.length} chars, format: ${fewShotExampleFormat}):`, fewShotExamples.substring(0, 200) + '...');
@@ -223,7 +242,7 @@ export async function llmCompletion(
fewShotExamples,
precedingTranslationPairs,
currentCellSourceContent,
- Boolean(completionConfig.allowHtmlPredictions),
+ preserveHtmlInPrompt,
fewShotExampleFormat || "source-and-target",
sourceLanguage
);
diff --git a/src/test/suite/codexCellEditorProvider.test.ts b/src/test/suite/codexCellEditorProvider.test.ts
index 29a2a5fcf..7079d0481 100644
--- a/src/test/suite/codexCellEditorProvider.test.ts
+++ b/src/test/suite/codexCellEditorProvider.test.ts
@@ -3865,7 +3865,7 @@ suite("CodexCellEditorProvider Test Suite", () => {
);
const cellId = codexSubtitleContent.cells[0].metadata.id;
- const sourceContent = "Test source content";
+ const sourceContent = "Test source content
";
// Track onlyValidated parameter
let capturedOnlyValidated: boolean | null = null;
@@ -3968,7 +3968,7 @@ suite("CodexCellEditorProvider Test Suite", () => {
);
const cellId = codexSubtitleContent.cells[0].metadata.id;
- const sourceContent = "Test source content";
+ const sourceContent = "Test source content
";
// Track onlyValidated parameter
let capturedOnlyValidated: boolean | null = null;
@@ -3982,7 +3982,13 @@ suite("CodexCellEditorProvider Test Suite", () => {
return [];
}
if (command === "codex-editor-extension.getSourceCellByCellIdFromAllSourceCells") {
- return { cellId: args[0], content: sourceContent, versions: [], notebookId: "nb1" } as MinimalCellResult;
+ return {
+ cellId: args[0],
+ content: sourceContent,
+ rawContent: sourceContent,
+ versions: [],
+ notebookId: "nb1",
+ } as MinimalCellResult;
}
return originalExecuteCommand.apply(vscode.commands, [command, ...args]);
};
@@ -4207,7 +4213,7 @@ suite("CodexCellEditorProvider Test Suite", () => {
);
const cellId = codexSubtitleContent.cells[0].metadata.id;
- const sourceContent = "Test source content";
+ const sourceContent = "Test source content
";
const htmlExample = "HTML content";
// Mock translation pairs with HTML content
@@ -4229,7 +4235,13 @@ suite("CodexCellEditorProvider Test Suite", () => {
return mockTranslationPairs;
}
if (command === "codex-editor-extension.getSourceCellByCellIdFromAllSourceCells") {
- return { cellId: args[0], content: sourceContent, versions: [], notebookId: "nb1" } as MinimalCellResult;
+ return {
+ cellId: args[0],
+ content: sourceContent,
+ rawContent: sourceContent,
+ versions: [],
+ notebookId: "nb1",
+ } as MinimalCellResult;
}
return originalExecuteCommand.apply(vscode.commands, [command, ...args]);
};
@@ -4303,6 +4315,10 @@ suite("CodexCellEditorProvider Test Suite", () => {
assert.ok(userMessage.content.includes("Test source content
]]>"),
+ "Current task source should preserve source HTML when allowHtmlPredictions is enabled"
+ );
// Verify system message mentions HTML
const systemMessage = (capturedMessages as any[]).find((m: any) => m.role === "system");
From e6c5932bad457ee5cadb17d34f60846d3c709d8d Mon Sep 17 00:00:00 2001
From: Ben Scholtens
Date: Tue, 17 Mar 2026 17:30:25 -0400
Subject: [PATCH 4/5] remove logs
---
.../translationSuggestions/llmCompletion.ts | 3 +-
.../translationSuggestions/shared.ts | 32 +++++++++----------
2 files changed, 17 insertions(+), 18 deletions(-)
diff --git a/src/providers/translationSuggestions/llmCompletion.ts b/src/providers/translationSuggestions/llmCompletion.ts
index 2032337ac..047608f0b 100644
--- a/src/providers/translationSuggestions/llmCompletion.ts
+++ b/src/providers/translationSuggestions/llmCompletion.ts
@@ -226,7 +226,7 @@ export async function llmCompletion(
// Generate few-shot examples
const fewShotExamples = buildFewShotExamplesText(
- finalExamples,
+ finalExamples,
preserveHtmlInPrompt,
fewShotExampleFormat || "source-and-target"
);
@@ -235,7 +235,6 @@ export async function llmCompletion(
// Build messages — buildMessages is the single source of truth for
// system message construction. Pass the raw chatSystemMessage and let
// buildMessages append instructions exactly once.
- console.log(`[llmCompletion] System message from config (first 200 chars): "${chatSystemMessage?.substring(0, 200)}..."`);
const messages = buildMessages(
targetLanguage,
chatSystemMessage,
diff --git a/src/providers/translationSuggestions/shared.ts b/src/providers/translationSuggestions/shared.ts
index c4d7c3533..0652528f9 100644
--- a/src/providers/translationSuggestions/shared.ts
+++ b/src/providers/translationSuggestions/shared.ts
@@ -14,7 +14,7 @@ export async function fetchFewShotExamples(
// Use a higher multiplier since many candidates may be incomplete pairs
const initialCandidateCount = Math.max(numberOfFewShotExamples * 10, 100);
console.debug(`[fetchFewShotExamples] Starting search with query: "${sourceContent}" (length: ${sourceContent?.length || 0}), requesting ${initialCandidateCount} candidates, validated only: ${useOnlyValidatedExamples}`);
-
+
let similarSourceCells: TranslationPair[] = [];
try {
similarSourceCells = await vscode.commands.executeCommand(
@@ -52,7 +52,7 @@ export async function fetchFewShotExamples(
// Instead of filtering, rank all valid complete pairs by relevance
const currentTokens = tokenizeText({ method: "whitespace_and_punctuation", text: sourceContent });
-
+
const rankedPairs = (similarSourceCells || [])
.filter((pair) => {
// Basic validity filters only
@@ -62,7 +62,7 @@ export async function fetchFewShotExamples(
}
return false;
}
-
+
// Must have both source and target content for complete pairs
const pairSourceContent = pair.sourceCell?.content || "";
const pairTargetContent = pair.targetCell?.content || "";
@@ -70,7 +70,7 @@ export async function fetchFewShotExamples(
console.debug(`[fetchFewShotExamples] Filtering out pair ${pair.cellId} - incomplete pair (missing source or target)`);
return false;
}
-
+
return true;
})
.map((pair) => {
@@ -79,13 +79,13 @@ export async function fetchFewShotExamples(
const pairSourceContentRaw = pair.sourceCell?.content || "";
const pairSourceContentSanitized = sanitizeHtmlContent(pairSourceContentRaw);
const pairTokens = tokenizeText({ method: "whitespace_and_punctuation", text: pairSourceContentSanitized });
-
+
// Calculate overlap ratio
const overlapCount = currentTokens.filter(token => pairTokens.includes(token)).length;
const overlapRatio = currentTokens.length > 0 ? overlapCount / currentTokens.length : 0;
-
+
console.debug(`[fetchFewShotExamples] Pair ${pair.cellId} - overlap: ${overlapCount}/${currentTokens.length} = ${(overlapRatio * 100).toFixed(1)}%`);
-
+
return {
pair,
overlapRatio,
@@ -99,23 +99,23 @@ export async function fetchFewShotExamples(
}
return b.overlapCount - a.overlapCount;
});
-
+
console.debug(`[fetchFewShotExamples] Ranked ${rankedPairs.length} complete pairs by relevance`);
-
+
// Take the top N most relevant complete pairs
const filteredSimilarSourceCells = rankedPairs
.slice(0, numberOfFewShotExamples)
.map(ranked => ranked.pair);
console.debug(`[fetchFewShotExamples] Returning ${filteredSimilarSourceCells.length} top-ranked examples (requested: ${numberOfFewShotExamples})`);
-
+
if (filteredSimilarSourceCells.length === 0) {
console.debug(`[fetchFewShotExamples] No complete translation pairs found. Source length: ${sourceContent?.length || 0}`);
console.debug(`[fetchFewShotExamples] Database may contain only incomplete pairs (source-only or target-only).`);
} else if (filteredSimilarSourceCells.length < numberOfFewShotExamples) {
console.debug(`[fetchFewShotExamples] Found fewer examples than requested: ${filteredSimilarSourceCells.length}/${numberOfFewShotExamples}`);
}
-
+
return filteredSimilarSourceCells;
}
@@ -176,11 +176,11 @@ export async function getPrecedingTranslationPairs(
}
export function buildFewShotExamplesText(
- pairs: TranslationPair[],
- allowHtml: boolean = false,
+ pairs: TranslationPair[],
+ allowHtml: boolean = false,
exampleFormat: string = "source-and-target"
): string {
- console.debug(`[buildFewShotExamplesText] Building ${pairs.length} examples in '${exampleFormat}' format, allowHtml=${allowHtml}`);
+
const examplesInner = pairs
.map((pair, idx) => {
@@ -191,11 +191,11 @@ export function buildFewShotExamplesText(
if (allowHtml && idx < 3) {
const hasHtmlInTarget = /<[a-z][^>]*>/i.test(target);
const hasHtmlInSource = /<[a-z][^>]*>/i.test(source);
- console.log(`[buildFewShotExamplesText] Example ${idx}: hasHtmlInSource=${hasHtmlInSource}, hasHtmlInTarget=${hasHtmlInTarget}, targetRawContent=${pair.targetCell?.rawContent ? 'present' : 'MISSING'}, target preview="${target.substring(0, 100)}"`);
+
}
const targetInner = allowHtml ? wrapCdata(target) : xmlEscape(target);
const sourceInner = allowHtml ? wrapCdata(source) : xmlEscape(source);
-
+
// Format examples based on the setting
if (exampleFormat === "target-only") {
return `${targetInner}`;
From e426f0c1a97e70734518f35caf785e5281b24a47 Mon Sep 17 00:00:00 2001
From: Ben Scholtens
Date: Tue, 17 Mar 2026 23:07:38 -0400
Subject: [PATCH 5/5] Update CodexCellEditorProvider tests to enhance system
message assertions
- Adjusted assertions to verify that the system message includes the target language "fr" or "French".
- Updated format instructions check to ensure it mentions HTML/formatting handling instead of just plain text when HTML is disabled.
---
src/test/suite/codexCellEditorProvider.test.ts | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/test/suite/codexCellEditorProvider.test.ts b/src/test/suite/codexCellEditorProvider.test.ts
index 7079d0481..ad83f1831 100644
--- a/src/test/suite/codexCellEditorProvider.test.ts
+++ b/src/test/suite/codexCellEditorProvider.test.ts
@@ -3826,7 +3826,7 @@ suite("CodexCellEditorProvider Test Suite", () => {
assert.ok(userMessage, "Should have a user message");
// Verify system message contains expected content
- assert.ok(systemMessage.content.includes("target language"), "System message should mention target language");
+ assert.ok(systemMessage.content.includes("fr") || systemMessage.content.includes("target language"), "System message should mention target language");
assert.ok(systemMessage.content.includes("fr") || systemMessage.content.includes("French"), "System message should include target language");
// Verify user message contains examples
@@ -4180,10 +4180,10 @@ suite("CodexCellEditorProvider Test Suite", () => {
"System message should contain translation instructions"
);
- // Verify format instructions (plain text since allowHtmlPredictions is false)
+ // Verify format instructions (HTML preservation guidance is always included)
assert.ok(
- systemContent.includes("plain text") || systemContent.includes("no XML/HTML"),
- "System message should mention plain text format when HTML is disabled"
+ systemContent.includes("HTML") || systemContent.includes("formatting"),
+ "System message should mention HTML/formatting handling"
);
// Verify reference to examples/patterns