diff --git a/sharedUtils/index.ts b/sharedUtils/index.ts index 6de95786e..b1adbea8f 100644 --- a/sharedUtils/index.ts +++ b/sharedUtils/index.ts @@ -286,5 +286,16 @@ export const computeProgressPercents = ( }; }; +/** + * Derives the target .codex file path from a .source file path. + * Normalizes path separators so the replacement works on both Windows and POSIX. + */ +export const deriveTargetPathFromSource = (sourcePath: string): string => { + const normalized = sourcePath.replace(/\\/g, "/"); + return normalized + .replace(/\.source$/, ".codex") + .replace(/\/\.project\/sourceTexts\//, "/files/target/"); +}; + // Re-export corpus utilities export * from "./corpusUtils"; diff --git a/src/providers/NewSourceUploader/NewSourceUploaderProvider.ts b/src/providers/NewSourceUploader/NewSourceUploaderProvider.ts index 71d79f90a..bb15d71e9 100644 --- a/src/providers/NewSourceUploader/NewSourceUploaderProvider.ts +++ b/src/providers/NewSourceUploader/NewSourceUploaderProvider.ts @@ -24,6 +24,7 @@ import { createStandardizedFilename, extractUsfmCodeFromFilename, getBookDisplay import { formatJsonForNotebookFile } from "../../utils/notebookFileFormattingUtils"; import { CodexContentSerializer } from "../../serializer"; import { getCorpusMarkerForBook } from "../../../sharedUtils/corpusUtils"; +import { deriveTargetPathFromSource } from "../../../sharedUtils"; import { getNotebookMetadataManager } from "../../utils/notebookMetadataManager"; import { SyncManager } from "../../projectManager/syncManager"; import { processNewlyImportedFiles } from "../../projectManager/utils/migrationUtils"; @@ -119,12 +120,17 @@ export class NewSourceUploaderProvider implements vscode.CustomTextEditorProvide webviewPanel.webview.onDidReceiveMessage(async (message: any) => { try { if (message.command === "webviewReady") { - // Webview is ready, send current project inventory const inventory = await this.fetchProjectInventory(); + // Extract initial intent from URI query params (e.g. ?intent=source or ?intent=target) + const uriQuery = document.uri.query || ""; + const intentMatch = uriQuery.match(/intent=(source|target)/); + const initialIntent = intentMatch ? intentMatch[1] : undefined; + webviewPanel.webview.postMessage({ command: "projectInventory", inventory: inventory, + initialIntent, }); } else if (message.command === "metadata.check") { // Handle metadata check request @@ -675,14 +681,10 @@ export class NewSourceUploaderProvider implements vscode.CustomTextEditorProvide }); } } else if (message.command === "fetchTargetFile") { - // Fetch target file content for translation imports const { sourceFilePath } = message; try { - const targetFilePath = sourceFilePath - .replace(/\.source$/, ".codex") - .replace(/\/\.project\/sourceTexts\//, "/files/target/"); - + const targetFilePath = deriveTargetPathFromSource(sourceFilePath); const targetUri = vscode.Uri.file(targetFilePath); const targetContent = await vscode.workspace.fs.readFile(targetUri); const targetNotebook = JSON.parse(new TextDecoder().decode(targetContent)); @@ -1634,41 +1636,27 @@ export class NewSourceUploaderProvider implements vscode.CustomTextEditorProvide token: vscode.CancellationToken ): Promise { try { - // The aligned content is already provided by the plugin's custom alignment algorithm - // We just need to merge it into the existing target notebook - - // Load the existing target notebook const targetFileUri = vscode.Uri.file(message.targetFilePath); const existingContent = await vscode.workspace.fs.readFile(targetFileUri); const existingNotebook = JSON.parse(new TextDecoder().decode(existingContent)); - // Create a map of existing cells for quick lookup - const existingCellsMap = new Map(); - existingNotebook.cells.forEach((cell: any) => { - if (cell.metadata?.id) { - existingCellsMap.set(cell.metadata.id, cell); - } - }); + // Build a map of aligned updates keyed by the TARGET cell's ID (not the imported content's ID) + const updatesMap = new Map(); + const paratextCells: Array<{ cell: any; parentId?: string }> = []; - // Track statistics let insertedCount = 0; let skippedCount = 0; let paratextCount = 0; let childCellCount = 0; - // Process aligned cells and update the notebook - const processedCells = new Map(); - const processedSourceCells = new Set(); - for (const alignedCell of message.alignedContent) { if (alignedCell.isParatext) { - // Add paratext cells const paratextId = alignedCell.importedContent.id; const importedData = alignedCell.importedContent.data; const paratextData = typeof importedData === "object" && importedData !== null ? importedData : {}; const paratextCell = { - kind: 1, // vscode.NotebookCellKind.Code + kind: 1, languageId: "html", value: alignedCell.importedContent.content, metadata: { @@ -1682,35 +1670,54 @@ export class NewSourceUploaderProvider implements vscode.CustomTextEditorProvide parentId: alignedCell.importedContent.parentId, }, }; - processedCells.set(paratextId, paratextCell); + paratextCells.push({ + cell: paratextCell, + parentId: alignedCell.importedContent.parentId, + }); paratextCount++; } else if (alignedCell.notebookCell) { - const targetId = alignedCell.importedContent.id; - const existingCell = existingCellsMap.get(targetId); + const targetId = + alignedCell.notebookCell?.metadata?.id ?? alignedCell.importedContent.id; + + const existingCell = existingNotebook.cells.find( + (c: any) => c.metadata?.id === targetId + ); + + // Never overwrite milestone cells — they are structural markers + const isMilestone = + existingCell?.metadata?.type === CodexCellTypes.MILESTONE || + alignedCell.notebookCell?.metadata?.type === CodexCellTypes.MILESTONE; + if (isMilestone) { + skippedCount++; + continue; + } + const existingValue = existingCell?.value ?? alignedCell.notebookCell.value ?? ""; if (existingValue && existingValue.trim() !== "") { - // Keep existing content if cell already has content - processedCells.set(targetId, existingCell || alignedCell.notebookCell); + updatesMap.set(targetId, { + alignedCell, + updatedCell: existingCell || alignedCell.notebookCell, + }); skippedCount++; } else { - // Update empty cell with new content const updatedCell = { - kind: 1, // vscode.NotebookCellKind.Code + kind: 1, languageId: "html", value: alignedCell.importedContent.content, metadata: { - ...alignedCell.notebookCell.metadata, + ...(existingCell?.metadata ?? alignedCell.notebookCell.metadata), type: CodexCellTypes.TEXT, id: targetId, data: { - ...alignedCell.notebookCell.metadata.data, + ...(existingCell?.metadata?.data ?? + alignedCell.notebookCell.metadata?.data), startTime: alignedCell.importedContent.startTime, endTime: alignedCell.importedContent.endTime, }, }, }; - processedCells.set(targetId, updatedCell); + updatesMap.set(targetId, { alignedCell, updatedCell }); if (alignedCell.isAdditionalOverlap) { childCellCount++; @@ -1721,53 +1728,64 @@ export class NewSourceUploaderProvider implements vscode.CustomTextEditorProvide } } - // Build the final cell array, preserving the temporal order from alignedContent + // Preserve original notebook cell order: iterate existing cells, apply updates in-place const newCells: any[] = []; - const usedExistingCellIds = new Set(); + const usedCellIds = new Set(); - // Process cells in the order they appear in alignedContent (temporal order) - for (const alignedCell of message.alignedContent) { - if (alignedCell.isParatext) { - // Add paratext cell - const paratextId = alignedCell.importedContent.id; - const paratextCell = processedCells.get(paratextId); - if (paratextCell) { - newCells.push(paratextCell); + for (const cell of existingNotebook.cells) { + const cellId = cell.metadata?.id; + if (cellId && updatesMap.has(cellId)) { + newCells.push(updatesMap.get(cellId)!.updatedCell); + usedCellIds.add(cellId); + } else { + newCells.push(cell); + if (cellId) { + usedCellIds.add(cellId); } - } else if (alignedCell.notebookCell) { - const targetId = alignedCell.importedContent.id; - const processedCell = processedCells.get(targetId); + } - if (processedCell) { - newCells.push(processedCell); - usedExistingCellIds.add(targetId); + // Insert paratext cells that reference this cell as their parent + if (cellId) { + const childParatexts = paratextCells.filter((p) => p.parentId === cellId); + for (const pt of childParatexts) { + newCells.push(pt.cell); } } } - // Add any existing cells that weren't in the aligned content (shouldn't happen normally) - for (const cell of existingNotebook.cells) { - const cellId = cell.metadata?.id; - if (!cellId || usedExistingCellIds.has(cellId)) { - continue; + // Append paratext cells without a parent (or whose parent wasn't found) + for (const pt of paratextCells) { + const alreadyInserted = + pt.parentId && newCells.some((c) => c.metadata?.id === pt.cell.metadata?.id); + if (!alreadyInserted) { + newCells.push(pt.cell); } - console.warn(`Cell ${cellId} was not in aligned content, appending at end`); - newCells.push(cell); } - // Update the notebook const updatedNotebook = { ...existingNotebook, cells: newCells, + metadata: { + ...existingNotebook.metadata, + importerType: message.importerType || existingNotebook.metadata?.importerType, + importTimestamp: new Date().toISOString(), + importContext: { + ...(existingNotebook.metadata?.importContext ?? {}), + lastTranslationImport: { + importerType: message.importerType, + timestamp: new Date().toISOString(), + sourceFilePath: message.sourceFilePath, + stats: { insertedCount, skippedCount, paratextCount, childCellCount }, + }, + }, + }, }; - // Write the updated notebook back to disk await vscode.workspace.fs.writeFile( targetFileUri, Buffer.from(formatJsonForNotebookFile(updatedNotebook)) ); - // Show success message with statistics vscode.window.showInformationMessage( `Translation imported: ${insertedCount} translations, ${paratextCount} paratext cells, ${childCellCount} child cells, ${skippedCount} skipped.` ); diff --git a/src/providers/navigationWebview/navigationWebviewProvider.ts b/src/providers/navigationWebview/navigationWebviewProvider.ts index 91e8ef6e4..f83df4049 100644 --- a/src/providers/navigationWebview/navigationWebviewProvider.ts +++ b/src/providers/navigationWebview/navigationWebviewProvider.ts @@ -366,7 +366,8 @@ export class NavigationWebviewProvider extends BaseWebviewProvider { } case "openSourceUpload": { try { - await vscode.commands.executeCommand("codex-project-manager.openSourceUpload"); + const intent = message.intent as string | undefined; + await vscode.commands.executeCommand("codex-project-manager.openSourceUpload", intent); } catch (error) { console.error("Error opening source upload:", error); vscode.window.showErrorMessage(`Failed to open source upload: ${error}`); diff --git a/src/providers/registerProviders.ts b/src/providers/registerProviders.ts index 1207caf7b..55a102994 100644 --- a/src/providers/registerProviders.ts +++ b/src/providers/registerProviders.ts @@ -37,10 +37,11 @@ export function registerProviders(context: vscode.ExtensionContext) { ); disposables.push( - vscode.commands.registerCommand("codex-project-manager.openSourceUpload", () => { + vscode.commands.registerCommand("codex-project-manager.openSourceUpload", (intent?: string) => { const workspaceFolder = getWorkSpaceFolder(); if (workspaceFolder) { - const uri = vscode.Uri.parse(`newSourceUploaderProvider-scheme:New Source Upload`); + const query = intent ? `?intent=${intent}` : ""; + const uri = vscode.Uri.parse(`newSourceUploaderProvider-scheme:New Source Upload${query}`); vscode.commands.executeCommand( "vscode.openWith", uri, diff --git a/webviews/codex-webviews/src/NavigationView/index.tsx b/webviews/codex-webviews/src/NavigationView/index.tsx index 14ccf6d10..6199a99ed 100644 --- a/webviews/codex-webviews/src/NavigationView/index.tsx +++ b/webviews/codex-webviews/src/NavigationView/index.tsx @@ -426,9 +426,17 @@ function NavigationView() { })); }; - const handleAddFiles = () => { + const handleAddSourceFile = () => { vscode.postMessage({ command: "openSourceUpload", + intent: "source", + }); + }; + + const handleImportTargetFile = () => { + vscode.postMessage({ + command: "openSourceUpload", + intent: "target", }); }; @@ -981,28 +989,32 @@ function NavigationView() {
- {/* Action Buttons - Side by Side */} -
- - -
- + + +
{/* Corpus Marker Modal */} diff --git a/webviews/codex-webviews/src/NewSourceUploader/NewSourceUploader.tsx b/webviews/codex-webviews/src/NewSourceUploader/NewSourceUploader.tsx index 49e3ef502..46dfb226c 100644 --- a/webviews/codex-webviews/src/NewSourceUploader/NewSourceUploader.tsx +++ b/webviews/codex-webviews/src/NewSourceUploader/NewSourceUploader.tsx @@ -27,12 +27,12 @@ import { TargetFileResponseMessage, TargetFileErrorMessage, } from "./types/wizard"; -import { IntentSelection } from "./components/IntentSelection"; import { SourceFileSelection } from "./components/SourceFileSelection"; import { EmptySourceState } from "./components/EmptySourceState"; import { PluginSelection } from "./components/PluginSelection"; import { ImportProgressView } from "./components/ImportProgressView"; import { SystemMessageStep } from "../StartupFlow/components/SystemMessageStep"; +import { deriveTargetPathFromSource } from "../../../../sharedUtils"; import { createDownloadHelper } from "./utils/downloadHelper"; import { notifyImportEnded } from "./utils/importProgress"; import "./App.css"; @@ -44,8 +44,8 @@ const vscode: { postMessage: (message: any) => void } = (window as any).vscodeAp const NewSourceUploader: React.FC = () => { // Wizard state const [wizardState, setWizardState] = useState({ - currentStep: "intent-selection", - selectedIntent: null, + currentStep: "source-import", + selectedIntent: "source", selectedSourceForTarget: undefined, selectedSourceDetails: undefined, selectedPlugin: undefined, @@ -73,6 +73,7 @@ const NewSourceUploader: React.FC = () => { reject: (error: Error) => void; importedContent: ImportedContent[]; customAligner?: CellAligner; + sourceFilePath: string; } > >(new Map()); @@ -85,7 +86,6 @@ const NewSourceUploader: React.FC = () => { customAligner?: CellAligner ): Promise => { return new Promise((resolve, reject) => { - // Store the request with a unique key const requestKey = `${sourceFilePath}-${Date.now()}`; setAlignmentRequests( (prev) => @@ -95,18 +95,17 @@ const NewSourceUploader: React.FC = () => { reject, importedContent, customAligner, + sourceFilePath, }) ) ); - // Request target file content from provider const message: FetchTargetFileMessage = { command: "fetchTargetFile", sourceFilePath, }; vscode.postMessage(message); - // Set up timeout to avoid hanging requests setTimeout(() => { setAlignmentRequests((prev) => { const newMap = new Map(prev); @@ -116,7 +115,7 @@ const NewSourceUploader: React.FC = () => { } return newMap; }); - }, 30000); // 30 second timeout + }, 30000); }); }, [] @@ -155,8 +154,7 @@ const NewSourceUploader: React.FC = () => { setImportComplete(false); setWizardState((prev) => ({ ...prev, - currentStep: "intent-selection", - selectedIntent: null, + currentStep: prev.selectedIntent === "target" ? "target-selection" : "source-import", selectedSourceForTarget: undefined, selectedSourceDetails: undefined, selectedPlugin: undefined, @@ -199,19 +197,17 @@ const NewSourceUploader: React.FC = () => { const response = message as TargetFileResponseMessage; console.log("Received target file content:", response); - // Find and complete pending alignment requests for this source file setAlignmentRequests((prev) => { const newMap = new Map(prev); const completedRequests: string[] = []; for (const [requestKey, request] of newMap.entries()) { - if (requestKey.startsWith(response.sourceFilePath)) { - // Run the alignment algorithm + if (request.sourceFilePath === response.sourceFilePath) { const aligner = request.customAligner || defaultCellAligner; aligner( response.targetCells, - [], // Source cells not currently used + [], request.importedContent ) .then((alignedCells) => { @@ -225,7 +221,6 @@ const NewSourceUploader: React.FC = () => { } } - // Remove completed requests completedRequests.forEach((key) => newMap.delete(key)); return newMap; }); @@ -287,13 +282,38 @@ const NewSourceUploader: React.FC = () => { translationPairs: [], }; - console.log("Received project inventory:", inventory); + const initialIntent: ImportIntent | undefined = message.initialIntent; - setWizardState((prev) => ({ - ...prev, - projectInventory: inventory, - isLoadingInventory: false, - })); + console.log("Received project inventory:", inventory, "initialIntent:", initialIntent); + + setWizardState((prev) => { + const base = { + ...prev, + projectInventory: inventory, + isLoadingInventory: false, + }; + + // Auto-navigate based on initial intent from provider + if (initialIntent === "source") { + return { + ...base, + selectedIntent: "source" as ImportIntent, + currentStep: "source-import" as WizardStep, + }; + } + if (initialIntent === "target") { + const hasSourceFiles = inventory.sourceFiles.length > 0; + return { + ...base, + selectedIntent: "target" as ImportIntent, + currentStep: hasSourceFiles + ? ("target-selection" as WizardStep) + : ("target-selection" as WizardStep), + }; + } + + return base; + }); } }; @@ -411,10 +431,7 @@ const NewSourceUploader: React.FC = () => { return; } - // Derive target file path from source file path - const targetFilePath = sourceFilePath - .replace(/\.source$/, ".codex") - .replace(/\/\.project\/sourceTexts\//, "/files/target/"); + const targetFilePath = deriveTargetPathFromSource(sourceFilePath); // Send translation to provider for writing const message: WriteTranslationMessage = { @@ -438,8 +455,7 @@ const NewSourceUploader: React.FC = () => { // Reset wizard setWizardState((prev) => ({ ...prev, - currentStep: "intent-selection", - selectedIntent: null, + currentStep: prev.selectedIntent === "target" ? "target-selection" : "source-import", selectedSourceForTarget: undefined, selectedSourceDetails: undefined, selectedPlugin: undefined, @@ -450,27 +466,23 @@ const NewSourceUploader: React.FC = () => { ); const handleCancel = useCallback(() => { - // Note: VS Code webviews don't support window.confirm() due to sandboxing - // Skip confirmation dialog - user action is explicit enough setWizardState((prev) => ({ ...prev, currentStep: prev.selectedIntent === "target" && prev.selectedSourceForTarget ? "target-selection" - : "intent-selection", + : prev.selectedIntent === "target" + ? "target-selection" + : "source-import", selectedPlugin: undefined, })); setIsDirty(false); }, [isDirty]); const handleCancelImport = useCallback(() => { - // Reset entire wizard state to beginning - // Note: VS Code webviews don't support window.confirm() due to sandboxing - // The "Cancel Import" button text makes the action clear enough setWizardState((prev) => ({ ...prev, - currentStep: "intent-selection", - selectedIntent: null, + currentStep: prev.selectedIntent === "target" ? "target-selection" : "source-import", selectedSourceForTarget: undefined, selectedSourceDetails: undefined, selectedPlugin: undefined, @@ -508,9 +520,6 @@ const NewSourceUploader: React.FC = () => { const handleBack = useCallback(() => { setWizardState((prev) => { switch (prev.currentStep) { - case "source-import": - case "target-selection": - return { ...prev, currentStep: "intent-selection", selectedIntent: null }; case "target-import": return { ...prev, @@ -620,17 +629,6 @@ const NewSourceUploader: React.FC = () => { // Render wizard steps switch (wizardState.currentStep) { - case "intent-selection": - return ( - - ); - case "source-import": return ( { intent="source" existingSourceCount={wizardState.projectInventory.sourceFiles.length} onSelectPlugin={handleSelectPlugin} - onBack={handleBack} /> ); @@ -648,7 +645,6 @@ const NewSourceUploader: React.FC = () => { return ( handleSelectIntent("source")} - onBack={handleBack} /> ); } @@ -656,7 +652,6 @@ const NewSourceUploader: React.FC = () => { ); @@ -685,8 +680,7 @@ const NewSourceUploader: React.FC = () => { setImportComplete(false); setWizardState((prev) => ({ ...prev, - currentStep: "intent-selection", - selectedIntent: null, + currentStep: prev.selectedIntent === "target" ? "target-selection" : "source-import", selectedSourceForTarget: undefined, selectedSourceDetails: undefined, selectedPlugin: undefined, diff --git a/webviews/codex-webviews/src/NewSourceUploader/components/AlignmentPreview.tsx b/webviews/codex-webviews/src/NewSourceUploader/components/AlignmentPreview.tsx index 19af534d3..a7b31e227 100644 --- a/webviews/codex-webviews/src/NewSourceUploader/components/AlignmentPreview.tsx +++ b/webviews/codex-webviews/src/NewSourceUploader/components/AlignmentPreview.tsx @@ -36,12 +36,13 @@ import { sequentialCellAligner, defaultCellAligner, } from "../types/plugin"; +import type { CustomNotebookCellData } from "types"; export interface AlignmentPreviewProps { alignedCells: AlignedCell[]; importedContent: ImportedContent[]; - targetCells: any[]; - sourceCells: any[]; + targetCells: CustomNotebookCellData[]; + sourceCells: CustomNotebookCellData[]; selectedSourceName?: string; onConfirm: (alignedCells: AlignedCell[]) => void; onCancel: () => void; diff --git a/webviews/codex-webviews/src/NewSourceUploader/components/EmptySourceState.tsx b/webviews/codex-webviews/src/NewSourceUploader/components/EmptySourceState.tsx index 31eaee127..63d53e1cf 100644 --- a/webviews/codex-webviews/src/NewSourceUploader/components/EmptySourceState.tsx +++ b/webviews/codex-webviews/src/NewSourceUploader/components/EmptySourceState.tsx @@ -1,23 +1,15 @@ import React from "react"; import { Card, CardContent } from "../../components/ui/card"; import { Button } from "../../components/ui/button"; -import { FileInput, ArrowLeft, AlertCircle } from "lucide-react"; +import { FileInput, AlertCircle } from "lucide-react"; interface EmptySourceStateProps { onImportSources: () => void; - onBack: () => void; } -export const EmptySourceState: React.FC = ({ onImportSources, onBack }) => { +export const EmptySourceState: React.FC = ({ onImportSources }) => { return (
- {/* Header */} -
- -
diff --git a/webviews/codex-webviews/src/NewSourceUploader/components/PluginSelection.tsx b/webviews/codex-webviews/src/NewSourceUploader/components/PluginSelection.tsx index 02a204494..1a1713394 100644 --- a/webviews/codex-webviews/src/NewSourceUploader/components/PluginSelection.tsx +++ b/webviews/codex-webviews/src/NewSourceUploader/components/PluginSelection.tsx @@ -27,7 +27,7 @@ interface PluginSelectionProps { selectedSource?: ExistingFile; existingSourceCount: number; onSelectPlugin: (pluginId: string) => void; - onBack: () => void; + onBack?: () => void; } const PluginCard: React.FC<{ @@ -121,8 +121,8 @@ export const PluginSelection: React.FC = ({ const [searchQuery, setSearchQuery] = useState(""); const isTargetImport = intent === "target"; - const essentialPlugins = useMemo(() => getEssentialImporters(), []); - const specializedPlugins = useMemo(() => getSpecializedImporters(), []); + const essentialPlugins = useMemo(() => getEssentialImporters(isTargetImport), [isTargetImport]); + const specializedPlugins = useMemo(() => getSpecializedImporters(isTargetImport), [isTargetImport]); const filteredSpecializedPlugins = useMemo(() => { return searchQuery ? searchPlugins(searchQuery, specializedPlugins) : specializedPlugins; @@ -131,12 +131,14 @@ export const PluginSelection: React.FC = ({ return (
{/* Header */} -
- -
+ {onBack && ( +
+ +
+ )} {/* Title Section */}
diff --git a/webviews/codex-webviews/src/NewSourceUploader/components/SourceFileSelection.tsx b/webviews/codex-webviews/src/NewSourceUploader/components/SourceFileSelection.tsx index 9ebe189b7..1f24d1dfb 100644 --- a/webviews/codex-webviews/src/NewSourceUploader/components/SourceFileSelection.tsx +++ b/webviews/codex-webviews/src/NewSourceUploader/components/SourceFileSelection.tsx @@ -26,7 +26,7 @@ import { cn } from "../../lib/utils"; interface SourceFileSelectionProps { sourceFiles: BasicFileInfo[]; onSelectSource: (source: BasicFileInfo) => void; - onBack: () => void; + onBack?: () => void; } export const SourceFileSelection: React.FC = ({ @@ -75,12 +75,14 @@ export const SourceFileSelection: React.FC = ({ return (
{/* Header */} -
- -
+ {onBack && ( +
+ +
+ )}

Select Source File

diff --git a/webviews/codex-webviews/src/NewSourceUploader/importers/common/translationHelper.ts b/webviews/codex-webviews/src/NewSourceUploader/importers/common/translationHelper.ts index 03f6eff8b..c703bb462 100644 --- a/webviews/codex-webviews/src/NewSourceUploader/importers/common/translationHelper.ts +++ b/webviews/codex-webviews/src/NewSourceUploader/importers/common/translationHelper.ts @@ -1,26 +1,32 @@ import { NotebookPair } from "../../types/common"; import { ImportedContent, ImporterComponentProps } from "../../types/plugin"; +import { CodexCellTypes } from "types/enums"; /** - * Helper function to convert notebook cells to ImportedContent format for translation imports + * Helper function to convert notebook cells to ImportedContent format for translation imports. + * Skips milestone cells — they are structural markers (chapter numbers, etc.) that the editor + * hides from view, so they should not be treated as translatable content. */ export function notebookToImportedContent(notebook: NotebookPair): ImportedContent[] { - return notebook.source.cells.map((cell, index) => { - const md = cell.metadata || {}; - const data = md.data || {}; - return { - id: cell.id || md.id || `cell-${index}`, - content: cell.content, - edits: md.edits, - // Surface commonly used fields for aligners - startTime: data.startTime ?? md.startTime, - endTime: data.endTime ?? md.endTime, - format: data.format ?? md.format, - originalText: data.originalText ?? md.originalText, - // Spread remaining metadata for flexibility - ...md, - }; - }); + return notebook.source.cells + .filter((cell) => { + const cellType = cell.metadata?.type; + return cellType !== CodexCellTypes.MILESTONE; + }) + .map((cell, index) => { + const md = cell.metadata || {}; + const data = md.data || {}; + return { + id: cell.id || md.id || `cell-${index}`, + content: cell.content, + edits: md.edits, + startTime: data.startTime ?? md.startTime, + endTime: data.endTime ?? md.endTime, + format: data.format ?? md.format, + originalText: data.originalText ?? md.originalText, + ...md, + }; + }); } /** diff --git a/webviews/codex-webviews/src/NewSourceUploader/importers/registry.tsx b/webviews/codex-webviews/src/NewSourceUploader/importers/registry.tsx index db97cfec0..b9a45894e 100644 --- a/webviews/codex-webviews/src/NewSourceUploader/importers/registry.tsx +++ b/webviews/codex-webviews/src/NewSourceUploader/importers/registry.tsx @@ -67,24 +67,28 @@ export const importerPlugins: ImporterPlugin[] = [ ...markdownImporterPlugin, name: "Markdown", description: "GitHub-style markdown files with round-trip export support", + supportsTargetImport: true, tags: ["Essential", "Documentation", "GitHub", "Round-trip"], }, { ...subtitlesImporterPlugin, name: "Subtitles", description: "Video captions with timestamps", + supportsTargetImport: true, tags: ["Essential", "Media", "Video"], }, { ...tmsImporterPlugin, name: "TMS Files", description: "Translation memory and localization files (TMX/XLIFF) with round-trip export support", + supportsTargetImport: true, tags: ["Essential", "Translation", "Localization", "Round-trip"], }, { ...docxImporterPlugin, name: "Word Documents", description: "Microsoft Word files with round-trip export support", + supportsTargetImport: true, tags: ["Essential", "Documents", "Microsoft", "Round-trip"], }, { @@ -113,30 +117,35 @@ export const importerPlugins: ImporterPlugin[] = [ ...usfmExperimentalImporterPlugin, name: "USFM New", description: "USFM files with round-trip export support (headers in chapter 1, verse-only target imports)", + supportsTargetImport: true, tags: ["Specialized", "Bible", "USFM", "Round-trip"], }, { ...paratextImporterPlugin, name: "Paratext Projects", description: "Translation projects with settings", + supportsTargetImport: true, tags: ["Specialized", "Bible", "Paratext"], }, { ...ebibleDownloadImporterPlugin, name: "eBible Download", description: "Download directly from eBible.org", + supportsTargetImport: true, tags: ["Specialized", "Bible", "Download"], }, { ...maculaBibleImporterPlugin, name: "Macula Bible", description: "Hebrew and Greek with annotations", + supportsTargetImport: true, tags: ["Specialized", "Bible", "Original Languages"], }, { ...obsImporterPlugin, name: "Bible Stories", description: "Open Bible Stories format with round-trip export support", + supportsTargetImport: true, tags: ["Specialized", "Bible", "Stories", "Round-trip"], }, { @@ -155,6 +164,7 @@ export const importerPlugins: ImporterPlugin[] = [ ...spreadsheetImporterPlugin, name: "Bible Spreadsheet with Audio data", description: "CSV and TSV files with audio URLs", + supportsTargetImport: true, tags: ["Specialized", "Bible", "Spreadsheet", "CSV", "TSV", "Round-trip"], }, ]; @@ -199,15 +209,23 @@ export const getSupportedExtensions = (): string[] => { /** * Get Essential importers (general-purpose, broad appeal) */ -export const getEssentialImporters = (): ImporterPlugin[] => { - return importerPlugins.filter((plugin) => plugin.tags?.includes("Essential")); +export const getEssentialImporters = (targetOnly?: boolean): ImporterPlugin[] => { + return importerPlugins.filter( + (plugin) => + plugin.tags?.includes("Essential") && + (!targetOnly || plugin.supportsTargetImport) + ); }; /** * Get Specialized importers (domain-specific tools) */ -export const getSpecializedImporters = (): ImporterPlugin[] => { - return importerPlugins.filter((plugin) => plugin.tags?.includes("Specialized")); +export const getSpecializedImporters = (targetOnly?: boolean): ImporterPlugin[] => { + return importerPlugins.filter( + (plugin) => + plugin.tags?.includes("Specialized") && + (!targetOnly || plugin.supportsTargetImport) + ); }; /** diff --git a/webviews/codex-webviews/src/NewSourceUploader/importers/usfm/experimental/usfmCellAligner.ts b/webviews/codex-webviews/src/NewSourceUploader/importers/usfm/experimental/usfmCellAligner.ts index b025815c8..eb81b0815 100644 --- a/webviews/codex-webviews/src/NewSourceUploader/importers/usfm/experimental/usfmCellAligner.ts +++ b/webviews/codex-webviews/src/NewSourceUploader/importers/usfm/experimental/usfmCellAligner.ts @@ -3,206 +3,189 @@ * Matches verses by verse number and chapter (cell ID format: "BOOK CHAPTER:VERSE") * rather than sequentially, ensuring verses end up in the correct cells even if * the target file is shorter or not well-structured. + * + * Preserves original target cell ordering: iterates target cells in their existing + * order and looks up matching imported content, keeping preface/header cells in place. */ import { CellAligner, AlignedCell, ImportedContent } from '../../../types/plugin'; +import type { CustomNotebookCellData } from 'types'; + +type MatchResult = { + importedItem: ImportedContent; + alignmentMethod: AlignedCell['alignmentMethod']; + confidence: number; +}; + +const VERSE_PATTERN = /^([A-Z0-9]{2,})\s+(\d+):(\d+[a-z]?)$/i; /** - * USFM cell aligner that matches verses by their cell ID (book chapter:verse) - * Falls back to exact ID matching for non-verse content + * Build lookup indexes from imported content for multi-strategy matching. */ -export const usfmCellAligner: CellAligner = async ( - targetCells: any[], - sourceCells: any[], - importedContent: ImportedContent[] -): Promise => { - const alignedCells: AlignedCell[] = []; +const buildImportedIndexes = (importedContent: ImportedContent[]) => { + const byId = new Map(); + const byLabel = new Map(); + const byVerseRef = new Map(); + const byVerseRefWithBook = new Map(); + + const appendToMap = (map: Map, key: string, item: ImportedContent) => { + const list = map.get(key) || []; + list.push(item); + map.set(key, list); + }; + + for (const item of importedContent) { + if (!item.content.trim()) continue; + + const id = String(item.id ?? '').trim(); + if (id) { + appendToMap(byId, id, item); + appendToMap(byId, id.toUpperCase(), item); + } + + const cellLabel = item.cellLabel || (item as Record).metadata?.cellLabel; + if (cellLabel) { + const label = String(cellLabel).trim(); + appendToMap(byLabel, label, item); + appendToMap(byLabel, label.toUpperCase(), item); + } - // Create maps for efficient lookup - // Map by exact cell ID (for verses: "BOOK CHAPTER:VERSE", for others: "BOOK CHAPTER:MARKER:INDEX") - const targetCellsById = new Map(); - - // Map by verse reference (for verse matching: "CHAPTER:VERSE") - const targetVersesByRef = new Map(); - - // Map by cell label (for fallback matching) - const targetCellsByLabel = new Map(); - - // Build lookup maps from target cells - targetCells.forEach((cell) => { - const cellId = cell.metadata?.id || cell.id; - if (cellId) { - const normalizedId = String(cellId).trim().toUpperCase(); - targetCellsById.set(normalizedId, cell); - // Also store original case for exact matching - targetCellsById.set(String(cellId).trim(), cell); - - // Extract verse reference if it's a verse cell - // Verse cell IDs are in format: "BOOK CHAPTER:VERSE" (e.g., "GEN 1:1", "1PE 1:1") - // Match pattern: book code (2+ chars), space(s), chapter number, colon, verse number - const verseMatch = String(cellId).match(/^([A-Z0-9]{2,})\s+(\d+):(\d+[a-z]?)$/i); + if (id) { + const verseMatch = id.match(VERSE_PATTERN); if (verseMatch) { const [, bookCode, chapter, verse] = verseMatch; - const normalizedBookCode = bookCode.toUpperCase(); - // Create verse reference with book code: "BOOK CHAPTER:VERSE" for more precise matching - const verseRefWithBook = `${normalizedBookCode} ${chapter}:${verse}`; - // Also create reference without book: "CHAPTER:VERSE" for fallback matching - const verseRef = `${chapter}:${verse}`; - targetVersesByRef.set(verseRefWithBook, cell); - // Only set verseRef if not already set (prefer book-specific match) - if (!targetVersesByRef.has(verseRef)) { - targetVersesByRef.set(verseRef, cell); - } + appendToMap(byVerseRefWithBook, `${bookCode.toUpperCase()} ${chapter}:${verse}`, item); + appendToMap(byVerseRef, `${chapter}:${verse}`, item); } } + } - // Also index by cellLabel for fallback - const cellLabel = cell.metadata?.cellLabel; - if (cellLabel) { - const normalizedLabel = String(cellLabel).trim().toUpperCase(); - targetCellsByLabel.set(normalizedLabel, cell); - // Also store original case - targetCellsByLabel.set(String(cellLabel).trim(), cell); + return { byId, byLabel, byVerseRef, byVerseRefWithBook }; +}; + +/** + * Try to match a target cell against the imported content using multiple strategies. + * Consumes the first match from the relevant list to avoid double-matching. + */ +const findMatchForTargetCell = ( + cell: CustomNotebookCellData, + indexes: ReturnType, + usedImported: Set, +): MatchResult | null => { + const { byId, byLabel, byVerseRef, byVerseRefWithBook } = indexes; + + const takeFirst = (map: Map, key: string): ImportedContent | null => { + const list = map.get(key); + if (!list) return null; + while (list.length > 0) { + const candidate = list.shift()!; + if (!usedImported.has(candidate)) return candidate; } - }); + return null; + }; - let exactMatches = 0; - let verseMatches = 0; - let labelMatches = 0; - let unmatched = 0; + const cellId = String(cell.metadata?.id || (cell as any).id || '').trim(); + const cellLabel = String(cell.metadata?.cellLabel || '').trim(); + + // Strategy 1: cellLabel matching (most reliable for verse matching) + if (cellLabel) { + const item = takeFirst(byLabel, cellLabel) || takeFirst(byLabel, cellLabel.toUpperCase()); + if (item) return { importedItem: item, alignmentMethod: 'custom', confidence: 0.95 }; + } - // Track which target cells have been matched - const matchedTargetCells = new Set(); + // Strategy 2: exact ID matching + if (cellId) { + const item = takeFirst(byId, cellId) || takeFirst(byId, cellId.toUpperCase()); + if (item) return { importedItem: item, alignmentMethod: 'exact-id', confidence: 1.0 }; + } - // Process each imported content item - // Only match verses to existing target cells - don't create new cells - for (const importedItem of importedContent) { - if (!importedItem.content.trim()) { - continue; // Skip empty content + // Strategy 3: verse reference matching + if (cellId) { + const verseMatch = cellId.match(VERSE_PATTERN); + if (verseMatch) { + const [, bookCode, chapter, verse] = verseMatch; + const refWithBook = `${bookCode.toUpperCase()} ${chapter}:${verse}`; + const item = takeFirst(byVerseRefWithBook, refWithBook) + || takeFirst(byVerseRef, `${chapter}:${verse}`); + if (item) return { importedItem: item, alignmentMethod: 'custom', confidence: 0.9 }; } + } - const importedId = importedItem.id; - let matchedCell: any | null = null; - let alignmentMethod: AlignedCell['alignmentMethod'] = 'custom'; - let confidence = 0.0; + return null; +}; - // Strategy 1: PRIORITIZE cellLabel matching (most reliable for verse matching) - // Check both importedItem.cellLabel and importedItem.metadata?.cellLabel - const cellLabel = importedItem.cellLabel || (importedItem as any).metadata?.cellLabel; - if (cellLabel) { - const labelStr = String(cellLabel).trim(); - const normalizedLabel = labelStr.toUpperCase(); - - if (targetCellsByLabel.has(labelStr)) { - matchedCell = targetCellsByLabel.get(labelStr); - alignmentMethod = 'custom'; - confidence = 0.95; // High confidence for label matching - labelMatches++; - } else if (targetCellsByLabel.has(normalizedLabel)) { - matchedCell = targetCellsByLabel.get(normalizedLabel); - alignmentMethod = 'custom'; - confidence = 0.95; // High confidence for label matching - labelMatches++; - } - } +/** + * USFM cell aligner that matches verses by their cell ID (book chapter:verse). + * Iterates target cells in their original order to preserve notebook structure. + */ +export const usfmCellAligner: CellAligner = async ( + targetCells: CustomNotebookCellData[], + _sourceCells: CustomNotebookCellData[], + importedContent: ImportedContent[] +): Promise => { + const alignedCells: AlignedCell[] = []; + const usedImported = new Set(); + const indexes = buildImportedIndexes(importedContent); - // Strategy 2: Try exact ID match (fallback) - // Try both original case and uppercase - if (!matchedCell && importedId) { - const normalizedId = String(importedId).trim().toUpperCase(); - const originalId = String(importedId).trim(); - - if (targetCellsById.has(originalId)) { - matchedCell = targetCellsById.get(originalId); - alignmentMethod = 'exact-id'; - confidence = 1.0; - exactMatches++; - } else if (targetCellsById.has(normalizedId)) { - matchedCell = targetCellsById.get(normalizedId); - alignmentMethod = 'exact-id'; - confidence = 1.0; - exactMatches++; - } - } + let labelMatches = 0; + let exactMatches = 0; + let verseMatches = 0; - // Strategy 3: Try verse reference matching (for verses) - last resort - // First try with book code for precise matching, then fallback to chapter:verse - if (!matchedCell && importedId) { - // Match pattern: book code (2+ chars), space(s), chapter number, colon, verse number - const verseMatch = String(importedId).match(/^([A-Z0-9]{2,})\s+(\d+):(\d+[a-z]?)$/i); - if (verseMatch) { - const [, bookCode, chapter, verse] = verseMatch; - const normalizedBookCode = bookCode.toUpperCase(); - // Try matching with normalized book code first (more precise) - const verseRefWithBook = `${normalizedBookCode} ${chapter}:${verse}`; - if (targetVersesByRef.has(verseRefWithBook)) { - matchedCell = targetVersesByRef.get(verseRefWithBook); - alignmentMethod = 'custom'; - confidence = 0.9; // High confidence for book-specific verse matching - verseMatches++; - } else { - // Fallback to chapter:verse matching (in case book codes differ slightly) - const verseRef = `${chapter}:${verse}`; - if (targetVersesByRef.has(verseRef)) { - matchedCell = targetVersesByRef.get(verseRef); - alignmentMethod = 'custom'; - confidence = 0.85; // Medium-high confidence for verse matching - verseMatches++; - } - } - } - } + // Iterate target cells in their existing order + for (const targetCell of targetCells) { + const targetId = String(targetCell.metadata?.id || (targetCell as any).id || '').trim(); + const match = findMatchForTargetCell(targetCell, indexes, usedImported); + + if (match) { + usedImported.add(match.importedItem); + + if (match.alignmentMethod === 'exact-id') exactMatches++; + else if (match.confidence >= 0.95) labelMatches++; + else verseMatches++; - // Only add aligned cell if we found a match - // Skip unmatched verses - don't create new cells for them - if (matchedCell) { - matchedTargetCells.add(matchedCell); alignedCells.push({ - notebookCell: matchedCell, - importedContent: importedItem, - alignmentMethod, - confidence, + notebookCell: targetCell, + importedContent: { + ...match.importedItem, + id: targetId || match.importedItem.id, + }, + alignmentMethod: match.alignmentMethod, + confidence: match.confidence, }); } else { - // No match found - skip this verse (don't create new cells) - // Log for debugging but don't add to alignedCells - console.warn(`[USFM Aligner] No match found for verse: ${importedId || 'unknown'}`); - unmatched++; - } - } - - // IMPORTANT: Preserve all existing target cells that weren't matched - // This ensures preface cells (chapter 0), headers, and other non-verse cells are kept - for (const targetCell of targetCells) { - if (!matchedTargetCells.has(targetCell)) { - // This cell wasn't matched - preserve it with its original content + // No match — preserve existing cell content in its original position alignedCells.push({ notebookCell: targetCell, importedContent: { - id: (targetCell.metadata?.id || targetCell.id) || '', - content: targetCell.value || targetCell.content || '', + id: targetId, + content: targetCell.value || '', cellLabel: targetCell.metadata?.cellLabel, metadata: targetCell.metadata || {}, }, - alignmentMethod: 'custom', // Preserved existing cell + alignmentMethod: 'custom', confidence: 1.0, }); } } - const preservedCount = targetCells.length - matchedTargetCells.size; + const unmatched = importedContent.filter( + (item) => item.content.trim() && !usedImported.has(item) + ).length; + const preservedCount = targetCells.length - usedImported.size; + console.log( - `USFM aligner: ${labelMatches} label matches, ${exactMatches} exact matches, ${verseMatches} verse matches, ` + - `${unmatched} unmatched imported verses (skipped), ${preservedCount} existing cells preserved` + `USFM aligner: ${labelMatches} label, ${exactMatches} exact-id, ${verseMatches} verse matches, ` + + `${unmatched} unmatched imported (skipped), ${preservedCount} existing cells preserved` ); - // Debug: Log sample target cell labels and imported labels for troubleshooting if (unmatched > 0 || labelMatches === 0) { - const sampleTargetLabels = Array.from(targetCellsByLabel.keys()).slice(0, 10); - const sampleImportedLabels = importedContent.slice(0, 10).map(item => - item.cellLabel || (item as any).metadata?.cellLabel || item.id + const sampleTargetLabels = targetCells.slice(0, 10).map( + (c) => c.metadata?.cellLabel || c.metadata?.id || '(no id)' + ); + const sampleImportedLabels = importedContent.slice(0, 10).map( + (item) => item.cellLabel || (item as Record).metadata?.cellLabel || item.id ); - console.log(`[USFM Aligner] Sample target cell labels:`, sampleTargetLabels); + console.log(`[USFM Aligner] Sample target labels:`, sampleTargetLabels); console.log(`[USFM Aligner] Sample imported labels/IDs:`, sampleImportedLabels); } diff --git a/webviews/codex-webviews/src/NewSourceUploader/types/plugin.ts b/webviews/codex-webviews/src/NewSourceUploader/types/plugin.ts index 105e936f4..2130bdc4d 100644 --- a/webviews/codex-webviews/src/NewSourceUploader/types/plugin.ts +++ b/webviews/codex-webviews/src/NewSourceUploader/types/plugin.ts @@ -2,6 +2,20 @@ import { NotebookPair, ProcessedNotebook } from './common'; import type { CustomNotebookCellData } from 'types'; import { WizardContext } from './wizard'; +/** + * Generate a stable, deterministic ID for paratext cells based on content. + * Re-importing the same content will produce the same ID, preventing duplicates. + */ +export const stableParatextId = (content: string, index: number): string => { + let hash = 0; + const str = content.trim(); + for (let i = 0; i < str.length; i++) { + hash = ((hash << 5) - hash + str.charCodeAt(i)) | 0; + } + const hex = (hash >>> 0).toString(16).padStart(8, '0'); + return `paratext-${hex}-${index}`; +}; + /** * Information about existing source files in the project */ @@ -34,7 +48,7 @@ export interface ImportedContent { * Aligned cell for translation import */ export interface AlignedCell { - notebookCell: any | null; // Target cell from existing notebook + notebookCell: CustomNotebookCellData | null; importedContent: ImportedContent; isParatext?: boolean; isAdditionalOverlap?: boolean; @@ -80,22 +94,27 @@ export const sequentialCellAligner: CellAligner = async ( targetCells.forEach((targetCell, targetIndex) => { const existingContent = targetCell.value || ""; const hasContent = existingContent.trim() !== ""; + const targetId = targetCell.metadata?.id || `target-${targetIndex}`; + const isMilestone = targetCell.metadata?.type === "milestone"; - if (!hasContent) { + // Never write imported content into milestone cells — they are structural markers + if (!hasContent && !isMilestone) { const importedItem = nextImportedItem(); if (importedItem) { alignedCells.push({ notebookCell: targetCell, - importedContent: importedItem, + importedContent: { + ...importedItem, + id: targetId, + }, alignmentMethod: 'sequential', - confidence: 0.8 // Medium confidence for sequential insertion + confidence: 0.8, }); insertedCount++; return; } } - const targetId = targetCell.metadata?.id || `target-${targetIndex}`; alignedCells.push({ notebookCell: targetCell, importedContent: { @@ -115,17 +134,17 @@ export const sequentialCellAligner: CellAligner = async ( for (let i = importIndex; i < importedContent.length; i++) { const importedItem = importedContent[i]; if (!importedItem.content.trim()) { - continue; // Skip empty content + continue; } alignedCells.push({ notebookCell: null, importedContent: { ...importedItem, - id: `paratext-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, + id: stableParatextId(importedItem.content, paratextCount), }, isParatext: true, alignmentMethod: 'sequential', - confidence: 0.3 // Low confidence for paratext + confidence: 0.3, }); paratextCount++; } @@ -140,8 +159,8 @@ export const sequentialCellAligner: CellAligner = async ( * This is used when plugins don't define their own custom alignment algorithm */ export const defaultCellAligner: CellAligner = async ( - targetCells: any[], - sourceCells: any[], + targetCells: CustomNotebookCellData[], + sourceCells: CustomNotebookCellData[], importedContent: ImportedContent[] ): Promise => { const alignedCells: AlignedCell[] = []; @@ -181,7 +200,7 @@ export const defaultCellAligner: CellAligner = async ( notebookCell: targetCell, importedContent: { id: targetId, - content: targetCell.value || targetCell.content || "", + content: targetCell.value || "", edits: targetCell.metadata?.edits, cellLabel: targetCell.metadata?.cellLabel, metadata: targetCell.metadata || {}, @@ -194,6 +213,7 @@ export const defaultCellAligner: CellAligner = async ( } }); + let paratextIdx = 0; importedContent.forEach((importedItem, index) => { if (!importedItem.content.trim() || usedImportedIndexes.has(index)) { return; @@ -203,11 +223,11 @@ export const defaultCellAligner: CellAligner = async ( notebookCell: null, importedContent: { ...importedItem, - id: `paratext-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, + id: stableParatextId(importedItem.content, paratextIdx++), }, isParatext: true, alignmentMethod: 'exact-id', - confidence: 0.0 // No confidence for unmatched content + confidence: 0.0, }); }); @@ -354,6 +374,12 @@ export interface ImporterPlugin { */ enabled?: boolean; + /** + * Whether this plugin supports target/translation imports. + * When false or undefined, the plugin will be hidden in the target import wizard. + */ + supportsTargetImport?: boolean; + /** * Optional: Tags for categorizing plugins */