Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions src/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -821,12 +821,16 @@ export async function activate(context: vscode.ExtensionContext) {
context.subscriptions.push(
vscode.commands.registerCommand("codex-editor-extension.generateTranscriptions", async () => {
const countInput = await vscode.window.showInputBox({
prompt: "How many cells to transcribe?",
placeHolder: "e.g., 5",
validateInput: (val) => (val && !isNaN(Number(val)) && Number(val) >= 1 ? undefined : "Enter a positive number"),
prompt: "How many cells to transcribe? (0 or blank = all untranscribed cells)",
placeHolder: "0 for all, or a specific number",
value: "0",
validateInput: (val) => {
if (!val || val.trim() === "") return undefined;
return !isNaN(Number(val)) && Number(val) >= 0 ? undefined : "Enter 0 for all, or a positive number";
},
});
if (!countInput) return;
const count = Math.max(1, Math.floor(Number(countInput)));
if (countInput === undefined) return; // user cancelled
const count = Math.max(0, Math.floor(Number(countInput || 0)));

const provider = GlobalProvider.getInstance().getProvider("codex-cell-editor") as CodexCellEditorProvider | undefined;
if (!provider) {
Expand All @@ -835,7 +839,10 @@ export async function activate(context: vscode.ExtensionContext) {
}

provider.postMessageToWebviews({ type: "startBatchTranscription", content: { count } } as any);
vscode.window.showInformationMessage(`Starting transcription for up to ${count} cells...`);
const label = count > 0 ? `up to ${count}` : "all untranscribed";
vscode.window.showInformationMessage(
`Starting transcription for ${label} cells... First request may take ~45s to warm up.`
);
})
);

Expand Down
12 changes: 10 additions & 2 deletions src/providers/NewSourceUploader/NewSourceUploaderProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1653,7 +1653,13 @@ export class NewSourceUploaderProvider implements vscode.CustomTextEditorProvide
processedCells.set(paratextId, paratextCell);
paratextCount++;
} else if (alignedCell.notebookCell) {
const targetId = alignedCell.importedContent.id;
// Use the matched target cell's ID (from the aligner) to preserve
// source-to-target linkage. The imported content may have a different
// UUID generated during re-parsing, but the existing target cell's ID
// is what matches the source notebook.
const targetId = alignedCell.notebookCell.metadata?.id
|| alignedCell.notebookCell.id
|| alignedCell.importedContent.id;
const existingCell = existingCellsMap.get(targetId);
const existingValue = existingCell?.value ?? alignedCell.notebookCell.value ?? "";

Expand Down Expand Up @@ -1703,7 +1709,9 @@ export class NewSourceUploaderProvider implements vscode.CustomTextEditorProvide
newCells.push(paratextCell);
}
} else if (alignedCell.notebookCell) {
const targetId = alignedCell.importedContent.id;
const targetId = alignedCell.notebookCell.metadata?.id
|| alignedCell.notebookCell.id
|| alignedCell.importedContent.id;
const processedCell = processedCells.get(targetId);

if (processedCell) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -394,10 +394,11 @@ const messageHandlers: Record<string, (ctx: MessageHandlerContext) => Promise<vo
console.error(`[getAsrConfig] This will cause transcription to fail. Please check authentication status.`);
}

debug(`[getAsrConfig] Sending config: endpoint=${endpoint}, hasToken=${!!authToken}`);
const language = vscode.workspace.getConfiguration("codex-editor-extension").get<string>("asrLanguage", "eng");
debug(`[getAsrConfig] Sending config: endpoint=${endpoint}, hasToken=${!!authToken}, language=${language}`);
safePostMessageToPanel(webviewPanel, {
type: "asrConfig",
content: { endpoint, authToken }
content: { endpoint, authToken, language }
});
} catch (error) {
console.error("Error sending ASR config:", error);
Expand All @@ -417,6 +418,19 @@ const messageHandlers: Record<string, (ctx: MessageHandlerContext) => Promise<vo
}
},

requestBatchTranscription: async ({ event, webviewPanel, provider }) => {
const count = (event as any).content?.count ?? 0;
// Forward to the webview as a startBatchTranscription message
safePostMessageToPanel(webviewPanel, {
type: "startBatchTranscription",
content: { count },
} as any);
const label = count > 0 ? `up to ${count}` : "all untranscribed";
vscode.window.showInformationMessage(
`Starting transcription for ${label} cells... First request may take ~45s to warm up.`
);
},

updateCellAfterTranscription: async ({ event, document, webviewPanel, provider }) => {
const typedEvent = event as Extract<EditorPostMessages, { command: "updateCellAfterTranscription"; }>;
const { cellId, transcribedText, language } = typedEvent.content;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ interface AudioWaveformWithTranscriptionProps {
isTranscribing: boolean;
transcriptionProgress: number;
onTranscribe: () => void;
onTranscribeAll?: () => void;
onInsertTranscription: () => void;
disabled?: boolean;
onRequestRemove?: () => void;
Expand All @@ -50,6 +51,7 @@ const AudioWaveformWithTranscription: React.FC<AudioWaveformWithTranscriptionPro
isTranscribing,
transcriptionProgress,
onTranscribe,
onTranscribeAll,
onInsertTranscription,
disabled = false,
onRequestRemove,
Expand Down Expand Up @@ -341,16 +343,30 @@ const AudioWaveformWithTranscription: React.FC<AudioWaveformWithTranscriptionPro
{/* Action buttons at bottom */}
<div className="flex flex-wrap items-center justify-center gap-2 px-2">
{!transcription && !isTranscribing && (
<Button
onClick={onTranscribe}
disabled={disabled || (!audioUrl && !audioBlob)}
variant="outline"
className="h-8 px-2 text-xs text-[var(--vscode-button-background)] border-[var(--vscode-button-background)]/20 hover:bg-[var(--vscode-button-background)]/10"
title="Transcribe Audio"
>
<MessageCircle className="h-3 w-3" />
<span className="ml-1">Transcribe</span>
</Button>
<>
<Button
onClick={onTranscribe}
disabled={disabled || (!audioUrl && !audioBlob)}
variant="outline"
className="h-8 px-2 text-xs text-[var(--vscode-button-background)] border-[var(--vscode-button-background)]/20 hover:bg-[var(--vscode-button-background)]/10"
title="Transcribe this cell"
>
<MessageCircle className="h-3 w-3" />
<span className="ml-1">Transcribe</span>
</Button>
{onTranscribeAll && (
<Button
onClick={onTranscribeAll}
disabled={disabled}
variant="outline"
className="h-8 px-2 text-xs text-[var(--vscode-button-background)] border-[var(--vscode-button-background)]/20 hover:bg-[var(--vscode-button-background)]/10"
title="Transcribe all untranscribed cells (recommended — avoids repeated cold starts)"
>
<MessageCircle className="h-3 w-3" />
<span className="ml-1">Transcribe All</span>
</Button>
)}
</>
)}
<Button
variant="outline"
Expand Down
15 changes: 8 additions & 7 deletions webviews/codex-webviews/src/CodexCellEditor/CodexCellEditor.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,11 @@ const CodexCellEditor: React.FC = () => {
const targetCount = Math.max(0, message.content.count | 0);
const specificCellId: string | undefined = (message as any)?.content?.cellId;
let completed = 0;
// Create a single client for the entire batch to reuse the warmed Modal container
const client = new WhisperTranscriptionClient(
wsEndpoint,
asrConfig.authToken
);
for (const unit of translationUnits) {
if (targetCount > 0 && completed >= targetCount) break;
const cellId = unit.cellMarkers[0];
Expand Down Expand Up @@ -508,11 +513,7 @@ const CodexCellEditor: React.FC = () => {
// Transcribe
debug(
"batchTranscription",
`Creating client for cell ${cellId}: endpoint=${wsEndpoint}, hasToken=${!!asrConfig.authToken}`
);
const client = new WhisperTranscriptionClient(
wsEndpoint,
asrConfig.authToken
`Transcribing cell ${cellId}: endpoint=${wsEndpoint}, hasToken=${!!asrConfig.authToken}`
);
try {
// Mark cell as transcribing for UI feedback
Expand All @@ -521,15 +522,15 @@ const CodexCellEditor: React.FC = () => {
next.add(cellId);
return next;
});
const result = await client.transcribe(blob);
const result = await client.transcribe(blob, 60000, asrConfig.language);
const text = (result.text || "").trim();
if (text) {
vscode.postMessage({
command: "updateCellAfterTranscription",
content: {
cellId,
transcribedText: text,
language: "unknown",
language: asrConfig.language || "unknown",
},
} as unknown as EditorPostMessages);

Expand Down
15 changes: 12 additions & 3 deletions webviews/codex-webviews/src/CodexCellEditor/TextCellEditor.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -1350,6 +1350,14 @@ const CellEditor: React.FC<CellEditorProps> = ({
}
});

const handleTranscribeAll = () => {
// Trigger batch transcription for all untranscribed cells via the extension
window.vscodeApi.postMessage({
command: "requestBatchTranscription",
content: { count: 0 }, // 0 = all untranscribed cells
});
};

const handleTranscribeAudio = async () => {
// Check connectivity first
if (!navigator.onLine) {
Expand Down Expand Up @@ -1397,8 +1405,8 @@ const CellEditor: React.FC<CellEditorProps> = ({
setTranscriptionStatus(`Error: ${error}`);
};

// Perform transcription
const result = await client.transcribe(audioBlob);
// Perform transcription with language hint
const result = await client.transcribe(audioBlob, 60000, asrConfig?.language);

// Success - save transcription but don't automatically insert
const transcribedText = result.text.trim();
Expand All @@ -1417,7 +1425,7 @@ const CellEditor: React.FC<CellEditorProps> = ({
content: {
cellId: cellMarkers[0],
transcribedText: transcribedText,
language: "unknown",
language: asrConfig?.language || "unknown",
},
};
window.vscodeApi.postMessage(messageContent);
Expand Down Expand Up @@ -2894,6 +2902,7 @@ const CellEditor: React.FC<CellEditorProps> = ({
isTranscribing={isTranscribing}
transcriptionProgress={transcriptionProgress}
onTranscribe={handleTranscribeAudio}
onTranscribeAll={handleTranscribeAll}
onInsertTranscription={handleInsertTranscription}
onRequestRemove={() => setConfirmingDiscard(true)}
onShowHistory={() => setShowAudioHistory(true)}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ export class WhisperTranscriptionClient {

async transcribe(
audioBlob: Blob,
timeoutMs: number = 60000
timeoutMs: number = 60000,
language?: string
): Promise<{ text: string; }> {
try {
// Create FormData with audio file
Expand All @@ -25,6 +26,9 @@ export class WhisperTranscriptionClient {
if (this.authToken) {
url.searchParams.set("token", this.authToken);
}
if (language) {
url.searchParams.set("lang", language);
}

// Prepare headers
const headers: HeadersInit = {};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import { describe, it, expect } from 'vitest';
import { usfmCellAligner } from './usfmCellAligner';

/**
* Simulates the ID resolution logic from handleWriteTranslation.
* This is the fix for issue #429: use the matched target cell's ID
* (from the aligner) instead of the imported content's new UUID.
*/
function resolveTargetId(alignedCell: { notebookCell: any; importedContent: { id: string } }) {
return alignedCell.notebookCell?.metadata?.id
|| alignedCell.notebookCell?.id
|| alignedCell.importedContent.id;
}

describe('usfmCellAligner', () => {
it('matches imported content to target cells by cellLabel', async () => {
// Simulate existing target cells (from .codex file, created during source import)
const targetCells = [
{ id: 'uuid-target-1', value: '', metadata: { id: 'uuid-target-1', cellLabel: 'GEN 1:1' } },
{ id: 'uuid-target-2', value: '', metadata: { id: 'uuid-target-2', cellLabel: 'GEN 1:2' } },
{ id: 'uuid-target-3', value: '', metadata: { id: 'uuid-target-3', cellLabel: 'GEN 1:3' } },
];

// Simulate imported content (from re-parsing the same file — NEW UUIDs)
const importedContent = [
{ id: 'uuid-new-1', content: 'In the beginning...', cellLabel: 'GEN 1:1' },
{ id: 'uuid-new-2', content: 'And the earth was...', cellLabel: 'GEN 1:2' },
{ id: 'uuid-new-3', content: 'And God said...', cellLabel: 'GEN 1:3' },
];

const aligned = await usfmCellAligner(targetCells, [], importedContent);

// All 3 should be matched
const matchedCells = aligned.filter(a => a.importedContent.content.trim() !== '');
expect(matchedCells).toHaveLength(3);

// Each aligned cell's notebookCell should be the EXISTING target cell
for (const cell of matchedCells) {
expect(cell.notebookCell).toBeDefined();
expect(cell.notebookCell.metadata.id).toMatch(/^uuid-target-/);
}

// The imported content still has the new UUIDs
for (const cell of matchedCells) {
expect(cell.importedContent.id).toMatch(/^uuid-new-/);
}
});

it('resolves target ID from matched notebook cell, not imported content (issue #429 fix)', async () => {
const targetCells = [
{ id: 'uuid-target-1', value: '', metadata: { id: 'uuid-target-1', cellLabel: 'GEN 1:1' } },
{ id: 'uuid-target-2', value: '', metadata: { id: 'uuid-target-2', cellLabel: 'GEN 1:2' } },
];

const importedContent = [
{ id: 'uuid-new-1', content: 'In the beginning...', cellLabel: 'GEN 1:1' },
{ id: 'uuid-new-2', content: 'And the earth was...', cellLabel: 'GEN 1:2' },
];

const aligned = await usfmCellAligner(targetCells, [], importedContent);
const matchedCells = aligned.filter(a => a.importedContent.content.trim() !== '');

// Simulate the handleWriteTranslation ID resolution (the fix)
for (let i = 0; i < matchedCells.length; i++) {
const targetId = resolveTargetId(matchedCells[i]);
// The resolved ID should be the EXISTING target cell's UUID, not the imported one
expect(targetId).toBe(`uuid-target-${i + 1}`);
expect(targetId).not.toBe(`uuid-new-${i + 1}`);
}
});

it('preserves unmatched target cells (paratext, headers)', async () => {
const targetCells = [
{ id: 'uuid-header', value: 'Genesis', metadata: { id: 'uuid-header', cellLabel: 'GEN 0:\\id:0' } },
{ id: 'uuid-target-1', value: '', metadata: { id: 'uuid-target-1', cellLabel: 'GEN 1:1' } },
];

// Only verse content imported (versesOnly mode)
const importedContent = [
{ id: 'uuid-new-1', content: 'In the beginning...', cellLabel: 'GEN 1:1' },
];

const aligned = await usfmCellAligner(targetCells, [], importedContent);

// Should have 2 cells: 1 matched verse + 1 preserved header
expect(aligned).toHaveLength(2);

// The header cell should be preserved with its original ID
const headerCell = aligned.find(a =>
(a.notebookCell?.metadata?.id || a.importedContent.id) === 'uuid-header'
);
expect(headerCell).toBeDefined();
});

it('handles case-insensitive cellLabel matching', async () => {
const targetCells = [
{ id: 'uuid-target-1', value: '', metadata: { id: 'uuid-target-1', cellLabel: 'gen 1:1' } },
];

const importedContent = [
{ id: 'uuid-new-1', content: 'In the beginning...', cellLabel: 'GEN 1:1' },
];

const aligned = await usfmCellAligner(targetCells, [], importedContent);
const matchedCells = aligned.filter(a => a.importedContent.content === 'In the beginning...');

expect(matchedCells).toHaveLength(1);
expect(matchedCells[0].notebookCell.metadata.id).toBe('uuid-target-1');
});
});