diff --git a/src/test/suite/vttRoundtrip.integration.test.ts b/src/test/suite/vttRoundtrip.integration.test.ts index 6208c113a..95056a349 100644 --- a/src/test/suite/vttRoundtrip.integration.test.ts +++ b/src/test/suite/vttRoundtrip.integration.test.ts @@ -105,7 +105,7 @@ suite("VTT round-trip integration (mock VTT fixtures)", function () { }); assert.ok(alignedTextCell, "Expected at least one aligned text cell"); - const alignedStart = alignedTextCell!.notebookCell.metadata.data.startTime as number; + const alignedStart = alignedTextCell!.notebookCell!.metadata!.data!.startTime as number; const expectedContent = importedByStart.get(alignedStart); assert.ok(expectedContent, "Expected imported content for aligned text cell"); assert.strictEqual( diff --git a/webviews/codex-webviews/src/NewSourceUploader/importers/subtitles/SubtitlesImporterForm.tsx b/webviews/codex-webviews/src/NewSourceUploader/importers/subtitles/SubtitlesImporterForm.tsx index 083e846cc..70ceee2ee 100644 --- a/webviews/codex-webviews/src/NewSourceUploader/importers/subtitles/SubtitlesImporterForm.tsx +++ b/webviews/codex-webviews/src/NewSourceUploader/importers/subtitles/SubtitlesImporterForm.tsx @@ -24,7 +24,7 @@ import { AlertCircle, } from "lucide-react"; import { Badge } from "../../../components/ui/badge"; -import { subtitlesImporter } from "./index"; +import { subtitlesImporter, validateSubtitleTimestamps } from "./index"; import { subtitlesImporterPlugin } from "./index.tsx"; import { handleImportCompletion, notebookToImportedContent } from "../common/translationHelper"; import { notifyImportStarted, notifyImportEnded } from "../../utils/importProgress"; @@ -45,6 +45,7 @@ export const SubtitlesImporterForm: React.FC = (props) = const [result, setResult] = useState(null); const [alignedCells, setAlignedCells] = useState(null); const [previewContent, setPreviewContent] = useState(""); + const [fileWarnings, setFileWarnings] = useState([]); const [subtitleStats, setSubtitleStats] = useState<{ totalCues: number; duration: string; @@ -64,6 +65,7 @@ export const SubtitlesImporterForm: React.FC = (props) = setResult(null); setAlignedCells(null); setSubtitleStats(null); + setFileWarnings([]); // Show preview and analyze file try { @@ -103,6 +105,9 @@ export const SubtitlesImporterForm: React.FC = (props) = duration, format, }); + + const timestampWarnings = validateSubtitleTimestamps(text); + setFileWarnings(timestampWarnings); } catch (err) { console.warn("Could not preview file:", err); } @@ -581,6 +586,17 @@ export const SubtitlesImporterForm: React.FC = (props) = )} + {fileWarnings.length > 0 && ( + + + + {fileWarnings.map((warning, index) => ( +

{warning}

+ ))} +
+
+ )} + {previewContent && ( diff --git a/webviews/codex-webviews/src/NewSourceUploader/importers/subtitles/index.test.ts b/webviews/codex-webviews/src/NewSourceUploader/importers/subtitles/index.test.ts index f14be1fbc..5db3762e9 100644 --- a/webviews/codex-webviews/src/NewSourceUploader/importers/subtitles/index.test.ts +++ b/webviews/codex-webviews/src/NewSourceUploader/importers/subtitles/index.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from 'vitest'; -import { subtitlesImporter } from './index'; +import { subtitlesImporter, validateSubtitleTimestamps } from './index'; // Minimal File-like shim for tests class MockFile { @@ -41,4 +41,138 @@ describe('subtitlesImporter.parseFile', () => { }); }); +describe('validateSubtitleTimestamps', () => { + it('returns no warnings for a well-ordered VTT', () => { + const vtt = [ + 'WEBVTT', + '', + '1', + '00:00:01.000 --> 00:00:03.000', + 'First cue', + '', + '2', + '00:00:04.000 --> 00:00:06.000', + 'Second cue', + '', + '3', + '00:00:07.000 --> 00:00:09.000', + 'Third cue', + ].join('\n'); + expect(validateSubtitleTimestamps(vtt)).toEqual([]); + }); + + it('returns no warnings for a well-ordered SRT', () => { + const srt = [ + '1', + '00:00:01,000 --> 00:00:03,000', + 'First cue', + '', + '2', + '00:00:04,000 --> 00:00:06,000', + 'Second cue', + ].join('\n'); + + expect(validateSubtitleTimestamps(srt)).toEqual([]); + }); + + it('catches small overlaps from multi-speaker cues (<5s)', () => { + const vtt = [ + 'WEBVTT', + '', + '1', + '00:00:10.000 --> 00:00:14.000', + 'Speaker A', + '', + '1', + '00:00:10.000 --> 00:00:14.000', + 'Speaker B (same timestamp, overlap of 4s)', + ].join('\n'); + + const warnings = validateSubtitleTimestamps(vtt); + expect(warnings.length).toBe(1); + expect(warnings[0]).toMatch(/non-sequential timestamps/); + expect(warnings[0]).toMatch(/4 seconds/); + }); + + it('warns when timestamps jump backwards significantly (corrupted hour)', () => { + const vtt = [ + 'WEBVTT', + '', + '1', + '01:00:23.625 --> 01:00:28.458', + 'Cue with wrong hour', + '', + '2', + '00:00:52.886 --> 00:00:54.763', + 'Cue with correct hour (jumps back ~3573s)', + ].join('\n'); + + const warnings = validateSubtitleTimestamps(vtt); + expect(warnings.length).toBe(1); + expect(warnings[0]).toMatch(/non-sequential timestamps/); + expect(warnings[0]).toMatch(/1 hour/); + }); + + it('counts multiple out-of-order cues', () => { + const vtt = [ + 'WEBVTT', + '', + '1', + '01:00:23.000 --> 01:00:28.000', + 'Wrong hour', + '', + '2', + '00:00:30.000 --> 00:00:35.000', + 'Correct (jump back)', + '', + '3', + '01:00:40.000 --> 01:00:45.000', + 'Wrong hour again', + '', + '4', + '00:00:50.000 --> 00:00:55.000', + 'Correct (jump back again)', + ].join('\n'); + + const warnings = validateSubtitleTimestamps(vtt); + expect(warnings.length).toBe(1); + expect(warnings[0]).toMatch(/2 subtitle cue/); + }); + + it('reports minutes for moderate jumps', () => { + const vtt = [ + 'WEBVTT', + '', + '1', + '00:05:00.000 --> 00:05:30.000', + 'Later cue first', + '', + '2', + '00:01:00.000 --> 00:01:05.000', + 'Earlier cue second (jumps back ~270s)', + ].join('\n'); + + const warnings = validateSubtitleTimestamps(vtt); + expect(warnings.length).toBe(1); + expect(warnings[0]).toMatch(/\d+ minutes/); + }); + + it('reports seconds for small jumps', () => { + const vtt = [ + 'WEBVTT', + '', + '1', + '00:00:30.000 --> 00:00:40.000', + 'Later cue first', + '', + '2', + '00:00:10.000 --> 00:00:15.000', + 'Earlier cue second (jumps back ~30s)', + ].join('\n'); + + const warnings = validateSubtitleTimestamps(vtt); + expect(warnings.length).toBe(1); + expect(warnings[0]).toMatch(/\d+ seconds/); + }); +}); diff --git a/webviews/codex-webviews/src/NewSourceUploader/importers/subtitles/index.ts b/webviews/codex-webviews/src/NewSourceUploader/importers/subtitles/index.ts index de280e1fe..be8bbff92 100644 --- a/webviews/codex-webviews/src/NewSourceUploader/importers/subtitles/index.ts +++ b/webviews/codex-webviews/src/NewSourceUploader/importers/subtitles/index.ts @@ -20,6 +20,63 @@ import { createSubtitleCellMetadata } from './cellMetadata'; const SUPPORTED_EXTENSIONS = ['vtt', 'srt', 'ass', 'sub']; +/** + * Scans raw subtitle text for non-sequential timestamps. + * Returns an array of warning strings (empty if no issues found). + * Works with both VTT (`.` separator) and SRT (`,` separator) formats. + */ +export const validateSubtitleTimestamps = (content: string): string[] => { + const warnings: string[] = []; + const timestampRegex = + /(\d{2}):(\d{2}):(\d{2})[.,](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[.,](\d{3})/g; + + let match: RegExpExecArray | null; + let prevEndTime = -1; + let cueIndex = 0; + const majorJumps: { cueIndex: number; jumpBackSeconds: number; }[] = []; + + while ((match = timestampRegex.exec(content)) !== null) { + cueIndex++; + const startTime = + parseInt(match[1]) * 3600 + + parseInt(match[2]) * 60 + + parseInt(match[3]) + + parseInt(match[4]) / 1000; + const endTime = + parseInt(match[5]) * 3600 + + parseInt(match[6]) * 60 + + parseInt(match[7]) + + parseInt(match[8]) / 1000; + + if (prevEndTime >= 0 && startTime < prevEndTime) { + const jumpBack = prevEndTime - startTime; + if (jumpBack > 0) { + majorJumps.push({ cueIndex, jumpBackSeconds: jumpBack }); + } + } + prevEndTime = endTime; + } + + if (majorJumps.length > 0) { + const maxJump = Math.max(...majorJumps.map((j) => j.jumpBackSeconds)); + const formattedMaxJump = + maxJump >= 3000 + ? `${Math.round(maxJump / 3600)} hour(s)` + : maxJump >= 120 + ? `${Math.round(maxJump / 60)} minutes` + : `${Math.round(maxJump)} seconds`; + + warnings.push( + `Found ${majorJumps.length} subtitle cue(s) with non-sequential timestamps ` + + `(jumping backwards by up to ${formattedMaxJump}). ` + + `This typically indicates corrupted timing data (e.g., incorrect hour values). ` + + `The imported content may not be in the correct order.` + ); + } + + return warnings; +}; + /** * Parses SRT content and converts it to VTT-like structure */ @@ -117,6 +174,10 @@ const validateFile = async (file: File): Promise => { warnings.push('No timestamp patterns found - this may not be a subtitle file'); } + // Check for non-sequential timestamps + const timestampWarnings = validateSubtitleTimestamps(content); + warnings.push(...timestampWarnings); + } catch (error) { errors.push('Could not read file content'); } @@ -187,6 +248,8 @@ export const parseFile = async ( throw new Error('No subtitle cues found in the file'); } + const timestampWarnings = validateSubtitleTimestamps(content); + onProgress?.(createProgress('Creating Cells', 'Creating notebook cells...', 70)); // Create notebook cells using ProcessedCell format with cellMetadata @@ -273,6 +336,7 @@ export const parseFile = async ( return { success: true, notebookPair: notebookPairWithMilestones, + warnings: timestampWarnings.length > 0 ? timestampWarnings : undefined, metadata: { segmentCount: sourceNotebook.cells.length, format,