Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/test/suite/vttRoundtrip.integration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ suite("VTT round-trip integration (mock VTT fixtures)", function () {
});
assert.ok(alignedTextCell, "Expected at least one aligned text cell");

const alignedStart = alignedTextCell!.notebookCell.metadata.data.startTime as number;
const alignedStart = alignedTextCell!.notebookCell!.metadata!.data!.startTime as number;
const expectedContent = importedByStart.get(alignedStart);
assert.ok(expectedContent, "Expected imported content for aligned text cell");
assert.strictEqual(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import {
AlertCircle,
} from "lucide-react";
import { Badge } from "../../../components/ui/badge";
import { subtitlesImporter } from "./index";
import { subtitlesImporter, validateSubtitleTimestamps } from "./index";
import { subtitlesImporterPlugin } from "./index.tsx";
import { handleImportCompletion, notebookToImportedContent } from "../common/translationHelper";
import { notifyImportStarted, notifyImportEnded } from "../../utils/importProgress";
Expand All @@ -45,6 +45,7 @@ export const SubtitlesImporterForm: React.FC<ImporterComponentProps> = (props) =
const [result, setResult] = useState<NotebookPair | null>(null);
const [alignedCells, setAlignedCells] = useState<AlignedCell[] | null>(null);
const [previewContent, setPreviewContent] = useState<string>("");
const [fileWarnings, setFileWarnings] = useState<string[]>([]);
const [subtitleStats, setSubtitleStats] = useState<{
totalCues: number;
duration: string;
Expand All @@ -64,6 +65,7 @@ export const SubtitlesImporterForm: React.FC<ImporterComponentProps> = (props) =
setResult(null);
setAlignedCells(null);
setSubtitleStats(null);
setFileWarnings([]);

// Show preview and analyze file
try {
Expand Down Expand Up @@ -103,6 +105,9 @@ export const SubtitlesImporterForm: React.FC<ImporterComponentProps> = (props) =
duration,
format,
});

const timestampWarnings = validateSubtitleTimestamps(text);
setFileWarnings(timestampWarnings);
} catch (err) {
console.warn("Could not preview file:", err);
}
Expand Down Expand Up @@ -581,6 +586,17 @@ export const SubtitlesImporterForm: React.FC<ImporterComponentProps> = (props) =
</Card>
)}

{fileWarnings.length > 0 && (
<Alert className="border-yellow-500 bg-yellow-50 dark:bg-yellow-950">
<AlertCircle className="h-4 w-4 !text-yellow-600 dark:!text-yellow-400" />
<AlertDescription className="text-yellow-800 dark:text-yellow-200">
{fileWarnings.map((warning, index) => (
<p key={index}>{warning}</p>
))}
</AlertDescription>
</Alert>
)}

{previewContent && (
<Card>
<CardHeader>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, it, expect } from 'vitest';
import { subtitlesImporter } from './index';
import { subtitlesImporter, validateSubtitleTimestamps } from './index';

// Minimal File-like shim for tests
class MockFile {
Expand Down Expand Up @@ -41,4 +41,138 @@ describe('subtitlesImporter.parseFile', () => {
});
});

describe('validateSubtitleTimestamps', () => {
it('returns no warnings for a well-ordered VTT', () => {
const vtt = [
'WEBVTT',
'',
'1',
'00:00:01.000 --> 00:00:03.000',
'First cue',
'',
'2',
'00:00:04.000 --> 00:00:06.000',
'Second cue',
'',
'3',
'00:00:07.000 --> 00:00:09.000',
'Third cue',
].join('\n');

expect(validateSubtitleTimestamps(vtt)).toEqual([]);
});

it('returns no warnings for a well-ordered SRT', () => {
const srt = [
'1',
'00:00:01,000 --> 00:00:03,000',
'First cue',
'',
'2',
'00:00:04,000 --> 00:00:06,000',
'Second cue',
].join('\n');

expect(validateSubtitleTimestamps(srt)).toEqual([]);
});

it('catches small overlaps from multi-speaker cues (<5s)', () => {
const vtt = [
'WEBVTT',
'',
'1',
'00:00:10.000 --> 00:00:14.000',
'Speaker A',
'',
'1',
'00:00:10.000 --> 00:00:14.000',
'Speaker B (same timestamp, overlap of 4s)',
].join('\n');

const warnings = validateSubtitleTimestamps(vtt);
expect(warnings.length).toBe(1);
expect(warnings[0]).toMatch(/non-sequential timestamps/);
expect(warnings[0]).toMatch(/4 seconds/);
});

it('warns when timestamps jump backwards significantly (corrupted hour)', () => {
const vtt = [
'WEBVTT',
'',
'1',
'01:00:23.625 --> 01:00:28.458',
'Cue with wrong hour',
'',
'2',
'00:00:52.886 --> 00:00:54.763',
'Cue with correct hour (jumps back ~3573s)',
].join('\n');

const warnings = validateSubtitleTimestamps(vtt);
expect(warnings.length).toBe(1);
expect(warnings[0]).toMatch(/non-sequential timestamps/);
expect(warnings[0]).toMatch(/1 hour/);
});

it('counts multiple out-of-order cues', () => {
const vtt = [
'WEBVTT',
'',
'1',
'01:00:23.000 --> 01:00:28.000',
'Wrong hour',
'',
'2',
'00:00:30.000 --> 00:00:35.000',
'Correct (jump back)',
'',
'3',
'01:00:40.000 --> 01:00:45.000',
'Wrong hour again',
'',
'4',
'00:00:50.000 --> 00:00:55.000',
'Correct (jump back again)',
].join('\n');

const warnings = validateSubtitleTimestamps(vtt);
expect(warnings.length).toBe(1);
expect(warnings[0]).toMatch(/2 subtitle cue/);
});

it('reports minutes for moderate jumps', () => {
const vtt = [
'WEBVTT',
'',
'1',
'00:05:00.000 --> 00:05:30.000',
'Later cue first',
'',
'2',
'00:01:00.000 --> 00:01:05.000',
'Earlier cue second (jumps back ~270s)',
].join('\n');

const warnings = validateSubtitleTimestamps(vtt);
expect(warnings.length).toBe(1);
expect(warnings[0]).toMatch(/\d+ minutes/);
});

it('reports seconds for small jumps', () => {
const vtt = [
'WEBVTT',
'',
'1',
'00:00:30.000 --> 00:00:40.000',
'Later cue first',
'',
'2',
'00:00:10.000 --> 00:00:15.000',
'Earlier cue second (jumps back ~30s)',
].join('\n');

const warnings = validateSubtitleTimestamps(vtt);
expect(warnings.length).toBe(1);
expect(warnings[0]).toMatch(/\d+ seconds/);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,63 @@ import { createSubtitleCellMetadata } from './cellMetadata';

const SUPPORTED_EXTENSIONS = ['vtt', 'srt', 'ass', 'sub'];

/**
* Scans raw subtitle text for non-sequential timestamps.
* Returns an array of warning strings (empty if no issues found).
* Works with both VTT (`.` separator) and SRT (`,` separator) formats.
*/
export const validateSubtitleTimestamps = (content: string): string[] => {
const warnings: string[] = [];
const timestampRegex =
/(\d{2}):(\d{2}):(\d{2})[.,](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[.,](\d{3})/g;

let match: RegExpExecArray | null;
let prevEndTime = -1;
let cueIndex = 0;
const majorJumps: { cueIndex: number; jumpBackSeconds: number; }[] = [];

while ((match = timestampRegex.exec(content)) !== null) {
cueIndex++;
const startTime =
parseInt(match[1]) * 3600 +
parseInt(match[2]) * 60 +
parseInt(match[3]) +
parseInt(match[4]) / 1000;
const endTime =
parseInt(match[5]) * 3600 +
parseInt(match[6]) * 60 +
parseInt(match[7]) +
parseInt(match[8]) / 1000;

if (prevEndTime >= 0 && startTime < prevEndTime) {
const jumpBack = prevEndTime - startTime;
if (jumpBack > 0) {
majorJumps.push({ cueIndex, jumpBackSeconds: jumpBack });
}
}
prevEndTime = endTime;
}

if (majorJumps.length > 0) {
const maxJump = Math.max(...majorJumps.map((j) => j.jumpBackSeconds));
const formattedMaxJump =
maxJump >= 3000
? `${Math.round(maxJump / 3600)} hour(s)`
: maxJump >= 120
? `${Math.round(maxJump / 60)} minutes`
: `${Math.round(maxJump)} seconds`;

warnings.push(
`Found ${majorJumps.length} subtitle cue(s) with non-sequential timestamps ` +
`(jumping backwards by up to ${formattedMaxJump}). ` +
`This typically indicates corrupted timing data (e.g., incorrect hour values). ` +
`The imported content may not be in the correct order.`
);
}

return warnings;
};

/**
* Parses SRT content and converts it to VTT-like structure
*/
Expand Down Expand Up @@ -117,6 +174,10 @@ const validateFile = async (file: File): Promise<FileValidationResult> => {
warnings.push('No timestamp patterns found - this may not be a subtitle file');
}

// Check for non-sequential timestamps
const timestampWarnings = validateSubtitleTimestamps(content);
warnings.push(...timestampWarnings);

} catch (error) {
errors.push('Could not read file content');
}
Expand Down Expand Up @@ -187,6 +248,8 @@ export const parseFile = async (
throw new Error('No subtitle cues found in the file');
}

const timestampWarnings = validateSubtitleTimestamps(content);

onProgress?.(createProgress('Creating Cells', 'Creating notebook cells...', 70));

// Create notebook cells using ProcessedCell format with cellMetadata
Expand Down Expand Up @@ -273,6 +336,7 @@ export const parseFile = async (
return {
success: true,
notebookPair: notebookPairWithMilestones,
warnings: timestampWarnings.length > 0 ? timestampWarnings : undefined,
metadata: {
segmentCount: sourceNotebook.cells.length,
format,
Expand Down