diff --git a/packages/core/src/content/transcript/providers/youtube.ts b/packages/core/src/content/transcript/providers/youtube.ts index fa7e7d4..45c61d4 100644 --- a/packages/core/src/content/transcript/providers/youtube.ts +++ b/packages/core/src/content/transcript/providers/youtube.ts @@ -77,6 +77,36 @@ export const fetchTranscript = async ( ) } + // Handle explicit apify mode before HTML check — Apify doesn't need HTML. + // Fixes: when HTML fetch fails, explicit --youtube apify was skipped entirely. + if (mode === 'apify') { + if (!options.apifyApiToken) { + throw new Error('Missing APIFY_API_TOKEN for --youtube apify') + } + pushHint('YouTube: fetching transcript (Apify)') + attemptedProviders.push('apify') + const apifyTranscript = await fetchTranscriptWithApify( + options.fetch, + options.apifyApiToken, + url + ) + if (apifyTranscript) { + return { + text: normalizeTranscriptText(apifyTranscript), + source: 'apify', + metadata: { provider: 'apify' }, + attemptedProviders, + } + } + attemptedProviders.push('unavailable') + return { + text: null, + source: 'unavailable', + metadata: { provider: 'youtube', reason: 'no_transcript_available' }, + attemptedProviders, + } + } + if (!html) { return { text: null, source: null, attemptedProviders } } @@ -249,14 +279,7 @@ export const fetchTranscript = async ( } } - // Explicit apify mode: allow forcing it, but require a token. - if (mode === 'apify') { - if (!options.apifyApiToken) { - throw new Error('Missing APIFY_API_TOKEN for --youtube apify') - } - const apifyResult = await tryApify('YouTube: fetching transcript (Apify)') - if (apifyResult) return apifyResult - } + // Note: explicit apify mode is handled before the HTML check (above). // Auto mode: if yt-dlp cannot run (no binary/credentials), fall back to Apify last-last. if (mode === 'auto' && !canRunYtDlp) { diff --git a/tests/transcript.youtube-provider.test.ts b/tests/transcript.youtube-provider.test.ts index 6df915f..96f3f07 100644 --- a/tests/transcript.youtube-provider.test.ts +++ b/tests/transcript.youtube-provider.test.ts @@ -67,6 +67,68 @@ describe('YouTube transcript provider module', () => { ).toEqual({ text: null, source: null, attemptedProviders: [] }) }) + it('uses apify mode even when HTML is null (fixes #51)', async () => { + apify.fetchTranscriptWithApify.mockResolvedValue('Hello from apify') + + const result = await fetchTranscript( + { + url: 'https://www.youtube.com/watch?v=abcdefghijk', + html: null, + resourceKey: null, + }, + { + ...baseOptions, + apifyApiToken: 'TOKEN', + youtubeTranscriptMode: 'apify', + } + ) + + expect(result.text).toBe('Hello from apify') + expect(result.source).toBe('apify') + expect(result.attemptedProviders).toEqual(['apify']) + expect(api.extractYoutubeiTranscriptConfig).not.toHaveBeenCalled() + expect(captions.fetchTranscriptFromCaptionTracks).not.toHaveBeenCalled() + expect(ytdlp.fetchTranscriptWithYtDlp).not.toHaveBeenCalled() + }) + + it('returns unavailable when apify mode fails with null HTML', async () => { + apify.fetchTranscriptWithApify.mockResolvedValue(null) + + const result = await fetchTranscript( + { + url: 'https://www.youtube.com/watch?v=abcdefghijk', + html: null, + resourceKey: null, + }, + { + ...baseOptions, + apifyApiToken: 'TOKEN', + youtubeTranscriptMode: 'apify', + } + ) + + expect(result.text).toBeNull() + expect(result.source).toBe('unavailable') + expect(result.attemptedProviders).toEqual(['apify', 'unavailable']) + }) + + it('throws when apify mode used without token and HTML is null', async () => { + await expect( + fetchTranscript( + { + url: 'https://www.youtube.com/watch?v=abcdefghijk', + html: null, + resourceKey: null, + }, + { + ...baseOptions, + apifyApiToken: null, + youtubeTranscriptMode: 'apify', + } + ) + ).rejects.toThrow(/Missing APIFY_API_TOKEN/i) + }) + it('uses apify-only mode and skips web + yt-dlp', async () => { apify.fetchTranscriptWithApify.mockResolvedValue('Hello from apify')