From e20f7c53ab8976e7cfd6cfd7d630a193ca56e775 Mon Sep 17 00:00:00 2001 From: entropyy0 Date: Tue, 3 Feb 2026 12:41:00 +1100 Subject: [PATCH] fix: handle --youtube apify when HTML fetch fails When using --youtube apify, the Apify provider was never attempted if the initial HTML fetch failed or returned empty content. The early `if (!html) return` exit on line 87 short-circuited the entire transcript flow before Apify was reached, even though Apify doesn't need HTML at all. Move explicit apify mode handling before the HTML check so it runs regardless of HTML availability. Closes #51 --- .../content/transcript/providers/youtube.ts | 39 +++++++++--- tests/transcript.youtube-provider.test.ts | 62 +++++++++++++++++++ 2 files changed, 93 insertions(+), 8 deletions(-) diff --git a/packages/core/src/content/transcript/providers/youtube.ts b/packages/core/src/content/transcript/providers/youtube.ts index fa7e7d43..45c61d41 100644 --- a/packages/core/src/content/transcript/providers/youtube.ts +++ b/packages/core/src/content/transcript/providers/youtube.ts @@ -77,6 +77,36 @@ export const fetchTranscript = async ( ) } + // Handle explicit apify mode before HTML check — Apify doesn't need HTML. + // Fixes: when HTML fetch fails, explicit --youtube apify was skipped entirely. + if (mode === 'apify') { + if (!options.apifyApiToken) { + throw new Error('Missing APIFY_API_TOKEN for --youtube apify') + } + pushHint('YouTube: fetching transcript (Apify)') + attemptedProviders.push('apify') + const apifyTranscript = await fetchTranscriptWithApify( + options.fetch, + options.apifyApiToken, + url + ) + if (apifyTranscript) { + return { + text: normalizeTranscriptText(apifyTranscript), + source: 'apify', + metadata: { provider: 'apify' }, + attemptedProviders, + } + } + attemptedProviders.push('unavailable') + return { + text: null, + source: 'unavailable', + metadata: { provider: 'youtube', reason: 'no_transcript_available' }, + attemptedProviders, + } + } + if (!html) { return { text: null, source: null, attemptedProviders } } @@ -249,14 +279,7 @@ export const fetchTranscript = async ( } } - // Explicit apify mode: allow forcing it, but require a token. - if (mode === 'apify') { - if (!options.apifyApiToken) { - throw new Error('Missing APIFY_API_TOKEN for --youtube apify') - } - const apifyResult = await tryApify('YouTube: fetching transcript (Apify)') - if (apifyResult) return apifyResult - } + // Note: explicit apify mode is handled before the HTML check (above). // Auto mode: if yt-dlp cannot run (no binary/credentials), fall back to Apify last-last. if (mode === 'auto' && !canRunYtDlp) { diff --git a/tests/transcript.youtube-provider.test.ts b/tests/transcript.youtube-provider.test.ts index 6df915f1..96f3f07e 100644 --- a/tests/transcript.youtube-provider.test.ts +++ b/tests/transcript.youtube-provider.test.ts @@ -67,6 +67,68 @@ describe('YouTube transcript provider module', () => { ).toEqual({ text: null, source: null, attemptedProviders: [] }) }) + it('uses apify mode even when HTML is null (fixes #51)', async () => { + apify.fetchTranscriptWithApify.mockResolvedValue('Hello from apify') + + const result = await fetchTranscript( + { + url: 'https://www.youtube.com/watch?v=abcdefghijk', + html: null, + resourceKey: null, + }, + { + ...baseOptions, + apifyApiToken: 'TOKEN', + youtubeTranscriptMode: 'apify', + } + ) + + expect(result.text).toBe('Hello from apify') + expect(result.source).toBe('apify') + expect(result.attemptedProviders).toEqual(['apify']) + expect(api.extractYoutubeiTranscriptConfig).not.toHaveBeenCalled() + expect(captions.fetchTranscriptFromCaptionTracks).not.toHaveBeenCalled() + expect(ytdlp.fetchTranscriptWithYtDlp).not.toHaveBeenCalled() + }) + + it('returns unavailable when apify mode fails with null HTML', async () => { + apify.fetchTranscriptWithApify.mockResolvedValue(null) + + const result = await fetchTranscript( + { + url: 'https://www.youtube.com/watch?v=abcdefghijk', + html: null, + resourceKey: null, + }, + { + ...baseOptions, + apifyApiToken: 'TOKEN', + youtubeTranscriptMode: 'apify', + } + ) + + expect(result.text).toBeNull() + expect(result.source).toBe('unavailable') + expect(result.attemptedProviders).toEqual(['apify', 'unavailable']) + }) + + it('throws when apify mode used without token and HTML is null', async () => { + await expect( + fetchTranscript( + { + url: 'https://www.youtube.com/watch?v=abcdefghijk', + html: null, + resourceKey: null, + }, + { + ...baseOptions, + apifyApiToken: null, + youtubeTranscriptMode: 'apify', + } + ) + ).rejects.toThrow(/Missing APIFY_API_TOKEN/i) + }) + it('uses apify-only mode and skips web + yt-dlp', async () => { apify.fetchTranscriptWithApify.mockResolvedValue('Hello from apify')