diff --git a/.gitignore b/.gitignore index 985e40a..c7f8b2c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ .env +.env.* +*.env .DS_Store node_modules/ data/ diff --git a/scripts/create-source.ts b/scripts/create-source.ts index bf14c97..8f0cff0 100644 --- a/scripts/create-source.ts +++ b/scripts/create-source.ts @@ -54,6 +54,7 @@ const apiBody = JSON.stringify(parsedPayload); // Auto-provision API key if missing, resolve base URL import { ensureKey, getBaseUrl } from "./ensure-key"; +import { openUrlInBrowser } from "./security"; const baseUrl = getBaseUrl(); const apiKey = await ensureKey(); if (!apiKey) { @@ -120,8 +121,7 @@ try { } // Open the live page in the user's browser automatically - const { $ } = await import("bun"); - await $`open ${result.source_url}`.quiet().nothrow(); + await openUrlInBrowser(result.source_url, new URL(baseUrl).hostname); result.run_id = runId; console.log(JSON.stringify(result)); diff --git a/scripts/diarize.ts b/scripts/diarize.ts index 94b748b..5d79817 100644 --- a/scripts/diarize.ts +++ b/scripts/diarize.ts @@ -54,6 +54,14 @@ function extractVideoId(url: string): string | null { return match?.[1] ?? null; } +function sanitizeCliUrl(url: string): string { + const trimmed = url.trim(); + if (!trimmed || trimmed.startsWith("-")) { + throw new Error("Invalid URL for command execution."); + } + return trimmed; +} + // --------------------------------------------------------------------------- // Timestamp math // --------------------------------------------------------------------------- @@ -92,9 +100,10 @@ interface YoutubeMeta { /** Fetch YouTube video metadata via yt-dlp --dump-json */ async function fetchYoutubeMeta(url: string): Promise { + const safeUrl = sanitizeCliUrl(url); const empty: YoutubeMeta = { publishedAt: null, title: null, channel: null, channelHandle: null, channelUrl: null }; try { - const result = await $`yt-dlp --dump-json --skip-download ${url}`.quiet().nothrow(); + const result = await $`yt-dlp --dump-json --skip-download -- ${safeUrl}`.quiet().nothrow(); if (result.exitCode !== 0) return empty; const meta = JSON.parse(result.stdout.toString()); @@ -123,12 +132,13 @@ async function fetchYoutubeMeta(url: string): Promise { } async function downloadAudio(url: string): Promise { + const safeUrl = sanitizeCliUrl(url); const videoId = extractVideoId(url) || "audio"; const outPath = join(tmpdir(), `diarize-${videoId}.mp3`); const { streamLog } = await import("./stream-log"); streamLog("Downloading audio..."); - const result = await $`yt-dlp --extract-audio --audio-format mp3 --audio-quality 5 -o ${outPath} ${url}` + const result = await $`yt-dlp --extract-audio --audio-format mp3 --audio-quality 5 -o ${outPath} -- ${safeUrl}` .quiet() .nothrow(); diff --git a/scripts/ensure-key.ts b/scripts/ensure-key.ts index 07ea8aa..b5b85c4 100644 --- a/scripts/ensure-key.ts +++ b/scripts/ensure-key.ts @@ -10,6 +10,7 @@ import { writeFileSync, existsSync, appendFileSync } from "fs"; import { getEnvSearchPaths, getPreferredEnvWritePath, readEnvValue } from "./runtime-paths"; +import { normalizeTrustedBaseUrl } from "./security"; /** Read a key from process.env or the nearest user/project .env context. */ export function loadKey(key: string): string | undefined { @@ -18,7 +19,10 @@ export function loadKey(key: string): string | undefined { /** Resolve the base URL for paste.trade API. */ export function getBaseUrl(): string { - return loadKey("PASTE_TRADE_URL") || loadKey("BOARD_URL") || loadKey("BELIEF_BOARD_URL") || "https://paste.trade"; + const configured = loadKey("PASTE_TRADE_URL") || loadKey("BOARD_URL") || loadKey("BELIEF_BOARD_URL"); + const { baseUrl, trusted, reason } = normalizeTrustedBaseUrl(configured); + if (!trusted) throw new Error(reason ?? "Invalid base URL configuration."); + return baseUrl; } /** @@ -30,11 +34,11 @@ export async function ensureKey(): Promise { const existing = loadKey("PASTE_TRADE_KEY"); if (existing) return existing; - // No key found — auto-provision - const baseUrl = getBaseUrl(); - console.error(`[paste.trade] No API key found. Creating your identity...`); - try { + // No key found — auto-provision + const baseUrl = getBaseUrl(); + console.error(`[paste.trade] No API key found. Creating your identity...`); + const res = await fetch(`${baseUrl}/api/keys`, { method: "POST", headers: { "Content-Type": "application/json" }, diff --git a/scripts/extract.ts b/scripts/extract.ts index c0526cb..1b4b7e2 100644 --- a/scripts/extract.ts +++ b/scripts/extract.ts @@ -22,6 +22,7 @@ import { mkdirSync } from "fs"; import { tmpdir } from "os"; import { join } from "path"; import { getRuntimeSourceDir, readEnvValue } from "./runtime-paths"; +import { fetchWithSafeRedirects, parseSafeExternalUrl } from "./security"; // --------------------------------------------------------------------------- // X API tokens (optional) @@ -256,6 +257,14 @@ function extractVideoId(url: string): string | null { return m?.[1] ?? null; } +function sanitizeCliUrl(url: string): string { + const trimmed = url.trim(); + if (!trimmed || trimmed.startsWith("-")) { + throw new Error("Invalid URL for command execution."); + } + return trimmed; +} + // --------------------------------------------------------------------------- // YouTube transcript via yt-dlp // --------------------------------------------------------------------------- @@ -288,9 +297,10 @@ interface YoutubeMeta { /** Fetch YouTube video metadata via yt-dlp --dump-json */ async function fetchYoutubeMeta(url: string): Promise { + const safeUrl = sanitizeCliUrl(url); const empty: YoutubeMeta = { publishedAt: null, title: null, channel: null, channelHandle: null, channelUrl: null, description: null, durationSeconds: null }; try { - const result = await $`yt-dlp --dump-json --skip-download ${url}`.quiet().nothrow(); + const result = await $`yt-dlp --dump-json --skip-download -- ${safeUrl}`.quiet().nothrow(); if (result.exitCode !== 0) return empty; const meta = JSON.parse(result.stdout.toString()); @@ -333,6 +343,7 @@ async function streamStatus(message: string): Promise { } async function extractYoutube(url: string): Promise { + const safeUrl = sanitizeCliUrl(url); const videoId = extractVideoId(url); if (!videoId) throw new Error("Could not extract video ID from URL"); @@ -353,7 +364,7 @@ async function extractYoutube(url: string): Promise { const capFile = join(tmpdir(), `yt-transcript-${videoId}-${videoId}.en.json3`); // Step 2: attempt caption download — quiet captures stderr for diagnosis - const result = await $`yt-dlp --write-auto-sub --write-sub --skip-download --sub-lang en --sub-format json3 -o ${outTemplate} ${url}` + const result = await $`yt-dlp --write-auto-sub --write-sub --skip-download --sub-lang en --sub-format json3 -o ${outTemplate} -- ${safeUrl}` .quiet() .nothrow(); @@ -413,7 +424,7 @@ async function extractYoutube(url: string): Promise { } // Step 3: fetch metadata (parallel-safe, runs while we parse captions) - const metaPromise = fetchYoutubeMeta(url); + const metaPromise = fetchYoutubeMeta(safeUrl); // Step 4: parse the caption file const data = (await Bun.file(capFile).json()) as { @@ -1189,20 +1200,21 @@ function extractArticleMetadataFromHtml( } async function fetchArticleMetadata(url: string): Promise { + const safeUrl = parseSafeExternalUrl(url)?.href; + if (!safeUrl) return null; try { - const res = await fetch(url, { + const res = await fetchWithSafeRedirects(safeUrl, { headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", Accept: "text/html,application/xhtml+xml,*/*", }, - redirect: "follow", }); if (!res.ok) return null; const body = await res.text(); if (!body || body.length < 50) return null; const looksLikeHtml = / { + const safeUrl = parseSafeExternalUrl(url)?.href; + if (!safeUrl) { + return JSON.stringify({ source: "text", url, error: "Blocked unsafe or invalid URL." }); + } streamStatus("Extracting article..."); - const metadataPromise = fetchArticleMetadata(url); + const metadataPromise = fetchArticleMetadata(safeUrl); // Try markdown.new first (clean article extraction, handles JS-rendered pages) try { - const mdRes = await fetch(`https://markdown.new/${url}`, { + const mdRes = await fetch(`https://markdown.new/${encodeURIComponent(safeUrl)}`, { headers: { Accept: "text/markdown" }, }); if (mdRes.ok) { @@ -1255,7 +1271,7 @@ async function extractText(url: string): Promise { if (md.length > 100) { const images = extractImagesFromMarkdown(md); const metadata = await metadataPromise; - const payload = buildArticlePayload("markdown.new", url, md, images, metadata); + const payload = buildArticlePayload("markdown.new", safeUrl, md, images, metadata); const parsed = JSON.parse(payload) as { word_count?: number }; console.error(` markdown.new: ${parsed.word_count ?? 0} words, ${images.length} images extracted`); return payload; @@ -1266,23 +1282,22 @@ async function extractText(url: string): Promise { } // Fallback: raw fetch + regex strip - const res = await fetch(url, { + const res = await fetchWithSafeRedirects(safeUrl, { headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", Accept: "text/html,application/xhtml+xml", }, - redirect: "follow", }); if (!res.ok) { - return JSON.stringify({ source: "text", url, error: `HTTP ${res.status}` }); + return JSON.stringify({ source: "text", url: safeUrl, error: `HTTP ${res.status}` }); } const html = await res.text(); // Extract images before stripping HTML - const images = extractImagesFromHtml(html, url); - const htmlMetadata = extractArticleMetadataFromHtml(html, url, undefined, images); + const images = extractImagesFromHtml(html, safeUrl); + const htmlMetadata = extractArticleMetadataFromHtml(html, safeUrl, undefined, images); const text = html .replace(//gi, "") @@ -1293,7 +1308,7 @@ async function extractText(url: string): Promise { .replace(/\s+/g, " ") .trim(); - const payload = buildArticlePayload("text", url, text, images, htmlMetadata); + const payload = buildArticlePayload("text", safeUrl, text, images, htmlMetadata); const parsed = JSON.parse(payload) as { word_count?: number }; console.error(` raw fetch: ${parsed.word_count ?? 0} words, ${images.length} images extracted`); return payload; @@ -1342,10 +1357,12 @@ async function main() { const img = parsed.images[i]; const imgUrl = typeof img === "string" ? img : img?.url; if (!imgUrl) continue; + const safeImgUrl = parseSafeExternalUrl(imgUrl)?.href; + if (!safeImgUrl) continue; try { - const imgRes = await fetch(imgUrl); + const imgRes = await fetchWithSafeRedirects(safeImgUrl); if (imgRes.ok) { - const ext = imgUrl.match(/\.(jpg|jpeg|png|gif|webp)/i)?.[1] ?? "jpg"; + const ext = safeImgUrl.match(/\.(jpg|jpeg|png|gif|webp)/i)?.[1] ?? "jpg"; const imgPath = join(dir, `source-${hash}-img${i}.${ext}`); await Bun.write(imgPath, await imgRes.arrayBuffer()); imageFiles.push(imgPath); diff --git a/scripts/security.ts b/scripts/security.ts new file mode 100644 index 0000000..6ca1ca2 --- /dev/null +++ b/scripts/security.ts @@ -0,0 +1,126 @@ +import { spawn } from "child_process"; +import { isIP } from "net"; +import { relative, resolve } from "path"; + +const DEFAULT_BASE_URL = "https://paste.trade"; + +function isPrivateIp(hostname: string): boolean { + if (isIP(hostname) === 0) return false; + const lower = hostname.toLowerCase(); + + // IPv6 local/link-local/loopback + if (lower === "::1" || lower.startsWith("fe80:") || lower.startsWith("fc") || lower.startsWith("fd")) { + return true; + } + + // IPv4 private/link-local/loopback ranges + const parts = hostname.split(".").map((n) => Number(n)); + if (parts.length !== 4 || parts.some(Number.isNaN)) return true; + const [a, b] = parts; + if (a === 10 || a === 127) return true; + if (a === 169 && b === 254) return true; + if (a === 172 && b >= 16 && b <= 31) return true; + if (a === 192 && b === 168) return true; + return false; +} + +function hasForbiddenHost(hostname: string): boolean { + const h = hostname.toLowerCase(); + return ( + h === "localhost" || + h.endsWith(".localhost") || + h.endsWith(".local") || + h.endsWith(".internal") || + isPrivateIp(h) + ); +} + +export function parseSafeExternalUrl(raw: string): URL | null { + try { + const url = new URL(raw); + if (url.protocol !== "https:") return null; + if (hasForbiddenHost(url.hostname)) return null; + return url; + } catch { + return null; + } +} + +export function normalizeTrustedBaseUrl( + configured?: string, +): { baseUrl: string; trusted: boolean; reason?: string } { + const candidate = (configured ?? "").trim() || DEFAULT_BASE_URL; + try { + const parsed = new URL(candidate); + if (parsed.protocol === "https:" || parsed.protocol === "http:") { + return { baseUrl: parsed.origin, trusted: true }; + } + return { + baseUrl: candidate, + trusted: false, + reason: `Invalid base URL "${candidate}" blocked. Only http/https are allowed.`, + }; + } catch { + return { + baseUrl: candidate, + trusted: false, + reason: `Invalid base URL "${candidate}" blocked.`, + }; + } +} + +export async function openUrlInBrowser(rawUrl: string, allowedHost?: string): Promise { + let parsed: URL; + try { + parsed = new URL(rawUrl); + } catch { + return false; + } + + if (parsed.protocol !== "https:") return false; + if (allowedHost && parsed.hostname !== allowedHost) return false; + + const cmd = + process.platform === "darwin" + ? { bin: "open", args: [parsed.href] } + : process.platform === "linux" + ? { bin: "xdg-open", args: [parsed.href] } + : { bin: "rundll32", args: ["url.dll,FileProtocolHandler", parsed.href] }; + + return await new Promise((resolveDone) => { + const child = spawn(cmd.bin, cmd.args, { stdio: "ignore", shell: false }); + child.on("error", () => resolveDone(false)); + child.on("exit", (code) => resolveDone(code === 0)); + }); +} + +export function ensurePathInsideDir(filePath: string, allowedDir: string): string | null { + const resolvedFile = resolve(filePath); + const resolvedDir = resolve(allowedDir); + const rel = relative(resolvedDir, resolvedFile); + if (rel === "" || (!rel.startsWith("..") && rel !== ".")) return resolvedFile; + return null; +} + +export async function fetchWithSafeRedirects( + inputUrl: string, + init?: RequestInit, + maxRedirects = 5, +): Promise { + let current = parseSafeExternalUrl(inputUrl); + if (!current) throw new Error("Blocked unsafe or invalid URL."); + + for (let i = 0; i <= maxRedirects; i++) { + const res = await fetch(current.href, { ...init, redirect: "manual" }); + if (res.status >= 300 && res.status < 400) { + const location = res.headers.get("location"); + if (!location) return res; + const next = parseSafeExternalUrl(new URL(location, current).href); + if (!next) throw new Error("Blocked unsafe redirect target."); + current = next; + continue; + } + return res; + } + throw new Error("Too many redirects."); +} diff --git a/scripts/signin.ts b/scripts/signin.ts index b792328..51026d1 100644 --- a/scripts/signin.ts +++ b/scripts/signin.ts @@ -8,7 +8,7 @@ */ import { loadKey, getBaseUrl } from "./ensure-key"; -import { execSync } from "child_process"; +import { openUrlInBrowser } from "./security"; export async function signIn(): Promise { const apiKey = loadKey("PASTE_TRADE_KEY"); @@ -32,16 +32,11 @@ export async function signIn(): Promise { } const result = await res.json() as { url: string; expires_in: number }; + const allowedHost = new URL(baseUrl).hostname; // Open in browser try { - if (process.platform === "darwin") { - execSync(`open "${result.url}"`); - } else if (process.platform === "linux") { - execSync(`xdg-open "${result.url}"`); - } else { - execSync(`start "${result.url}"`); - } + await openUrlInBrowser(result.url, allowedHost); } catch { // If open fails, the user can manually visit the URL } diff --git a/scripts/source-excerpt.ts b/scripts/source-excerpt.ts index da4a7fc..e6d541b 100644 --- a/scripts/source-excerpt.ts +++ b/scripts/source-excerpt.ts @@ -18,6 +18,8 @@ import { readFileSync } from "fs"; import { applyRunId, extractRunIdArg } from "./run-id"; import { getStreamContext, pushEvent } from "./stream-context"; +import { getRuntimeSourceDir } from "./runtime-paths"; +import { ensurePathInsideDir } from "./security"; // ── Args ────────────────────────────────────────────────────────── @@ -61,7 +63,11 @@ function parseArgs(argv: string[]): ParsedArgs { // ── Source loading ──────────────────────────────────────────────── function loadSourceText(filePath: string): string { - const raw = readFileSync(filePath, "utf8"); + const allowedPath = ensurePathInsideDir(filePath, getRuntimeSourceDir()); + if (!allowedPath) { + throw new Error("Source file path must be inside the runtime sources directory."); + } + const raw = readFileSync(allowedPath, "utf8"); // Source files are JSON with a "transcript" or "text" field try { const parsed = JSON.parse(raw); @@ -252,7 +258,13 @@ const { runId, args: rawArgs } = extractRunIdArg(process.argv); applyRunId(runId); const args = parseArgs(rawArgs); -const sourceText = loadSourceText(args.file); +let sourceText = ""; +try { + sourceText = loadSourceText(args.file); +} catch (err: any) { + console.error(`[source-excerpt] ${err?.message ?? "Invalid source file path"}`); + process.exit(1); +} if (!sourceText.trim()) { console.error("[source-excerpt] Source file is empty or unreadable"); diff --git a/scripts/upload-source-text.ts b/scripts/upload-source-text.ts index 166431c..fb861d7 100644 --- a/scripts/upload-source-text.ts +++ b/scripts/upload-source-text.ts @@ -7,6 +7,8 @@ */ import { readFileSync } from "fs"; +import { getRuntimeSourceDir } from "./runtime-paths"; +import { ensurePathInsideDir } from "./security"; import { getAuthedBase, logHttp, readJsonInput, readResponseOrExit } from "./common"; const args = process.argv.slice(2); @@ -27,7 +29,12 @@ if (fileIdx !== -1) { console.error("[upload-source-text] --file requires a path"); process.exit(1); } - const rawText = readFileSync(filePath, "utf8"); + const allowedPath = ensurePathInsideDir(filePath, getRuntimeSourceDir()); + if (!allowedPath) { + console.error("[upload-source-text] --file must point to a file inside the runtime sources directory."); + process.exit(1); + } + const rawText = readFileSync(allowedPath, "utf8"); payload = { raw_text: rawText, provider: provider || "transcript_file",