diff --git a/CHANGELOG.md b/CHANGELOG.md index 487b9feb..4f85c3f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - Transcripts: `--timestamps` adds segment-level timings (`transcriptSegments` + `transcriptTimedText`) for YouTube, podcasts, and embedded captions. - Media-aware summarization in the Side Panel: Page vs Video/Audio dropdown, automatic media preference on video sites, plus visible word count/duration. - CLI: transcribe local audio/video files with mtime-aware transcript cache invalidation (thanks @mvance!). +- CLI: add Cursor Agent CLI provider (`cli/agent`, `--cli agent`). - Browser extension: add Firefox sidebar build + multi-browser config (#31, thanks @vlnd0). - Chrome automation: add artifacts tool + REPL helpers for persistent session files (notes/JSON/CSV) and downloads. - Chrome automation: expand navigate tool with list/switch tab support and return matching skills after navigation. diff --git a/README.md b/README.md index 1631c028..7ab73e3a 100644 --- a/README.md +++ b/README.md @@ -243,7 +243,7 @@ Use `summarize --help` or `summarize help` for the full help text. - `--length short|medium|long|xl|xxl|s|m|l|` - `--language, --lang `: output language (`auto` = match source) - `--max-output-tokens `: hard cap for LLM output tokens -- `--cli [provider]`: use a CLI provider (`--model cli/`). If omitted, uses auto selection with CLI enabled. +- `--cli [provider]`: use a CLI provider (`--model cli/`). Supports `claude`, `gemini`, `codex`, `agent`. If omitted, uses auto selection with CLI enabled. - `--stream auto|on|off`: stream LLM output (`auto` = TTY only; disabled in `--json` mode) - `--plain`: keep raw output (no ANSI/OSC Markdown rendering) - `--no-color`: disable ANSI colors @@ -272,7 +272,7 @@ Why: CLI adds ~4s latency per attempt and higher variance. Shortcut: `--cli` (with no provider) uses auto selection with CLI enabled. When enabled, auto prepends CLI attempts in the order listed in `cli.enabled` -(recommended: `["gemini"]`), then tries the native provider candidates +(recommended: `["gemini"]` or `["agent"]`), then tries the native provider candidates (with OpenRouter fallbacks when configured). Enable CLI attempts: diff --git a/docs/cli.md b/docs/cli.md index 44753a8d..1307b726 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -6,13 +6,14 @@ read_when: # CLI models -Summarize can use installed CLIs (Claude, Codex, Gemini) as local model backends. +Summarize can use installed CLIs (Claude, Codex, Gemini, Cursor Agent) as local model backends. ## Model ids - `cli/claude/` (e.g. `cli/claude/sonnet`) - `cli/codex/` (e.g. `cli/codex/gpt-5.2`) - `cli/gemini/` (e.g. `cli/gemini/gemini-3-flash-preview`) +- `cli/agent/` (e.g. `cli/agent/gpt-5.2`) Use `--cli [provider]` (case-insensitive) for the provider default, or `--model cli//` to pin a model. If `--cli` is provided without a provider, auto selection is used with CLI enabled. @@ -22,12 +23,12 @@ If `--cli` is provided without a provider, auto selection is used with CLI enabl Auto mode does **not** use CLIs unless you set `cli.enabled` in config. Why: CLI adds ~4s latency per attempt and higher variance. -Recommendation: enable only Gemini unless you have a reason to add others. +Recommendation: enable only Gemini or Agent unless you have a reason to add others. Gemini CLI performance: summarize sets `GEMINI_CLI_NO_RELAUNCH=true` for Gemini CLI runs to avoid a costly self-relaunch (can be overridden by setting it yourself). When enabled, auto prepends CLI attempts in the order listed in `cli.enabled` -(recommended: `["gemini"]`). +(recommended: `["gemini"]` or `["agent"]`). Enable CLI attempts: @@ -52,6 +53,7 @@ Note: when `cli.enabled` is set, it also acts as an allowlist for explicit `--cl Binary lookup: - `CLAUDE_PATH`, `CODEX_PATH`, `GEMINI_PATH` (optional overrides) +- `AGENT_PATH` (optional override) - Otherwise uses `PATH` ## Attachments (images/files) @@ -62,19 +64,24 @@ path-based prompt and enables the required tool flags: - Claude: `--tools Read --dangerously-skip-permissions` - Gemini: `--yolo` and `--include-directories ` - Codex: `codex exec --output-last-message ...` and `-i ` for images +- Agent: uses built-in file tools in `agent --print` mode (no extra flags) ## Config ```json { "cli": { - "enabled": ["claude", "gemini", "codex"], + "enabled": ["claude", "gemini", "codex", "agent"], "codex": { "model": "gpt-5.2" }, "gemini": { "model": "gemini-3-flash-preview", "extraArgs": ["--verbose"] }, "claude": { "model": "sonnet", "binary": "/usr/local/bin/claude", "extraArgs": ["--verbose"] + }, + "agent": { + "model": "gpt-5.2", + "binary": "/usr/local/bin/agent" } } } @@ -84,6 +91,7 @@ Notes: - CLI output is treated as text only (no token accounting). - If a CLI call fails, auto mode falls back to the next candidate. +- Cursor Agent CLI uses the `agent` binary and relies on Cursor CLI auth (login or `CURSOR_API_KEY`). ## Generate free preset (OpenRouter) diff --git a/docs/config.md b/docs/config.md index 2c7a6f19..d91dfe10 100644 --- a/docs/config.md +++ b/docs/config.md @@ -272,9 +272,10 @@ Examples: ```json { "cli": { - "enabled": ["gemini"], + "enabled": ["gemini", "agent"], "codex": { "model": "gpt-5.2" }, - "claude": { "binary": "/usr/local/bin/claude", "extraArgs": ["--verbose"] } + "claude": { "binary": "/usr/local/bin/claude", "extraArgs": ["--verbose"] }, + "agent": { "binary": "/usr/local/bin/agent", "model": "gpt-5.2" } } } ``` @@ -282,7 +283,7 @@ Examples: Notes: - `cli.enabled` is an allowlist (auto uses CLIs only when set; explicit `--cli` / `--model cli/...` must be included). -- Recommendation: keep `cli.enabled` to `["gemini"]` unless you have a reason to add others (extra latency/variance). +- Recommendation: keep `cli.enabled` to `["gemini"]` or `["agent"]` unless you have a reason to add others (extra latency/variance). - `cli..binary` overrides CLI binary discovery. - `cli..extraArgs` appends extra CLI args. diff --git a/docs/llm.md b/docs/llm.md index 7dba2b7f..914225e6 100644 --- a/docs/llm.md +++ b/docs/llm.md @@ -30,7 +30,7 @@ installed, auto mode can use local CLI models when `cli.enabled` is set (see `do - `ANTHROPIC_API_KEY` (required for `anthropic/...` models) - `ANTHROPIC_BASE_URL` (optional; override Anthropic API endpoint) - `SUMMARIZE_MODEL` (optional; overrides default model selection) -- `CLAUDE_PATH` / `CODEX_PATH` / `GEMINI_PATH` (optional; override CLI binary paths) +- `CLAUDE_PATH` / `CODEX_PATH` / `GEMINI_PATH` / `AGENT_PATH` (optional; override CLI binary paths) ## Flags @@ -39,6 +39,7 @@ installed, auto mode can use local CLI models when `cli.enabled` is set (see `do - `cli/codex/gpt-5.2` - `cli/claude/sonnet` - `cli/gemini/gemini-3-flash-preview` + - `cli/agent/gpt-5.2` - `google/gemini-3-flash-preview` - `openai/gpt-5-mini` - `zai/glm-4.7` @@ -47,7 +48,7 @@ installed, auto mode can use local CLI models when `cli.enabled` is set (see `do - `anthropic/claude-sonnet-4-5` - `openrouter/meta-llama/llama-3.3-70b-instruct:free` (force OpenRouter) - `--cli [provider]` - - Examples: `--cli claude`, `--cli Gemini`, `--cli codex` (equivalent to `--model cli/`); `--cli` alone uses auto selection with CLI enabled. + - Examples: `--cli claude`, `--cli Gemini`, `--cli codex`, `--cli agent` (equivalent to `--model cli/`); `--cli` alone uses auto selection with CLI enabled. - `--model auto` - See `docs/model-auto.md` - `--model ` diff --git a/src/config.ts b/src/config.ts index d1e6beca..36e74a19 100644 --- a/src/config.ts +++ b/src/config.ts @@ -6,7 +6,7 @@ import { isCliThemeName, listCliThemes } from './tty/theme.js' export type AutoRuleKind = 'text' | 'website' | 'youtube' | 'image' | 'video' | 'file' export type VideoMode = 'auto' | 'transcript' | 'understand' -export type CliProvider = 'claude' | 'codex' | 'gemini' +export type CliProvider = 'claude' | 'codex' | 'gemini' | 'agent' export type CliProviderConfig = { binary?: string extraArgs?: string[] @@ -17,6 +17,7 @@ export type CliConfig = { claude?: CliProviderConfig codex?: CliProviderConfig gemini?: CliProviderConfig + agent?: CliProviderConfig } export type OpenAiConfig = { @@ -215,7 +216,7 @@ function parseAutoRuleKind(value: unknown): AutoRuleKind | null { function parseCliProvider(value: unknown, path: string): CliProvider { const trimmed = typeof value === 'string' ? value.trim().toLowerCase() : '' - if (trimmed === 'claude' || trimmed === 'codex' || trimmed === 'gemini') { + if (trimmed === 'claude' || trimmed === 'codex' || trimmed === 'gemini' || trimmed === 'agent') { return trimmed as CliProvider } throw new Error(`Invalid config file ${path}: unknown CLI provider "${String(value)}".`) @@ -852,6 +853,7 @@ export function loadSummarizeConfig({ env }: { env: Record 0 ? value.promptOverride.trim() @@ -868,6 +870,7 @@ export function loadSummarizeConfig({ env }: { env: Record = { claude: 'claude', codex: 'codex', gemini: 'gemini', + agent: 'agent', +} + +const PROVIDER_PATH_ENV: Record = { + claude: 'CLAUDE_PATH', + codex: 'CODEX_PATH', + gemini: 'GEMINI_PATH', + agent: 'AGENT_PATH', } type RunCliModelOptions = { @@ -50,8 +58,16 @@ export function resolveCliBinary( env: Record ): string { const providerConfig = - provider === 'claude' ? config?.claude : provider === 'codex' ? config?.codex : config?.gemini + provider === 'claude' + ? config?.claude + : provider === 'codex' + ? config?.codex + : provider === 'gemini' + ? config?.gemini + : config?.agent if (isNonEmptyString(providerConfig?.binary)) return providerConfig.binary.trim() + const pathKey = PROVIDER_PATH_ENV[provider] + if (isNonEmptyString(env[pathKey])) return env[pathKey].trim() const envKey = `SUMMARIZE_CLI_${provider.toUpperCase()}` if (isNonEmptyString(env[envKey])) return env[envKey].trim() return DEFAULT_BINARIES[provider] @@ -319,6 +335,45 @@ export async function runCliModel({ throw new Error('CLI returned empty output') } + if (provider === 'agent') { + args.push('--print', '--output-format', 'json') + if (!allowTools) { + args.push('--mode', 'ask') + } + if (model && model.trim().length > 0) { + args.push('--model', model.trim()) + } + args.push(prompt) + const { stdout } = await execCliWithInput({ + execFileImpl: execFileFn, + cmd: binary, + args, + input: '', + timeoutMs, + env: effectiveEnv, + cwd, + }) + const trimmed = stdout.trim() + if (!trimmed) { + throw new Error('CLI returned empty output') + } + const parsed = parseJsonFromOutput(trimmed) + if (parsed && typeof parsed === 'object') { + const payload = parsed as Record + const resultText = + payload.result ?? + payload.response ?? + payload.output ?? + payload.message ?? + payload.text ?? + null + if (typeof resultText === 'string' && resultText.trim().length > 0) { + return { text: resultText.trim(), usage: null, costUsd: null } + } + } + return { text: trimmed, usage: null, costUsd: null } + } + if (model && model.trim().length > 0) { args.push('--model', model.trim()) } diff --git a/src/model-auto.ts b/src/model-auto.ts index 186e53d6..9f145d68 100644 --- a/src/model-auto.ts +++ b/src/model-auto.ts @@ -36,6 +36,7 @@ export type AutoModelAttempt = { | 'CLI_CLAUDE' | 'CLI_CODEX' | 'CLI_GEMINI' + | 'CLI_AGENT' debug: string } @@ -197,6 +198,7 @@ const DEFAULT_CLI_MODELS: Record = { claude: 'sonnet', codex: 'gpt-5.2', gemini: 'gemini-3-flash-preview', + agent: 'gpt-5.2', } function isCliProviderEnabled(provider: CliProvider, config: SummarizeConfig | null): boolean { @@ -223,7 +225,8 @@ function parseCliCandidate( .map((entry) => entry.trim()) if (parts.length < 2) return null const provider = parts[1]?.toLowerCase() - if (provider !== 'claude' && provider !== 'codex' && provider !== 'gemini') return null + if (provider !== 'claude' && provider !== 'codex' && provider !== 'gemini' && provider !== 'agent') + return null const model = parts.slice(2).join('/').trim() return { provider, model: model.length > 0 ? model : null } } @@ -243,7 +246,9 @@ function requiredEnvForCandidate(modelId: string): AutoModelAttempt['requiredEnv ? 'CLI_CODEX' : parsed.provider === 'gemini' ? 'CLI_GEMINI' - : 'CLI_CLAUDE' + : parsed.provider === 'agent' + ? 'CLI_AGENT' + : 'CLI_CLAUDE' } if (isCandidateOpenRouter(modelId)) return 'OPENROUTER_API_KEY' const parsed = parseGatewayStyleModelId(normalizeGatewayStyleModelId(modelId)) @@ -365,7 +370,9 @@ function prependCliCandidates({ ? cli?.gemini?.model : provider === 'codex' ? cli?.codex?.model - : cli?.claude?.model + : provider === 'agent' + ? cli?.agent?.model + : cli?.claude?.model add(provider, modelOverride) } if (cliCandidates.length === 0) return candidates diff --git a/src/model-spec.ts b/src/model-spec.ts index 78ea6e19..4f01c92a 100644 --- a/src/model-spec.ts +++ b/src/model-spec.ts @@ -5,6 +5,7 @@ const DEFAULT_CLI_MODELS: Record = { claude: 'sonnet', codex: 'gpt-5.2', gemini: 'gemini-3-flash-preview', + agent: 'gpt-5.2', } export type FixedModelSpec = @@ -39,7 +40,7 @@ export type FixedModelSpec = llmModelId: null openrouterProviders: null forceOpenRouter: false - requiredEnv: 'CLI_CLAUDE' | 'CLI_CODEX' | 'CLI_GEMINI' + requiredEnv: 'CLI_CLAUDE' | 'CLI_CODEX' | 'CLI_GEMINI' | 'CLI_AGENT' cliProvider: CliProvider cliModel: string | null } @@ -100,14 +101,25 @@ export function parseRequestedModelId(raw: string): RequestedModel { .map((part) => part.trim()) .filter((part) => part.length > 0) const providerRaw = parts[1]?.toLowerCase() ?? '' - if (providerRaw !== 'claude' && providerRaw !== 'codex' && providerRaw !== 'gemini') { + if ( + providerRaw !== 'claude' && + providerRaw !== 'codex' && + providerRaw !== 'gemini' && + providerRaw !== 'agent' + ) { throw new Error(`Invalid CLI model id "${trimmed}". Expected cli//.`) } const cliProvider = providerRaw as CliProvider const requestedModel = parts.slice(2).join('/').trim() const cliModel = requestedModel.length > 0 ? requestedModel : DEFAULT_CLI_MODELS[cliProvider] const requiredEnv = - cliProvider === 'claude' ? 'CLI_CLAUDE' : cliProvider === 'codex' ? 'CLI_CODEX' : 'CLI_GEMINI' + cliProvider === 'claude' + ? 'CLI_CLAUDE' + : cliProvider === 'codex' + ? 'CLI_CODEX' + : cliProvider === 'gemini' + ? 'CLI_GEMINI' + : 'CLI_AGENT' const userModelId = `cli/${cliProvider}/${cliModel}` return { kind: 'fixed', diff --git a/src/run/env.ts b/src/run/env.ts index bad649e6..ceac960d 100644 --- a/src/run/env.ts +++ b/src/run/env.ts @@ -50,7 +50,7 @@ export function resolveCliAvailability({ config: ConfigForCli }): Partial> { const cliConfig = config?.cli ?? null - const providers: CliProvider[] = ['claude', 'codex', 'gemini'] + const providers: CliProvider[] = ['claude', 'codex', 'gemini', 'agent'] const availability: Partial> = {} for (const provider of providers) { if (isCliDisabled(provider, cliConfig)) { @@ -72,7 +72,7 @@ export function parseCliUserModelId(modelId: string): { .split('/') .map((part) => part.trim()) const provider = parts[1]?.toLowerCase() - if (provider !== 'claude' && provider !== 'codex' && provider !== 'gemini') { + if (provider !== 'claude' && provider !== 'codex' && provider !== 'gemini' && provider !== 'agent') { throw new Error(`Invalid CLI model id "${modelId}". Expected cli//.`) } const model = parts.slice(2).join('/').trim() @@ -81,7 +81,7 @@ export function parseCliUserModelId(modelId: string): { export function parseCliProviderArg(raw: string): CliProvider { const normalized = raw.trim().toLowerCase() - if (normalized === 'claude' || normalized === 'codex' || normalized === 'gemini') { + if (normalized === 'claude' || normalized === 'codex' || normalized === 'gemini' || normalized === 'agent') { return normalized as CliProvider } throw new Error(`Unsupported --cli: ${raw}`) diff --git a/src/run/flows/url/types.ts b/src/run/flows/url/types.ts index d777c3a9..b3a96a03 100644 --- a/src/run/flows/url/types.ts +++ b/src/run/flows/url/types.ts @@ -1,5 +1,5 @@ import type { CacheState } from '../../../cache.js' -import type { SummarizeConfig } from '../../../config.js' +import type { CliProvider, SummarizeConfig } from '../../../config.js' import type { ExtractedLinkContent, LinkPreviewProgressEvent, @@ -78,7 +78,7 @@ export type UrlFlowModel = { desiredOutputTokens: number | null configForModelSelection: SummarizeConfig | null envForAuto: Record - cliAvailability: Partial> + cliAvailability: Partial> openaiUseChatCompletions: boolean openaiWhisperUsdPerMinute: number apiStatus: { diff --git a/src/run/help.ts b/src/run/help.ts index cf6daca0..49f98353 100644 --- a/src/run/help.ts +++ b/src/run/help.ts @@ -126,7 +126,7 @@ export function buildProgram() { .addOption( new Option( '--cli [provider]', - 'Use a CLI provider: claude, gemini, codex (equivalent to --model cli/). If omitted, use auto selection with CLI enabled.' + 'Use a CLI provider: claude, gemini, codex, agent (equivalent to --model cli/). If omitted, use auto selection with CLI enabled.' ) ) .option('--extract', 'Print extracted content and exit (no LLM summary)', false) @@ -261,6 +261,7 @@ ${heading('Env Vars')} CLAUDE_PATH optional (path to Claude CLI binary) CODEX_PATH optional (path to Codex CLI binary) GEMINI_PATH optional (path to Gemini CLI binary) + AGENT_PATH optional (path to Cursor Agent CLI binary) SUMMARIZE_MODEL optional (overrides default model selection) SUMMARIZE_THEME optional (${CLI_THEME_NAMES.join(', ')}) SUMMARIZE_TRUECOLOR optional (force 24-bit color) diff --git a/src/run/run-config.ts b/src/run/run-config.ts index d70ebaa0..fe2a7cd2 100644 --- a/src/run/run-config.ts +++ b/src/run/run-config.ts @@ -57,7 +57,7 @@ export function resolveConfigState({ const cliEnabledOverride: CliProvider[] | null = (() => { if (!cliFlagPresent || cliProviderArg) return null if (Array.isArray(config?.cli?.enabled)) return config.cli.enabled - return ['gemini', 'claude', 'codex'] + return ['gemini', 'claude', 'codex', 'agent'] })() const cliConfigForRun = cliEnabledOverride ? { ...(config?.cli ?? {}), enabled: cliEnabledOverride } diff --git a/src/run/summary-engine.ts b/src/run/summary-engine.ts index 88d97abf..92efb404 100644 --- a/src/run/summary-engine.ts +++ b/src/run/summary-engine.ts @@ -101,6 +101,9 @@ export function createSummaryEngine(deps: SummaryEngineDeps) { if (requiredEnv === 'CLI_GEMINI') { return Boolean(deps.cliAvailability.gemini) } + if (requiredEnv === 'CLI_AGENT') { + return Boolean(deps.cliAvailability.agent) + } if (requiredEnv === 'GEMINI_API_KEY') { return deps.keyFlags.googleConfigured } @@ -129,6 +132,9 @@ export function createSummaryEngine(deps: SummaryEngineDeps) { if (attempt.requiredEnv === 'CLI_GEMINI') { return `Gemini CLI not found for model ${attempt.userModelId}. Install Gemini CLI or set GEMINI_PATH.` } + if (attempt.requiredEnv === 'CLI_AGENT') { + return `Cursor Agent CLI not found for model ${attempt.userModelId}. Install Cursor CLI or set AGENT_PATH.` + } return `Missing ${attempt.requiredEnv} for model ${attempt.userModelId}. Set the env var or choose a different --model.` } diff --git a/src/run/types.ts b/src/run/types.ts index a5fa8ba1..8f05b854 100644 --- a/src/run/types.ts +++ b/src/run/types.ts @@ -10,6 +10,7 @@ export type ModelAttemptRequiredEnv = | 'CLI_CLAUDE' | 'CLI_CODEX' | 'CLI_GEMINI' + | 'CLI_AGENT' export type ModelAttempt = { transport: 'native' | 'openrouter' | 'cli' diff --git a/tests/llm.cli.more-branches-2.test.ts b/tests/llm.cli.more-branches-2.test.ts index dadea2c2..81082c56 100644 --- a/tests/llm.cli.more-branches-2.test.ts +++ b/tests/llm.cli.more-branches-2.test.ts @@ -25,6 +25,7 @@ describe('llm/cli more branches', () => { ) expect(resolveCliBinary('claude', null, {})).toBe('claude') + expect(resolveCliBinary('agent', null, {})).toBe('agent') }) it('includes stderr in exec error messages', async () => { diff --git a/tests/llm.cli.test.ts b/tests/llm.cli.test.ts index e05ca854..75757768 100644 --- a/tests/llm.cli.test.ts +++ b/tests/llm.cli.test.ts @@ -146,6 +146,33 @@ describe('runCliModel', () => { expect(seen[0]).toContain('--bar') }) + it('handles Agent CLI JSON output in ask mode', async () => { + const seen: string[][] = [] + const execFileImpl = makeStub((args) => { + seen.push(args) + return { stdout: JSON.stringify({ result: 'ok' }) } + }) + const result = await runCliModel({ + provider: 'agent', + prompt: 'Test', + model: 'gpt-5.2', + allowTools: false, + timeoutMs: 1000, + env: {}, + execFileImpl, + config: null, + }) + expect(result.text).toBe('ok') + expect(seen[0]).toContain('--print') + expect(seen[0]).toContain('--output-format') + expect(seen[0]).toContain('json') + expect(seen[0]).toContain('--mode') + expect(seen[0]).toContain('ask') + expect(seen[0]).toContain('--model') + expect(seen[0]).toContain('gpt-5.2') + expect(seen[0]?.[seen[0].length - 1]).toBe('Test') + }) + it('reads the Codex output file', async () => { const execFileImpl: ExecFileFn = ((_cmd, args, _options, cb) => { const outputIndex = args.indexOf('--output-last-message') diff --git a/tests/model-spec.test.ts b/tests/model-spec.test.ts index 81d39293..bd6d8e0d 100644 --- a/tests/model-spec.test.ts +++ b/tests/model-spec.test.ts @@ -29,6 +29,15 @@ describe('model spec parsing', () => { expect(parsed.requiredEnv).toBe('CLI_CODEX') }) + it('defaults agent cli models when missing', () => { + const parsed = parseRequestedModelId('cli/agent') + expect(parsed.kind).toBe('fixed') + expect(parsed.transport).toBe('cli') + expect(parsed.cliProvider).toBe('agent') + expect(parsed.cliModel).toBe('gpt-5.2') + expect(parsed.requiredEnv).toBe('CLI_AGENT') + }) + it('rejects invalid cli providers', () => { expect(() => parseRequestedModelId('cli/unknown/model')).toThrow(/Invalid CLI model id/) })