diff --git a/openspec/changes/agent-codex-prevent-session-severity-help-and-test-d-2026-04-23-00-01/notes.md b/openspec/changes/agent-codex-prevent-session-severity-help-and-test-d-2026-04-23-00-01/notes.md new file mode 100644 index 0000000..096c685 --- /dev/null +++ b/openspec/changes/agent-codex-prevent-session-severity-help-and-test-d-2026-04-23-00-01/notes.md @@ -0,0 +1,10 @@ +# agent-codex-prevent-session-severity-help-and-test-d-2026-04-23-00-01 (minimal / T1) + +- Move the `report session-severity` help contract into the report module so usage text, example args, and rubric summary come from the same source as the scorer. +- Keep the scored-output assertions hard-coded in tests while reusing the shared example args, so score changes still break tests instead of silently updating expectations. +- Add focused help coverage that proves the CLI help prints the shared session-severity contract text. +- Verification: + - `node --test test/report.test.js test/cli-args-dispatch.test.js` + - `node bin/multiagent-safety.js report help` + - `node bin/multiagent-safety.js report session-severity --task-size narrow-patch --tokens 3850000 --exec-count 18 --write-stdin-count 6 --completion-before-tail yes --fragmentation 14 --finish-path 6 --post-proof 4` + - `git diff --check` diff --git a/src/cli/main.js b/src/cli/main.js index 434c55f..6c3ef05 100755 --- a/src/cli/main.js +++ b/src/cli/main.js @@ -2431,15 +2431,20 @@ function report(rawArgs) { const options = parseReportArgs(rawArgs); const subcommand = options.subcommand || 'help'; if (subcommand === 'help' || subcommand === '--help' || subcommand === '-h') { + const sessionSeverityHelpDetails = sessionSeverityReport.renderSessionSeverityHelpDetails() + .split('\n') + .map((line) => ` ${line}`) + .join('\n'); console.log( `${TOOL_NAME} report commands:\n` + ` ${TOOL_NAME} report scorecard [--target ] [--repo github.com//] [--scorecard-json ] [--output-dir ] [--date YYYY-MM-DD] [--dry-run] [--json]\n` + - ` ${TOOL_NAME} report session-severity --task-size --tokens --exec-count --write-stdin-count --completion-before-tail [--expected-bound ] [--fragmentation ] [--finish-path ] [--post-proof ] [--json]\n` + + ` ${sessionSeverityReport.renderSessionSeverityCommand(TOOL_NAME)}\n` + + `${sessionSeverityHelpDetails}\n` + `\n` + `Examples:\n` + ` ${TOOL_NAME} report scorecard --repo github.com/recodeecom/multiagent-safety\n` + ` ${TOOL_NAME} report scorecard --scorecard-json ./scorecard.json --date 2026-04-10\n` + - ` ${TOOL_NAME} report session-severity --task-size narrow-patch --tokens 3850000 --exec-count 18 --write-stdin-count 6 --completion-before-tail yes --fragmentation 14 --finish-path 6 --post-proof 4`, + ` ${sessionSeverityReport.renderSessionSeverityExample(TOOL_NAME)}`, ); process.exitCode = 0; return; diff --git a/src/report/session-severity.js b/src/report/session-severity.js index 809c0e5..ee80bb1 100644 --- a/src/report/session-severity.js +++ b/src/report/session-severity.js @@ -4,7 +4,8 @@ const TASK_SIZE_UPPER_BOUNDS = { 'large-change': 8_000_000, }; -const TASK_SIZE_VALUES = new Set(Object.keys(TASK_SIZE_UPPER_BOUNDS)); +const TASK_SIZE_VALUES = Object.keys(TASK_SIZE_UPPER_BOUNDS); +const TASK_SIZE_SET = new Set(TASK_SIZE_VALUES); const FRAGMENTATION_PRESET_SCORES = { clean: 0, 'few-extra-checks': 5, @@ -32,6 +33,50 @@ const DRIVER_LABELS = { finishPath: 'finish-path discipline', postProof: 'post-proof drift', }; +const LABEL_BANDS = [ + { max: 15, label: 'Healthy' }, + { max: 30, label: 'Mildly fragmented' }, + { max: 50, label: 'Inefficient' }, + { max: 75, label: 'Runaway' }, + { max: 100, label: 'Catastrophic' }, +]; +const SESSION_SEVERITY_SUBCOMMAND = 'session-severity'; +const SESSION_SEVERITY_USAGE_ARGS = [ + `--task-size <${TASK_SIZE_VALUES.join('|')}>`, + '--tokens ', + '--exec-count ', + '--write-stdin-count ', + '--completion-before-tail ', + '[--expected-bound ]', + `[--fragmentation <${Object.keys(FRAGMENTATION_PRESET_SCORES).join('|')}|0-25>]`, + `[--finish-path <${Object.keys(FINISH_PATH_PRESET_SCORES).join('|')}|0-15>]`, + `[--post-proof <${Object.keys(POST_PROOF_PRESET_SCORES).join('|')}|0-15>]`, + '[--json]', +].join(' '); +const SESSION_SEVERITY_COMMAND_TAIL = `${SESSION_SEVERITY_SUBCOMMAND} ${SESSION_SEVERITY_USAGE_ARGS}`; +const SESSION_SEVERITY_EXAMPLE_ARGS = [ + '--task-size', + 'narrow-patch', + '--tokens', + '3850000', + '--exec-count', + '18', + '--write-stdin-count', + '6', + '--completion-before-tail', + 'yes', + '--fragmentation', + '14', + '--finish-path', + '6', + '--post-proof', + '4', +]; +const SESSION_SEVERITY_EXAMPLE_TAIL = `${SESSION_SEVERITY_SUBCOMMAND} ${SESSION_SEVERITY_EXAMPLE_ARGS.join(' ')}`; + +function formatInteger(value) { + return Number(value).toLocaleString('en-US'); +} function parseRequiredPositiveInteger(name, rawValue, { allowZero = true } = {}) { const parsed = Number.parseInt(String(rawValue || ''), 10); @@ -58,8 +103,8 @@ function clampScore(value, min, max) { function parseTaskSize(rawTaskSize) { const normalized = String(rawTaskSize || '').trim(); - if (!TASK_SIZE_VALUES.has(normalized)) { - throw new Error(`--task-size must be one of: ${Array.from(TASK_SIZE_VALUES).join(', ')}`); + if (!TASK_SIZE_SET.has(normalized)) { + throw new Error(`--task-size must be one of: ${TASK_SIZE_VALUES.join(', ')}`); } return normalized; } @@ -123,11 +168,7 @@ function scorePostProof(completionBeforeTail, override) { } function labelForTotal(total) { - if (total <= 15) return 'Healthy'; - if (total <= 30) return 'Mildly fragmented'; - if (total <= 50) return 'Inefficient'; - if (total <= 75) return 'Runaway'; - return 'Catastrophic'; + return LABEL_BANDS.find((band) => total <= band.max)?.label || LABEL_BANDS[LABEL_BANDS.length - 1].label; } function buildSessionSeverityReport(options) { @@ -205,9 +246,37 @@ function renderSessionSeverityReport(report) { ].join('\n'); } +function renderSessionSeverityHelpDetails() { + const taskSizeDefaults = TASK_SIZE_VALUES + .map((taskSize) => `${taskSize}=${formatInteger(TASK_SIZE_UPPER_BOUNDS[taskSize])}`) + .join(', '); + const labelBands = LABEL_BANDS + .map((band, index) => { + const min = index === 0 ? 0 : LABEL_BANDS[index - 1].max + 1; + return `${band.label}=${min}-${band.max}`; + }) + .join(', '); + return [`Task-size defaults: ${taskSizeDefaults}`, `Label bands: ${labelBands}`].join('\n'); +} + +function renderSessionSeverityCommand(toolName) { + return `${toolName} report ${SESSION_SEVERITY_COMMAND_TAIL}`; +} + +function renderSessionSeverityExample(toolName) { + return `${toolName} report ${SESSION_SEVERITY_EXAMPLE_TAIL}`; +} + module.exports = { + LABEL_BANDS, + SESSION_SEVERITY_COMMAND_TAIL, + SESSION_SEVERITY_EXAMPLE_ARGS, + SESSION_SEVERITY_EXAMPLE_TAIL, TASK_SIZE_UPPER_BOUNDS, buildSessionSeverityReport, renderSessionSeverityReport, + renderSessionSeverityHelpDetails, + renderSessionSeverityCommand, + renderSessionSeverityExample, labelForTotal, }; diff --git a/test/report.test.js b/test/report.test.js index 1f44a93..d50c518 100644 --- a/test/report.test.js +++ b/test/report.test.js @@ -60,6 +60,7 @@ const { sanitizeSlug, defineSpawnSuite, } = require('./helpers/install-test-helpers'); +const sessionSeverityReport = require('../src/report/session-severity'); defineSpawnSuite('report integration suite', () => { @@ -101,26 +102,7 @@ exit 1 test('report session-severity prints the weighted rubric summary', () => { const repoDir = initRepo(); - const result = runNode([ - 'report', - 'session-severity', - '--task-size', - 'narrow-patch', - '--tokens', - '3850000', - '--exec-count', - '18', - '--write-stdin-count', - '6', - '--completion-before-tail', - 'yes', - '--fragmentation', - '14', - '--finish-path', - '6', - '--post-proof', - '4', - ], repoDir); + const result = runNode(['report', 'session-severity', ...sessionSeverityReport.SESSION_SEVERITY_EXAMPLE_ARGS], repoDir); assert.equal(result.status, 0, result.stderr || result.stdout); assert.match(result.stdout, /Score 44\/100 — Inefficient\./); @@ -130,6 +112,19 @@ test('report session-severity prints the weighted rubric summary', () => { assert.match(result.stdout, /Total: 44/); }); +test('report help reuses the shared session-severity contract text', () => { + const repoDir = initRepo(); + const result = runNode(['report', 'help'], repoDir); + const helpDetailLines = sessionSeverityReport.renderSessionSeverityHelpDetails().split('\n'); + + assert.equal(result.status, 0, result.stderr || result.stdout); + assert.match(result.stdout, new RegExp(escapeRegexLiteral(sessionSeverityReport.SESSION_SEVERITY_COMMAND_TAIL))); + assert.match(result.stdout, new RegExp(escapeRegexLiteral(sessionSeverityReport.SESSION_SEVERITY_EXAMPLE_TAIL))); + for (const line of helpDetailLines) { + assert.match(result.stdout, new RegExp(escapeRegexLiteral(line))); + } +}); + test('report session-severity emits structured JSON when requested', () => { const repoDir = initRepo(); const result = runNode([