From 7c3e35bec0c70a92ec175b4701f68cd3bb71b6f4 Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Thu, 23 Apr 2026 00:05:47 +0200 Subject: [PATCH] Keep session severity help and tests anchored to one contract The new session-severity report had already merged, but its help example and focused tests still repeated user-facing contract text in multiple places. This patch moves the usage tail, example args, and rubric summary into the report module so the CLI help and test inputs reuse one source while the scored-output expectations remain explicit. Constraint: Preserve the existing session-severity scoring output while reducing help/example drift Rejected: Snapshot the full help output in tests | too broad for a narrow T1 drift guard Confidence: high Scope-risk: narrow Directive: Keep score expectations hard-coded in focused tests even when help/example text is shared from the module Tested: node --test test/report.test.js test/cli-args-dispatch.test.js; node bin/multiagent-safety.js report help; node bin/multiagent-safety.js report session-severity --task-size narrow-patch --tokens 3850000 --exec-count 18 --write-stdin-count 6 --completion-before-tail yes --fragmentation 14 --finish-path 6 --post-proof 4; git diff --check Not-tested: remote PR merge path until guarded finish completes --- .../notes.md | 10 +++ src/cli/main.js | 9 +- src/report/session-severity.js | 85 +++++++++++++++++-- test/report.test.js | 35 ++++---- 4 files changed, 109 insertions(+), 30 deletions(-) create mode 100644 openspec/changes/agent-codex-prevent-session-severity-help-and-test-d-2026-04-23-00-01/notes.md diff --git a/openspec/changes/agent-codex-prevent-session-severity-help-and-test-d-2026-04-23-00-01/notes.md b/openspec/changes/agent-codex-prevent-session-severity-help-and-test-d-2026-04-23-00-01/notes.md new file mode 100644 index 0000000..096c685 --- /dev/null +++ b/openspec/changes/agent-codex-prevent-session-severity-help-and-test-d-2026-04-23-00-01/notes.md @@ -0,0 +1,10 @@ +# agent-codex-prevent-session-severity-help-and-test-d-2026-04-23-00-01 (minimal / T1) + +- Move the `report session-severity` help contract into the report module so usage text, example args, and rubric summary come from the same source as the scorer. +- Keep the scored-output assertions hard-coded in tests while reusing the shared example args, so score changes still break tests instead of silently updating expectations. +- Add focused help coverage that proves the CLI help prints the shared session-severity contract text. +- Verification: + - `node --test test/report.test.js test/cli-args-dispatch.test.js` + - `node bin/multiagent-safety.js report help` + - `node bin/multiagent-safety.js report session-severity --task-size narrow-patch --tokens 3850000 --exec-count 18 --write-stdin-count 6 --completion-before-tail yes --fragmentation 14 --finish-path 6 --post-proof 4` + - `git diff --check` diff --git a/src/cli/main.js b/src/cli/main.js index 434c55f..6c3ef05 100755 --- a/src/cli/main.js +++ b/src/cli/main.js @@ -2431,15 +2431,20 @@ function report(rawArgs) { const options = parseReportArgs(rawArgs); const subcommand = options.subcommand || 'help'; if (subcommand === 'help' || subcommand === '--help' || subcommand === '-h') { + const sessionSeverityHelpDetails = sessionSeverityReport.renderSessionSeverityHelpDetails() + .split('\n') + .map((line) => ` ${line}`) + .join('\n'); console.log( `${TOOL_NAME} report commands:\n` + ` ${TOOL_NAME} report scorecard [--target ] [--repo github.com//] [--scorecard-json ] [--output-dir ] [--date YYYY-MM-DD] [--dry-run] [--json]\n` + - ` ${TOOL_NAME} report session-severity --task-size --tokens --exec-count --write-stdin-count --completion-before-tail [--expected-bound ] [--fragmentation ] [--finish-path ] [--post-proof ] [--json]\n` + + ` ${sessionSeverityReport.renderSessionSeverityCommand(TOOL_NAME)}\n` + + `${sessionSeverityHelpDetails}\n` + `\n` + `Examples:\n` + ` ${TOOL_NAME} report scorecard --repo github.com/recodeecom/multiagent-safety\n` + ` ${TOOL_NAME} report scorecard --scorecard-json ./scorecard.json --date 2026-04-10\n` + - ` ${TOOL_NAME} report session-severity --task-size narrow-patch --tokens 3850000 --exec-count 18 --write-stdin-count 6 --completion-before-tail yes --fragmentation 14 --finish-path 6 --post-proof 4`, + ` ${sessionSeverityReport.renderSessionSeverityExample(TOOL_NAME)}`, ); process.exitCode = 0; return; diff --git a/src/report/session-severity.js b/src/report/session-severity.js index 809c0e5..ee80bb1 100644 --- a/src/report/session-severity.js +++ b/src/report/session-severity.js @@ -4,7 +4,8 @@ const TASK_SIZE_UPPER_BOUNDS = { 'large-change': 8_000_000, }; -const TASK_SIZE_VALUES = new Set(Object.keys(TASK_SIZE_UPPER_BOUNDS)); +const TASK_SIZE_VALUES = Object.keys(TASK_SIZE_UPPER_BOUNDS); +const TASK_SIZE_SET = new Set(TASK_SIZE_VALUES); const FRAGMENTATION_PRESET_SCORES = { clean: 0, 'few-extra-checks': 5, @@ -32,6 +33,50 @@ const DRIVER_LABELS = { finishPath: 'finish-path discipline', postProof: 'post-proof drift', }; +const LABEL_BANDS = [ + { max: 15, label: 'Healthy' }, + { max: 30, label: 'Mildly fragmented' }, + { max: 50, label: 'Inefficient' }, + { max: 75, label: 'Runaway' }, + { max: 100, label: 'Catastrophic' }, +]; +const SESSION_SEVERITY_SUBCOMMAND = 'session-severity'; +const SESSION_SEVERITY_USAGE_ARGS = [ + `--task-size <${TASK_SIZE_VALUES.join('|')}>`, + '--tokens ', + '--exec-count ', + '--write-stdin-count ', + '--completion-before-tail ', + '[--expected-bound ]', + `[--fragmentation <${Object.keys(FRAGMENTATION_PRESET_SCORES).join('|')}|0-25>]`, + `[--finish-path <${Object.keys(FINISH_PATH_PRESET_SCORES).join('|')}|0-15>]`, + `[--post-proof <${Object.keys(POST_PROOF_PRESET_SCORES).join('|')}|0-15>]`, + '[--json]', +].join(' '); +const SESSION_SEVERITY_COMMAND_TAIL = `${SESSION_SEVERITY_SUBCOMMAND} ${SESSION_SEVERITY_USAGE_ARGS}`; +const SESSION_SEVERITY_EXAMPLE_ARGS = [ + '--task-size', + 'narrow-patch', + '--tokens', + '3850000', + '--exec-count', + '18', + '--write-stdin-count', + '6', + '--completion-before-tail', + 'yes', + '--fragmentation', + '14', + '--finish-path', + '6', + '--post-proof', + '4', +]; +const SESSION_SEVERITY_EXAMPLE_TAIL = `${SESSION_SEVERITY_SUBCOMMAND} ${SESSION_SEVERITY_EXAMPLE_ARGS.join(' ')}`; + +function formatInteger(value) { + return Number(value).toLocaleString('en-US'); +} function parseRequiredPositiveInteger(name, rawValue, { allowZero = true } = {}) { const parsed = Number.parseInt(String(rawValue || ''), 10); @@ -58,8 +103,8 @@ function clampScore(value, min, max) { function parseTaskSize(rawTaskSize) { const normalized = String(rawTaskSize || '').trim(); - if (!TASK_SIZE_VALUES.has(normalized)) { - throw new Error(`--task-size must be one of: ${Array.from(TASK_SIZE_VALUES).join(', ')}`); + if (!TASK_SIZE_SET.has(normalized)) { + throw new Error(`--task-size must be one of: ${TASK_SIZE_VALUES.join(', ')}`); } return normalized; } @@ -123,11 +168,7 @@ function scorePostProof(completionBeforeTail, override) { } function labelForTotal(total) { - if (total <= 15) return 'Healthy'; - if (total <= 30) return 'Mildly fragmented'; - if (total <= 50) return 'Inefficient'; - if (total <= 75) return 'Runaway'; - return 'Catastrophic'; + return LABEL_BANDS.find((band) => total <= band.max)?.label || LABEL_BANDS[LABEL_BANDS.length - 1].label; } function buildSessionSeverityReport(options) { @@ -205,9 +246,37 @@ function renderSessionSeverityReport(report) { ].join('\n'); } +function renderSessionSeverityHelpDetails() { + const taskSizeDefaults = TASK_SIZE_VALUES + .map((taskSize) => `${taskSize}=${formatInteger(TASK_SIZE_UPPER_BOUNDS[taskSize])}`) + .join(', '); + const labelBands = LABEL_BANDS + .map((band, index) => { + const min = index === 0 ? 0 : LABEL_BANDS[index - 1].max + 1; + return `${band.label}=${min}-${band.max}`; + }) + .join(', '); + return [`Task-size defaults: ${taskSizeDefaults}`, `Label bands: ${labelBands}`].join('\n'); +} + +function renderSessionSeverityCommand(toolName) { + return `${toolName} report ${SESSION_SEVERITY_COMMAND_TAIL}`; +} + +function renderSessionSeverityExample(toolName) { + return `${toolName} report ${SESSION_SEVERITY_EXAMPLE_TAIL}`; +} + module.exports = { + LABEL_BANDS, + SESSION_SEVERITY_COMMAND_TAIL, + SESSION_SEVERITY_EXAMPLE_ARGS, + SESSION_SEVERITY_EXAMPLE_TAIL, TASK_SIZE_UPPER_BOUNDS, buildSessionSeverityReport, renderSessionSeverityReport, + renderSessionSeverityHelpDetails, + renderSessionSeverityCommand, + renderSessionSeverityExample, labelForTotal, }; diff --git a/test/report.test.js b/test/report.test.js index 1f44a93..d50c518 100644 --- a/test/report.test.js +++ b/test/report.test.js @@ -60,6 +60,7 @@ const { sanitizeSlug, defineSpawnSuite, } = require('./helpers/install-test-helpers'); +const sessionSeverityReport = require('../src/report/session-severity'); defineSpawnSuite('report integration suite', () => { @@ -101,26 +102,7 @@ exit 1 test('report session-severity prints the weighted rubric summary', () => { const repoDir = initRepo(); - const result = runNode([ - 'report', - 'session-severity', - '--task-size', - 'narrow-patch', - '--tokens', - '3850000', - '--exec-count', - '18', - '--write-stdin-count', - '6', - '--completion-before-tail', - 'yes', - '--fragmentation', - '14', - '--finish-path', - '6', - '--post-proof', - '4', - ], repoDir); + const result = runNode(['report', 'session-severity', ...sessionSeverityReport.SESSION_SEVERITY_EXAMPLE_ARGS], repoDir); assert.equal(result.status, 0, result.stderr || result.stdout); assert.match(result.stdout, /Score 44\/100 — Inefficient\./); @@ -130,6 +112,19 @@ test('report session-severity prints the weighted rubric summary', () => { assert.match(result.stdout, /Total: 44/); }); +test('report help reuses the shared session-severity contract text', () => { + const repoDir = initRepo(); + const result = runNode(['report', 'help'], repoDir); + const helpDetailLines = sessionSeverityReport.renderSessionSeverityHelpDetails().split('\n'); + + assert.equal(result.status, 0, result.stderr || result.stdout); + assert.match(result.stdout, new RegExp(escapeRegexLiteral(sessionSeverityReport.SESSION_SEVERITY_COMMAND_TAIL))); + assert.match(result.stdout, new RegExp(escapeRegexLiteral(sessionSeverityReport.SESSION_SEVERITY_EXAMPLE_TAIL))); + for (const line of helpDetailLines) { + assert.match(result.stdout, new RegExp(escapeRegexLiteral(line))); + } +}); + test('report session-severity emits structured JSON when requested', () => { const repoDir = initRepo(); const result = runNode([