Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# agent-codex-prevent-session-severity-help-and-test-d-2026-04-23-00-01 (minimal / T1)

- Move the `report session-severity` help contract into the report module so usage text, example args, and rubric summary come from the same source as the scorer.
- Keep the scored-output assertions hard-coded in tests while reusing the shared example args, so score changes still break tests instead of silently updating expectations.
- Add focused help coverage that proves the CLI help prints the shared session-severity contract text.
- Verification:
- `node --test test/report.test.js test/cli-args-dispatch.test.js`
- `node bin/multiagent-safety.js report help`
- `node bin/multiagent-safety.js report session-severity --task-size narrow-patch --tokens 3850000 --exec-count 18 --write-stdin-count 6 --completion-before-tail yes --fragmentation 14 --finish-path 6 --post-proof 4`
- `git diff --check`
9 changes: 7 additions & 2 deletions src/cli/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -2431,15 +2431,20 @@ function report(rawArgs) {
const options = parseReportArgs(rawArgs);
const subcommand = options.subcommand || 'help';
if (subcommand === 'help' || subcommand === '--help' || subcommand === '-h') {
const sessionSeverityHelpDetails = sessionSeverityReport.renderSessionSeverityHelpDetails()
.split('\n')
.map((line) => ` ${line}`)
.join('\n');
console.log(
`${TOOL_NAME} report commands:\n` +
` ${TOOL_NAME} report scorecard [--target <path>] [--repo github.com/<owner>/<repo>] [--scorecard-json <file>] [--output-dir <path>] [--date YYYY-MM-DD] [--dry-run] [--json]\n` +
` ${TOOL_NAME} report session-severity --task-size <narrow-patch|medium-change|large-change> --tokens <count> --exec-count <count> --write-stdin-count <count> --completion-before-tail <yes|no> [--expected-bound <count>] [--fragmentation <preset|0-25>] [--finish-path <preset|0-15>] [--post-proof <preset|0-15>] [--json]\n` +
` ${sessionSeverityReport.renderSessionSeverityCommand(TOOL_NAME)}\n` +
`${sessionSeverityHelpDetails}\n` +
`\n` +
`Examples:\n` +
` ${TOOL_NAME} report scorecard --repo github.com/recodeecom/multiagent-safety\n` +
` ${TOOL_NAME} report scorecard --scorecard-json ./scorecard.json --date 2026-04-10\n` +
` ${TOOL_NAME} report session-severity --task-size narrow-patch --tokens 3850000 --exec-count 18 --write-stdin-count 6 --completion-before-tail yes --fragmentation 14 --finish-path 6 --post-proof 4`,
` ${sessionSeverityReport.renderSessionSeverityExample(TOOL_NAME)}`,
);
process.exitCode = 0;
return;
Expand Down
85 changes: 77 additions & 8 deletions src/report/session-severity.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ const TASK_SIZE_UPPER_BOUNDS = {
'large-change': 8_000_000,
};

const TASK_SIZE_VALUES = new Set(Object.keys(TASK_SIZE_UPPER_BOUNDS));
const TASK_SIZE_VALUES = Object.keys(TASK_SIZE_UPPER_BOUNDS);
const TASK_SIZE_SET = new Set(TASK_SIZE_VALUES);
const FRAGMENTATION_PRESET_SCORES = {
clean: 0,
'few-extra-checks': 5,
Expand Down Expand Up @@ -32,6 +33,50 @@ const DRIVER_LABELS = {
finishPath: 'finish-path discipline',
postProof: 'post-proof drift',
};
const LABEL_BANDS = [
{ max: 15, label: 'Healthy' },
{ max: 30, label: 'Mildly fragmented' },
{ max: 50, label: 'Inefficient' },
{ max: 75, label: 'Runaway' },
{ max: 100, label: 'Catastrophic' },
];
const SESSION_SEVERITY_SUBCOMMAND = 'session-severity';
const SESSION_SEVERITY_USAGE_ARGS = [
`--task-size <${TASK_SIZE_VALUES.join('|')}>`,
'--tokens <count>',
'--exec-count <count>',
'--write-stdin-count <count>',
'--completion-before-tail <yes|no>',
'[--expected-bound <count>]',
`[--fragmentation <${Object.keys(FRAGMENTATION_PRESET_SCORES).join('|')}|0-25>]`,
`[--finish-path <${Object.keys(FINISH_PATH_PRESET_SCORES).join('|')}|0-15>]`,
`[--post-proof <${Object.keys(POST_PROOF_PRESET_SCORES).join('|')}|0-15>]`,
'[--json]',
].join(' ');
const SESSION_SEVERITY_COMMAND_TAIL = `${SESSION_SEVERITY_SUBCOMMAND} ${SESSION_SEVERITY_USAGE_ARGS}`;
const SESSION_SEVERITY_EXAMPLE_ARGS = [
'--task-size',
'narrow-patch',
'--tokens',
'3850000',
'--exec-count',
'18',
'--write-stdin-count',
'6',
'--completion-before-tail',
'yes',
'--fragmentation',
'14',
'--finish-path',
'6',
'--post-proof',
'4',
];
const SESSION_SEVERITY_EXAMPLE_TAIL = `${SESSION_SEVERITY_SUBCOMMAND} ${SESSION_SEVERITY_EXAMPLE_ARGS.join(' ')}`;

function formatInteger(value) {
return Number(value).toLocaleString('en-US');
}

function parseRequiredPositiveInteger(name, rawValue, { allowZero = true } = {}) {
const parsed = Number.parseInt(String(rawValue || ''), 10);
Expand All @@ -58,8 +103,8 @@ function clampScore(value, min, max) {

function parseTaskSize(rawTaskSize) {
const normalized = String(rawTaskSize || '').trim();
if (!TASK_SIZE_VALUES.has(normalized)) {
throw new Error(`--task-size must be one of: ${Array.from(TASK_SIZE_VALUES).join(', ')}`);
if (!TASK_SIZE_SET.has(normalized)) {
throw new Error(`--task-size must be one of: ${TASK_SIZE_VALUES.join(', ')}`);
}
return normalized;
}
Expand Down Expand Up @@ -123,11 +168,7 @@ function scorePostProof(completionBeforeTail, override) {
}

function labelForTotal(total) {
if (total <= 15) return 'Healthy';
if (total <= 30) return 'Mildly fragmented';
if (total <= 50) return 'Inefficient';
if (total <= 75) return 'Runaway';
return 'Catastrophic';
return LABEL_BANDS.find((band) => total <= band.max)?.label || LABEL_BANDS[LABEL_BANDS.length - 1].label;
}

function buildSessionSeverityReport(options) {
Expand Down Expand Up @@ -205,9 +246,37 @@ function renderSessionSeverityReport(report) {
].join('\n');
}

function renderSessionSeverityHelpDetails() {
const taskSizeDefaults = TASK_SIZE_VALUES
.map((taskSize) => `${taskSize}=${formatInteger(TASK_SIZE_UPPER_BOUNDS[taskSize])}`)
.join(', ');
const labelBands = LABEL_BANDS
.map((band, index) => {
const min = index === 0 ? 0 : LABEL_BANDS[index - 1].max + 1;
return `${band.label}=${min}-${band.max}`;
})
.join(', ');
return [`Task-size defaults: ${taskSizeDefaults}`, `Label bands: ${labelBands}`].join('\n');
}

function renderSessionSeverityCommand(toolName) {
return `${toolName} report ${SESSION_SEVERITY_COMMAND_TAIL}`;
}

function renderSessionSeverityExample(toolName) {
return `${toolName} report ${SESSION_SEVERITY_EXAMPLE_TAIL}`;
}

module.exports = {
LABEL_BANDS,
SESSION_SEVERITY_COMMAND_TAIL,
SESSION_SEVERITY_EXAMPLE_ARGS,
SESSION_SEVERITY_EXAMPLE_TAIL,
TASK_SIZE_UPPER_BOUNDS,
buildSessionSeverityReport,
renderSessionSeverityReport,
renderSessionSeverityHelpDetails,
renderSessionSeverityCommand,
renderSessionSeverityExample,
labelForTotal,
};
35 changes: 15 additions & 20 deletions test/report.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ const {
sanitizeSlug,
defineSpawnSuite,
} = require('./helpers/install-test-helpers');
const sessionSeverityReport = require('../src/report/session-severity');

defineSpawnSuite('report integration suite', () => {

Expand Down Expand Up @@ -101,26 +102,7 @@ exit 1

test('report session-severity prints the weighted rubric summary', () => {
const repoDir = initRepo();
const result = runNode([
'report',
'session-severity',
'--task-size',
'narrow-patch',
'--tokens',
'3850000',
'--exec-count',
'18',
'--write-stdin-count',
'6',
'--completion-before-tail',
'yes',
'--fragmentation',
'14',
'--finish-path',
'6',
'--post-proof',
'4',
], repoDir);
const result = runNode(['report', 'session-severity', ...sessionSeverityReport.SESSION_SEVERITY_EXAMPLE_ARGS], repoDir);

assert.equal(result.status, 0, result.stderr || result.stdout);
assert.match(result.stdout, /Score 44\/100 — Inefficient\./);
Expand All @@ -130,6 +112,19 @@ test('report session-severity prints the weighted rubric summary', () => {
assert.match(result.stdout, /Total: 44/);
});

test('report help reuses the shared session-severity contract text', () => {
const repoDir = initRepo();
const result = runNode(['report', 'help'], repoDir);
const helpDetailLines = sessionSeverityReport.renderSessionSeverityHelpDetails().split('\n');

assert.equal(result.status, 0, result.stderr || result.stdout);
assert.match(result.stdout, new RegExp(escapeRegexLiteral(sessionSeverityReport.SESSION_SEVERITY_COMMAND_TAIL)));
assert.match(result.stdout, new RegExp(escapeRegexLiteral(sessionSeverityReport.SESSION_SEVERITY_EXAMPLE_TAIL)));
for (const line of helpDetailLines) {
assert.match(result.stdout, new RegExp(escapeRegexLiteral(line)));
}
});

test('report session-severity emits structured JSON when requested', () => {
const repoDir = initRepo();
const result = runNode([
Expand Down