Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# agent-codex-add-session-severity-scoring-command-2026-04-22-23-28 (minimal / T1)

- Add `gx report session-severity` as a native GitGuardex report subcommand with the fixed weighted rubric for healthy / mildly fragmented / inefficient / runaway / catastrophic sessions.
- Keep the scoring logic in a small report module and thread it through the existing `gx report` help, parsing, and output surface instead of shipping a repo-local side script.
- Lock the new report surface with focused CLI arg parsing and report integration tests.
- Verification:
- `node --test test/cli-args-dispatch.test.js test/report.test.js`
- `node bin/multiagent-safety.js report help`
- `node bin/multiagent-safety.js report session-severity --task-size narrow-patch --tokens 3850000 --exec-count 18 --write-stdin-count 6 --completion-before-tail yes --fragmentation 14 --finish-path 6 --post-proof 4`
- `git diff --check`
72 changes: 72 additions & 0 deletions src/cli/args.js
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,15 @@ function parseReportArgs(rawArgs) {
scorecardJson: '',
outputDir: '',
date: '',
taskSize: '',
tokens: '',
execCount: '',
writeStdinCount: '',
completionBeforeTail: '',
expectedBound: '',
fragmentation: '',
finishPath: '',
postProof: '',
dryRun: false,
json: false,
};
Expand Down Expand Up @@ -390,6 +399,69 @@ function parseReportArgs(rawArgs) {
index += 1;
continue;
}
if (arg === '--task-size') {
const next = rawArgs[index + 1];
if (!next) throw new Error('--task-size requires a value');
options.taskSize = next;
index += 1;
continue;
}
if (arg === '--tokens') {
const next = rawArgs[index + 1];
if (!next) throw new Error('--tokens requires a value');
options.tokens = next;
index += 1;
continue;
}
if (arg === '--exec-count') {
const next = rawArgs[index + 1];
if (!next) throw new Error('--exec-count requires a value');
options.execCount = next;
index += 1;
continue;
}
if (arg === '--write-stdin-count') {
const next = rawArgs[index + 1];
if (!next) throw new Error('--write-stdin-count requires a value');
options.writeStdinCount = next;
index += 1;
continue;
}
if (arg === '--completion-before-tail') {
const next = rawArgs[index + 1];
if (!next) throw new Error('--completion-before-tail requires yes or no');
options.completionBeforeTail = next;
index += 1;
continue;
}
if (arg === '--expected-bound') {
const next = rawArgs[index + 1];
if (!next) throw new Error('--expected-bound requires a value');
options.expectedBound = next;
index += 1;
continue;
}
if (arg === '--fragmentation') {
const next = rawArgs[index + 1];
if (!next) throw new Error('--fragmentation requires a value');
options.fragmentation = next;
index += 1;
continue;
}
if (arg === '--finish-path') {
const next = rawArgs[index + 1];
if (!next) throw new Error('--finish-path requires a value');
options.finishPath = next;
index += 1;
continue;
}
if (arg === '--post-proof') {
const next = rawArgs[index + 1];
if (!next) throw new Error('--post-proof requires a value');
options.postProof = next;
index += 1;
continue;
}
if (arg === '--dry-run') {
options.dryRun = true;
continue;
Expand Down
17 changes: 16 additions & 1 deletion src/cli/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const sandboxModule = require('../sandbox');
const toolchainModule = require('../toolchain');
const finishCommands = require('../finish');
const doctorModule = require('../doctor');
const sessionSeverityReport = require('../report/session-severity');
const {
fs,
path,
Expand Down Expand Up @@ -2433,15 +2434,29 @@ function report(rawArgs) {
console.log(
`${TOOL_NAME} report commands:\n` +
` ${TOOL_NAME} report scorecard [--target <path>] [--repo github.com/<owner>/<repo>] [--scorecard-json <file>] [--output-dir <path>] [--date YYYY-MM-DD] [--dry-run] [--json]\n` +
` ${TOOL_NAME} report session-severity --task-size <narrow-patch|medium-change|large-change> --tokens <count> --exec-count <count> --write-stdin-count <count> --completion-before-tail <yes|no> [--expected-bound <count>] [--fragmentation <preset|0-25>] [--finish-path <preset|0-15>] [--post-proof <preset|0-15>] [--json]\n` +
`\n` +
`Examples:\n` +
` ${TOOL_NAME} report scorecard --repo github.com/recodeecom/multiagent-safety\n` +
` ${TOOL_NAME} report scorecard --scorecard-json ./scorecard.json --date 2026-04-10`,
` ${TOOL_NAME} report scorecard --scorecard-json ./scorecard.json --date 2026-04-10\n` +
` ${TOOL_NAME} report session-severity --task-size narrow-patch --tokens 3850000 --exec-count 18 --write-stdin-count 6 --completion-before-tail yes --fragmentation 14 --finish-path 6 --post-proof 4`,
);
process.exitCode = 0;
return;
}

if (subcommand === 'session-severity') {
const payload = sessionSeverityReport.buildSessionSeverityReport(options);
if (options.json) {
process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
process.exitCode = 0;
return;
}
console.log(sessionSeverityReport.renderSessionSeverityReport(payload));
process.exitCode = 0;
return;
}

if (subcommand !== 'scorecard') {
throw new Error(`Unknown report subcommand: ${subcommand}`);
}
Expand Down
2 changes: 1 addition & 1 deletion src/context.js
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ const CLI_COMMAND_DESCRIPTIONS = [
['release', 'Create or update the current GitHub release with README-generated notes'],
['agents', 'Start/stop repo-scoped review + cleanup bots'],
['prompt', 'Print AI setup checklist or named slices (--exec, --part, --list-parts, --snippet)'],
['report', 'Security/safety reports (e.g. OpenSSF scorecard)'],
['report', 'Security/safety reports (e.g. OpenSSF scorecard, session severity)'],
['help', 'Show this help output'],
['version', 'Print GitGuardex version'],
];
Expand Down
213 changes: 213 additions & 0 deletions src/report/session-severity.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
const TASK_SIZE_UPPER_BOUNDS = {
'narrow-patch': 1_800_000,
'medium-change': 4_000_000,
'large-change': 8_000_000,
};

const TASK_SIZE_VALUES = new Set(Object.keys(TASK_SIZE_UPPER_BOUNDS));
const FRAGMENTATION_PRESET_SCORES = {
clean: 0,
'few-extra-checks': 5,
'repeated-follow-ups': 10,
looping: 18,
'dominant-loop': 25,
};
const FINISH_PATH_PRESET_SCORES = {
'clear-early': 0,
'minor-hesitation': 5,
'late-decision': 10,
reopening: 15,
};
const POST_PROOF_PRESET_SCORES = {
'stops-soon': 0,
'small-tail': 5,
'notable-tail': 10,
'heavy-tail': 15,
};
const DRIVER_TIE_BREAK = ['fragmentation', 'writeStdin', 'finishPath', 'postProof', 'cost'];
const DRIVER_LABELS = {
cost: 'cost vs expected scope',
fragmentation: 'turn fragmentation',
writeStdin: 'write_stdin churn',
finishPath: 'finish-path discipline',
postProof: 'post-proof drift',
};

function parseRequiredPositiveInteger(name, rawValue, { allowZero = true } = {}) {
const parsed = Number.parseInt(String(rawValue || ''), 10);
if (!Number.isFinite(parsed) || (!allowZero && parsed <= 0) || (allowZero && parsed < 0)) {
throw new Error(`${name} requires ${allowZero ? 'a non-negative integer' : 'a positive integer'} value`);
}
return parsed;
}

function parseBooleanFlag(name, rawValue) {
const normalized = String(rawValue || '').trim().toLowerCase();
if (normalized === 'yes' || normalized === 'true' || normalized === '1') {
return true;
}
if (normalized === 'no' || normalized === 'false' || normalized === '0') {
return false;
}
throw new Error(`${name} requires yes/no (or true/false, 1/0)`);
}

function clampScore(value, min, max) {
return Math.max(min, Math.min(max, Math.round(value)));
}

function parseTaskSize(rawTaskSize) {
const normalized = String(rawTaskSize || '').trim();
if (!TASK_SIZE_VALUES.has(normalized)) {
throw new Error(`--task-size must be one of: ${Array.from(TASK_SIZE_VALUES).join(', ')}`);
}
return normalized;
}

function resolveExpectedUpperBound(taskSize, rawExpectedBound) {
if (rawExpectedBound) {
return parseRequiredPositiveInteger('--expected-bound', rawExpectedBound, { allowZero: false });
}
return TASK_SIZE_UPPER_BOUNDS[taskSize];
}

function scoreCost(tokens, expectedUpperBound) {
const ratio = tokens / expectedUpperBound;
if (ratio <= 1.0) return 0;
if (ratio <= 1.5) return 5;
if (ratio <= 2.5) return 10;
if (ratio <= 4.0) return 18;
if (ratio <= 6.0) return 24;
return 30;
}

function scoreFragmentation(execCount, override) {
if (override) {
if (Object.prototype.hasOwnProperty.call(FRAGMENTATION_PRESET_SCORES, override)) {
return FRAGMENTATION_PRESET_SCORES[override];
}
return clampScore(parseRequiredPositiveInteger('--fragmentation', override), 0, 25);
}
if (execCount <= 4) return 0;
if (execCount <= 8) return 5;
if (execCount <= 16) return 10;
if (execCount <= 28) return 18;
return 25;
}

function scoreWriteStdin(writeStdinCount) {
if (writeStdinCount <= 0) return 0;
if (writeStdinCount <= 3) return 5;
if (writeStdinCount <= 6) return 10;
return 15;
}

function scoreFinishPath(completionBeforeTail, override) {
if (override) {
if (Object.prototype.hasOwnProperty.call(FINISH_PATH_PRESET_SCORES, override)) {
return FINISH_PATH_PRESET_SCORES[override];
}
return clampScore(parseRequiredPositiveInteger('--finish-path', override), 0, 15);
}
return completionBeforeTail ? 0 : 5;
}

function scorePostProof(completionBeforeTail, override) {
if (override) {
if (Object.prototype.hasOwnProperty.call(POST_PROOF_PRESET_SCORES, override)) {
return POST_PROOF_PRESET_SCORES[override];
}
return clampScore(parseRequiredPositiveInteger('--post-proof', override), 0, 15);
}
return completionBeforeTail ? 0 : 10;
}

function labelForTotal(total) {
if (total <= 15) return 'Healthy';
if (total <= 30) return 'Mildly fragmented';
if (total <= 50) return 'Inefficient';
if (total <= 75) return 'Runaway';
return 'Catastrophic';
}

function buildSessionSeverityReport(options) {
const taskSize = parseTaskSize(options.taskSize);
const tokens = parseRequiredPositiveInteger('--tokens', options.tokens);
const execCount = parseRequiredPositiveInteger('--exec-count', options.execCount);
const writeStdinCount = parseRequiredPositiveInteger('--write-stdin-count', options.writeStdinCount);
const completionBeforeTail = parseBooleanFlag('--completion-before-tail', options.completionBeforeTail);
const expectedUpperBound = resolveExpectedUpperBound(taskSize, options.expectedBound);
const costRatio = tokens / expectedUpperBound;
const scores = {
cost: scoreCost(tokens, expectedUpperBound),
fragmentation: scoreFragmentation(execCount, options.fragmentation),
writeStdin: scoreWriteStdin(writeStdinCount),
finishPath: scoreFinishPath(completionBeforeTail, options.finishPath),
postProof: scorePostProof(completionBeforeTail, options.postProof),
};
const total = scores.cost + scores.fragmentation + scores.writeStdin + scores.finishPath + scores.postProof;
const label = labelForTotal(total);
const rankedDimensions = Object.entries(scores)
.map(([key, score]) => ({ key, score, label: DRIVER_LABELS[key] }))
.filter((entry) => entry.score > 0)
.sort((left, right) => {
if (right.score !== left.score) {
return right.score - left.score;
}
return DRIVER_TIE_BREAK.indexOf(left.key) - DRIVER_TIE_BREAK.indexOf(right.key);
});
const primaryDriver = rankedDimensions[0] ? rankedDimensions[0].label : 'none';
const secondaries = rankedDimensions.slice(1).map((entry) => entry.label);

return {
taskSize,
expectedUpperBound,
tokens,
execCount,
writeStdinCount,
completionBeforeTail,
costRatio,
scores: {
...scores,
total,
},
label,
primaryDriver,
secondaries,
outputLine: `Score ${total}/100 — ${label}. Primary: ${primaryDriver}. Secondaries: ${
secondaries.length > 0 ? secondaries.join(', ') : 'none'
}.`,
};
}

function renderSessionSeverityReport(report) {
return [
report.outputLine,
'',
`Task size: ${report.taskSize}`,
`Expected upper bound: ${report.expectedUpperBound}`,
`Actual tokens: ${report.tokens}`,
`Exec count: ${report.execCount}`,
`write_stdin count: ${report.writeStdinCount}`,
`Completion before tail churn: ${report.completionBeforeTail ? 'yes' : 'no'}`,
`Cost ratio: ${report.costRatio.toFixed(2)}x`,
'',
`A. Cost vs expected scope: ${report.scores.cost}`,
`B. Turn fragmentation: ${report.scores.fragmentation}`,
`C. write_stdin churn: ${report.scores.writeStdin}`,
`D. Finish-path discipline: ${report.scores.finishPath}`,
`E. Post-proof drift: ${report.scores.postProof}`,
'',
`Total: ${report.scores.total}`,
`Label: ${report.label}`,
`Primary driver: ${report.primaryDriver}`,
`Secondary drivers: ${report.secondaries.length > 0 ? report.secondaries.join(', ') : 'none'}`,
].join('\n');
}

module.exports = {
TASK_SIZE_UPPER_BOUNDS,
buildSessionSeverityReport,
renderSessionSeverityReport,
labelForTotal,
};
Loading