From 80c5b0bbddb1b400f02f50152df55d07d3fca6c1 Mon Sep 17 00:00:00 2001
From: NagyVikt <nagy.viktordp@gmail.com>
Date: Wed, 22 Apr 2026 23:47:45 +0200
Subject: [PATCH] Expose a built-in session severity rubric for Guardex lanes

The repo needed a first-class CLI surface for scoring session health without relying on side scripts. This threads a native report subcommand through the existing help, parsing, and shared context paths and locks the behavior with focused regression coverage.

Constraint: Keep the scoring flow inside the existing report command surface and T1 notes-only OpenSpec lane
Rejected: Ship a standalone side script | would drift from CLI parsing and help text
Confidence: high
Scope-risk: narrow
Directive: Keep help text, rubric weights, and report tests synchronized when the scoring model changes
Tested: node --test test/cli-args-dispatch.test.js test/report.test.js; node bin/multiagent-safety.js report help; node bin/multiagent-safety.js report session-severity --task-size narrow-patch --tokens 3850000 --exec-count 18 --write-stdin-count 6 --completion-before-tail yes --fragmentation 14 --finish-path 6 --post-proof 4; git diff --check
Not-tested: remote PR merge path until guarded finish completes
---
 .../notes.md                                  |  10 +
 src/cli/args.js                               |  72 ++++++
 src/cli/main.js                               |  17 +-
 src/context.js                                |   2 +-
 src/report/session-severity.js                | 213 ++++++++++++++++++
 test/cli-args-dispatch.test.js                |  39 ++++
 test/report.test.js                           |  63 ++++++
 7 files changed, 414 insertions(+), 2 deletions(-)
 create mode 100644 openspec/changes/agent-codex-add-session-severity-scoring-command-2026-04-22-23-28/notes.md
 create mode 100644 src/report/session-severity.js

diff --git a/openspec/changes/agent-codex-add-session-severity-scoring-command-2026-04-22-23-28/notes.md b/openspec/changes/agent-codex-add-session-severity-scoring-command-2026-04-22-23-28/notes.md
new file mode 100644
index 00000000..33cf06a1
--- /dev/null
+++ b/openspec/changes/agent-codex-add-session-severity-scoring-command-2026-04-22-23-28/notes.md
@@ -0,0 +1,10 @@
+# agent-codex-add-session-severity-scoring-command-2026-04-22-23-28 (minimal / T1)
+
+- Add `gx report session-severity` as a native GitGuardex report subcommand with the fixed weighted rubric for healthy / mildly fragmented / inefficient / runaway / catastrophic sessions.
+- Keep the scoring logic in a small report module and thread it through the existing `gx report` help, parsing, and output surface instead of shipping a repo-local side script.
+- Lock the new report surface with focused CLI arg parsing and report integration tests.
+- Verification:
+  - `node --test test/cli-args-dispatch.test.js test/report.test.js`
+  - `node bin/multiagent-safety.js report help`
+  - `node bin/multiagent-safety.js report session-severity --task-size narrow-patch --tokens 3850000 --exec-count 18 --write-stdin-count 6 --completion-before-tail yes --fragmentation 14 --finish-path 6 --post-proof 4`
+  - `git diff --check`
diff --git a/src/cli/args.js b/src/cli/args.js
index e7bf9f92..599e623c 100644
--- a/src/cli/args.js
+++ b/src/cli/args.js
@@ -349,6 +349,15 @@ function parseReportArgs(rawArgs) {
     scorecardJson: '',
     outputDir: '',
     date: '',
+    taskSize: '',
+    tokens: '',
+    execCount: '',
+    writeStdinCount: '',
+    completionBeforeTail: '',
+    expectedBound: '',
+    fragmentation: '',
+    finishPath: '',
+    postProof: '',
     dryRun: false,
     json: false,
   };
@@ -390,6 +399,69 @@ function parseReportArgs(rawArgs) {
       index += 1;
       continue;
     }
+    if (arg === '--task-size') {
+      const next = rawArgs[index + 1];
+      if (!next) throw new Error('--task-size requires a value');
+      options.taskSize = next;
+      index += 1;
+      continue;
+    }
+    if (arg === '--tokens') {
+      const next = rawArgs[index + 1];
+      if (!next) throw new Error('--tokens requires a value');
+      options.tokens = next;
+      index += 1;
+      continue;
+    }
+    if (arg === '--exec-count') {
+      const next = rawArgs[index + 1];
+      if (!next) throw new Error('--exec-count requires a value');
+      options.execCount = next;
+      index += 1;
+      continue;
+    }
+    if (arg === '--write-stdin-count') {
+      const next = rawArgs[index + 1];
+      if (!next) throw new Error('--write-stdin-count requires a value');
+      options.writeStdinCount = next;
+      index += 1;
+      continue;
+    }
+    if (arg === '--completion-before-tail') {
+      const next = rawArgs[index + 1];
+      if (!next) throw new Error('--completion-before-tail requires yes or no');
+      options.completionBeforeTail = next;
+      index += 1;
+      continue;
+    }
+    if (arg === '--expected-bound') {
+      const next = rawArgs[index + 1];
+      if (!next) throw new Error('--expected-bound requires a value');
+      options.expectedBound = next;
+      index += 1;
+      continue;
+    }
+    if (arg === '--fragmentation') {
+      const next = rawArgs[index + 1];
+      if (!next) throw new Error('--fragmentation requires a value');
+      options.fragmentation = next;
+      index += 1;
+      continue;
+    }
+    if (arg === '--finish-path') {
+      const next = rawArgs[index + 1];
+      if (!next) throw new Error('--finish-path requires a value');
+      options.finishPath = next;
+      index += 1;
+      continue;
+    }
+    if (arg === '--post-proof') {
+      const next = rawArgs[index + 1];
+      if (!next) throw new Error('--post-proof requires a value');
+      options.postProof = next;
+      index += 1;
+      continue;
+    }
     if (arg === '--dry-run') {
       options.dryRun = true;
       continue;
diff --git a/src/cli/main.js b/src/cli/main.js
index 56e99e90..434c55ff 100755
--- a/src/cli/main.js
+++ b/src/cli/main.js
@@ -5,6 +5,7 @@ const sandboxModule = require('../sandbox');
 const toolchainModule = require('../toolchain');
 const finishCommands = require('../finish');
 const doctorModule = require('../doctor');
+const sessionSeverityReport = require('../report/session-severity');
 const {
   fs,
   path,
@@ -2433,15 +2434,29 @@ function report(rawArgs) {
     console.log(
       `${TOOL_NAME} report commands:\n` +
       `  ${TOOL_NAME} report scorecard [--target <path>] [--repo github.com/<owner>/<repo>] [--scorecard-json <file>] [--output-dir <path>] [--date YYYY-MM-DD] [--dry-run] [--json]\n` +
+      `  ${TOOL_NAME} report session-severity --task-size <narrow-patch|medium-change|large-change> --tokens <count> --exec-count <count> --write-stdin-count <count> --completion-before-tail <yes|no> [--expected-bound <count>] [--fragmentation <preset|0-25>] [--finish-path <preset|0-15>] [--post-proof <preset|0-15>] [--json]\n` +
       `\n` +
       `Examples:\n` +
       `  ${TOOL_NAME} report scorecard --repo github.com/recodeecom/multiagent-safety\n` +
-      `  ${TOOL_NAME} report scorecard --scorecard-json ./scorecard.json --date 2026-04-10`,
+      `  ${TOOL_NAME} report scorecard --scorecard-json ./scorecard.json --date 2026-04-10\n` +
+      `  ${TOOL_NAME} report session-severity --task-size narrow-patch --tokens 3850000 --exec-count 18 --write-stdin-count 6 --completion-before-tail yes --fragmentation 14 --finish-path 6 --post-proof 4`,
     );
     process.exitCode = 0;
     return;
   }
 
+  if (subcommand === 'session-severity') {
+    const payload = sessionSeverityReport.buildSessionSeverityReport(options);
+    if (options.json) {
+      process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
+      process.exitCode = 0;
+      return;
+    }
+    console.log(sessionSeverityReport.renderSessionSeverityReport(payload));
+    process.exitCode = 0;
+    return;
+  }
+
   if (subcommand !== 'scorecard') {
     throw new Error(`Unknown report subcommand: ${subcommand}`);
   }
diff --git a/src/context.js b/src/context.js
index 1d667b6a..cf2f21ce 100644
--- a/src/context.js
+++ b/src/context.js
@@ -361,7 +361,7 @@ const CLI_COMMAND_DESCRIPTIONS = [
   ['release', 'Create or update the current GitHub release with README-generated notes'],
   ['agents', 'Start/stop repo-scoped review + cleanup bots'],
   ['prompt', 'Print AI setup checklist or named slices (--exec, --part, --list-parts, --snippet)'],
-  ['report', 'Security/safety reports (e.g. OpenSSF scorecard)'],
+  ['report', 'Security/safety reports (e.g. OpenSSF scorecard, session severity)'],
   ['help', 'Show this help output'],
   ['version', 'Print GitGuardex version'],
 ];
diff --git a/src/report/session-severity.js b/src/report/session-severity.js
new file mode 100644
index 00000000..809c0e57
--- /dev/null
+++ b/src/report/session-severity.js
@@ -0,0 +1,213 @@
+const TASK_SIZE_UPPER_BOUNDS = {
+  'narrow-patch': 1_800_000,
+  'medium-change': 4_000_000,
+  'large-change': 8_000_000,
+};
+
+const TASK_SIZE_VALUES = new Set(Object.keys(TASK_SIZE_UPPER_BOUNDS));
+const FRAGMENTATION_PRESET_SCORES = {
+  clean: 0,
+  'few-extra-checks': 5,
+  'repeated-follow-ups': 10,
+  looping: 18,
+  'dominant-loop': 25,
+};
+const FINISH_PATH_PRESET_SCORES = {
+  'clear-early': 0,
+  'minor-hesitation': 5,
+  'late-decision': 10,
+  reopening: 15,
+};
+const POST_PROOF_PRESET_SCORES = {
+  'stops-soon': 0,
+  'small-tail': 5,
+  'notable-tail': 10,
+  'heavy-tail': 15,
+};
+const DRIVER_TIE_BREAK = ['fragmentation', 'writeStdin', 'finishPath', 'postProof', 'cost'];
+const DRIVER_LABELS = {
+  cost: 'cost vs expected scope',
+  fragmentation: 'turn fragmentation',
+  writeStdin: 'write_stdin churn',
+  finishPath: 'finish-path discipline',
+  postProof: 'post-proof drift',
+};
+
+function parseRequiredPositiveInteger(name, rawValue, { allowZero = true } = {}) {
+  const parsed = Number.parseInt(String(rawValue || ''), 10);
+  if (!Number.isFinite(parsed) || (!allowZero && parsed <= 0) || (allowZero && parsed < 0)) {
+    throw new Error(`${name} requires ${allowZero ? 'a non-negative integer' : 'a positive integer'} value`);
+  }
+  return parsed;
+}
+
+function parseBooleanFlag(name, rawValue) {
+  const normalized = String(rawValue || '').trim().toLowerCase();
+  if (normalized === 'yes' || normalized === 'true' || normalized === '1') {
+    return true;
+  }
+  if (normalized === 'no' || normalized === 'false' || normalized === '0') {
+    return false;
+  }
+  throw new Error(`${name} requires yes/no (or true/false, 1/0)`);
+}
+
+function clampScore(value, min, max) {
+  return Math.max(min, Math.min(max, Math.round(value)));
+}
+
+function parseTaskSize(rawTaskSize) {
+  const normalized = String(rawTaskSize || '').trim();
+  if (!TASK_SIZE_VALUES.has(normalized)) {
+    throw new Error(`--task-size must be one of: ${Array.from(TASK_SIZE_VALUES).join(', ')}`);
+  }
+  return normalized;
+}
+
+function resolveExpectedUpperBound(taskSize, rawExpectedBound) {
+  if (rawExpectedBound) {
+    return parseRequiredPositiveInteger('--expected-bound', rawExpectedBound, { allowZero: false });
+  }
+  return TASK_SIZE_UPPER_BOUNDS[taskSize];
+}
+
+function scoreCost(tokens, expectedUpperBound) {
+  const ratio = tokens / expectedUpperBound;
+  if (ratio <= 1.0) return 0;
+  if (ratio <= 1.5) return 5;
+  if (ratio <= 2.5) return 10;
+  if (ratio <= 4.0) return 18;
+  if (ratio <= 6.0) return 24;
+  return 30;
+}
+
+function scoreFragmentation(execCount, override) {
+  if (override) {
+    if (Object.prototype.hasOwnProperty.call(FRAGMENTATION_PRESET_SCORES, override)) {
+      return FRAGMENTATION_PRESET_SCORES[override];
+    }
+    return clampScore(parseRequiredPositiveInteger('--fragmentation', override), 0, 25);
+  }
+  if (execCount <= 4) return 0;
+  if (execCount <= 8) return 5;
+  if (execCount <= 16) return 10;
+  if (execCount <= 28) return 18;
+  return 25;
+}
+
+function scoreWriteStdin(writeStdinCount) {
+  if (writeStdinCount <= 0) return 0;
+  if (writeStdinCount <= 3) return 5;
+  if (writeStdinCount <= 6) return 10;
+  return 15;
+}
+
+function scoreFinishPath(completionBeforeTail, override) {
+  if (override) {
+    if (Object.prototype.hasOwnProperty.call(FINISH_PATH_PRESET_SCORES, override)) {
+      return FINISH_PATH_PRESET_SCORES[override];
+    }
+    return clampScore(parseRequiredPositiveInteger('--finish-path', override), 0, 15);
+  }
+  return completionBeforeTail ? 0 : 5;
+}
+
+function scorePostProof(completionBeforeTail, override) {
+  if (override) {
+    if (Object.prototype.hasOwnProperty.call(POST_PROOF_PRESET_SCORES, override)) {
+      return POST_PROOF_PRESET_SCORES[override];
+    }
+    return clampScore(parseRequiredPositiveInteger('--post-proof', override), 0, 15);
+  }
+  return completionBeforeTail ? 0 : 10;
+}
+
+function labelForTotal(total) {
+  if (total <= 15) return 'Healthy';
+  if (total <= 30) return 'Mildly fragmented';
+  if (total <= 50) return 'Inefficient';
+  if (total <= 75) return 'Runaway';
+  return 'Catastrophic';
+}
+
+function buildSessionSeverityReport(options) {
+  const taskSize = parseTaskSize(options.taskSize);
+  const tokens = parseRequiredPositiveInteger('--tokens', options.tokens);
+  const execCount = parseRequiredPositiveInteger('--exec-count', options.execCount);
+  const writeStdinCount = parseRequiredPositiveInteger('--write-stdin-count', options.writeStdinCount);
+  const completionBeforeTail = parseBooleanFlag('--completion-before-tail', options.completionBeforeTail);
+  const expectedUpperBound = resolveExpectedUpperBound(taskSize, options.expectedBound);
+  const costRatio = tokens / expectedUpperBound;
+  const scores = {
+    cost: scoreCost(tokens, expectedUpperBound),
+    fragmentation: scoreFragmentation(execCount, options.fragmentation),
+    writeStdin: scoreWriteStdin(writeStdinCount),
+    finishPath: scoreFinishPath(completionBeforeTail, options.finishPath),
+    postProof: scorePostProof(completionBeforeTail, options.postProof),
+  };
+  const total = scores.cost + scores.fragmentation + scores.writeStdin + scores.finishPath + scores.postProof;
+  const label = labelForTotal(total);
+  const rankedDimensions = Object.entries(scores)
+    .map(([key, score]) => ({ key, score, label: DRIVER_LABELS[key] }))
+    .filter((entry) => entry.score > 0)
+    .sort((left, right) => {
+      if (right.score !== left.score) {
+        return right.score - left.score;
+      }
+      return DRIVER_TIE_BREAK.indexOf(left.key) - DRIVER_TIE_BREAK.indexOf(right.key);
+    });
+  const primaryDriver = rankedDimensions[0] ? rankedDimensions[0].label : 'none';
+  const secondaries = rankedDimensions.slice(1).map((entry) => entry.label);
+
+  return {
+    taskSize,
+    expectedUpperBound,
+    tokens,
+    execCount,
+    writeStdinCount,
+    completionBeforeTail,
+    costRatio,
+    scores: {
+      ...scores,
+      total,
+    },
+    label,
+    primaryDriver,
+    secondaries,
+    outputLine: `Score ${total}/100 — ${label}. Primary: ${primaryDriver}. Secondaries: ${
+      secondaries.length > 0 ? secondaries.join(', ') : 'none'
+    }.`,
+  };
+}
+
+function renderSessionSeverityReport(report) {
+  return [
+    report.outputLine,
+    '',
+    `Task size: ${report.taskSize}`,
+    `Expected upper bound: ${report.expectedUpperBound}`,
+    `Actual tokens: ${report.tokens}`,
+    `Exec count: ${report.execCount}`,
+    `write_stdin count: ${report.writeStdinCount}`,
+    `Completion before tail churn: ${report.completionBeforeTail ? 'yes' : 'no'}`,
+    `Cost ratio: ${report.costRatio.toFixed(2)}x`,
+    '',
+    `A. Cost vs expected scope: ${report.scores.cost}`,
+    `B. Turn fragmentation: ${report.scores.fragmentation}`,
+    `C. write_stdin churn: ${report.scores.writeStdin}`,
+    `D. Finish-path discipline: ${report.scores.finishPath}`,
+    `E. Post-proof drift: ${report.scores.postProof}`,
+    '',
+    `Total: ${report.scores.total}`,
+    `Label: ${report.label}`,
+    `Primary driver: ${report.primaryDriver}`,
+    `Secondary drivers: ${report.secondaries.length > 0 ? report.secondaries.join(', ') : 'none'}`,
+  ].join('\n');
+}
+
+module.exports = {
+  TASK_SIZE_UPPER_BOUNDS,
+  buildSessionSeverityReport,
+  renderSessionSeverityReport,
+  labelForTotal,
+};
diff --git a/test/cli-args-dispatch.test.js b/test/cli-args-dispatch.test.js
index b71a2b02..9f1b08a2 100644
--- a/test/cli-args-dispatch.test.js
+++ b/test/cli-args-dispatch.test.js
@@ -15,6 +15,7 @@ const {
   parseSetupArgs,
   parseDoctorArgs,
   parseAgentsArgs,
+  parseReportArgs,
   parseCleanupArgs,
   parseMergeArgs,
   parseFinishArgs,
@@ -111,6 +112,43 @@ test('parseAgentsArgs applies interval overrides and validates the subcommand',
   });
 });
 
+test('parseReportArgs accepts the session-severity flag set', () => {
+  const options = parseReportArgs([
+    'session-severity',
+    '--task-size',
+    'medium-change',
+    '--tokens',
+    '2100000',
+    '--exec-count',
+    '12',
+    '--write-stdin-count',
+    '4',
+    '--completion-before-tail',
+    'no',
+    '--expected-bound',
+    '4000000',
+    '--fragmentation',
+    '10',
+    '--finish-path',
+    'late-decision',
+    '--post-proof',
+    'heavy-tail',
+    '--json',
+  ]);
+
+  assert.equal(options.subcommand, 'session-severity');
+  assert.equal(options.taskSize, 'medium-change');
+  assert.equal(options.tokens, '2100000');
+  assert.equal(options.execCount, '12');
+  assert.equal(options.writeStdinCount, '4');
+  assert.equal(options.completionBeforeTail, 'no');
+  assert.equal(options.expectedBound, '4000000');
+  assert.equal(options.fragmentation, '10');
+  assert.equal(options.finishPath, 'late-decision');
+  assert.equal(options.postProof, 'heavy-tail');
+  assert.equal(options.json, true);
+});
+
 test('parseCleanupArgs defaults idle minutes when watch mode is enabled', () => {
   const options = parseCleanupArgs(['--watch']);
   assert.equal(options.watch, true);
@@ -178,6 +216,7 @@ test('shared context keeps the drift-prone help text, gitignore paths, and relea
   assert.ok(MANAGED_GITIGNORE_PATHS.includes('!.vscode/'));
   assert.ok(MANAGED_GITIGNORE_PATHS.includes('.vscode/*'));
   assert.ok(MANAGED_GITIGNORE_PATHS.includes('!.vscode/settings.json'));
+  assert.match(descriptions.get('report'), /session severity/);
   assert.equal(MAINTAINER_RELEASE_REPO, repoRoot);
 });
 
diff --git a/test/report.test.js b/test/report.test.js
index 73a17a4d..1f44a93b 100644
--- a/test/report.test.js
+++ b/test/report.test.js
@@ -99,4 +99,67 @@ exit 1
   assert.match(remediation, /Verification loop/);
 });
 
+test('report session-severity prints the weighted rubric summary', () => {
+  const repoDir = initRepo();
+  const result = runNode([
+    'report',
+    'session-severity',
+    '--task-size',
+    'narrow-patch',
+    '--tokens',
+    '3850000',
+    '--exec-count',
+    '18',
+    '--write-stdin-count',
+    '6',
+    '--completion-before-tail',
+    'yes',
+    '--fragmentation',
+    '14',
+    '--finish-path',
+    '6',
+    '--post-proof',
+    '4',
+  ], repoDir);
+
+  assert.equal(result.status, 0, result.stderr || result.stdout);
+  assert.match(result.stdout, /Score 44\/100 — Inefficient\./);
+  assert.match(result.stdout, /Primary: turn fragmentation\./);
+  assert.match(result.stdout, /Secondaries: write_stdin churn, cost vs expected scope, finish-path discipline, post-proof drift\./);
+  assert.match(result.stdout, /A\. Cost vs expected scope: 10/);
+  assert.match(result.stdout, /Total: 44/);
+});
+
+test('report session-severity emits structured JSON when requested', () => {
+  const repoDir = initRepo();
+  const result = runNode([
+    'report',
+    'session-severity',
+    '--task-size',
+    'medium-change',
+    '--tokens',
+    '2100000',
+    '--exec-count',
+    '12',
+    '--write-stdin-count',
+    '4',
+    '--completion-before-tail',
+    'no',
+    '--fragmentation',
+    '10',
+    '--finish-path',
+    '10',
+    '--post-proof',
+    '10',
+    '--json',
+  ], repoDir);
+
+  assert.equal(result.status, 0, result.stderr || result.stdout);
+  const payload = JSON.parse(result.stdout);
+  assert.equal(payload.taskSize, 'medium-change');
+  assert.equal(payload.scores.total, 40);
+  assert.equal(payload.label, 'Inefficient');
+  assert.equal(payload.primaryDriver, 'turn fragmentation');
+});
+
 });