From 2a0d2a26ef61f6586fd9e7eb12aa52272b869fcb Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Wed, 22 Apr 2026 22:40:04 +0200 Subject: [PATCH] Keep Guardex prompts from encouraging fragmented probe loops Guardex already preferred phase-based execution, but the copied task-loop prompt was still too sparse. This change makes the repo contract and gx prompt say low output is fine when bounded, and that long runs should collapse back to inspect once, patch once, verify once, then finish. Constraint: Prompt copy must stay short enough for gx prompt output Rejected: Only patch AGENTS.md | gx prompt would keep emitting weaker task-loop wording Confidence: high Scope-risk: narrow Directive: Keep task-loop copy phase-shaped; do not reintroduce vague micro-step prompt text Tested: node --test test/prompt.test.js; openspec validate --specs; git diff --check Not-tested: Full gx prompt output in a live protected-repo bootstrap --- AGENTS.md | 3 +++ .../proposal.md | 7 ++++++ .../tasks.md | 22 +++++++++++++++++++ src/context.js | 3 ++- test/prompt.test.js | 3 +++ 5 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 openspec/changes/tighten-token-fragmentation-prompts/proposal.md create mode 100644 openspec/changes/tighten-token-fragmentation-prompts/tasks.md diff --git a/AGENTS.md b/AGENTS.md index bad0d365..c23b2f30 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -21,6 +21,7 @@ When writing complex features or significant refactors, use an ExecPlan (as desc 2. grouped edits or grouped repo actions 3. focused verification 4. compact summary +- Low output alone is not a defect. A bounded run that finishes in roughly <=10 steps is usually fine; low output stretched across 20+ steps with rising input is fragmentation. - Treat obvious follow-on actions as part of the active phase; do not stop for tiny internal checkpoints. - If context grows or the session becomes fragmented, write a short working summary and continue from it. @@ -30,12 +31,14 @@ When writing complex features or significant refactors, use an ExecPlan (as desc - Prefer targeted reads: `rg`, `head`, `tail`, `git diff`, and exact line ranges. - Keep command output compact and relevant. - Avoid repeated status checks unless something changed. +- Treat repeated `sed` / `cat` peeks, tiny diagnostic retries, and repeated `write_stdin` as red flags. When they appear, stop the probe loop and reset to one bounded phase. ### Command Discipline - Batch related shell commands whenever safe. - Prefer one-shot non-interactive commands, scripts, or exact invocations over interactive loops or repeated stdin driving. - For diagnosis, gather the relevant evidence in one pass, then summarize once. +- If the session turns fragmented, collapse back to inspect once, patch once, verify once, and summarize once. ### Git And PR Workflow diff --git a/openspec/changes/tighten-token-fragmentation-prompts/proposal.md b/openspec/changes/tighten-token-fragmentation-prompts/proposal.md new file mode 100644 index 00000000..7354e27b --- /dev/null +++ b/openspec/changes/tighten-token-fragmentation-prompts/proposal.md @@ -0,0 +1,7 @@ +# Proposal: tighten Guardex token-fragmentation prompts + +Guardex already prefers phase-based execution, but the copied `gx prompt` task loop still underspecifies the anti-fragmentation pattern. This change makes the repo contract and prompt output teach the stronger classifier: low output alone is fine when the run is bounded, while long low-output sessions with repeated peeks or `write_stdin` loops are fragmentation. + +- add the bounded-vs-fragmented classifier to `AGENTS.md` +- teach `gx prompt` to say inspect once, patch once, verify once, then finish +- lock the prompt wording with focused tests diff --git a/openspec/changes/tighten-token-fragmentation-prompts/tasks.md b/openspec/changes/tighten-token-fragmentation-prompts/tasks.md new file mode 100644 index 00000000..c9d54064 --- /dev/null +++ b/openspec/changes/tighten-token-fragmentation-prompts/tasks.md @@ -0,0 +1,22 @@ +## 1. Spec + +- [x] 1.1 Capture why Guardex prompt surfaces should blame long fragmented runs, not low output alone. + +## 2. Implementation + +- [x] 2.1 Update `AGENTS.md` with the bounded-vs-fragmented classifier and stop-loop guidance. +- [x] 2.2 Update `src/context.js` task-loop prompt text to push inspect-once / patch-once / verify-once execution. +- [x] 2.3 Update `test/prompt.test.js` to lock the new prompt wording. + +## 3. Verification + +- [x] 3.1 Run `node --test test/prompt.test.js`. +- [x] 3.2 Run `openspec validate --specs`. + +Verification evidence: +- `node --test test/prompt.test.js` (pass) +- `openspec validate --specs` (no items found to validate) + +## 4. Cleanup + +- [ ] 4.1 Commit, push, open/update PR, merge, and clean up the worktree. diff --git a/src/context.js b/src/context.js index 2ff298e3..1d667b6a 100644 --- a/src/context.js +++ b/src/context.js @@ -425,7 +425,8 @@ const AI_SETUP_PARTS = [ label: 'Task loop', promptLines: [ 'gx branch start "" ""', - 'then gx locks claim --branch "" -> gx branch finish', + 'then gx locks claim --branch "" -> inspect once -> patch once -> verify once -> gx branch finish', + 'batch discovery, git/PR, and CI by phase; avoid repeated peeks or stdin loops', ], execLines: [ 'gx branch start "" ""', diff --git a/test/prompt.test.js b/test/prompt.test.js index 1ac14efa..7d983da0 100644 --- a/test/prompt.test.js +++ b/test/prompt.test.js @@ -73,6 +73,8 @@ test('prompt outputs AI setup instructions', () => { assert.match(result.stdout, /gx doctor/); assert.match(result.stdout, /gx branch start/); assert.match(result.stdout, /gx locks claim/); + assert.match(result.stdout, /inspect once -> patch once -> verify once -> gx branch finish/); + assert.match(result.stdout, /avoid repeated peeks or stdin loops/); assert.match(result.stdout, /gx finish --all/); assert.match(result.stdout, /\/opsx:propose/); assert.match(result.stdout, /https:\/\/github\.com\/apps\/pull/); @@ -101,6 +103,7 @@ test('prompt --part outputs only the selected checklist slices', () => { assert.equal(result.status, 0, result.stderr || result.stdout); assert.match(result.stdout, /^Task loop:/m); assert.match(result.stdout, /gx branch start "" ""/); + assert.match(result.stdout, /inspect once -> patch once -> verify once -> gx branch finish/); assert.match(result.stdout, /^Finish:/m); assert.match(result.stdout, /gx finish --all/); assert.doesNotMatch(result.stdout, /GitGuardex \(gx\) setup checklist/);