From c0633c951870f9f373858df42e42e846cac4ea79 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Mon, 30 Mar 2026 18:37:37 -0700 Subject: [PATCH 1/5] Fix attribution inflation from intermediate commits Entire-Checkpoint: 079c1c0e0eeb --- .../cli/strategy/manual_commit_attribution.go | 17 ++++++++--- .../manual_commit_attribution_test.go | 24 +++++++-------- .../strategy/manual_commit_condensation.go | 12 +++++--- .../cli/strategy/manual_commit_hooks.go | 30 ++++++++++++++----- 4 files changed, 55 insertions(+), 28 deletions(-) diff --git a/cmd/entire/cli/strategy/manual_commit_attribution.go b/cmd/entire/cli/strategy/manual_commit_attribution.go index 04c535bb1..3fd9513c8 100644 --- a/cmd/entire/cli/strategy/manual_commit_attribution.go +++ b/cmd/entire/cli/strategy/manual_commit_attribution.go @@ -179,8 +179,11 @@ func countLinesStr(content string) int { // 4. Estimate user self-modifications vs agent modifications using per-file tracking // 5. Compute percentages // -// attributionBaseCommit and headCommitHash are optional commit hashes for fast non-agent -// file detection via git diff-tree. When empty, falls back to go-git tree walk. +// parentCommitHash, attributionBaseCommit, and headCommitHash are optional commit hashes +// for fast non-agent file detection via git diff-tree. When a first parent exists, +// parentCommitHash→headCommitHash is preferred so only files from THIS commit count. +// For initial commits (no parent), falls back to attributionBaseCommit→headCommitHash. +// When hashes are empty, falls back to go-git tree walk. // // Note: Binary files (detected by null bytes) are silently excluded from attribution // calculations since line-based diffing only applies to text files. @@ -194,6 +197,7 @@ func CalculateAttributionWithAccumulated( filesTouched []string, promptAttributions []PromptAttribution, repoDir string, + parentCommitHash string, attributionBaseCommit string, headCommitHash string, ) *checkpoint.InitialAttribution { @@ -243,8 +247,13 @@ func CalculateAttributionWithAccumulated( } // Calculate total user edits to non-agent files (files not in filesTouched) - // These files are not in the shadow tree, so base→head captures ALL their user edits - allChangedFiles, err := getAllChangedFiles(ctx, baseTree, headTree, repoDir, attributionBaseCommit, headCommitHash) + // These files are not in the shadow tree. Prefer parent→head so only THIS + // commit's non-agent files count; initial commits fall back to session base→head. + diffBaseCommit := parentCommitHash + if diffBaseCommit == "" { + diffBaseCommit = attributionBaseCommit + } + allChangedFiles, err := getAllChangedFiles(ctx, baseTree, headTree, repoDir, diffBaseCommit, headCommitHash) if err != nil { logging.Warn(logging.WithComponent(ctx, "attribution"), "attribution: failed to enumerate changed files", diff --git a/cmd/entire/cli/strategy/manual_commit_attribution_test.go b/cmd/entire/cli/strategy/manual_commit_attribution_test.go index 478c6a951..5bc885be9 100644 --- a/cmd/entire/cli/strategy/manual_commit_attribution_test.go +++ b/cmd/entire/cli/strategy/manual_commit_attribution_test.go @@ -281,7 +281,7 @@ func TestCalculateAttributionWithAccumulated_BasicCase(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -338,7 +338,7 @@ func TestCalculateAttributionWithAccumulated_BugScenario(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -395,7 +395,7 @@ func TestCalculateAttributionWithAccumulated_DeletionOnly(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -444,7 +444,7 @@ func TestCalculateAttributionWithAccumulated_NoUserEdits(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -496,7 +496,7 @@ func TestCalculateAttributionWithAccumulated_NoAgentWork(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -550,7 +550,7 @@ func TestCalculateAttributionWithAccumulated_UserRemovesAllAgentLines(t *testing result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -619,7 +619,7 @@ func TestCalculateAttributionWithAccumulated_WithPromptAttributions(t *testing.T result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -662,7 +662,7 @@ func TestCalculateAttributionWithAccumulated_EmptyFilesTouched(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, []string{}, []PromptAttribution{}, "", "", "", + baseTree, shadowTree, headTree, []string{}, []PromptAttribution{}, "", "", "", "", ) if result != nil { @@ -716,7 +716,7 @@ func TestCalculateAttributionWithAccumulated_UserEditsNonAgentFile(t *testing.T) result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -1021,7 +1021,7 @@ func TestCalculateAttributionWithAccumulated_UserSelfModification(t *testing.T) result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -1092,7 +1092,7 @@ func TestCalculateAttributionWithAccumulated_MixedModifications(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -1173,7 +1173,7 @@ func TestCalculateAttributionWithAccumulated_UncommittedWorktreeFiles(t *testing result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") diff --git a/cmd/entire/cli/strategy/manual_commit_condensation.go b/cmd/entire/cli/strategy/manual_commit_condensation.go index 2e80e270e..a6e1e3b83 100644 --- a/cmd/entire/cli/strategy/manual_commit_condensation.go +++ b/cmd/entire/cli/strategy/manual_commit_condensation.go @@ -87,10 +87,11 @@ func (s *ManualCommitStrategy) getCheckpointLog(ctx context.Context, checkpointI // condenseOpts provides pre-resolved git objects to avoid redundant reads. type condenseOpts struct { - shadowRef *plumbing.Reference // Pre-resolved shadow branch ref (nil = resolve from repo) - headTree *object.Tree // Pre-resolved HEAD tree (passed through to calculateSessionAttributions) - repoDir string // Repository worktree path for git CLI commands - headCommitHash string // HEAD commit hash (passed through for attribution) + shadowRef *plumbing.Reference // Pre-resolved shadow branch ref (nil = resolve from repo) + headTree *object.Tree // Pre-resolved HEAD tree (passed through to calculateSessionAttributions) + repoDir string // Repository worktree path for git CLI commands + parentCommitHash string // HEAD's first parent hash for per-commit non-agent file detection + headCommitHash string // HEAD commit hash (passed through for attribution) } // CondenseSession condenses a session's shadow branch to permanent storage. @@ -197,6 +198,7 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re headTree: o.headTree, repoDir: o.repoDir, attributionBaseCommit: attrBase, + parentCommitHash: o.parentCommitHash, headCommitHash: o.headCommitHash, }) @@ -339,6 +341,7 @@ type attributionOpts struct { shadowTree *object.Tree // Shadow branch tree (already resolved by PostCommit) repoDir string // Repository worktree path for git CLI commands attributionBaseCommit string // Base commit hash for non-agent file detection (empty = fall back to go-git tree walk) + parentCommitHash string // HEAD's first parent hash (preferred diff base for non-agent files) headCommitHash string // HEAD commit hash for non-agent file detection (empty = fall back to go-git tree walk) } @@ -449,6 +452,7 @@ func calculateSessionAttributions(ctx context.Context, repo *git.Repository, sha sessionData.FilesTouched, state.PromptAttributions, o.repoDir, + o.parentCommitHash, o.attributionBaseCommit, o.headCommitHash, ) diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index c5e302def..2893ece54 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -639,11 +639,18 @@ func (h *postCommitActionHandler) HandleCondense(state *session.State) error { ) if shouldCondense { + parentCommitHash := "" + if h.commit.NumParents() > 0 { + if parent, err := h.commit.Parent(0); err == nil { + parentCommitHash = parent.Hash.String() + } + } h.condensed = h.s.condenseAndUpdateState(h.ctx, h.repo, h.checkpointID, state, h.head, h.shadowBranchName, h.shadowBranchesToDelete, h.committedFileSet, condenseOpts{ - shadowRef: h.shadowRef, - headTree: h.headTree, - repoDir: h.repoDir, - headCommitHash: h.newHead, + shadowRef: h.shadowRef, + headTree: h.headTree, + repoDir: h.repoDir, + parentCommitHash: parentCommitHash, + headCommitHash: h.newHead, }) } else { h.s.updateBaseCommitIfChanged(h.ctx, state, h.newHead) @@ -665,11 +672,18 @@ func (h *postCommitActionHandler) HandleCondenseIfFilesTouched(state *session.St ) if shouldCondense { + parentCommitHash := "" + if h.commit.NumParents() > 0 { + if parent, err := h.commit.Parent(0); err == nil { + parentCommitHash = parent.Hash.String() + } + } h.condensed = h.s.condenseAndUpdateState(h.ctx, h.repo, h.checkpointID, state, h.head, h.shadowBranchName, h.shadowBranchesToDelete, h.committedFileSet, condenseOpts{ - shadowRef: h.shadowRef, - headTree: h.headTree, - repoDir: h.repoDir, - headCommitHash: h.newHead, + shadowRef: h.shadowRef, + headTree: h.headTree, + repoDir: h.repoDir, + parentCommitHash: parentCommitHash, + headCommitHash: h.newHead, }) } else { h.s.updateBaseCommitIfChanged(h.ctx, state, h.newHead) From fab7ee6bf569a21983e7d3d9083b848e6309e717 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 31 Mar 2026 16:25:42 -0700 Subject: [PATCH 2/5] Fix attribution inflation from pre-session worktree dirt Pre-session dirty files (CLI config files from `entire enable`, leftover changes from previous sessions) were incorrectly counted as human contributions, deflating agent percentage. Root cause: PA1 (first prompt attribution) captures worktree state at session start. This data was used to correct agent line counts (correct) but also added to human contributions (wrong). Fix: - Split prompt attributions into baseline (PA1) and session (PA2+) - PA1 data still subtracted from agent work (correct agent calc) - PA1 contributions excluded from relevantAccumulatedUser - PA1 removals excluded from totalUserRemoved - Include PendingPromptAttribution during condensation for agents that skip SaveStep (e.g., Codex mid-turn commits) - Add .entire/ filter to attribution calc (matches existing PA filter) - Fix wrapcheck lint errors in updateCombinedAttributionForCheckpoint Verified end-to-end: 100% agent with config files committed alongside. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: b0cb4216f6bc --- .../cli/strategy/manual_commit_attribution.go | 53 ++++- .../manual_commit_attribution_test.go | 216 ++++++++++++++++-- .../strategy/manual_commit_condensation.go | 28 ++- .../cli/strategy/manual_commit_hooks.go | 76 ++++++ cmd/entire/cli/strategy/manual_commit_test.go | 17 +- 5 files changed, 352 insertions(+), 38 deletions(-) diff --git a/cmd/entire/cli/strategy/manual_commit_attribution.go b/cmd/entire/cli/strategy/manual_commit_attribution.go index 3fd9513c8..4707e9c85 100644 --- a/cmd/entire/cli/strategy/manual_commit_attribution.go +++ b/cmd/entire/cli/strategy/manual_commit_attribution.go @@ -10,6 +10,7 @@ import ( "github.com/entireio/cli/cmd/entire/cli/checkpoint" "github.com/entireio/cli/cmd/entire/cli/gitops" "github.com/entireio/cli/cmd/entire/cli/logging" + "github.com/entireio/cli/cmd/entire/cli/paths" "github.com/go-git/go-git/v6/plumbing/object" "github.com/sergi/go-diff/diffmatchpatch" ) @@ -205,17 +206,33 @@ func CalculateAttributionWithAccumulated( return nil } - // Sum accumulated user lines from prompt attributions - // Also aggregate per-file user additions for accurate modification tracking + // Sum accumulated user lines from prompt attributions. + // Also aggregate per-file user additions for accurate modification tracking. + // + // BASELINE SPLIT: PA1 (CheckpointNumber <= 1) captures pre-session worktree + // state — files already dirty when the session started (CLI config files, + // leftover changes from previous sessions). PA1 data is accumulated normally + // for agent line correction (subtracted from totalAgentAndUserWork) but is + // tracked separately so it can be EXCLUDED from human contribution counts. + // Only PA2+ represents genuine human edits during the session. var accumulatedUserAdded, accumulatedUserRemoved int + var baselineUserRemoved int accumulatedUserAddedPerFile := make(map[string]int) + baselineUserAddedPerFile := make(map[string]int) + for _, pa := range promptAttributions { accumulatedUserAdded += pa.UserLinesAdded accumulatedUserRemoved += pa.UserLinesRemoved - // Merge per-file data from all prompt attributions for filePath, added := range pa.UserAddedPerFile { accumulatedUserAddedPerFile[filePath] += added } + // Track baseline (PA1) separately + if pa.CheckpointNumber <= 1 { + baselineUserRemoved += pa.UserLinesRemoved + for filePath, added := range pa.UserAddedPerFile { + baselineUserAddedPerFile[filePath] += added + } + } } // Calculate attribution for agent-touched files @@ -266,6 +283,9 @@ func CalculateAttributionWithAccumulated( if slices.Contains(filesTouched, filePath) { continue // Skip agent-touched files } + if strings.HasPrefix(filePath, ".entire/") || strings.HasPrefix(filePath, paths.EntireMetadataDir+"/") { + continue // Skip CLI metadata — matches filter in calculatePromptAttributionAtStart + } baseContent := getFileContent(baseTree, filePath) headContent := getFileContent(headTree, filePath) @@ -302,14 +322,27 @@ func CalculateAttributionWithAccumulated( // Post-checkpoint edits to non-agent files = total edits - accumulated portion (never negative) postToNonAgentFiles := max(0, allUserEditsToNonAgentFiles-accumulatedToCommittedNonAgentFiles) - // Total user contribution = accumulated (committed files only) + post-checkpoint edits - relevantAccumulatedUser := accumulatedToAgentFiles + accumulatedToCommittedNonAgentFiles + // Compute baseline (PA1) contributions to subtract from accumulated totals. + // PA1 data was used above for agent correction (accumulatedToAgentFiles) but should + // not be counted as human work — it's pre-session state. + var baselineToAgentFiles, baselineToCommittedNonAgentFiles int + for filePath, added := range baselineUserAddedPerFile { + if slices.Contains(filesTouched, filePath) { + baselineToAgentFiles += added + } else if _, ok := committedNonAgentSet[filePath]; ok { + baselineToCommittedNonAgentFiles += added + } + } + + // Total user contribution = session-time accumulated + post-checkpoint edits. + // Subtract baseline contributions so pre-session dirt doesn't count as human. + sessionAccumulatedToAgentFiles := max(0, accumulatedToAgentFiles-baselineToAgentFiles) + sessionAccumulatedToNonAgent := max(0, accumulatedToCommittedNonAgentFiles-baselineToCommittedNonAgentFiles) + relevantAccumulatedUser := sessionAccumulatedToAgentFiles + sessionAccumulatedToNonAgent totalUserAdded := relevantAccumulatedUser + postCheckpointUserAdded + postToNonAgentFiles - // TODO: accumulatedUserRemoved also includes removals from uncommitted files, - // but we don't have per-file tracking for removals yet. In practice, removals - // from uncommitted files are rare and the impact is minor (could slightly reduce - // totalCommitted via pureUserRemoved). Add UserRemovedPerFile if this becomes an issue. - totalUserRemoved := accumulatedUserRemoved + postCheckpointUserRemoved + // Exclude baseline removals (pre-session state) from human removal count. + sessionAccumulatedUserRemoved := max(0, accumulatedUserRemoved-baselineUserRemoved) + totalUserRemoved := sessionAccumulatedUserRemoved + postCheckpointUserRemoved // Estimate modified lines (user changed existing lines) // Lines that were both added and removed are treated as modifications. diff --git a/cmd/entire/cli/strategy/manual_commit_attribution_test.go b/cmd/entire/cli/strategy/manual_commit_attribution_test.go index 5bc885be9..28963ea0b 100644 --- a/cmd/entire/cli/strategy/manual_commit_attribution_test.go +++ b/cmd/entire/cli/strategy/manual_commit_attribution_test.go @@ -723,18 +723,12 @@ func TestCalculateAttributionWithAccumulated_UserEditsNonAgentFile(t *testing.T) // Expected calculation: // - Agent added 3 lines to file1.go (2 functions + 1 blank) - // - User added 2 lines to file2.go between checkpoints (from PromptAttribution) - // - User added 2 MORE lines to file2.go after last checkpoint (post-checkpoint) - // - Total user added: 2 + 2 = 4 + // - PA1 captured 2 lines to file2.go — but PA1 is pre-session baseline, excluded from human count + // - User added 2 MORE lines to file2.go after last checkpoint (post-checkpoint) — these count + // - Total user added: 0 (PA1 excluded) + 2 (post-checkpoint) = 2 // - agentLinesInCommit: 3 - // - Total: 3 + 4 = 7 - // - Agent percentage: 3/7 = 42.9% - // - // BUG (if not fixed): Post-checkpoint calculation only looks at file1.go, - // so it would miss the 2 post-checkpoint edits to file2.go: - // - Total user added: 2 + 0 = 2 (WRONG) - // - Total: 3 + 2 = 5 (WRONG) - // - Agent percentage: 3/5 = 60% (WRONG, inflated) + // - Total: 3 + 2 = 5 + // - Agent percentage: 3/5 = 60% t.Logf("Attribution: agent=%d, human_added=%d, total=%d, percentage=%.1f%%", result.AgentLines, result.HumanAdded, result.TotalCommitted, result.AgentPercentage) @@ -743,18 +737,18 @@ func TestCalculateAttributionWithAccumulated_UserEditsNonAgentFile(t *testing.T) t.Errorf("AgentLines = %d, want 3", result.AgentLines) } - if result.HumanAdded != 4 { - t.Errorf("HumanAdded = %d, want 4 (2 between + 2 post-checkpoint, including file agent never touched)", + if result.HumanAdded != 2 { + t.Errorf("HumanAdded = %d, want 2 (post-checkpoint only; PA1 pre-session edits excluded as baseline)", result.HumanAdded) } - if result.TotalCommitted != 7 { - t.Errorf("TotalCommitted = %d, want 7 (3 agent + 4 user)", result.TotalCommitted) + if result.TotalCommitted != 5 { + t.Errorf("TotalCommitted = %d, want 5 (3 agent + 2 post-checkpoint user)", result.TotalCommitted) } - // Agent percentage should be 3/7 = 42.9% - if result.AgentPercentage < 42.8 || result.AgentPercentage > 43.0 { - t.Errorf("AgentPercentage = %.1f%%, want ~42.9%% (not inflated)", result.AgentPercentage) + // Agent percentage should be 3/5 = 60% + if result.AgentPercentage < 59.9 || result.AgentPercentage > 60.1 { + t.Errorf("AgentPercentage = %.1f%%, want ~60.0%%", result.AgentPercentage) } } @@ -1237,3 +1231,189 @@ func TestCalculatePromptAttribution_PopulatesPerFile(t *testing.T) { t.Errorf("UserAddedPerFile[b.go] = %d, want 1", result.UserAddedPerFile["b.go"]) } } + +// TestCalculateAttributionWithAccumulated_PreSessionDirtOnAgentFiles verifies that +// pre-session worktree dirt (captured in PA1 / checkpoint 1) on files the agent later +// touches does NOT get counted as human contributions. +// +// Scenario: hooks.go has 3 pre-session dirty lines when session starts. +// Agent also modifies hooks.go (adds 5 more lines). Shadow captures all 8 new lines. +// At commit time, the 3 pre-session lines should be excluded from human count. +func TestCalculateAttributionWithAccumulated_PreSessionDirtOnAgentFiles(t *testing.T) { + t.Parallel() + + // Base: hooks.go has 3 lines + baseTree := buildTestTree(t, map[string]string{ + "hooks.go": "package strategy\n\nfunc warn() {}\n", + }) + + // Shadow captures base (3 lines) + pre-session dirt (3 new lines) + agent work (5 new lines) + // = 11 total lines, 8 added relative to base + shadowContent := "package strategy\n\n// pre1\n// pre2\n// pre3\nfunc agentA() {}\nfunc agentB() {}\nfunc agentC() {}\nfunc agentD() {}\nfunc agentE() {}\nfunc warn() {}\n" + shadowTree := buildTestTree(t, map[string]string{ + "hooks.go": shadowContent, + }) + + // Head = shadow (user didn't edit after agent) + headTree := shadowTree + + filesTouched := []string{"hooks.go"} + + // PA1 captured the 3 pre-session dirty lines at session start + promptAttributions := []PromptAttribution{ + { + CheckpointNumber: 1, + UserLinesAdded: 3, + UserLinesRemoved: 0, + UserAddedPerFile: map[string]int{"hooks.go": 3}, + }, + } + + result := CalculateAttributionWithAccumulated( + context.Background(), + baseTree, shadowTree, headTree, filesTouched, promptAttributions, + "", "", "", "", + ) + + require.NotNil(t, result) + + // base→shadow adds 8 lines. PA1 says 3 are pre-session. + // totalAgentAdded = 8 - 3 = 5 (correct agent subtraction). + // Pre-session 3 lines should NOT appear in HumanAdded. + require.Equal(t, 5, result.AgentLines, "agent should get credit for 5 lines") + require.Equal(t, 0, result.HumanAdded, "pre-session dirt should not count as human") + require.Equal(t, 5, result.TotalCommitted, "total should be agent-only") + require.InDelta(t, 100.0, result.AgentPercentage, 0.1, "should be 100%% agent") +} + +// TestCalculateAttributionWithAccumulated_PreSessionConfigFiles verifies that +// non-agent files dirty at session start (e.g., CLI config files from `entire enable`) +// do NOT get counted as human contributions. +// +// Uses flat file names because buildTestTree doesn't support nested paths. +// The attribution code only checks filesTouched membership and UserAddedPerFile keys, +// so flat names are equivalent for testing. +func TestCalculateAttributionWithAccumulated_PreSessionConfigFiles(t *testing.T) { + t.Parallel() + + // Base: empty repo + baseTree := buildTestTree(t, map[string]string{ + "empty": "", + }) + + // Shadow: agent created hello.py (5 lines). Config file also present (10 lines). + shadowTree := buildTestTree(t, map[string]string{ + "empty": "", + "hello.py": "line1\nline2\nline3\nline4\nline5\n", + "config.json": "k1\nk2\nk3\nk4\nk5\nk6\nk7\nk8\nk9\nk10\n", + }) + + // Head = shadow (user didn't edit) + headTree := shadowTree + + filesTouched := []string{"hello.py"} + + // PA1 captured the config file at session start (pre-session dirty) + promptAttributions := []PromptAttribution{ + { + CheckpointNumber: 1, + UserLinesAdded: 10, + UserLinesRemoved: 0, + UserAddedPerFile: map[string]int{"config.json": 10}, + }, + } + + result := CalculateAttributionWithAccumulated( + context.Background(), + baseTree, shadowTree, headTree, filesTouched, promptAttributions, + "", "", "", "", + ) + + require.NotNil(t, result) + + // Agent created hello.py (5 lines). Config file is pre-session baseline — excluded. + require.Equal(t, 5, result.AgentLines, "agent should get 5 lines for hello.py") + require.Equal(t, 0, result.HumanAdded, "pre-session config should not count as human") + require.Equal(t, 5, result.TotalCommitted, "total should be agent-only") + require.InDelta(t, 100.0, result.AgentPercentage, 0.1, "should be 100%% agent") +} + +// TestCalculateAttributionWithAccumulated_DuringSessionHumanEdits verifies that +// human edits made DURING the session (captured by PA2+) are still correctly +// counted as human contributions after the baseline fix. +// +// This is a correctness guard — the fix must not break this. +func TestCalculateAttributionWithAccumulated_DuringSessionHumanEdits(t *testing.T) { + t.Parallel() + + baseTree := buildTestTree(t, map[string]string{ + "main.go": "", + }) + + // Shadow: 12 lines total — 10 agent + 2 user (added between turns) + shadowTree := buildTestTree(t, map[string]string{ + "main.go": "a1\na2\na3\na4\na5\na6\na7\na8\nu1\nu2\na9\na10\n", + }) + + headTree := shadowTree + + filesTouched := []string{"main.go"} + + promptAttributions := []PromptAttribution{ + { + CheckpointNumber: 1, + UserLinesAdded: 0, // Clean worktree at session start + UserLinesRemoved: 0, + UserAddedPerFile: map[string]int{}, + }, + { + CheckpointNumber: 2, + UserLinesAdded: 2, // User added 2 lines between turn 1 and 2 + UserLinesRemoved: 0, + UserAddedPerFile: map[string]int{"main.go": 2}, + }, + } + + result := CalculateAttributionWithAccumulated( + context.Background(), + baseTree, shadowTree, headTree, filesTouched, promptAttributions, + "", "", "", "", + ) + + require.NotNil(t, result) + + // 12 total lines in shadow. PA2 says user added 2. Agent = 12 - 2 = 10. + require.Equal(t, 10, result.AgentLines, "agent should get 10 lines") + require.Equal(t, 2, result.HumanAdded, "user's 2 lines from PA2 should count") + require.Equal(t, 12, result.TotalCommitted) + require.InDelta(t, 83.3, result.AgentPercentage, 0.1) +} + +// TestCalculateAttributionWithAccumulated_EmptyPA verifies that sessions with +// no prompt attributions (old CLI versions, edge cases) still work correctly. +func TestCalculateAttributionWithAccumulated_EmptyPA(t *testing.T) { + t.Parallel() + + baseTree := buildTestTree(t, map[string]string{ + "main.go": "", + }) + + shadowTree := buildTestTree(t, map[string]string{ + "main.go": "line1\nline2\nline3\n", + }) + + headTree := shadowTree + filesTouched := []string{"main.go"} + + // No prompt attributions at all (old session or edge case) + result := CalculateAttributionWithAccumulated( + context.Background(), + baseTree, shadowTree, headTree, filesTouched, nil, + "", "", "", "", + ) + + require.NotNil(t, result) + require.Equal(t, 3, result.AgentLines) + require.Equal(t, 0, result.HumanAdded) + require.InDelta(t, 100.0, result.AgentPercentage, 0.1) +} diff --git a/cmd/entire/cli/strategy/manual_commit_condensation.go b/cmd/entire/cli/strategy/manual_commit_condensation.go index a6e1e3b83..cf068bb1e 100644 --- a/cmd/entire/cli/strategy/manual_commit_condensation.go +++ b/cmd/entire/cli/strategy/manual_commit_condensation.go @@ -266,6 +266,7 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re TokenUsage: sessionData.TokenUsage, SessionMetrics: buildSessionMetrics(state), InitialAttribution: attribution, + PromptAttributionsJSON: marshalPromptAttributions(state.PromptAttributions), Summary: summary, } @@ -289,6 +290,19 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re // buildSessionMetrics creates a SessionMetrics from session state if any metrics are available. // Returns nil if no hook-provided metrics exist (e.g., for agents that don't report them). +// marshalPromptAttributions encodes PromptAttributions to JSON for diagnostic persistence. +// Returns nil if there are no attributions to persist. +func marshalPromptAttributions(pas []PromptAttribution) json.RawMessage { + if len(pas) == 0 { + return nil + } + data, err := json.Marshal(pas) + if err != nil { + return nil + } + return data +} + func buildSessionMetrics(state *SessionState) *cpkg.SessionMetrics { if state.SessionDurationMs == 0 && state.SessionTurnCount == 0 && state.ContextTokens == 0 && state.ContextWindowSize == 0 { return nil @@ -430,9 +444,19 @@ func calculateSessionAttributions(ctx context.Context, repo *git.Repository, sha slog.String("attribution_base", attrBase)) } + // Include PendingPromptAttribution if it was never moved to PromptAttributions. + // This happens when an agent commits mid-turn without calling SaveStep (e.g., Codex). + // PendingPromptAttribution is set during UserPromptSubmit but only moved to + // PromptAttributions during SaveStep. Without this, mid-turn commits have no PA + // data and pre-session worktree dirt cannot be identified for baseline exclusion. + promptAttrs := state.PromptAttributions + if state.PendingPromptAttribution != nil { + promptAttrs = append(promptAttrs, *state.PendingPromptAttribution) + } + // Log accumulated prompt attributions for debugging var totalUserAdded, totalUserRemoved int - for i, pa := range state.PromptAttributions { + for i, pa := range promptAttrs { totalUserAdded += pa.UserLinesAdded totalUserRemoved += pa.UserLinesRemoved logging.Debug(logCtx, "prompt attribution data", @@ -450,7 +474,7 @@ func calculateSessionAttributions(ctx context.Context, repo *git.Repository, sha shadowTree, headTree, sessionData.FilesTouched, - state.PromptAttributions, + promptAttrs, o.repoDir, o.parentCommitHash, o.attributionBaseCommit, diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index 2893ece54..9922a1c5a 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -877,6 +877,12 @@ func (s *ManualCommitStrategy) PostCommit(ctx context.Context) error { //nolint: } processSessionsLoop.End() + if err := s.updateCombinedAttributionForCheckpoint(ctx, repo, checkpointID); err != nil { + logging.Warn(logCtx, "failed to update combined checkpoint attribution", + slog.String("checkpoint_id", checkpointID.String()), + slog.String("error", err.Error())) + } + // Clean up shadow branches — only delete when ALL sessions on the branch are non-active // or were condensed during this PostCommit. _, cleanupBranchesSpan := perf.Start(ctx, "cleanup_shadow_branches") @@ -903,6 +909,76 @@ func (s *ManualCommitStrategy) PostCommit(ctx context.Context) error { //nolint: return nil } +func (s *ManualCommitStrategy) updateCombinedAttributionForCheckpoint(ctx context.Context, repo *git.Repository, checkpointID id.CheckpointID) error { + store := checkpoint.NewGitStore(repo) + + summary, err := store.ReadCommitted(ctx, checkpointID) + if err != nil { + return fmt.Errorf("reading checkpoint summary: %w", err) + } + if summary == nil || len(summary.Sessions) <= 1 { + return nil + } + + combined := aggregateCheckpointAttribution() + for i := range len(summary.Sessions) { + content, readErr := store.ReadSessionContent(ctx, checkpointID, i) + if readErr != nil || content == nil || content.Metadata.InitialAttribution == nil { + continue + } + combined.add(content.Metadata.InitialAttribution) + } + + if !combined.hasData() { + return nil + } + + if err := store.UpdateCheckpointSummary(ctx, checkpointID, combined.snapshot()); err != nil { + return fmt.Errorf("updating combined attribution: %w", err) + } + + return nil +} + +type checkpointAttributionAggregate struct { + agentLines int + humanAdded int + humanModified int + humanRemoved int + totalCommitted int +} + +func aggregateCheckpointAttribution() *checkpointAttributionAggregate { + return &checkpointAttributionAggregate{} +} + +func (a *checkpointAttributionAggregate) add(attr *checkpoint.InitialAttribution) { + a.agentLines += attr.AgentLines + a.humanAdded += attr.HumanAdded + a.humanModified += attr.HumanModified + a.humanRemoved += attr.HumanRemoved + a.totalCommitted += attr.TotalCommitted +} + +func (a *checkpointAttributionAggregate) hasData() bool { + return a.agentLines != 0 || a.humanAdded != 0 || a.humanModified != 0 || a.humanRemoved != 0 || a.totalCommitted != 0 +} + +func (a *checkpointAttributionAggregate) snapshot() *checkpoint.InitialAttribution { + attr := &checkpoint.InitialAttribution{ + CalculatedAt: time.Now().UTC(), + AgentLines: a.agentLines, + HumanAdded: a.humanAdded, + HumanModified: a.humanModified, + HumanRemoved: a.humanRemoved, + TotalCommitted: a.totalCommitted, + } + if a.totalCommitted > 0 { + attr.AgentPercentage = float64(a.agentLines) / float64(a.totalCommitted) * 100 + } + return attr +} + // postCommitProcessSession handles a single session within the PostCommit loop. // Pre-resolved git objects (headTree, parentTree) are shared across all sessions; // per-session shadow ref/tree are resolved once here and threaded through sub-calls. diff --git a/cmd/entire/cli/strategy/manual_commit_test.go b/cmd/entire/cli/strategy/manual_commit_test.go index 5f4102b16..338adbc4c 100644 --- a/cmd/entire/cli/strategy/manual_commit_test.go +++ b/cmd/entire/cli/strategy/manual_commit_test.go @@ -2453,19 +2453,20 @@ func TestCondenseSession_AttributionWithoutShadowBranch_MixedHumanAgent(t *testi t.Logf("Attribution (mixed, no shadow): agent=%d, human_added=%d, total=%d, percentage=%.1f%%", attr.AgentLines, attr.HumanAdded, attr.TotalCommitted, attr.AgentPercentage) - // src/app.go has 7 lines (agent), docs/notes.md has 4 lines (human) + // src/app.go has 7 lines (agent). docs/notes.md was added before the session + // (captured by PA1) so it's pre-session baseline — excluded from human count. if attr.AgentLines != 7 { t.Errorf("AgentLines = %d, want 7 (src/app.go has 7 lines)", attr.AgentLines) } - if attr.HumanAdded != 4 { - t.Errorf("HumanAdded = %d, want 4 (docs/notes.md has 4 lines)", attr.HumanAdded) + if attr.HumanAdded != 0 { + t.Errorf("HumanAdded = %d, want 0 (docs/notes.md is pre-session baseline, excluded)", attr.HumanAdded) } - if attr.TotalCommitted != 11 { - t.Errorf("TotalCommitted = %d, want 11 (7 agent + 4 human)", attr.TotalCommitted) + if attr.TotalCommitted != 7 { + t.Errorf("TotalCommitted = %d, want 7 (agent-only, pre-session excluded)", attr.TotalCommitted) } - // Agent wrote 7/11 = 63.6% - if attr.AgentPercentage < 60 || attr.AgentPercentage > 70 { - t.Errorf("AgentPercentage = %.1f%%, want ~63.6%% (7/11)", attr.AgentPercentage) + // Agent wrote 7/7 = 100% + if attr.AgentPercentage < 99.0 { + t.Errorf("AgentPercentage = %.1f%%, want ~100%% (pre-session human file excluded)", attr.AgentPercentage) } } From 51fe59e405fd33cd9c012b6b85680e6c776f616a Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 31 Mar 2026 16:49:49 -0700 Subject: [PATCH 3/5] Add PromptAttributionsJSON, UpdateCheckpointSummary, and CombinedAttribution Checkpoint package changes required by the attribution baseline fix: - PromptAttributionsJSON field on WriteCommittedOptions and CommittedMetadata - UpdateCheckpointSummary method on GitStore for multi-session aggregation - CombinedAttribution field on CheckpointSummary - Preserve existing CombinedAttribution during summary rewrites Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: b8963737336c --- cmd/entire/cli/checkpoint/checkpoint.go | 28 +++++-- cmd/entire/cli/checkpoint/committed.go | 97 +++++++++++++++++++++-- cmd/entire/cli/checkpoint/v2_committed.go | 1 + 3 files changed, 110 insertions(+), 16 deletions(-) diff --git a/cmd/entire/cli/checkpoint/checkpoint.go b/cmd/entire/cli/checkpoint/checkpoint.go index 40133fbcf..5932e7bdd 100644 --- a/cmd/entire/cli/checkpoint/checkpoint.go +++ b/cmd/entire/cli/checkpoint/checkpoint.go @@ -8,6 +8,7 @@ package checkpoint import ( "context" + "encoding/json" "errors" "time" @@ -282,6 +283,12 @@ type WriteCommittedOptions struct { // comparing checkpoint tree (agent work) to committed tree (may include human edits) InitialAttribution *InitialAttribution + // PromptAttributionsJSON is the raw PromptAttributions data, JSON-encoded. + // Persisted for diagnostic purposes — shows exactly which prompt recorded + // which "user" lines, enabling root cause analysis of attribution bugs. + // Uses json.RawMessage to avoid importing session package. + PromptAttributionsJSON json.RawMessage + // Summary is an optional AI-generated summary for this checkpoint. // This field may be nil when: // - summarization is disabled in settings @@ -402,6 +409,10 @@ type CommittedMetadata struct { // InitialAttribution is line-level attribution calculated at commit time InitialAttribution *InitialAttribution `json:"initial_attribution,omitempty"` + + // PromptAttributions is the raw per-prompt attribution data used to compute InitialAttribution. + // Diagnostic field — shows which prompt recorded which "user" lines. + PromptAttributions json.RawMessage `json:"prompt_attributions,omitempty"` } // GetTranscriptStart returns the transcript line offset at which this checkpoint's data begins. @@ -443,14 +454,15 @@ type SessionFilePaths struct { // //nolint:revive // Named CheckpointSummary to avoid conflict with existing Summary struct type CheckpointSummary struct { - CLIVersion string `json:"cli_version,omitempty"` - CheckpointID id.CheckpointID `json:"checkpoint_id"` - Strategy string `json:"strategy"` - Branch string `json:"branch,omitempty"` - CheckpointsCount int `json:"checkpoints_count"` - FilesTouched []string `json:"files_touched"` - Sessions []SessionFilePaths `json:"sessions"` - TokenUsage *agent.TokenUsage `json:"token_usage,omitempty"` + CLIVersion string `json:"cli_version,omitempty"` + CheckpointID id.CheckpointID `json:"checkpoint_id"` + Strategy string `json:"strategy"` + Branch string `json:"branch,omitempty"` + CheckpointsCount int `json:"checkpoints_count"` + FilesTouched []string `json:"files_touched"` + Sessions []SessionFilePaths `json:"sessions"` + TokenUsage *agent.TokenUsage `json:"token_usage,omitempty"` + CombinedAttribution *InitialAttribution `json:"combined_attribution,omitempty"` } // SessionMetrics contains hook-provided session metrics from agents that report diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index 47a75beb8..0f70d47fa 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -383,6 +383,7 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom TokenUsage: opts.TokenUsage, SessionMetrics: opts.SessionMetrics, InitialAttribution: opts.InitialAttribution, + PromptAttributions: opts.PromptAttributionsJSON, Summary: redactSummary(opts.Summary), CLIVersion: versioninfo.Version, } @@ -414,15 +415,25 @@ func (s *GitStore) writeCheckpointSummary(opts WriteCommittedOptions, basePath s return fmt.Errorf("failed to aggregate session stats: %w", err) } + var combinedAttribution *InitialAttribution + rootMetadataPath := basePath + paths.MetadataFileName + if entry, exists := entries[rootMetadataPath]; exists { + existingSummary, readErr := s.readSummaryFromBlob(entry.Hash) + if readErr == nil { + combinedAttribution = existingSummary.CombinedAttribution + } + } + summary := CheckpointSummary{ - CheckpointID: opts.CheckpointID, - CLIVersion: versioninfo.Version, - Strategy: opts.Strategy, - Branch: opts.Branch, - CheckpointsCount: checkpointsCount, - FilesTouched: filesTouched, - Sessions: sessions, - TokenUsage: tokenUsage, + CheckpointID: opts.CheckpointID, + CLIVersion: versioninfo.Version, + Strategy: opts.Strategy, + Branch: opts.Branch, + CheckpointsCount: checkpointsCount, + FilesTouched: filesTouched, + Sessions: sessions, + TokenUsage: tokenUsage, + CombinedAttribution: combinedAttribution, } metadataJSON, err := jsonutil.MarshalIndentWithNewline(summary, "", " ") @@ -441,6 +452,76 @@ func (s *GitStore) writeCheckpointSummary(opts WriteCommittedOptions, basePath s return nil } +// UpdateCheckpointSummary updates root-level checkpoint metadata fields that depend +// on the full set of sessions already written to the checkpoint. +func (s *GitStore) UpdateCheckpointSummary(ctx context.Context, checkpointID id.CheckpointID, combinedAttribution *InitialAttribution) error { + if err := ctx.Err(); err != nil { + return err //nolint:wrapcheck // Propagating context cancellation + } + + if err := s.ensureSessionsBranch(); err != nil { + return fmt.Errorf("failed to ensure sessions branch: %w", err) + } + + parentHash, rootTreeHash, err := s.getSessionsBranchRef() + if err != nil { + return err + } + + basePath := checkpointID.Path() + "/" + checkpointPath := checkpointID.Path() + entries, err := s.flattenCheckpointEntries(rootTreeHash, checkpointPath) + if err != nil { + return err + } + + rootMetadataPath := basePath + paths.MetadataFileName + entry, exists := entries[rootMetadataPath] + if !exists { + return ErrCheckpointNotFound + } + + summary, err := s.readSummaryFromBlob(entry.Hash) + if err != nil { + return fmt.Errorf("failed to read checkpoint summary: %w", err) + } + summary.CombinedAttribution = combinedAttribution + + metadataJSON, err := jsonutil.MarshalIndentWithNewline(summary, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal checkpoint summary: %w", err) + } + metadataHash, err := CreateBlobFromContent(s.repo, metadataJSON) + if err != nil { + return fmt.Errorf("failed to create checkpoint summary blob: %w", err) + } + entries[rootMetadataPath] = object.TreeEntry{ + Name: rootMetadataPath, + Mode: filemode.Regular, + Hash: metadataHash, + } + + newTreeHash, err := s.spliceCheckpointSubtree(rootTreeHash, checkpointID, basePath, entries) + if err != nil { + return err + } + + authorName, authorEmail := GetGitAuthorFromRepo(s.repo) + commitMsg := fmt.Sprintf("Update checkpoint summary for %s", checkpointID) + newCommitHash, err := s.createCommit(newTreeHash, parentHash, commitMsg, authorName, authorEmail) + if err != nil { + return err + } + + refName := plumbing.NewBranchReferenceName(paths.MetadataBranchName) + newRef := plumbing.NewHashReference(refName, newCommitHash) + if err := s.repo.Storer.SetReference(newRef); err != nil { + return fmt.Errorf("failed to set branch reference: %w", err) + } + + return nil +} + // findSessionIndex returns the index of an existing session with the given ID, // or the next available index if not found. This prevents duplicate session entries. func (s *GitStore) findSessionIndex(ctx context.Context, basePath string, existingSummary *CheckpointSummary, entries map[string]object.TreeEntry, sessionID string) int { diff --git a/cmd/entire/cli/checkpoint/v2_committed.go b/cmd/entire/cli/checkpoint/v2_committed.go index d5a7a4f34..f9f19bb3f 100644 --- a/cmd/entire/cli/checkpoint/v2_committed.go +++ b/cmd/entire/cli/checkpoint/v2_committed.go @@ -329,6 +329,7 @@ func (s *V2GitStore) writeMainSessionToSubdirectory(opts WriteCommittedOptions, TokenUsage: opts.TokenUsage, SessionMetrics: opts.SessionMetrics, InitialAttribution: opts.InitialAttribution, + PromptAttributions: opts.PromptAttributionsJSON, Summary: redactSummary(opts.Summary), CLIVersion: versioninfo.Version, } From 654be48cff640d3b11a3a0bf94cef3646373f66f Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 31 Mar 2026 18:03:27 -0700 Subject: [PATCH 4/5] Address PR review: consistent parentTree line counting, deduplicate parentCommitHash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes all 4 issues from Copilot and Cursor Bugbot review: 1. Precompute parentCommitHash on postCommitActionHandler struct using ParentHashes[0] (avoids extra object read, no silent error) 2. Remove duplicated 6-line parentCommitHash computation from HandleCondense and HandleCondenseIfFilesTouched 3. Thread parentTree through condenseOpts/attributionOpts and use it for non-agent file line counting — ensures diffLines uses parent→HEAD (consistent with parentCommitHash file scoping) instead of sessionBase→HEAD which over-counted intermediate commit changes 4. Add ParentTreeForNonAgentLines test proving the fix (TDD verified: HumanAdded=8 without fix → HumanAdded=3 with fix) Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 12f5c4373467 --- .../cli/strategy/manual_commit_attribution.go | 12 ++- .../manual_commit_attribution_test.go | 101 +++++++++++++++--- .../strategy/manual_commit_condensation.go | 4 + .../cli/strategy/manual_commit_hooks.go | 37 +++---- 4 files changed, 115 insertions(+), 39 deletions(-) diff --git a/cmd/entire/cli/strategy/manual_commit_attribution.go b/cmd/entire/cli/strategy/manual_commit_attribution.go index 4707e9c85..844e8a463 100644 --- a/cmd/entire/cli/strategy/manual_commit_attribution.go +++ b/cmd/entire/cli/strategy/manual_commit_attribution.go @@ -201,6 +201,7 @@ func CalculateAttributionWithAccumulated( parentCommitHash string, attributionBaseCommit string, headCommitHash string, + parentTree *object.Tree, ) *checkpoint.InitialAttribution { if len(filesTouched) == 0 { return nil @@ -287,9 +288,16 @@ func CalculateAttributionWithAccumulated( continue // Skip CLI metadata — matches filter in calculatePromptAttributionAtStart } - baseContent := getFileContent(baseTree, filePath) + // Use parentTree for line counting when available so only THIS commit's + // changes are counted (consistent with parentCommitHash file scoping). + // For initial commits or when parentTree is nil, fall back to baseTree. + nonAgentDiffTree := parentTree + if nonAgentDiffTree == nil { + nonAgentDiffTree = baseTree + } + diffBaseContent := getFileContent(nonAgentDiffTree, filePath) headContent := getFileContent(headTree, filePath) - _, userAdded, _ := diffLines(baseContent, headContent) + _, userAdded, _ := diffLines(diffBaseContent, headContent) allUserEditsToNonAgentFiles += userAdded } diff --git a/cmd/entire/cli/strategy/manual_commit_attribution_test.go b/cmd/entire/cli/strategy/manual_commit_attribution_test.go index 28963ea0b..b6138b1cb 100644 --- a/cmd/entire/cli/strategy/manual_commit_attribution_test.go +++ b/cmd/entire/cli/strategy/manual_commit_attribution_test.go @@ -281,7 +281,7 @@ func TestCalculateAttributionWithAccumulated_BasicCase(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, ) require.NotNil(t, result, "expected non-nil result") @@ -338,7 +338,7 @@ func TestCalculateAttributionWithAccumulated_BugScenario(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, ) require.NotNil(t, result, "expected non-nil result") @@ -395,7 +395,7 @@ func TestCalculateAttributionWithAccumulated_DeletionOnly(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, ) require.NotNil(t, result, "expected non-nil result") @@ -444,7 +444,7 @@ func TestCalculateAttributionWithAccumulated_NoUserEdits(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, ) require.NotNil(t, result, "expected non-nil result") @@ -496,7 +496,7 @@ func TestCalculateAttributionWithAccumulated_NoAgentWork(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, ) require.NotNil(t, result, "expected non-nil result") @@ -550,7 +550,7 @@ func TestCalculateAttributionWithAccumulated_UserRemovesAllAgentLines(t *testing result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, ) require.NotNil(t, result, "expected non-nil result") @@ -619,7 +619,7 @@ func TestCalculateAttributionWithAccumulated_WithPromptAttributions(t *testing.T result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, ) require.NotNil(t, result, "expected non-nil result") @@ -662,7 +662,7 @@ func TestCalculateAttributionWithAccumulated_EmptyFilesTouched(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, []string{}, []PromptAttribution{}, "", "", "", "", + baseTree, shadowTree, headTree, []string{}, []PromptAttribution{}, "", "", "", "", nil, ) if result != nil { @@ -716,7 +716,7 @@ func TestCalculateAttributionWithAccumulated_UserEditsNonAgentFile(t *testing.T) result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, ) require.NotNil(t, result, "expected non-nil result") @@ -1015,7 +1015,7 @@ func TestCalculateAttributionWithAccumulated_UserSelfModification(t *testing.T) result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, ) require.NotNil(t, result, "expected non-nil result") @@ -1086,7 +1086,7 @@ func TestCalculateAttributionWithAccumulated_MixedModifications(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, ) require.NotNil(t, result, "expected non-nil result") @@ -1167,7 +1167,7 @@ func TestCalculateAttributionWithAccumulated_UncommittedWorktreeFiles(t *testing result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, ) require.NotNil(t, result, "expected non-nil result") @@ -1272,7 +1272,7 @@ func TestCalculateAttributionWithAccumulated_PreSessionDirtOnAgentFiles(t *testi result := CalculateAttributionWithAccumulated( context.Background(), baseTree, shadowTree, headTree, filesTouched, promptAttributions, - "", "", "", "", + "", "", "", "", nil, ) require.NotNil(t, result) @@ -1326,7 +1326,7 @@ func TestCalculateAttributionWithAccumulated_PreSessionConfigFiles(t *testing.T) result := CalculateAttributionWithAccumulated( context.Background(), baseTree, shadowTree, headTree, filesTouched, promptAttributions, - "", "", "", "", + "", "", "", "", nil, ) require.NotNil(t, result) @@ -1377,7 +1377,7 @@ func TestCalculateAttributionWithAccumulated_DuringSessionHumanEdits(t *testing. result := CalculateAttributionWithAccumulated( context.Background(), baseTree, shadowTree, headTree, filesTouched, promptAttributions, - "", "", "", "", + "", "", "", "", nil, ) require.NotNil(t, result) @@ -1409,7 +1409,7 @@ func TestCalculateAttributionWithAccumulated_EmptyPA(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), baseTree, shadowTree, headTree, filesTouched, nil, - "", "", "", "", + "", "", "", "", nil, ) require.NotNil(t, result) @@ -1417,3 +1417,72 @@ func TestCalculateAttributionWithAccumulated_EmptyPA(t *testing.T) { require.Equal(t, 0, result.HumanAdded) require.InDelta(t, 100.0, result.AgentPercentage, 0.1) } + +// TestCalculateAttributionWithAccumulated_ParentTreeForNonAgentLines verifies that +// non-agent file line counting uses parentTree (not baseTree) when provided. +// This prevents inflation in multi-commit sessions where a non-agent file was +// modified in an intermediate commit AND the current commit. +// +// Scenario (multi-commit session): +// - Session starts at commit A: readme.md has 2 lines +// - Commit B: user adds 5 lines to readme.md (intermediate commit) +// - Commit C (current): agent modifies main.go, user adds 3 more lines to readme.md +// +// Without parentTree: diffLines(baseTree=A, headTree=C) counts ALL 8 lines → inflated +// With parentTree: diffLines(parentTree=B, headTree=C) counts only 3 lines → correct +func TestCalculateAttributionWithAccumulated_ParentTreeForNonAgentLines(t *testing.T) { + t.Parallel() + + // baseTree = commit A: readme.md has 2 lines, main.go is empty + baseTree := buildTestTree(t, map[string]string{ + "main.go": "", + "readme.md": "line1\nline2\n", + }) + + // parentTree = commit B: readme.md grew to 7 lines (user added 5 in intermediate commit) + parentTree := buildTestTree(t, map[string]string{ + "main.go": "", + "readme.md": "line1\nline2\ninter1\ninter2\ninter3\ninter4\ninter5\n", + }) + + // shadowTree: agent added 4 lines to main.go (checkpoint state) + shadowTree := buildTestTree(t, map[string]string{ + "main.go": "func a() {}\nfunc b() {}\nfunc c() {}\nfunc d() {}\n", + "readme.md": "line1\nline2\ninter1\ninter2\ninter3\ninter4\ninter5\n", + }) + + // headTree = commit C: agent's main.go + user added 3 more lines to readme.md + headTree := buildTestTree(t, map[string]string{ + "main.go": "func a() {}\nfunc b() {}\nfunc c() {}\nfunc d() {}\n", + "readme.md": "line1\nline2\ninter1\ninter2\ninter3\ninter4\ninter5\nnew1\nnew2\nnew3\n", + }) + + filesTouched := []string{"main.go"} + + // No prompt attributions (clean worktree at session start) + promptAttributions := []PromptAttribution{} + + // WITH parentTree: should only count 3 new readme.md lines (parent→head) + result := CalculateAttributionWithAccumulated( + context.Background(), + baseTree, shadowTree, headTree, filesTouched, promptAttributions, + "", "", "", "", parentTree, + ) + + require.NotNil(t, result) + require.Equal(t, 4, result.AgentLines, "agent added 4 lines to main.go") + require.Equal(t, 3, result.HumanAdded, "only 3 lines from THIS commit, not all 8 since session start") + require.Equal(t, 7, result.TotalCommitted, "4 agent + 3 human") + require.InDelta(t, 57.1, result.AgentPercentage, 0.2, "4/7 = 57.1%") + + // WITHOUT parentTree (nil): would count all 8 lines since session start — verify the bug + resultNoPT := CalculateAttributionWithAccumulated( + context.Background(), + baseTree, shadowTree, headTree, filesTouched, promptAttributions, + "", "", "", "", nil, + ) + + require.NotNil(t, resultNoPT) + // Without parentTree, falls back to baseTree: counts 8 lines (all since session start) + require.Equal(t, 8, resultNoPT.HumanAdded, "without parentTree, all 8 lines counted (inflated)") +} diff --git a/cmd/entire/cli/strategy/manual_commit_condensation.go b/cmd/entire/cli/strategy/manual_commit_condensation.go index cf068bb1e..45414c8fa 100644 --- a/cmd/entire/cli/strategy/manual_commit_condensation.go +++ b/cmd/entire/cli/strategy/manual_commit_condensation.go @@ -89,6 +89,7 @@ func (s *ManualCommitStrategy) getCheckpointLog(ctx context.Context, checkpointI type condenseOpts struct { shadowRef *plumbing.Reference // Pre-resolved shadow branch ref (nil = resolve from repo) headTree *object.Tree // Pre-resolved HEAD tree (passed through to calculateSessionAttributions) + parentTree *object.Tree // Pre-resolved parent tree (nil for initial commits, for consistent non-agent line counting) repoDir string // Repository worktree path for git CLI commands parentCommitHash string // HEAD's first parent hash for per-commit non-agent file detection headCommitHash string // HEAD commit hash (passed through for attribution) @@ -196,6 +197,7 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re attribution := calculateSessionAttributions(ctx, repo, ref, sessionData, state, attributionOpts{ headTree: o.headTree, + parentTree: o.parentTree, repoDir: o.repoDir, attributionBaseCommit: attrBase, parentCommitHash: o.parentCommitHash, @@ -353,6 +355,7 @@ func sessionStateBackfillTokenUsage(ctx context.Context, ag agent.Agent, agentTy type attributionOpts struct { headTree *object.Tree // HEAD commit tree (already resolved by PostCommit) shadowTree *object.Tree // Shadow branch tree (already resolved by PostCommit) + parentTree *object.Tree // Parent commit tree (nil for initial commits, for consistent non-agent line counting) repoDir string // Repository worktree path for git CLI commands attributionBaseCommit string // Base commit hash for non-agent file detection (empty = fall back to go-git tree walk) parentCommitHash string // HEAD's first parent hash (preferred diff base for non-agent files) @@ -479,6 +482,7 @@ func calculateSessionAttributions(ctx context.Context, repo *git.Repository, sha o.parentCommitHash, o.attributionBaseCommit, o.headCommitHash, + o.parentTree, ) if attribution != nil { diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index 9922a1c5a..ec03781d9 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -617,10 +617,11 @@ type postCommitActionHandler struct { // Cached git objects — resolved once per PostCommit invocation to avoid // redundant reads across filesOverlapWithContent, filesWithRemainingAgentChanges, // CondenseSession, and calculateSessionAttributions. - headTree *object.Tree // HEAD commit tree (shared across all sessions) - parentTree *object.Tree // HEAD's first parent tree (shared, nil for initial commits) - shadowRef *plumbing.Reference // Per-session shadow branch ref (nil if branch doesn't exist) - shadowTree *object.Tree // Per-session shadow commit tree (nil if branch doesn't exist) + headTree *object.Tree // HEAD commit tree (shared across all sessions) + parentTree *object.Tree // HEAD's first parent tree (shared, nil for initial commits) + parentCommitHash string // HEAD's first parent hash (empty for initial commits) + shadowRef *plumbing.Reference // Per-session shadow branch ref (nil if branch doesn't exist) + shadowTree *object.Tree // Per-session shadow commit tree (nil if branch doesn't exist) // Output: set by handler methods, read by caller after TransitionAndLog. condensed bool @@ -639,17 +640,12 @@ func (h *postCommitActionHandler) HandleCondense(state *session.State) error { ) if shouldCondense { - parentCommitHash := "" - if h.commit.NumParents() > 0 { - if parent, err := h.commit.Parent(0); err == nil { - parentCommitHash = parent.Hash.String() - } - } h.condensed = h.s.condenseAndUpdateState(h.ctx, h.repo, h.checkpointID, state, h.head, h.shadowBranchName, h.shadowBranchesToDelete, h.committedFileSet, condenseOpts{ shadowRef: h.shadowRef, headTree: h.headTree, + parentTree: h.parentTree, repoDir: h.repoDir, - parentCommitHash: parentCommitHash, + parentCommitHash: h.parentCommitHash, headCommitHash: h.newHead, }) } else { @@ -672,17 +668,12 @@ func (h *postCommitActionHandler) HandleCondenseIfFilesTouched(state *session.St ) if shouldCondense { - parentCommitHash := "" - if h.commit.NumParents() > 0 { - if parent, err := h.commit.Parent(0); err == nil { - parentCommitHash = parent.Hash.String() - } - } h.condensed = h.s.condenseAndUpdateState(h.ctx, h.repo, h.checkpointID, state, h.head, h.shadowBranchName, h.shadowBranchesToDelete, h.committedFileSet, condenseOpts{ shadowRef: h.shadowRef, headTree: h.headTree, + parentTree: h.parentTree, repoDir: h.repoDir, - parentCommitHash: parentCommitHash, + parentCommitHash: h.parentCommitHash, headCommitHash: h.newHead, }) } else { @@ -851,7 +842,9 @@ func (s *ManualCommitStrategy) PostCommit(ctx context.Context) error { //nolint: headTree = t } var parentTree *object.Tree - if commit.NumParents() > 0 { + var parentCommitHash string + if commit.NumParents() > 0 && len(commit.ParentHashes) > 0 { + parentCommitHash = commit.ParentHashes[0].String() if parent, err := commit.Parent(0); err == nil { if t, err := parent.Tree(); err == nil { parentTree = t @@ -871,8 +864,8 @@ func (s *ManualCommitStrategy) PostCommit(ctx context.Context) error { //nolint: } iterCtx, iterSpan := processSessionsLoop.Iteration(loopCtx) s.postCommitProcessSession(iterCtx, repo, state, &transitionCtx, checkpointID, - head, commit, newHead, worktreePath, headTree, parentTree, committedFileSet, - shadowBranchesToDelete, uncondensedActiveOnBranch) + head, commit, newHead, worktreePath, headTree, parentTree, parentCommitHash, + committedFileSet, shadowBranchesToDelete, uncondensedActiveOnBranch) iterSpan.End() } processSessionsLoop.End() @@ -993,6 +986,7 @@ func (s *ManualCommitStrategy) postCommitProcessSession( newHead string, repoDir string, headTree, parentTree *object.Tree, + parentCommitHash string, committedFileSet map[string]struct{}, shadowBranchesToDelete map[string]struct{}, uncondensedActiveOnBranch map[string]bool, @@ -1084,6 +1078,7 @@ func (s *ManualCommitStrategy) postCommitProcessSession( filesTouchedBefore: filesTouchedBefore, headTree: headTree, parentTree: parentTree, + parentCommitHash: parentCommitHash, shadowRef: shadowRef, shadowTree: shadowTree, } From 48ad3571e48c6e66c64a517ad568a9a6514ade07 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Fri, 3 Apr 2026 16:31:14 -0700 Subject: [PATCH 5/5] fix: accurate per-session attribution in multi-session checkpoints Three fixes for multi-session attribution: 1. Cross-session file exclusion: Thread allAgentFiles (union of all sessions' FilesTouched) through the attribution pipeline. Files created by other agent sessions are no longer counted as human work. 2. Exclude .entire/ from commit session fallback: When the commit session has no FilesTouched and falls back to all committed files, filter out .entire/ metadata created by `entire enable`. 3. PA1 baseline uses base tree for new sessions: New sessions (StepCount == 0) always diff against the base commit tree, not the shared shadow branch which may contain other sessions' state. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../cli/strategy/manual_commit_attribution.go | 25 ++++++-- .../manual_commit_attribution_test.go | 36 ++++++------ .../strategy/manual_commit_condensation.go | 37 ++++++++---- .../cli/strategy/manual_commit_hooks.go | 57 ++++++++++++------- 4 files changed, 99 insertions(+), 56 deletions(-) diff --git a/cmd/entire/cli/strategy/manual_commit_attribution.go b/cmd/entire/cli/strategy/manual_commit_attribution.go index 844e8a463..058473902 100644 --- a/cmd/entire/cli/strategy/manual_commit_attribution.go +++ b/cmd/entire/cli/strategy/manual_commit_attribution.go @@ -202,6 +202,7 @@ func CalculateAttributionWithAccumulated( attributionBaseCommit string, headCommitHash string, parentTree *object.Tree, + allAgentFiles map[string]struct{}, ) *checkpoint.InitialAttribution { if len(filesTouched) == 0 { return nil @@ -281,11 +282,8 @@ func CalculateAttributionWithAccumulated( } var allUserEditsToNonAgentFiles int for _, filePath := range allChangedFiles { - if slices.Contains(filesTouched, filePath) { - continue // Skip agent-touched files - } - if strings.HasPrefix(filePath, ".entire/") || strings.HasPrefix(filePath, paths.EntireMetadataDir+"/") { - continue // Skip CLI metadata — matches filter in calculatePromptAttributionAtStart + if isAgentOrMetadataFile(filePath, filesTouched, allAgentFiles) { + continue } // Use parentTree for line counting when available so only THIS commit's @@ -309,7 +307,7 @@ func CalculateAttributionWithAccumulated( // that should not affect attribution. committedNonAgentSet := make(map[string]struct{}, len(allChangedFiles)) for _, f := range allChangedFiles { - if !slices.Contains(filesTouched, f) { + if !isAgentOrMetadataFile(f, filesTouched, allAgentFiles) { committedNonAgentSet[f] = struct{}{} } } @@ -348,6 +346,7 @@ func CalculateAttributionWithAccumulated( sessionAccumulatedToNonAgent := max(0, accumulatedToCommittedNonAgentFiles-baselineToCommittedNonAgentFiles) relevantAccumulatedUser := sessionAccumulatedToAgentFiles + sessionAccumulatedToNonAgent totalUserAdded := relevantAccumulatedUser + postCheckpointUserAdded + postToNonAgentFiles + // Exclude baseline removals (pre-session state) from human removal count. sessionAccumulatedUserRemoved := max(0, accumulatedUserRemoved-baselineUserRemoved) totalUserRemoved := sessionAccumulatedUserRemoved + postCheckpointUserRemoved @@ -488,3 +487,17 @@ func CalculatePromptAttribution( return result } + +// isAgentOrMetadataFile returns true if the file was touched by any agent session +// (this session or another) or is CLI metadata that should be excluded from attribution. +func isAgentOrMetadataFile(filePath string, filesTouched []string, allAgentFiles map[string]struct{}) bool { + if slices.Contains(filesTouched, filePath) { + return true + } + if allAgentFiles != nil { + if _, ok := allAgentFiles[filePath]; ok { + return true + } + } + return strings.HasPrefix(filePath, ".entire/") || strings.HasPrefix(filePath, paths.EntireMetadataDir+"/") +} diff --git a/cmd/entire/cli/strategy/manual_commit_attribution_test.go b/cmd/entire/cli/strategy/manual_commit_attribution_test.go index b6138b1cb..7c96f6482 100644 --- a/cmd/entire/cli/strategy/manual_commit_attribution_test.go +++ b/cmd/entire/cli/strategy/manual_commit_attribution_test.go @@ -281,7 +281,7 @@ func TestCalculateAttributionWithAccumulated_BasicCase(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, nil, ) require.NotNil(t, result, "expected non-nil result") @@ -338,7 +338,7 @@ func TestCalculateAttributionWithAccumulated_BugScenario(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, nil, ) require.NotNil(t, result, "expected non-nil result") @@ -395,7 +395,7 @@ func TestCalculateAttributionWithAccumulated_DeletionOnly(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, nil, ) require.NotNil(t, result, "expected non-nil result") @@ -444,7 +444,7 @@ func TestCalculateAttributionWithAccumulated_NoUserEdits(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, nil, ) require.NotNil(t, result, "expected non-nil result") @@ -496,7 +496,7 @@ func TestCalculateAttributionWithAccumulated_NoAgentWork(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, nil, ) require.NotNil(t, result, "expected non-nil result") @@ -550,7 +550,7 @@ func TestCalculateAttributionWithAccumulated_UserRemovesAllAgentLines(t *testing result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, nil, ) require.NotNil(t, result, "expected non-nil result") @@ -619,7 +619,7 @@ func TestCalculateAttributionWithAccumulated_WithPromptAttributions(t *testing.T result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, nil, ) require.NotNil(t, result, "expected non-nil result") @@ -662,7 +662,7 @@ func TestCalculateAttributionWithAccumulated_EmptyFilesTouched(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, []string{}, []PromptAttribution{}, "", "", "", "", nil, + baseTree, shadowTree, headTree, []string{}, []PromptAttribution{}, "", "", "", "", nil, nil, ) if result != nil { @@ -716,7 +716,7 @@ func TestCalculateAttributionWithAccumulated_UserEditsNonAgentFile(t *testing.T) result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, nil, ) require.NotNil(t, result, "expected non-nil result") @@ -1015,7 +1015,7 @@ func TestCalculateAttributionWithAccumulated_UserSelfModification(t *testing.T) result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, nil, ) require.NotNil(t, result, "expected non-nil result") @@ -1086,7 +1086,7 @@ func TestCalculateAttributionWithAccumulated_MixedModifications(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, nil, ) require.NotNil(t, result, "expected non-nil result") @@ -1167,7 +1167,7 @@ func TestCalculateAttributionWithAccumulated_UncommittedWorktreeFiles(t *testing result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, + baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", "", nil, nil, ) require.NotNil(t, result, "expected non-nil result") @@ -1272,7 +1272,7 @@ func TestCalculateAttributionWithAccumulated_PreSessionDirtOnAgentFiles(t *testi result := CalculateAttributionWithAccumulated( context.Background(), baseTree, shadowTree, headTree, filesTouched, promptAttributions, - "", "", "", "", nil, + "", "", "", "", nil, nil, ) require.NotNil(t, result) @@ -1326,7 +1326,7 @@ func TestCalculateAttributionWithAccumulated_PreSessionConfigFiles(t *testing.T) result := CalculateAttributionWithAccumulated( context.Background(), baseTree, shadowTree, headTree, filesTouched, promptAttributions, - "", "", "", "", nil, + "", "", "", "", nil, nil, ) require.NotNil(t, result) @@ -1377,7 +1377,7 @@ func TestCalculateAttributionWithAccumulated_DuringSessionHumanEdits(t *testing. result := CalculateAttributionWithAccumulated( context.Background(), baseTree, shadowTree, headTree, filesTouched, promptAttributions, - "", "", "", "", nil, + "", "", "", "", nil, nil, ) require.NotNil(t, result) @@ -1409,7 +1409,7 @@ func TestCalculateAttributionWithAccumulated_EmptyPA(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), baseTree, shadowTree, headTree, filesTouched, nil, - "", "", "", "", nil, + "", "", "", "", nil, nil, ) require.NotNil(t, result) @@ -1466,7 +1466,7 @@ func TestCalculateAttributionWithAccumulated_ParentTreeForNonAgentLines(t *testi result := CalculateAttributionWithAccumulated( context.Background(), baseTree, shadowTree, headTree, filesTouched, promptAttributions, - "", "", "", "", parentTree, + "", "", "", "", parentTree, nil, ) require.NotNil(t, result) @@ -1479,7 +1479,7 @@ func TestCalculateAttributionWithAccumulated_ParentTreeForNonAgentLines(t *testi resultNoPT := CalculateAttributionWithAccumulated( context.Background(), baseTree, shadowTree, headTree, filesTouched, promptAttributions, - "", "", "", "", nil, + "", "", "", "", nil, nil, ) require.NotNil(t, resultNoPT) diff --git a/cmd/entire/cli/strategy/manual_commit_condensation.go b/cmd/entire/cli/strategy/manual_commit_condensation.go index 45414c8fa..4930a291e 100644 --- a/cmd/entire/cli/strategy/manual_commit_condensation.go +++ b/cmd/entire/cli/strategy/manual_commit_condensation.go @@ -93,6 +93,7 @@ type condenseOpts struct { repoDir string // Repository worktree path for git CLI commands parentCommitHash string // HEAD's first parent hash for per-commit non-agent file detection headCommitHash string // HEAD commit hash (passed through for attribution) + allAgentFiles map[string]struct{} // Union of all sessions' FilesTouched for cross-session exclusion (nil = single-session) } // CondenseSession condenses a session's shadow branch to permanent storage. @@ -173,10 +174,7 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re } if len(sessionData.FilesTouched) == 0 && !hadFilesBeforeFiltering { - sessionData.FilesTouched = make([]string, 0, len(committedFiles)) - for f := range committedFiles { - sessionData.FilesTouched = append(sessionData.FilesTouched, f) - } + sessionData.FilesTouched = committedFilesExcludingMetadata(committedFiles) } } @@ -202,6 +200,7 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re attributionBaseCommit: attrBase, parentCommitHash: o.parentCommitHash, headCommitHash: o.headCommitHash, + allAgentFiles: o.allAgentFiles, }) // Get current branch name @@ -353,13 +352,14 @@ func sessionStateBackfillTokenUsage(ctx context.Context, ag agent.Agent, agentTy // attributionOpts provides pre-resolved git objects to avoid redundant reads. type attributionOpts struct { - headTree *object.Tree // HEAD commit tree (already resolved by PostCommit) - shadowTree *object.Tree // Shadow branch tree (already resolved by PostCommit) - parentTree *object.Tree // Parent commit tree (nil for initial commits, for consistent non-agent line counting) - repoDir string // Repository worktree path for git CLI commands - attributionBaseCommit string // Base commit hash for non-agent file detection (empty = fall back to go-git tree walk) - parentCommitHash string // HEAD's first parent hash (preferred diff base for non-agent files) - headCommitHash string // HEAD commit hash for non-agent file detection (empty = fall back to go-git tree walk) + headTree *object.Tree // HEAD commit tree (already resolved by PostCommit) + shadowTree *object.Tree // Shadow branch tree (already resolved by PostCommit) + parentTree *object.Tree // Parent commit tree (nil for initial commits, for consistent non-agent line counting) + repoDir string // Repository worktree path for git CLI commands + attributionBaseCommit string // Base commit hash for non-agent file detection (empty = fall back to go-git tree walk) + parentCommitHash string // HEAD's first parent hash (preferred diff base for non-agent files) + headCommitHash string // HEAD commit hash for non-agent file detection (empty = fall back to go-git tree walk) + allAgentFiles map[string]struct{} // Union of all sessions' FilesTouched (nil = single-session) } func calculateSessionAttributions(ctx context.Context, repo *git.Repository, shadowRef *plumbing.Reference, sessionData *ExtractedSessionData, state *SessionState, opts ...attributionOpts) *cpkg.InitialAttribution { @@ -483,6 +483,7 @@ func calculateSessionAttributions(ctx context.Context, repo *git.Repository, sha o.attributionBaseCommit, o.headCommitHash, o.parentTree, + o.allAgentFiles, ) if attribution != nil { @@ -501,6 +502,20 @@ func calculateSessionAttributions(ctx context.Context, repo *git.Repository, sha return attribution } +// committedFilesExcludingMetadata returns committed files with CLI metadata paths filtered out. +// `.entire/` files are created by `entire enable`, not by the agent, and should not be +// attributed as agent work when used as a fallback for sessions with no FilesTouched. +func committedFilesExcludingMetadata(committedFiles map[string]struct{}) []string { + result := make([]string, 0, len(committedFiles)) + for f := range committedFiles { + if strings.HasPrefix(f, ".entire/") || strings.HasPrefix(f, paths.EntireMetadataDir+"/") { + continue + } + result = append(result, f) + } + return result +} + // extractSessionData extracts session data from the shadow branch. // filesTouched is the list of files tracked during the session (from SessionState.FilesTouched). // agentType identifies the agent (e.g., "Gemini CLI", "Claude Code") to determine transcript format. diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index 604dfe9a0..2ff9073aa 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -623,6 +623,7 @@ type postCommitActionHandler struct { parentCommitHash string // HEAD's first parent hash (empty for initial commits) shadowRef *plumbing.Reference // Per-session shadow branch ref (nil if branch doesn't exist) shadowTree *object.Tree // Per-session shadow commit tree (nil if branch doesn't exist) + allAgentFiles map[string]struct{} // Union of all sessions' FilesTouched for cross-session attribution // Output: set by handler methods, read by caller after TransitionAndLog. condensed bool @@ -648,6 +649,7 @@ func (h *postCommitActionHandler) HandleCondense(state *session.State) error { repoDir: h.repoDir, parentCommitHash: h.parentCommitHash, headCommitHash: h.newHead, + allAgentFiles: h.allAgentFiles, }) } else { h.s.updateBaseCommitIfChanged(h.ctx, state, h.newHead) @@ -676,6 +678,7 @@ func (h *postCommitActionHandler) HandleCondenseIfFilesTouched(state *session.St repoDir: h.repoDir, parentCommitHash: h.parentCommitHash, headCommitHash: h.newHead, + allAgentFiles: h.allAgentFiles, }) } else { h.s.updateBaseCommitIfChanged(h.ctx, state, h.newHead) @@ -896,6 +899,16 @@ func (s *ManualCommitStrategy) PostCommit(ctx context.Context) error { //nolint: committedFileSet := filesChangedInCommit(ctx, worktreePath, commit, headTree, parentTree) resolveTreesSpan.End() + // Compute union of all sessions' FilesTouched for cross-session attribution. + // This lets each session's attribution exclude files created by other agent sessions, + // preventing files from session B being counted as "human work" in session A. + allAgentFiles := make(map[string]struct{}) + for _, state := range sessions { + for _, f := range state.FilesTouched { + allAgentFiles[f] = struct{}{} + } + } + loopCtx, processSessionsLoop := perf.StartLoop(ctx, "process_sessions") for _, state := range sessions { // Skip fully-condensed ended sessions — no work remains. @@ -906,7 +919,7 @@ func (s *ManualCommitStrategy) PostCommit(ctx context.Context) error { //nolint: iterCtx, iterSpan := processSessionsLoop.Iteration(loopCtx) s.postCommitProcessSession(iterCtx, repo, state, &transitionCtx, checkpointID, head, commit, newHead, worktreePath, headTree, parentTree, parentCommitHash, - committedFileSet, shadowBranchesToDelete, uncondensedActiveOnBranch) + committedFileSet, shadowBranchesToDelete, uncondensedActiveOnBranch, allAgentFiles) iterSpan.End() } processSessionsLoop.End() @@ -1035,6 +1048,7 @@ func (s *ManualCommitStrategy) postCommitProcessSession( committedFileSet map[string]struct{}, shadowBranchesToDelete map[string]struct{}, uncondensedActiveOnBranch map[string]bool, + allAgentFiles map[string]struct{}, ) { logCtx := logging.WithComponent(ctx, "checkpoint") shadowBranchName := getShadowBranchNameForCommit(state.BaseCommit, state.WorktreeID) @@ -1126,6 +1140,7 @@ func (s *ManualCommitStrategy) postCommitProcessSession( parentCommitHash: parentCommitHash, shadowRef: shadowRef, shadowTree: shadowTree, + allAgentFiles: allAgentFiles, } if err := TransitionAndLog(ctx, state, session.EventGitCommit, *transitionCtx, handler); err != nil { @@ -2071,33 +2086,33 @@ func (s *ManualCommitStrategy) calculatePromptAttributionAtStart( nextCheckpointNum := state.StepCount + 1 result := PromptAttribution{CheckpointNumber: nextCheckpointNum} - // Get last checkpoint tree from shadow branch (if it exists) - // For the first checkpoint, no shadow branch exists yet - this is fine, - // CalculatePromptAttribution will use baseTree as the reference instead. + // Get last checkpoint tree from shadow branch (if it exists). + // For a new session (StepCount == 0), always use baseTree as the reference. + // The shadow branch may contain checkpoints from OTHER concurrent sessions, + // and using that tree would miss pre-session worktree dirt (e.g., .claude/settings.json) + // because it appears unchanged when compared to another session's snapshot. var lastCheckpointTree *object.Tree - shadowBranchName := checkpoint.ShadowBranchNameForCommit(state.BaseCommit, state.WorktreeID) - refName := plumbing.NewBranchReferenceName(shadowBranchName) - ref, err := repo.Reference(refName, true) - if err != nil { - logging.Debug(logCtx, "prompt attribution: no shadow branch yet (first checkpoint)", - slog.String("shadow_branch", shadowBranchName)) - // Continue with lastCheckpointTree = nil - } else { - shadowCommit, err := repo.CommitObject(ref.Hash()) - if err != nil { + if state.StepCount > 0 { + // Existing session with prior checkpoints — use shadow branch as reference. + shadowBranchName := checkpoint.ShadowBranchNameForCommit(state.BaseCommit, state.WorktreeID) + refName := plumbing.NewBranchReferenceName(shadowBranchName) + if ref, err := repo.Reference(refName, true); err != nil { + logging.Debug(logCtx, "prompt attribution: no shadow branch", + slog.String("shadow_branch", shadowBranchName)) + } else if shadowCommit, err := repo.CommitObject(ref.Hash()); err != nil { logging.Debug(logCtx, "prompt attribution: failed to get shadow commit", slog.String("shadow_ref", ref.Hash().String()), slog.String("error", err.Error())) - // Continue with lastCheckpointTree = nil + } else if tree, err := shadowCommit.Tree(); err != nil { + logging.Debug(logCtx, "prompt attribution: failed to get shadow tree", + slog.String("error", err.Error())) } else { - lastCheckpointTree, err = shadowCommit.Tree() - if err != nil { - logging.Debug(logCtx, "prompt attribution: failed to get shadow tree", - slog.String("error", err.Error())) - // Continue with lastCheckpointTree = nil - } + lastCheckpointTree = tree } } + // For new sessions (StepCount == 0), lastCheckpointTree stays nil. + // CalculatePromptAttribution falls back to baseTree, ensuring pre-session + // worktree dirt is captured even when the shadow branch has other sessions' data. // Get base tree for agent lines calculation var baseTree *object.Tree