Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 20 additions & 8 deletions cmd/entire/cli/checkpoint/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package checkpoint

import (
"context"
"encoding/json"
"errors"
"time"

Expand Down Expand Up @@ -282,6 +283,12 @@ type WriteCommittedOptions struct {
// comparing checkpoint tree (agent work) to committed tree (may include human edits)
InitialAttribution *InitialAttribution

// PromptAttributionsJSON is the raw PromptAttributions data, JSON-encoded.
// Persisted for diagnostic purposes — shows exactly which prompt recorded
// which "user" lines, enabling root cause analysis of attribution bugs.
// Uses json.RawMessage to avoid importing session package.
PromptAttributionsJSON json.RawMessage

// Summary is an optional AI-generated summary for this checkpoint.
// This field may be nil when:
// - summarization is disabled in settings
Expand Down Expand Up @@ -402,6 +409,10 @@ type CommittedMetadata struct {

// InitialAttribution is line-level attribution calculated at commit time
InitialAttribution *InitialAttribution `json:"initial_attribution,omitempty"`

// PromptAttributions is the raw per-prompt attribution data used to compute InitialAttribution.
// Diagnostic field — shows which prompt recorded which "user" lines.
PromptAttributions json.RawMessage `json:"prompt_attributions,omitempty"`
}

// GetTranscriptStart returns the transcript line offset at which this checkpoint's data begins.
Expand Down Expand Up @@ -443,14 +454,15 @@ type SessionFilePaths struct {
//
//nolint:revive // Named CheckpointSummary to avoid conflict with existing Summary struct
type CheckpointSummary struct {
CLIVersion string `json:"cli_version,omitempty"`
CheckpointID id.CheckpointID `json:"checkpoint_id"`
Strategy string `json:"strategy"`
Branch string `json:"branch,omitempty"`
CheckpointsCount int `json:"checkpoints_count"`
FilesTouched []string `json:"files_touched"`
Sessions []SessionFilePaths `json:"sessions"`
TokenUsage *agent.TokenUsage `json:"token_usage,omitempty"`
CLIVersion string `json:"cli_version,omitempty"`
CheckpointID id.CheckpointID `json:"checkpoint_id"`
Strategy string `json:"strategy"`
Branch string `json:"branch,omitempty"`
CheckpointsCount int `json:"checkpoints_count"`
FilesTouched []string `json:"files_touched"`
Sessions []SessionFilePaths `json:"sessions"`
TokenUsage *agent.TokenUsage `json:"token_usage,omitempty"`
CombinedAttribution *InitialAttribution `json:"combined_attribution,omitempty"`
}

// SessionMetrics contains hook-provided session metrics from agents that report
Expand Down
97 changes: 89 additions & 8 deletions cmd/entire/cli/checkpoint/committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom
TokenUsage: opts.TokenUsage,
SessionMetrics: opts.SessionMetrics,
InitialAttribution: opts.InitialAttribution,
PromptAttributions: opts.PromptAttributionsJSON,
Summary: redactSummary(opts.Summary),
CLIVersion: versioninfo.Version,
}
Expand Down Expand Up @@ -414,15 +415,25 @@ func (s *GitStore) writeCheckpointSummary(opts WriteCommittedOptions, basePath s
return fmt.Errorf("failed to aggregate session stats: %w", err)
}

var combinedAttribution *InitialAttribution
rootMetadataPath := basePath + paths.MetadataFileName
if entry, exists := entries[rootMetadataPath]; exists {
existingSummary, readErr := s.readSummaryFromBlob(entry.Hash)
if readErr == nil {
combinedAttribution = existingSummary.CombinedAttribution
}
}

summary := CheckpointSummary{
CheckpointID: opts.CheckpointID,
CLIVersion: versioninfo.Version,
Strategy: opts.Strategy,
Branch: opts.Branch,
CheckpointsCount: checkpointsCount,
FilesTouched: filesTouched,
Sessions: sessions,
TokenUsage: tokenUsage,
CheckpointID: opts.CheckpointID,
CLIVersion: versioninfo.Version,
Strategy: opts.Strategy,
Branch: opts.Branch,
CheckpointsCount: checkpointsCount,
FilesTouched: filesTouched,
Sessions: sessions,
TokenUsage: tokenUsage,
CombinedAttribution: combinedAttribution,
}

metadataJSON, err := jsonutil.MarshalIndentWithNewline(summary, "", " ")
Expand All @@ -441,6 +452,76 @@ func (s *GitStore) writeCheckpointSummary(opts WriteCommittedOptions, basePath s
return nil
}

// UpdateCheckpointSummary updates root-level checkpoint metadata fields that depend
// on the full set of sessions already written to the checkpoint.
func (s *GitStore) UpdateCheckpointSummary(ctx context.Context, checkpointID id.CheckpointID, combinedAttribution *InitialAttribution) error {
if err := ctx.Err(); err != nil {
return err //nolint:wrapcheck // Propagating context cancellation
}

if err := s.ensureSessionsBranch(); err != nil {
return fmt.Errorf("failed to ensure sessions branch: %w", err)
}

parentHash, rootTreeHash, err := s.getSessionsBranchRef()
if err != nil {
return err
}

basePath := checkpointID.Path() + "/"
checkpointPath := checkpointID.Path()
entries, err := s.flattenCheckpointEntries(rootTreeHash, checkpointPath)
if err != nil {
return err
}

rootMetadataPath := basePath + paths.MetadataFileName
entry, exists := entries[rootMetadataPath]
if !exists {
return ErrCheckpointNotFound
}

summary, err := s.readSummaryFromBlob(entry.Hash)
if err != nil {
return fmt.Errorf("failed to read checkpoint summary: %w", err)
}
summary.CombinedAttribution = combinedAttribution

metadataJSON, err := jsonutil.MarshalIndentWithNewline(summary, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal checkpoint summary: %w", err)
}
metadataHash, err := CreateBlobFromContent(s.repo, metadataJSON)
if err != nil {
return fmt.Errorf("failed to create checkpoint summary blob: %w", err)
}
entries[rootMetadataPath] = object.TreeEntry{
Name: rootMetadataPath,
Mode: filemode.Regular,
Hash: metadataHash,
}

newTreeHash, err := s.spliceCheckpointSubtree(rootTreeHash, checkpointID, basePath, entries)
if err != nil {
return err
}

authorName, authorEmail := GetGitAuthorFromRepo(s.repo)
commitMsg := fmt.Sprintf("Update checkpoint summary for %s", checkpointID)
newCommitHash, err := s.createCommit(newTreeHash, parentHash, commitMsg, authorName, authorEmail)
if err != nil {
return err
}

refName := plumbing.NewBranchReferenceName(paths.MetadataBranchName)
newRef := plumbing.NewHashReference(refName, newCommitHash)
if err := s.repo.Storer.SetReference(newRef); err != nil {
return fmt.Errorf("failed to set branch reference: %w", err)
}

return nil
}

// findSessionIndex returns the index of an existing session with the given ID,
// or the next available index if not found. This prevents duplicate session entries.
func (s *GitStore) findSessionIndex(ctx context.Context, basePath string, existingSummary *CheckpointSummary, entries map[string]object.TreeEntry, sessionID string) int {
Expand Down
1 change: 1 addition & 0 deletions cmd/entire/cli/checkpoint/v2_committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ func (s *V2GitStore) writeMainSessionToSubdirectory(opts WriteCommittedOptions,
TokenUsage: opts.TokenUsage,
SessionMetrics: opts.SessionMetrics,
InitialAttribution: opts.InitialAttribution,
PromptAttributions: opts.PromptAttributionsJSON,
Summary: redactSummary(opts.Summary),
CLIVersion: versioninfo.Version,
}
Expand Down
82 changes: 66 additions & 16 deletions cmd/entire/cli/strategy/manual_commit_attribution.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/entireio/cli/cmd/entire/cli/checkpoint"
"github.com/entireio/cli/cmd/entire/cli/gitops"
"github.com/entireio/cli/cmd/entire/cli/logging"
"github.com/entireio/cli/cmd/entire/cli/paths"
"github.com/go-git/go-git/v6/plumbing/object"
"github.com/sergi/go-diff/diffmatchpatch"
)
Expand Down Expand Up @@ -179,8 +180,11 @@ func countLinesStr(content string) int {
// 4. Estimate user self-modifications vs agent modifications using per-file tracking
// 5. Compute percentages
//
// attributionBaseCommit and headCommitHash are optional commit hashes for fast non-agent
// file detection via git diff-tree. When empty, falls back to go-git tree walk.
// parentCommitHash, attributionBaseCommit, and headCommitHash are optional commit hashes
// for fast non-agent file detection via git diff-tree. When a first parent exists,
// parentCommitHash→headCommitHash is preferred so only files from THIS commit count.
// For initial commits (no parent), falls back to attributionBaseCommit→headCommitHash.
// When hashes are empty, falls back to go-git tree walk.
//
// Note: Binary files (detected by null bytes) are silently excluded from attribution
// calculations since line-based diffing only applies to text files.
Expand All @@ -194,24 +198,42 @@ func CalculateAttributionWithAccumulated(
filesTouched []string,
promptAttributions []PromptAttribution,
repoDir string,
parentCommitHash string,
attributionBaseCommit string,
headCommitHash string,
parentTree *object.Tree,
) *checkpoint.InitialAttribution {
if len(filesTouched) == 0 {
return nil
}

// Sum accumulated user lines from prompt attributions
// Also aggregate per-file user additions for accurate modification tracking
// Sum accumulated user lines from prompt attributions.
// Also aggregate per-file user additions for accurate modification tracking.
//
// BASELINE SPLIT: PA1 (CheckpointNumber <= 1) captures pre-session worktree
// state — files already dirty when the session started (CLI config files,
// leftover changes from previous sessions). PA1 data is accumulated normally
// for agent line correction (subtracted from totalAgentAndUserWork) but is
// tracked separately so it can be EXCLUDED from human contribution counts.
// Only PA2+ represents genuine human edits during the session.
var accumulatedUserAdded, accumulatedUserRemoved int
var baselineUserRemoved int
accumulatedUserAddedPerFile := make(map[string]int)
baselineUserAddedPerFile := make(map[string]int)

for _, pa := range promptAttributions {
accumulatedUserAdded += pa.UserLinesAdded
accumulatedUserRemoved += pa.UserLinesRemoved
// Merge per-file data from all prompt attributions
for filePath, added := range pa.UserAddedPerFile {
accumulatedUserAddedPerFile[filePath] += added
}
// Track baseline (PA1) separately
if pa.CheckpointNumber <= 1 {
baselineUserRemoved += pa.UserLinesRemoved
for filePath, added := range pa.UserAddedPerFile {
baselineUserAddedPerFile[filePath] += added
}
}
}

// Calculate attribution for agent-touched files
Expand Down Expand Up @@ -243,8 +265,13 @@ func CalculateAttributionWithAccumulated(
}

// Calculate total user edits to non-agent files (files not in filesTouched)
// These files are not in the shadow tree, so base→head captures ALL their user edits
allChangedFiles, err := getAllChangedFiles(ctx, baseTree, headTree, repoDir, attributionBaseCommit, headCommitHash)
// These files are not in the shadow tree. Prefer parent→head so only THIS
// commit's non-agent files count; initial commits fall back to session base→head.
diffBaseCommit := parentCommitHash
if diffBaseCommit == "" {
diffBaseCommit = attributionBaseCommit
}
allChangedFiles, err := getAllChangedFiles(ctx, baseTree, headTree, repoDir, diffBaseCommit, headCommitHash)
if err != nil {
logging.Warn(logging.WithComponent(ctx, "attribution"),
"attribution: failed to enumerate changed files",
Expand All @@ -257,10 +284,20 @@ func CalculateAttributionWithAccumulated(
if slices.Contains(filesTouched, filePath) {
continue // Skip agent-touched files
}
if strings.HasPrefix(filePath, ".entire/") || strings.HasPrefix(filePath, paths.EntireMetadataDir+"/") {
continue // Skip CLI metadata — matches filter in calculatePromptAttributionAtStart
}

baseContent := getFileContent(baseTree, filePath)
// Use parentTree for line counting when available so only THIS commit's
// changes are counted (consistent with parentCommitHash file scoping).
// For initial commits or when parentTree is nil, fall back to baseTree.
nonAgentDiffTree := parentTree
if nonAgentDiffTree == nil {
nonAgentDiffTree = baseTree
}
diffBaseContent := getFileContent(nonAgentDiffTree, filePath)
headContent := getFileContent(headTree, filePath)
_, userAdded, _ := diffLines(baseContent, headContent)
_, userAdded, _ := diffLines(diffBaseContent, headContent)
allUserEditsToNonAgentFiles += userAdded
}

Expand Down Expand Up @@ -293,14 +330,27 @@ func CalculateAttributionWithAccumulated(
// Post-checkpoint edits to non-agent files = total edits - accumulated portion (never negative)
postToNonAgentFiles := max(0, allUserEditsToNonAgentFiles-accumulatedToCommittedNonAgentFiles)

// Total user contribution = accumulated (committed files only) + post-checkpoint edits
relevantAccumulatedUser := accumulatedToAgentFiles + accumulatedToCommittedNonAgentFiles
// Compute baseline (PA1) contributions to subtract from accumulated totals.
// PA1 data was used above for agent correction (accumulatedToAgentFiles) but should
// not be counted as human work — it's pre-session state.
var baselineToAgentFiles, baselineToCommittedNonAgentFiles int
for filePath, added := range baselineUserAddedPerFile {
if slices.Contains(filesTouched, filePath) {
baselineToAgentFiles += added
} else if _, ok := committedNonAgentSet[filePath]; ok {
baselineToCommittedNonAgentFiles += added
}
}

// Total user contribution = session-time accumulated + post-checkpoint edits.
// Subtract baseline contributions so pre-session dirt doesn't count as human.
sessionAccumulatedToAgentFiles := max(0, accumulatedToAgentFiles-baselineToAgentFiles)
sessionAccumulatedToNonAgent := max(0, accumulatedToCommittedNonAgentFiles-baselineToCommittedNonAgentFiles)
relevantAccumulatedUser := sessionAccumulatedToAgentFiles + sessionAccumulatedToNonAgent
totalUserAdded := relevantAccumulatedUser + postCheckpointUserAdded + postToNonAgentFiles
// TODO: accumulatedUserRemoved also includes removals from uncommitted files,
// but we don't have per-file tracking for removals yet. In practice, removals
// from uncommitted files are rare and the impact is minor (could slightly reduce
// totalCommitted via pureUserRemoved). Add UserRemovedPerFile if this becomes an issue.
totalUserRemoved := accumulatedUserRemoved + postCheckpointUserRemoved
// Exclude baseline removals (pre-session state) from human removal count.
sessionAccumulatedUserRemoved := max(0, accumulatedUserRemoved-baselineUserRemoved)
totalUserRemoved := sessionAccumulatedUserRemoved + postCheckpointUserRemoved

// Estimate modified lines (user changed existing lines)
// Lines that were both added and removed are treated as modifications.
Expand Down
Loading
Loading