diff --git a/README.md b/README.md index 48e6c90..ff5ce09 100644 --- a/README.md +++ b/README.md @@ -210,6 +210,7 @@ Available stats: | `profile` | Per-developer report: scope, specialization index, contribution type, pace, collaboration, top files | | `top-commits` | Largest commits ranked by lines changed (includes message if extracted with `--include-commit-messages`) | | `pareto` | Concentration (80% threshold) across files, devs (two lenses: commits and churn), and directories | +| `structure` | Repo layout as a `tree(1)`-style view, dirs sorted by aggregate churn, capped by `--tree-depth` (default 3) | Output formats: `table` (default, human-readable), `csv` (single clean table per `--stat`, header row on line 1), `json` (unified object with all sections). diff --git a/cmd/gitcortex/main.go b/cmd/gitcortex/main.go index a879368..0aee55b 100644 --- a/cmd/gitcortex/main.go +++ b/cmd/gitcortex/main.go @@ -107,7 +107,7 @@ func isValidStat(s string) bool { switch s { case "summary", "contributors", "hotspots", "directories", "activity", "busfactor", "coupling", "churn-risk", "working-patterns", - "dev-network", "profile", "top-commits", "pareto": + "dev-network", "profile", "top-commits", "pareto", "structure": return true } return false @@ -125,6 +125,7 @@ type statsFlags struct { churnHalfLife int networkMinFiles int email string + treeDepth int } func addStatsFlags(cmd *cobra.Command, sf *statsFlags) { @@ -132,13 +133,14 @@ func addStatsFlags(cmd *cobra.Command, sf *statsFlags) { cmd.Flags().StringVar(&sf.format, "format", "table", "Output format: table, csv, json") cmd.Flags().IntVar(&sf.topN, "top", 10, "Number of top entries to show (0 = all)") cmd.Flags().StringVar(&sf.granularity, "granularity", "month", "Activity granularity: day, week, month, year") - cmd.Flags().StringVar(&sf.stat, "stat", "", "Show a specific stat: summary, contributors, hotspots, directories, activity, busfactor, coupling, churn-risk, working-patterns, dev-network, profile, top-commits, pareto") + cmd.Flags().StringVar(&sf.stat, "stat", "", "Show a specific stat: summary, contributors, hotspots, directories, activity, busfactor, coupling, churn-risk, working-patterns, dev-network, profile, top-commits, pareto, structure") cmd.Flags().IntVar(&sf.couplingMaxFiles, "coupling-max-files", 50, "Max files per commit for coupling analysis") cmd.Flags().IntVar(&sf.couplingMinChanges, "coupling-min-changes", 5, "Min co-changes for coupling results") cmd.Flags().IntVar(&sf.churnHalfLife, "churn-half-life", 90, "Half-life in days for churn decay (churn-risk)") cmd.Flags().IntVar(&sf.networkMinFiles, "network-min-files", 5, "Min shared files for dev-network edges") cmd.Flags().StringVar(&sf.email, "email", "", "Filter by developer email (for profile stat)") cmd.Flags().StringVar(&sf.since, "since", "", "Filter to recent period (e.g. 7d, 4w, 3m, 1y)") + cmd.Flags().IntVar(&sf.treeDepth, "tree-depth", 3, "Max depth for --stat structure (0 = unlimited)") } func validateStatsFlags(sf *statsFlags) error { @@ -149,7 +151,7 @@ func validateStatsFlags(sf *statsFlags) error { return fmt.Errorf("invalid --granularity %q; must be one of: day, week, month, year", sf.granularity) } if sf.stat != "" && !isValidStat(sf.stat) { - return fmt.Errorf("invalid --stat %q; valid: summary, contributors, hotspots, directories, activity, busfactor, coupling, churn-risk, working-patterns, dev-network, profile, top-commits, pareto", sf.stat) + return fmt.Errorf("invalid --stat %q; valid: summary, contributors, hotspots, directories, activity, busfactor, coupling, churn-risk, working-patterns, dev-network, profile, top-commits, pareto, structure", sf.stat) } return nil } @@ -331,6 +333,22 @@ func renderStats(ds *stats.Dataset, sf *statsFlags) error { return err } } + if sf.stat == "structure" { + root := reportpkg.BuildRepoTree(stats.FileHotspots(ds, 0), sf.treeDepth) + // CSV skips the stderr banner — downstream parsers sometimes + // tail stderr onto stdout, and a stray "=== ... ===" would + // break the single-table contract. + if sf.format != "csv" { + depthLabel := "unlimited" + if sf.treeDepth > 0 { + depthLabel = fmt.Sprintf("%d", sf.treeDepth) + } + fmt.Fprintf(os.Stderr, "\n=== Repo Structure (depth %s) ===\n", depthLabel) + } + if err := reportpkg.RenderTreeForFormat(os.Stdout, root, sf.format); err != nil { + return err + } + } return nil } @@ -378,6 +396,9 @@ func renderStatsJSON(f *stats.Formatter, ds *stats.Dataset, sf *statsFlags) erro if showAll || sf.stat == "top-commits" { report["top_commits"] = stats.TopCommits(ds, sf.topN) } + if sf.stat == "structure" { + report["structure"] = reportpkg.BuildRepoTree(stats.FileHotspots(ds, 0), sf.treeDepth) + } return f.PrintReport(report) } diff --git a/docs/METRICS.md b/docs/METRICS.md index 4f28b74..1b7a0f6 100644 --- a/docs/METRICS.md +++ b/docs/METRICS.md @@ -254,6 +254,20 @@ Two dev lenses are surfaced because commit count alone is a flawed proxy for con **How to interpret**: "20 files concentrate 80% of all churn" describes where change lands — it can indicate a healthy core module under active development, or a bottleneck if combined with low bus factor. Cross-reference with the Churn Risk section before drawing conclusions. +## Repo Structure + +A `tree(1)`-style view of the repository's directory layout, built from paths seen in history (`FileHotspots`), not from the filesystem at HEAD. Deleted files are included — the view answers "what shaped the codebase", not "what is present today". + +**Aggregation**: +- File nodes: `Commits` and `Churn` are the per-file values. +- Directory nodes: `Churn` and `Files` sum over all descendants; `Commits` is intentionally left at zero. Per-file commit counts do not sum to a distinct commit count — one commit that touches three files would add to three children. `Files` is the distinct descendant count. + +**Ordering**: within each level, directories come first (architectural shape reads top-down), then files. Ties are broken by churn descending, then name ascending. + +**Truncation**: the CLI caps depth at `--tree-depth` (default 3, 0 = unlimited). The HTML report additionally caps children at 50 per directory to keep the page under ~1MB on kernel-scale repos; the tail is collapsed into a `… N more hidden (ranked by churn)` counter. + +**When to use**: before drilling into hotspots or churn-risk, skim the structure to locate the modules those files live in. The tree is navigational context; ranked tables are where judgment happens. + ## Data Flow ``` diff --git a/docs/RUNBOOK.md b/docs/RUNBOOK.md index a35c0c9..4cc90c8 100644 --- a/docs/RUNBOOK.md +++ b/docs/RUNBOOK.md @@ -155,6 +155,7 @@ Section headers go to stderr, data to stdout. To capture only data: ./gitcortex stats --input data.jsonl --stat profile ./gitcortex stats --input data.jsonl --stat profile --email alice@company.com ./gitcortex stats --input data.jsonl --stat top-commits --top 20 +./gitcortex stats --input data.jsonl --stat structure --tree-depth 3 ``` ### Time filtering diff --git a/internal/report/report.go b/internal/report/report.go index d332455..cddc3a1 100644 --- a/internal/report/report.go +++ b/internal/report/report.go @@ -47,8 +47,27 @@ type ReportData struct { // hotspot" from "there are 48 legacy-hotspots in total". Populated // alongside ChurnRisk in Generate(). ChurnRiskLabelCounts []LabelCount + + // Structure holds a pruned repo-structure tree rendered as a + // collapsible architecture view. Truncated to htmlTreeDepth levels + // so mature repos (linux-scale) don't blow up the HTML. nil when + // the dataset has no files. + Structure *TreeNode } +// htmlTreeDepth caps the repo-structure tree baked into the HTML report. +// Three levels resolves top-level modules and their immediate children, +// enough to read the architecture at a glance without drowning the page +// on kernel-scale repos. CLI users can override via --tree-depth. +const htmlTreeDepth = 3 + +// htmlTreeMaxChildrenPerDir keeps wide directories (e.g. repos with +// hundreds of sibling files at one level) from ballooning the HTML. +// Children are pre-sorted dirs-first then churn-desc, so the top 50 +// preserves the architectural shape and pushes long tails into a +// "… N more" counter. CLI does not apply this cap. +const htmlTreeMaxChildrenPerDir = 50 + // LabelCount pairs a Churn Risk label with its total count and sort // priority, so the template can render chips in the same label order // used by the table below. @@ -337,7 +356,9 @@ func Generate(w io.Writer, ds *stats.Dataset, repoName string, topN int, sf stat Pareto: ComputePareto(ds), PatternGrid: grid, MaxPattern: maxP, + Structure: BuildRepoTree(stats.FileHotspots(ds, 0), htmlTreeDepth), } + CapChildrenPerDir(data.Structure, htmlTreeMaxChildrenPerDir) return tmpl.Execute(w, data) } diff --git a/internal/report/template.go b/internal/report/template.go index cf7e009..0947812 100644 --- a/internal/report/template.go +++ b/internal/report/template.go @@ -44,13 +44,14 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col .chip-active { background: #2da44e; color: #fff; } .chip-cold { background: #eaeef2; color: #656d76; } .chip-note { font-size: 11px; color: #656d76; font-style: italic; margin-left: 4px; } -.glossary { background: #fff; border: 1px solid #d0d7de; border-radius: 6px; padding: 10px 16px; margin-bottom: 24px; } -.glossary summary { cursor: pointer; font-weight: 600; font-size: 13px; color: #24292f; } -.glossary[open] summary { margin-bottom: 8px; } -.glossary dl { font-size: 12px; color: #24292f; margin: 0; } -.glossary dt { font-weight: 600; margin-top: 8px; } -.glossary dt:first-child { margin-top: 0; } -.glossary dd { color: #656d76; margin: 2px 0 0; } +.accordion { background: #fff; border: 1px solid #d0d7de; border-radius: 6px; padding: 10px 16px; margin-bottom: 24px; } +.accordion + .accordion { margin-top: -16px; } +.accordion summary { cursor: pointer; font-weight: 600; font-size: 13px; color: #24292f; } +.accordion[open] summary { margin-bottom: 8px; } +.accordion dl { font-size: 12px; color: #24292f; margin: 0; } +.accordion dt { font-weight: 600; margin-top: 8px; } +.accordion dt:first-child { margin-top: 0; } +.accordion dd { color: #656d76; margin: 2px 0 0; } @@ -58,7 +59,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col

{{.RepoName}} report

{{.Summary.FirstCommitDate}} to {{.Summary.LastCommitDate}}

-
+
Glossary — what do these terms mean?

gitcortex is a repository behavior analyzer, not a code analyzer. These metrics describe what people and processes did in git — who touched what, when, and with whom — not the quality of the source code itself. A file classified as silo or legacy-hotspot reveals a human or process pattern; it is not a judgment on the code (a well-written library maintained by one person will classify as silo regardless of how good it is). Labels point at where to look, not what to conclude.

@@ -91,6 +92,17 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
+{{if .Structure}}{{if .Structure.Children}} +
+ Repo Structure — architecture at a glance +

Directories sorted by total churn within each level; files listed after. Counts reflect history, so deleted files still appear (they shaped the codebase). Tree capped at the top 3 levels; deeper subtrees collapsed to keep the report readable.

+
+
{{.Structure.Name}} ({{thousands .Structure.Files}} files, {{humanize .Structure.Churn}} churn)
+ {{template "treechildren" .Structure}} +
+
+{{end}}{{end}} +
Commits
{{humanize .Summary.TotalCommits}}
Developers
{{humanize .Summary.TotalDevs}}
@@ -382,6 +394,30 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{end}} +{{define "treechildren"}} + +{{end}} + diff --git a/internal/report/tree.go b/internal/report/tree.go new file mode 100644 index 0000000..9da75eb --- /dev/null +++ b/internal/report/tree.go @@ -0,0 +1,308 @@ +package report + +import ( + "encoding/csv" + "fmt" + "io" + "sort" + "strconv" + "strings" + + "github.com/lex0c/gitcortex/internal/stats" +) + +// TreeNode is a single node in the repo-structure tree. Files carry +// Commits + Churn for that file; directories aggregate Churn + Files over +// all descendants but leave Commits = 0, because summing per-file commit +// counts across a directory double-counts any commit that touches +// multiple files — a trap DirStat.Commits fell into before it was +// renamed to FileTouches (see internal/stats/stats.go). The tree is +// derived from paths seen in git history (stats.FileHotspots), so it +// includes files that existed at some point — not just those present at +// HEAD. That matches the rest of the report's historical lens. +type TreeNode struct { + Name string + Path string + IsDir bool + // Commits is populated for file leaves only; dirs leave it zero so + // JSON consumers don't mistake the per-file sum for a distinct-commit + // count. + Commits int + Churn int64 + Files int + Children []*TreeNode + // Depth is the distance from root (root = 0). Pre-computed so HTML + // template can indent without recursion. + Depth int + // Truncated flags a directory whose subtree was cut by the depth + // limit. CLI/HTML surfaces show an ellipsis marker so the reader + // knows there's more below. + Truncated bool + // HiddenChildren counts children dropped by a per-dir render cap + // (applied for the HTML surface so wide directories don't blow up + // the page). CLI does not cap. Zero when no cap was applied. + HiddenChildren int + + // childIndex is an O(1) lookup for BuildRepoTree. Unexported so it + // doesn't show up in JSON. Cleared after build to release memory + // (the Children slice is the durable view; the map is scaffolding). + childIndex map[childKey]*TreeNode `json:"-"` +} + +// childKey disambiguates file/dir with the same name across history +// (delete-file then mkdir). Both coexist as siblings under the same +// parent and the index keeps them addressable. +type childKey struct { + name string + isDir bool +} + +// BuildRepoTree builds a repo structure tree from the hotspots slice. +// maxDepth limits how many levels are expanded (root counts as 0); +// 0 = no limit. Nodes whose subtree would extend past maxDepth are +// marked Truncated and their aggregate counts still reflect everything +// underneath, so renderers can signal "... N more" without losing the +// totals. +// +// Complexity: O(F × D) where F is the number of hotspots and D is the +// average path depth. A per-node map index (childIndex) keeps child +// lookup at O(1); without it, wide flat directories degrade the build +// to quadratic. Pruning happens during descent (not as a post-pass), so +// nodes past the cap are never allocated. +func BuildRepoTree(hotspots []stats.FileStat, maxDepth int) *TreeNode { + root := &TreeNode{Name: ".", Path: "", IsDir: true} + + for _, h := range hotspots { + if h.Path == "" { + continue + } + parts := strings.Split(h.Path, "/") + cur := root + cur.Churn += h.Churn + cur.Files++ + + for i, part := range parts { + isLeaf := i == len(parts)-1 + currentDepth := i + 1 + + // Depth cap: stop creating deeper nodes, but leave the + // aggregates already applied on ancestors intact. cur here + // is the node at exactly maxDepth; mark it Truncated so + // renderers can show "subtree hidden". + if maxDepth > 0 && currentDepth > maxDepth { + cur.Truncated = true + break + } + + child := cur.getChild(part, !isLeaf) + if child == nil { + child = &TreeNode{ + Name: part, + Path: strings.Join(parts[:i+1], "/"), + IsDir: !isLeaf, + Depth: currentDepth, + } + cur.putChild(child) + } + if isLeaf { + // Defense-in-depth: FileHotspots iterates a map keyed + // by path, so no duplicates reach this loop in + // practice. If dupes ever did arrive, ancestor .Files + // would also over-count — this += is not sufficient on + // its own, just a cheap safety net. + child.Commits += h.Commits + child.Churn += h.Churn + } else { + // Dir: aggregate churn + descendant file count only. + // Commits is intentionally left at zero (see type docs). + child.Churn += h.Churn + child.Files++ + } + cur = child + } + } + + sortTree(root) + clearChildIndex(root) + return root +} + +// getChild / putChild keep child lookup O(1) during BuildRepoTree. The +// map is pure build-time scaffolding; clearChildIndex drops it so the +// tree kept in ReportData is exactly the exported fields. +func (n *TreeNode) getChild(name string, isDir bool) *TreeNode { + if n.childIndex == nil { + return nil + } + return n.childIndex[childKey{name, isDir}] +} + +func (n *TreeNode) putChild(c *TreeNode) { + if n.childIndex == nil { + n.childIndex = make(map[childKey]*TreeNode) + } + n.childIndex[childKey{c.Name, c.IsDir}] = c + n.Children = append(n.Children, c) +} + +func clearChildIndex(n *TreeNode) { + n.childIndex = nil + for _, c := range n.Children { + clearChildIndex(c) + } +} + +// CapChildrenPerDir keeps the top `limit` children of each directory +// and records how many were dropped in HiddenChildren so the renderer +// can show "… N more hidden". Applied only to the HTML surface — a +// chromium-scale dir at depth 2 can have thousands of leaves, and the +// tree section was meant to tame "too much output" not reintroduce it. +// The CLI intentionally skips this cap because a piped tree is expected +// to be exhaustive within the --tree-depth limit. +// +// Children are already sorted (dirs first, churn desc within kind), so +// the top N favours structure over noise: at a wide dir with dozens of +// subdirs and hundreds of files, the dirs remain visible and the tail +// of thin files collapses into the counter. +func CapChildrenPerDir(n *TreeNode, limit int) { + if limit <= 0 { + return + } + if len(n.Children) > limit { + n.HiddenChildren = len(n.Children) - limit + n.Children = n.Children[:limit] + } + for _, c := range n.Children { + CapChildrenPerDir(c, limit) + } +} + +// sortTree orders children deterministically: directories first (so the +// architectural shape reads top-down), then by churn desc as a proxy for +// importance, then by name asc for ties. +func sortTree(n *TreeNode) { + sort.Slice(n.Children, func(i, j int) bool { + a, b := n.Children[i], n.Children[j] + if a.IsDir != b.IsDir { + return a.IsDir + } + if a.Churn != b.Churn { + return a.Churn > b.Churn + } + return a.Name < b.Name + }) + for _, c := range n.Children { + sortTree(c) + } +} + +// RenderTreeText prints the tree in the style of `tree(1)`: unicode +// box-drawing prefixes, directories annotated with file/churn counts, +// files with commits/churn. The output is UTF-8; callers that need +// ASCII-only should wrap with a transform. +func RenderTreeText(w io.Writer, root *TreeNode) error { + if _, err := fmt.Fprintf(w, "%s\n", root.Name); err != nil { + return err + } + return renderChildren(w, root, "") +} + +func renderChildren(w io.Writer, n *TreeNode, prefix string) error { + for i, c := range n.Children { + last := i == len(n.Children)-1 + branch := "├── " + next := "│ " + if last { + branch = "└── " + next = " " + } + if _, err := fmt.Fprintf(w, "%s%s%s\n", prefix, branch, formatNodeLabel(c)); err != nil { + return err + } + if c.Truncated { + if _, err := fmt.Fprintf(w, "%s%s... (subtree hidden, use --tree-depth to expand)\n", prefix, next); err != nil { + return err + } + continue + } + if len(c.Children) > 0 { + if err := renderChildren(w, c, prefix+next); err != nil { + return err + } + } + } + return nil +} + +func formatNodeLabel(n *TreeNode) string { + if n.IsDir { + return fmt.Sprintf("%s/ (%d files, %s churn)", n.Name, n.Files, humanize(n.Churn)) + } + return fmt.Sprintf("%s (%d commits, %s churn)", n.Name, n.Commits, humanize(n.Churn)) +} + +// RenderTreeCSV emits the tree as a flat CSV, one row per node in DFS +// preorder so the traversal order matches the text renderer. Honors the +// "single clean table per --stat" contract: downstream tools can read +// the same columns whether the user asked for `--stat structure` or +// another stat. Commits is 0 for dir rows (see TreeNode doc — per-file +// commit sums would double-count), so consumers wanting a directory- +// level "activity" signal should use Churn or Files instead. +func RenderTreeCSV(w io.Writer, root *TreeNode) error { + cw := csv.NewWriter(w) + if err := cw.Write([]string{"path", "type", "depth", "commits", "churn", "files", "truncated"}); err != nil { + return err + } + if err := writeTreeCSVRow(cw, root); err != nil { + return err + } + cw.Flush() + return cw.Error() +} + +// RenderTreeForFormat dispatches tree rendering to the writer matching +// the CLI's --format. Centralizing the switch here means the earlier +// bug ("--format csv silently wrote a Unicode tree") can't recur: every +// CLI caller goes through this function, and the table-driven test in +// tree_test.go asserts one writer per format. Unknown formats fall +// through to the text renderer for backward compatibility with users +// who pipe into their own tooling and pass through unrelated --format +// values (e.g. "table"). +func RenderTreeForFormat(w io.Writer, root *TreeNode, format string) error { + switch format { + case "csv": + return RenderTreeCSV(w, root) + default: + return RenderTreeText(w, root) + } +} + +func writeTreeCSVRow(cw *csv.Writer, n *TreeNode) error { + kind := "file" + if n.IsDir { + kind = "dir" + } + // The root node's Path is empty by construction; emit "." so + // consumers don't get a blank cell as the first row. + path := n.Path + if path == "" { + path = n.Name + } + if err := cw.Write([]string{ + path, + kind, + strconv.Itoa(n.Depth), + strconv.Itoa(n.Commits), + strconv.FormatInt(n.Churn, 10), + strconv.Itoa(n.Files), + strconv.FormatBool(n.Truncated), + }); err != nil { + return err + } + for _, c := range n.Children { + if err := writeTreeCSVRow(cw, c); err != nil { + return err + } + } + return nil +} diff --git a/internal/report/tree_test.go b/internal/report/tree_test.go new file mode 100644 index 0000000..797fc2c --- /dev/null +++ b/internal/report/tree_test.go @@ -0,0 +1,338 @@ +package report + +import ( + "bytes" + "strings" + "testing" + + "github.com/lex0c/gitcortex/internal/stats" +) + +func TestBuildRepoTreeAggregatesAndSorts(t *testing.T) { + hotspots := []stats.FileStat{ + {Path: "cmd/gitcortex/main.go", Commits: 5, Churn: 100}, + {Path: "internal/stats/stats.go", Commits: 20, Churn: 900}, + {Path: "internal/stats/reader.go", Commits: 10, Churn: 400}, + {Path: "internal/report/report.go", Commits: 8, Churn: 300}, + {Path: "README.md", Commits: 2, Churn: 30}, + } + + root := BuildRepoTree(hotspots, 0) + + if root.Files != 5 { + t.Fatalf("root.Files = %d, want 5", root.Files) + } + if root.Churn != 1730 { + t.Fatalf("root.Churn = %d, want 1730", root.Churn) + } + + // Directories first, sorted by churn desc. internal (1600) > cmd (100) > README (file). + if len(root.Children) < 3 { + t.Fatalf("root.Children = %d, want >= 3", len(root.Children)) + } + if !root.Children[0].IsDir || root.Children[0].Name != "internal" { + t.Errorf("first child = %s (dir=%v), want internal/", root.Children[0].Name, root.Children[0].IsDir) + } + if root.Children[0].Files != 3 { + t.Errorf("internal/.Files = %d, want 3", root.Children[0].Files) + } + if root.Children[0].Churn != 1600 { + t.Errorf("internal/.Churn = %d, want 1600", root.Children[0].Churn) + } + + // README.md is a leaf at root level, should come after all dirs. + last := root.Children[len(root.Children)-1] + if last.IsDir || last.Name != "README.md" { + t.Errorf("last root child = %s (dir=%v), want README.md leaf", last.Name, last.IsDir) + } + + // Within internal/, stats/ (churn 1300) should come before report/ (churn 300). + internal := root.Children[0] + if internal.Children[0].Name != "stats" { + t.Errorf("first internal child = %s, want stats (higher churn)", internal.Children[0].Name) + } +} + +func TestBuildRepoTreePrunesAndFlagsTruncation(t *testing.T) { + hotspots := []stats.FileStat{ + {Path: "a/b/c/deep.go", Commits: 1, Churn: 10}, + {Path: "a/b/other.go", Commits: 1, Churn: 10}, + } + root := BuildRepoTree(hotspots, 2) + + // Root=0, a=1, b=2. b should be truncated (no children), but counts kept. + a := root.Children[0] + if a.Name != "a" { + t.Fatalf("want a, got %s", a.Name) + } + b := a.Children[0] + if b.Name != "b" { + t.Fatalf("want b, got %s", b.Name) + } + if !b.Truncated { + t.Errorf("b should be truncated at depth 2, got Truncated=false") + } + if len(b.Children) != 0 { + t.Errorf("b.Children should be empty after prune, got %d", len(b.Children)) + } + if b.Files != 2 { + t.Errorf("b.Files = %d, want 2 (aggregation preserved)", b.Files) + } +} + +func TestRenderTreeCSVEmitsHeaderAndPreorderRows(t *testing.T) { + hotspots := []stats.FileStat{ + {Path: "cmd/main.go", Commits: 7, Churn: 42}, + {Path: "README.md", Commits: 3, Churn: 5}, + } + root := BuildRepoTree(hotspots, 0) + + var buf bytes.Buffer + if err := RenderTreeCSV(&buf, root); err != nil { + t.Fatal(err) + } + lines := strings.Split(strings.TrimRight(buf.String(), "\n"), "\n") + + // Header + root + cmd + main.go + README.md = 5 rows. + if len(lines) != 5 { + t.Fatalf("got %d rows, want 5:\n%s", len(lines), buf.String()) + } + wantHeader := "path,type,depth,commits,churn,files,truncated" + if lines[0] != wantHeader { + t.Errorf("header = %q, want %q", lines[0], wantHeader) + } + // Root: path resolved to ".", dir, aggregate. + if !strings.HasPrefix(lines[1], ".,dir,0,0,47,2,false") { + t.Errorf("root row = %q, want prefix .,dir,0,0,47,2,false", lines[1]) + } + // Dir row for cmd/: commits should be 0 (not aggregated from children). + foundCmdDir := false + for _, ln := range lines[2:] { + if strings.HasPrefix(ln, "cmd,dir,") { + foundCmdDir = true + if !strings.Contains(ln, ",0,42,1,false") { + t.Errorf("cmd dir row = %q, want commits=0 churn=42 files=1", ln) + } + } + } + if !foundCmdDir { + t.Errorf("missing cmd dir row:\n%s", buf.String()) + } + // File row for cmd/main.go: full path, commits preserved. + foundLeaf := false + for _, ln := range lines[2:] { + if strings.HasPrefix(ln, "cmd/main.go,file,") { + foundLeaf = true + if !strings.Contains(ln, ",7,42,0,false") { + t.Errorf("main.go row = %q, want commits=7 churn=42 files=0", ln) + } + } + } + if !foundLeaf { + t.Errorf("missing cmd/main.go row:\n%s", buf.String()) + } +} + +func TestRenderTreeTextProducesBoxPrefixes(t *testing.T) { + hotspots := []stats.FileStat{ + {Path: "cmd/main.go", Commits: 1, Churn: 10}, + {Path: "README.md", Commits: 1, Churn: 5}, + } + root := BuildRepoTree(hotspots, 0) + + var buf bytes.Buffer + if err := RenderTreeText(&buf, root); err != nil { + t.Fatal(err) + } + out := buf.String() + + // Root name, dir branch, nested file, and final sibling file. + for _, want := range []string{".\n", "├── cmd/", "│ └── main.go", "└── README.md"} { + if !strings.Contains(out, want) { + t.Errorf("output missing %q:\n%s", want, out) + } + } +} + +func TestBuildRepoTreeEmpty(t *testing.T) { + root := BuildRepoTree(nil, 0) + if root == nil { + t.Fatal("empty tree should still return a root node") + } + if len(root.Children) != 0 { + t.Errorf("empty input: root.Children = %d, want 0", len(root.Children)) + } +} + +// Regression: dir nodes must NOT carry aggregated Commits. Summing per- +// file commit counts double-counts any commit that touches multiple +// files in the directory (a single commit touching all 3 files here +// would be reported as Commits=30 under the broken aggregation). +func TestBuildRepoTreeDirCommitsZero(t *testing.T) { + hotspots := []stats.FileStat{ + {Path: "foo/a.go", Commits: 10, Churn: 100}, + {Path: "foo/b.go", Commits: 10, Churn: 100}, + {Path: "foo/c.go", Commits: 10, Churn: 100}, + } + root := BuildRepoTree(hotspots, 0) + foo := root.Children[0] + if !foo.IsDir || foo.Name != "foo" { + t.Fatalf("expected foo/ dir, got %s (dir=%v)", foo.Name, foo.IsDir) + } + if foo.Commits != 0 { + t.Errorf("dir foo/.Commits = %d, want 0 (aggregating per-file commits double-counts)", foo.Commits) + } + if foo.Files != 3 { + t.Errorf("foo/.Files = %d, want 3", foo.Files) + } + if foo.Churn != 300 { + t.Errorf("foo/.Churn = %d, want 300 (churn aggregation still valid)", foo.Churn) + } + // Root also left at zero on Commits. + if root.Commits != 0 { + t.Errorf("root.Commits = %d, want 0", root.Commits) + } +} + +// Regression: the --format csv path once silently wrote a Unicode tree. +// Assert RenderTreeForFormat routes each format to the correct writer +// by peeking at the first bytes of output — CSV starts with the header +// row, text starts with the root label. Cheap and stable. +func TestRenderTreeForFormatDispatches(t *testing.T) { + hotspots := []stats.FileStat{{Path: "cmd/main.go", Commits: 1, Churn: 10}} + root := BuildRepoTree(hotspots, 0) + + cases := []struct { + format string + wantHead string + }{ + {"csv", "path,type,depth,commits,churn,files,truncated\n"}, + {"table", ".\n"}, // text renderer starts with root name + {"", ".\n"}, // empty format falls through to default (text) + {"garbage", ".\n"}, // unknown format falls through to default (text) + } + for _, c := range cases { + t.Run(c.format, func(t *testing.T) { + var buf bytes.Buffer + if err := RenderTreeForFormat(&buf, root, c.format); err != nil { + t.Fatal(err) + } + if !strings.HasPrefix(buf.String(), c.wantHead) { + t.Errorf("format=%q: output did not start with %q; got:\n%s", + c.format, c.wantHead, buf.String()) + } + }) + } +} + +// Truncation markers: the text renderer must surface the "subtree +// hidden" hint with the correct flag name, and the CSV row for a +// truncated dir must carry truncated=true. Both are user-visible and +// neither was previously asserted. +func TestTruncationMarkersInRenderers(t *testing.T) { + hotspots := []stats.FileStat{{Path: "a/b/c.go", Commits: 1, Churn: 10}} + root := BuildRepoTree(hotspots, 2) + + var txt bytes.Buffer + if err := RenderTreeText(&txt, root); err != nil { + t.Fatal(err) + } + // Must reference the actual flag, not the --depth typo. + if !strings.Contains(txt.String(), "subtree hidden, use --tree-depth to expand") { + t.Errorf("text render missing truncation hint with correct flag:\n%s", txt.String()) + } + + var cs bytes.Buffer + if err := RenderTreeCSV(&cs, root); err != nil { + t.Fatal(err) + } + // The dir row at depth 2 (b/) is the truncated one; it must set + // truncated=true in the last column. + found := false + for _, line := range strings.Split(cs.String(), "\n") { + if strings.HasPrefix(line, "a/b,dir,") && strings.HasSuffix(line, ",true") { + found = true + break + } + } + if !found { + t.Errorf("CSV missing truncated=true on a/b dir row:\n%s", cs.String()) + } +} + +// HTML cap: CapChildrenPerDir should retain the top-N and set +// HiddenChildren on the overflow, preserving churn-desc order from the +// prior sort. CLI callers that never invoke it must not see counts +// change. +func TestCapChildrenPerDir(t *testing.T) { + // 5 siblings, cap at 2. + var hotspots []stats.FileStat + for i, name := range []string{"a.go", "b.go", "c.go", "d.go", "e.go"} { + hotspots = append(hotspots, stats.FileStat{ + Path: "dir/" + name, + Commits: 1, + Churn: int64(100 - i*10), // a=100, b=90, c=80, d=70, e=60 + }) + } + root := BuildRepoTree(hotspots, 0) + dir := root.Children[0] + if dir.Name != "dir" || len(dir.Children) != 5 { + t.Fatalf("expected dir with 5 children, got %s with %d", dir.Name, len(dir.Children)) + } + + CapChildrenPerDir(root, 2) + if dir.HiddenChildren != 3 { + t.Errorf("HiddenChildren = %d, want 3", dir.HiddenChildren) + } + if len(dir.Children) != 2 { + t.Fatalf("capped children = %d, want 2", len(dir.Children)) + } + // Top two must be the highest-churn survivors (a.go, b.go) in that + // order — the cap is a prefix trim on the already-sorted slice. + if dir.Children[0].Name != "a.go" || dir.Children[1].Name != "b.go" { + t.Errorf("top-2 after cap = [%s, %s], want [a.go, b.go]", + dir.Children[0].Name, dir.Children[1].Name) + } +} + +// Regression: file and directory can share a name across history +// (path deleted, then recreated as a directory). Each must get its own +// node rather than the second path corrupting the first. +func TestBuildRepoTreeFileDirNameCollision(t *testing.T) { + hotspots := []stats.FileStat{ + {Path: "foo", Commits: 3, Churn: 30}, // file at root called "foo" + {Path: "foo/bar.go", Commits: 5, Churn: 50}, // later a dir with the same name + } + root := BuildRepoTree(hotspots, 0) + + var fileNode, dirNode *TreeNode + for _, c := range root.Children { + if c.Name != "foo" { + continue + } + if c.IsDir { + dirNode = c + } else { + fileNode = c + } + } + if fileNode == nil { + t.Fatal("expected file node named foo at root, got none") + } + if dirNode == nil { + t.Fatal("expected dir node named foo at root, got none") + } + if fileNode.Commits != 3 || fileNode.Churn != 30 { + t.Errorf("file foo: commits=%d churn=%d, want 3/30", fileNode.Commits, fileNode.Churn) + } + if dirNode.Files != 1 || dirNode.Churn != 50 { + t.Errorf("dir foo/: files=%d churn=%d, want 1/50", dirNode.Files, dirNode.Churn) + } + // The dir should hold bar.go, not the file node. + if len(dirNode.Children) != 1 || dirNode.Children[0].Name != "bar.go" { + t.Errorf("dir foo/ children = %+v, want [bar.go]", dirNode.Children) + } + if len(fileNode.Children) != 0 { + t.Errorf("file foo has children; node was corrupted into a dir: %+v", fileNode.Children) + } +}