From 09f25dca86994533330312179bb8b9ed0a75b0cc Mon Sep 17 00:00:00 2001 From: lex0c Date: Sun, 19 Apr 2026 20:00:32 -0300 Subject: [PATCH 1/5] Add repo structure tree to CLI and HTML report Architecture-view section built from stats.FileHotspots, shared between the CLI (gitcortex stats --stat structure [--tree-depth N]) and the HTML report (collapsed accordion below the glossary, capped at 3 levels). Directories aggregate Churn + Files but leave Commits at zero to avoid the double-counting trap DirStat hit before the FileTouches rename; same-name file/dir collisions across history get separate sibling nodes instead of corrupting each other. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/gitcortex/main.go | 18 +++- internal/report/report.go | 13 +++ internal/report/template.go | 33 +++++++ internal/report/tree.go | 184 +++++++++++++++++++++++++++++++++++ internal/report/tree_test.go | 184 +++++++++++++++++++++++++++++++++++ 5 files changed, 429 insertions(+), 3 deletions(-) create mode 100644 internal/report/tree.go create mode 100644 internal/report/tree_test.go diff --git a/cmd/gitcortex/main.go b/cmd/gitcortex/main.go index a879368..cd17400 100644 --- a/cmd/gitcortex/main.go +++ b/cmd/gitcortex/main.go @@ -107,7 +107,7 @@ func isValidStat(s string) bool { switch s { case "summary", "contributors", "hotspots", "directories", "activity", "busfactor", "coupling", "churn-risk", "working-patterns", - "dev-network", "profile", "top-commits", "pareto": + "dev-network", "profile", "top-commits", "pareto", "structure": return true } return false @@ -125,6 +125,7 @@ type statsFlags struct { churnHalfLife int networkMinFiles int email string + treeDepth int } func addStatsFlags(cmd *cobra.Command, sf *statsFlags) { @@ -132,13 +133,14 @@ func addStatsFlags(cmd *cobra.Command, sf *statsFlags) { cmd.Flags().StringVar(&sf.format, "format", "table", "Output format: table, csv, json") cmd.Flags().IntVar(&sf.topN, "top", 10, "Number of top entries to show (0 = all)") cmd.Flags().StringVar(&sf.granularity, "granularity", "month", "Activity granularity: day, week, month, year") - cmd.Flags().StringVar(&sf.stat, "stat", "", "Show a specific stat: summary, contributors, hotspots, directories, activity, busfactor, coupling, churn-risk, working-patterns, dev-network, profile, top-commits, pareto") + cmd.Flags().StringVar(&sf.stat, "stat", "", "Show a specific stat: summary, contributors, hotspots, directories, activity, busfactor, coupling, churn-risk, working-patterns, dev-network, profile, top-commits, pareto, structure") cmd.Flags().IntVar(&sf.couplingMaxFiles, "coupling-max-files", 50, "Max files per commit for coupling analysis") cmd.Flags().IntVar(&sf.couplingMinChanges, "coupling-min-changes", 5, "Min co-changes for coupling results") cmd.Flags().IntVar(&sf.churnHalfLife, "churn-half-life", 90, "Half-life in days for churn decay (churn-risk)") cmd.Flags().IntVar(&sf.networkMinFiles, "network-min-files", 5, "Min shared files for dev-network edges") cmd.Flags().StringVar(&sf.email, "email", "", "Filter by developer email (for profile stat)") cmd.Flags().StringVar(&sf.since, "since", "", "Filter to recent period (e.g. 7d, 4w, 3m, 1y)") + cmd.Flags().IntVar(&sf.treeDepth, "tree-depth", 3, "Max depth for --stat structure (0 = unlimited)") } func validateStatsFlags(sf *statsFlags) error { @@ -149,7 +151,7 @@ func validateStatsFlags(sf *statsFlags) error { return fmt.Errorf("invalid --granularity %q; must be one of: day, week, month, year", sf.granularity) } if sf.stat != "" && !isValidStat(sf.stat) { - return fmt.Errorf("invalid --stat %q; valid: summary, contributors, hotspots, directories, activity, busfactor, coupling, churn-risk, working-patterns, dev-network, profile, top-commits, pareto", sf.stat) + return fmt.Errorf("invalid --stat %q; valid: summary, contributors, hotspots, directories, activity, busfactor, coupling, churn-risk, working-patterns, dev-network, profile, top-commits, pareto, structure", sf.stat) } return nil } @@ -331,6 +333,13 @@ func renderStats(ds *stats.Dataset, sf *statsFlags) error { return err } } + if sf.stat == "structure" { + fmt.Fprintf(os.Stderr, "\n=== Repo Structure (depth %d) ===\n", sf.treeDepth) + root := reportpkg.BuildRepoTree(stats.FileHotspots(ds, 0), sf.treeDepth) + if err := reportpkg.RenderTreeText(os.Stdout, root); err != nil { + return err + } + } return nil } @@ -378,6 +387,9 @@ func renderStatsJSON(f *stats.Formatter, ds *stats.Dataset, sf *statsFlags) erro if showAll || sf.stat == "top-commits" { report["top_commits"] = stats.TopCommits(ds, sf.topN) } + if sf.stat == "structure" { + report["structure"] = reportpkg.BuildRepoTree(stats.FileHotspots(ds, 0), sf.treeDepth) + } return f.PrintReport(report) } diff --git a/internal/report/report.go b/internal/report/report.go index d332455..4467816 100644 --- a/internal/report/report.go +++ b/internal/report/report.go @@ -47,8 +47,20 @@ type ReportData struct { // hotspot" from "there are 48 legacy-hotspots in total". Populated // alongside ChurnRisk in Generate(). ChurnRiskLabelCounts []LabelCount + + // Structure holds a pruned repo-structure tree rendered as a + // collapsible architecture view. Truncated to htmlTreeDepth levels + // so mature repos (linux-scale) don't blow up the HTML. nil when + // the dataset has no files. + Structure *TreeNode } +// htmlTreeDepth caps the repo-structure tree baked into the HTML report. +// Three levels resolves top-level modules and their immediate children, +// enough to read the architecture at a glance without drowning the page +// on kernel-scale repos. CLI users can override via --tree-depth. +const htmlTreeDepth = 3 + // LabelCount pairs a Churn Risk label with its total count and sort // priority, so the template can render chips in the same label order // used by the table below. @@ -337,6 +349,7 @@ func Generate(w io.Writer, ds *stats.Dataset, repoName string, topN int, sf stat Pareto: ComputePareto(ds), PatternGrid: grid, MaxPattern: maxP, + Structure: BuildRepoTree(stats.FileHotspots(ds, 0), htmlTreeDepth), } return tmpl.Execute(w, data) diff --git a/internal/report/template.go b/internal/report/template.go index cf7e009..9c97376 100644 --- a/internal/report/template.go +++ b/internal/report/template.go @@ -45,6 +45,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col .chip-cold { background: #eaeef2; color: #656d76; } .chip-note { font-size: 11px; color: #656d76; font-style: italic; margin-left: 4px; } .glossary { background: #fff; border: 1px solid #d0d7de; border-radius: 6px; padding: 10px 16px; margin-bottom: 24px; } +.glossary + .glossary { margin-top: -16px; } .glossary summary { cursor: pointer; font-weight: 600; font-size: 13px; color: #24292f; } .glossary[open] summary { margin-bottom: 8px; } .glossary dl { font-size: 12px; color: #24292f; margin: 0; } @@ -91,6 +92,17 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col +{{if .Structure}}{{if .Structure.Children}} +
+ Repo Structure โ€” architecture at a glance +

Directories sorted by total churn within each level; files listed after. Counts reflect history, so deleted files still appear (they shaped the codebase). Tree capped at the top 3 levels; deeper subtrees collapsed to keep the report readable.

+
+
{{.Structure.Name}} ({{thousands .Structure.Files}} files, {{humanize .Structure.Churn}} churn)
+ {{template "treechildren" .Structure}} +
+
+{{end}}{{end}} +
Commits
{{humanize .Summary.TotalCommits}}
Developers
{{humanize .Summary.TotalDevs}}
@@ -382,6 +394,27 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{end}} +{{define "treechildren"}} +
    + {{range .Children}} +
  • + {{if .IsDir}} + {{if .Children}} +
    + ๐Ÿ“ {{.Name}}/ ({{thousands .Files}} files ยท {{humanize .Churn}} churn) + {{template "treechildren" .}} +
    + {{else}} + ๐Ÿ“ {{.Name}}/ ({{thousands .Files}} files ยท {{humanize .Churn}} churn){{if .Truncated}} โ€ฆ subtree hidden{{end}} + {{end}} + {{else}} + ๐Ÿ“„ {{.Name}} ({{thousands .Commits}} commits ยท {{humanize .Churn}} churn) + {{end}} +
  • + {{end}} +
+{{end}} +
Generated by gitcortex ยท {{.GeneratedAt}}
diff --git a/internal/report/tree.go b/internal/report/tree.go new file mode 100644 index 0000000..91edaba --- /dev/null +++ b/internal/report/tree.go @@ -0,0 +1,184 @@ +package report + +import ( + "fmt" + "io" + "sort" + "strings" + + "github.com/lex0c/gitcortex/internal/stats" +) + +// TreeNode is a single node in the repo-structure tree. Files carry +// Commits + Churn for that file; directories aggregate Churn + Files over +// all descendants but leave Commits = 0, because summing per-file commit +// counts across a directory double-counts any commit that touches +// multiple files โ€” a trap DirStat.Commits fell into before it was +// renamed to FileTouches (see internal/stats/stats.go). The tree is +// derived from paths seen in git history (stats.FileHotspots), so it +// includes files that existed at some point โ€” not just those present at +// HEAD. That matches the rest of the report's historical lens. +type TreeNode struct { + Name string + Path string + IsDir bool + // Commits is populated for file leaves only; dirs leave it zero so + // JSON consumers don't mistake the per-file sum for a distinct-commit + // count. + Commits int + Churn int64 + Files int + Children []*TreeNode + // Depth is the distance from root (root = 0). Pre-computed so HTML + // template can indent without recursion. + Depth int + // Truncated flags a directory whose subtree was cut by the depth + // limit. CLI/HTML surfaces show an ellipsis marker so the reader + // knows there's more below. + Truncated bool +} + +// BuildRepoTree builds a repo structure tree from the hotspots slice. +// maxDepth limits how many levels are expanded (root counts as 0); +// 0 = no limit. Nodes whose subtree is pruned are marked Truncated so +// renderers can signal "... N more" to the reader. +func BuildRepoTree(hotspots []stats.FileStat, maxDepth int) *TreeNode { + root := &TreeNode{Name: ".", Path: "", IsDir: true} + + for _, h := range hotspots { + if h.Path == "" { + continue + } + parts := strings.Split(h.Path, "/") + cur := root + cur.Churn += h.Churn + cur.Files++ + + for i, part := range parts { + isLeaf := i == len(parts)-1 + child := findChild(cur, part, !isLeaf) + if child == nil { + child = &TreeNode{ + Name: part, + Path: strings.Join(parts[:i+1], "/"), + IsDir: !isLeaf, + Depth: i + 1, + } + cur.Children = append(cur.Children, child) + } + if isLeaf { + // Leaf: accumulate so rename-induced duplicates within a + // single Dataset (same canonical path emitted twice under + // pathological rename chains) don't silently overwrite. + child.Commits += h.Commits + child.Churn += h.Churn + } else { + // Dir: aggregate churn + descendant file count only. + // Commits is intentionally left at zero (see type docs). + child.Churn += h.Churn + child.Files++ + } + cur = child + } + } + + sortTree(root) + if maxDepth > 0 { + pruneDepth(root, maxDepth) + } + return root +} + +// findChild returns the existing sibling that matches both name AND +// directory-ness. A path history where the same name refers to a file in +// one commit and a directory in another (delete-file then mkdir) would +// otherwise corrupt the tree: the file node would grow dir children, or +// the dir node would get leaf values overwritten. Matching on the pair +// lets both coexist as sibling nodes; rare in practice, silent-wrong if +// unhandled. +func findChild(n *TreeNode, name string, wantDir bool) *TreeNode { + for _, c := range n.Children { + if c.Name == name && c.IsDir == wantDir { + return c + } + } + return nil +} + +// sortTree orders children deterministically: directories first (so the +// architectural shape reads top-down), then by churn desc as a proxy for +// importance, then by name asc for ties. +func sortTree(n *TreeNode) { + sort.Slice(n.Children, func(i, j int) bool { + a, b := n.Children[i], n.Children[j] + if a.IsDir != b.IsDir { + return a.IsDir + } + if a.Churn != b.Churn { + return a.Churn > b.Churn + } + return a.Name < b.Name + }) + for _, c := range n.Children { + sortTree(c) + } +} + +// pruneDepth drops children past maxDepth levels below root. The node at +// the cut line keeps its aggregate counts but is marked Truncated so the +// renderer can emit "... N more" instead of silently hiding data. +func pruneDepth(n *TreeNode, maxDepth int) { + if n.Depth >= maxDepth && len(n.Children) > 0 { + n.Truncated = true + n.Children = nil + return + } + for _, c := range n.Children { + pruneDepth(c, maxDepth) + } +} + +// RenderTreeText prints the tree in the style of `tree(1)`: unicode +// box-drawing prefixes, directories annotated with file/churn counts, +// files with commits/churn. The output is UTF-8; callers that need +// ASCII-only should wrap with a transform. +func RenderTreeText(w io.Writer, root *TreeNode) error { + if _, err := fmt.Fprintf(w, "%s\n", root.Name); err != nil { + return err + } + return renderChildren(w, root, "") +} + +func renderChildren(w io.Writer, n *TreeNode, prefix string) error { + for i, c := range n.Children { + last := i == len(n.Children)-1 + branch := "โ”œโ”€โ”€ " + next := "โ”‚ " + if last { + branch = "โ””โ”€โ”€ " + next = " " + } + if _, err := fmt.Fprintf(w, "%s%s%s\n", prefix, branch, formatNodeLabel(c)); err != nil { + return err + } + if c.Truncated { + if _, err := fmt.Fprintf(w, "%s%s... (subtree hidden, use --depth to expand)\n", prefix, next); err != nil { + return err + } + continue + } + if len(c.Children) > 0 { + if err := renderChildren(w, c, prefix+next); err != nil { + return err + } + } + } + return nil +} + +func formatNodeLabel(n *TreeNode) string { + if n.IsDir { + return fmt.Sprintf("%s/ (%d files, %s churn)", n.Name, n.Files, humanize(n.Churn)) + } + return fmt.Sprintf("%s (%d commits, %s churn)", n.Name, n.Commits, humanize(n.Churn)) +} diff --git a/internal/report/tree_test.go b/internal/report/tree_test.go new file mode 100644 index 0000000..6850ac8 --- /dev/null +++ b/internal/report/tree_test.go @@ -0,0 +1,184 @@ +package report + +import ( + "bytes" + "strings" + "testing" + + "github.com/lex0c/gitcortex/internal/stats" +) + +func TestBuildRepoTreeAggregatesAndSorts(t *testing.T) { + hotspots := []stats.FileStat{ + {Path: "cmd/gitcortex/main.go", Commits: 5, Churn: 100}, + {Path: "internal/stats/stats.go", Commits: 20, Churn: 900}, + {Path: "internal/stats/reader.go", Commits: 10, Churn: 400}, + {Path: "internal/report/report.go", Commits: 8, Churn: 300}, + {Path: "README.md", Commits: 2, Churn: 30}, + } + + root := BuildRepoTree(hotspots, 0) + + if root.Files != 5 { + t.Fatalf("root.Files = %d, want 5", root.Files) + } + if root.Churn != 1730 { + t.Fatalf("root.Churn = %d, want 1730", root.Churn) + } + + // Directories first, sorted by churn desc. internal (1600) > cmd (100) > README (file). + if len(root.Children) < 3 { + t.Fatalf("root.Children = %d, want >= 3", len(root.Children)) + } + if !root.Children[0].IsDir || root.Children[0].Name != "internal" { + t.Errorf("first child = %s (dir=%v), want internal/", root.Children[0].Name, root.Children[0].IsDir) + } + if root.Children[0].Files != 3 { + t.Errorf("internal/.Files = %d, want 3", root.Children[0].Files) + } + if root.Children[0].Churn != 1600 { + t.Errorf("internal/.Churn = %d, want 1600", root.Children[0].Churn) + } + + // README.md is a leaf at root level, should come after all dirs. + last := root.Children[len(root.Children)-1] + if last.IsDir || last.Name != "README.md" { + t.Errorf("last root child = %s (dir=%v), want README.md leaf", last.Name, last.IsDir) + } + + // Within internal/, stats/ (churn 1300) should come before report/ (churn 300). + internal := root.Children[0] + if internal.Children[0].Name != "stats" { + t.Errorf("first internal child = %s, want stats (higher churn)", internal.Children[0].Name) + } +} + +func TestBuildRepoTreePrunesAndFlagsTruncation(t *testing.T) { + hotspots := []stats.FileStat{ + {Path: "a/b/c/deep.go", Commits: 1, Churn: 10}, + {Path: "a/b/other.go", Commits: 1, Churn: 10}, + } + root := BuildRepoTree(hotspots, 2) + + // Root=0, a=1, b=2. b should be truncated (no children), but counts kept. + a := root.Children[0] + if a.Name != "a" { + t.Fatalf("want a, got %s", a.Name) + } + b := a.Children[0] + if b.Name != "b" { + t.Fatalf("want b, got %s", b.Name) + } + if !b.Truncated { + t.Errorf("b should be truncated at depth 2, got Truncated=false") + } + if len(b.Children) != 0 { + t.Errorf("b.Children should be empty after prune, got %d", len(b.Children)) + } + if b.Files != 2 { + t.Errorf("b.Files = %d, want 2 (aggregation preserved)", b.Files) + } +} + +func TestRenderTreeTextProducesBoxPrefixes(t *testing.T) { + hotspots := []stats.FileStat{ + {Path: "cmd/main.go", Commits: 1, Churn: 10}, + {Path: "README.md", Commits: 1, Churn: 5}, + } + root := BuildRepoTree(hotspots, 0) + + var buf bytes.Buffer + if err := RenderTreeText(&buf, root); err != nil { + t.Fatal(err) + } + out := buf.String() + + // Root name, dir branch, nested file, and final sibling file. + for _, want := range []string{".\n", "โ”œโ”€โ”€ cmd/", "โ”‚ โ””โ”€โ”€ main.go", "โ””โ”€โ”€ README.md"} { + if !strings.Contains(out, want) { + t.Errorf("output missing %q:\n%s", want, out) + } + } +} + +func TestBuildRepoTreeEmpty(t *testing.T) { + root := BuildRepoTree(nil, 0) + if root == nil { + t.Fatal("empty tree should still return a root node") + } + if len(root.Children) != 0 { + t.Errorf("empty input: root.Children = %d, want 0", len(root.Children)) + } +} + +// Regression: dir nodes must NOT carry aggregated Commits. Summing per- +// file commit counts double-counts any commit that touches multiple +// files in the directory (a single commit touching all 3 files here +// would be reported as Commits=30 under the broken aggregation). +func TestBuildRepoTreeDirCommitsZero(t *testing.T) { + hotspots := []stats.FileStat{ + {Path: "foo/a.go", Commits: 10, Churn: 100}, + {Path: "foo/b.go", Commits: 10, Churn: 100}, + {Path: "foo/c.go", Commits: 10, Churn: 100}, + } + root := BuildRepoTree(hotspots, 0) + foo := root.Children[0] + if !foo.IsDir || foo.Name != "foo" { + t.Fatalf("expected foo/ dir, got %s (dir=%v)", foo.Name, foo.IsDir) + } + if foo.Commits != 0 { + t.Errorf("dir foo/.Commits = %d, want 0 (aggregating per-file commits double-counts)", foo.Commits) + } + if foo.Files != 3 { + t.Errorf("foo/.Files = %d, want 3", foo.Files) + } + if foo.Churn != 300 { + t.Errorf("foo/.Churn = %d, want 300 (churn aggregation still valid)", foo.Churn) + } + // Root also left at zero on Commits. + if root.Commits != 0 { + t.Errorf("root.Commits = %d, want 0", root.Commits) + } +} + +// Regression: file and directory can share a name across history +// (path deleted, then recreated as a directory). Each must get its own +// node rather than the second path corrupting the first. +func TestBuildRepoTreeFileDirNameCollision(t *testing.T) { + hotspots := []stats.FileStat{ + {Path: "foo", Commits: 3, Churn: 30}, // file at root called "foo" + {Path: "foo/bar.go", Commits: 5, Churn: 50}, // later a dir with the same name + } + root := BuildRepoTree(hotspots, 0) + + var fileNode, dirNode *TreeNode + for _, c := range root.Children { + if c.Name != "foo" { + continue + } + if c.IsDir { + dirNode = c + } else { + fileNode = c + } + } + if fileNode == nil { + t.Fatal("expected file node named foo at root, got none") + } + if dirNode == nil { + t.Fatal("expected dir node named foo at root, got none") + } + if fileNode.Commits != 3 || fileNode.Churn != 30 { + t.Errorf("file foo: commits=%d churn=%d, want 3/30", fileNode.Commits, fileNode.Churn) + } + if dirNode.Files != 1 || dirNode.Churn != 50 { + t.Errorf("dir foo/: files=%d churn=%d, want 1/50", dirNode.Files, dirNode.Churn) + } + // The dir should hold bar.go, not the file node. + if len(dirNode.Children) != 1 || dirNode.Children[0].Name != "bar.go" { + t.Errorf("dir foo/ children = %+v, want [bar.go]", dirNode.Children) + } + if len(fileNode.Children) != 0 { + t.Errorf("file foo has children; node was corrupted into a dir: %+v", fileNode.Children) + } +} From 0f8c717571055c00cc445034f26d64f4771629d9 Mon Sep 17 00:00:00 2001 From: lex0c Date: Sun, 19 Apr 2026 20:08:19 -0300 Subject: [PATCH 2/5] Honor --format csv for --stat structure The structure branch previously always wrote the Unicode tree to stdout, breaking the "single clean table per --stat" CSV contract that downstream parsers rely on. Add RenderTreeCSV (one row per node in DFS preorder with path/type/depth/commits/churn/files/truncated columns) and route --format csv through it. Dir rows still carry commits=0 so the CSV matches the struct semantics documented on TreeNode. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/gitcortex/main.go | 15 ++++++++-- internal/report/tree.go | 51 ++++++++++++++++++++++++++++++++++ internal/report/tree_test.go | 53 ++++++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 3 deletions(-) diff --git a/cmd/gitcortex/main.go b/cmd/gitcortex/main.go index cd17400..39efca3 100644 --- a/cmd/gitcortex/main.go +++ b/cmd/gitcortex/main.go @@ -334,10 +334,19 @@ func renderStats(ds *stats.Dataset, sf *statsFlags) error { } } if sf.stat == "structure" { - fmt.Fprintf(os.Stderr, "\n=== Repo Structure (depth %d) ===\n", sf.treeDepth) root := reportpkg.BuildRepoTree(stats.FileHotspots(ds, 0), sf.treeDepth) - if err := reportpkg.RenderTreeText(os.Stdout, root); err != nil { - return err + if sf.format == "csv" { + // CSV path: no header banner on stderr โ€” downstream parsers + // sometimes tail stderr onto stdout, and a stray "=== ... ===" + // breaks the single-table contract. + if err := reportpkg.RenderTreeCSV(os.Stdout, root); err != nil { + return err + } + } else { + fmt.Fprintf(os.Stderr, "\n=== Repo Structure (depth %d) ===\n", sf.treeDepth) + if err := reportpkg.RenderTreeText(os.Stdout, root); err != nil { + return err + } } } diff --git a/internal/report/tree.go b/internal/report/tree.go index 91edaba..8bf76c9 100644 --- a/internal/report/tree.go +++ b/internal/report/tree.go @@ -1,9 +1,11 @@ package report import ( + "encoding/csv" "fmt" "io" "sort" + "strconv" "strings" "github.com/lex0c/gitcortex/internal/stats" @@ -182,3 +184,52 @@ func formatNodeLabel(n *TreeNode) string { } return fmt.Sprintf("%s (%d commits, %s churn)", n.Name, n.Commits, humanize(n.Churn)) } + +// RenderTreeCSV emits the tree as a flat CSV, one row per node in DFS +// preorder so the traversal order matches the text renderer. Honors the +// "single clean table per --stat" contract: downstream tools can read +// the same columns whether the user asked for `--stat structure` or +// another stat. Commits is 0 for dir rows (see TreeNode doc โ€” per-file +// commit sums would double-count), so consumers wanting a directory- +// level "activity" signal should use Churn or Files instead. +func RenderTreeCSV(w io.Writer, root *TreeNode) error { + cw := csv.NewWriter(w) + if err := cw.Write([]string{"path", "type", "depth", "commits", "churn", "files", "truncated"}); err != nil { + return err + } + if err := writeTreeCSVRow(cw, root); err != nil { + return err + } + cw.Flush() + return cw.Error() +} + +func writeTreeCSVRow(cw *csv.Writer, n *TreeNode) error { + kind := "file" + if n.IsDir { + kind = "dir" + } + // The root node's Path is empty by construction; emit "." so + // consumers don't get a blank cell as the first row. + path := n.Path + if path == "" { + path = n.Name + } + if err := cw.Write([]string{ + path, + kind, + strconv.Itoa(n.Depth), + strconv.Itoa(n.Commits), + strconv.FormatInt(n.Churn, 10), + strconv.Itoa(n.Files), + strconv.FormatBool(n.Truncated), + }); err != nil { + return err + } + for _, c := range n.Children { + if err := writeTreeCSVRow(cw, c); err != nil { + return err + } + } + return nil +} diff --git a/internal/report/tree_test.go b/internal/report/tree_test.go index 6850ac8..0cf64e4 100644 --- a/internal/report/tree_test.go +++ b/internal/report/tree_test.go @@ -80,6 +80,59 @@ func TestBuildRepoTreePrunesAndFlagsTruncation(t *testing.T) { } } +func TestRenderTreeCSVEmitsHeaderAndPreorderRows(t *testing.T) { + hotspots := []stats.FileStat{ + {Path: "cmd/main.go", Commits: 7, Churn: 42}, + {Path: "README.md", Commits: 3, Churn: 5}, + } + root := BuildRepoTree(hotspots, 0) + + var buf bytes.Buffer + if err := RenderTreeCSV(&buf, root); err != nil { + t.Fatal(err) + } + lines := strings.Split(strings.TrimRight(buf.String(), "\n"), "\n") + + // Header + root + cmd + main.go + README.md = 5 rows. + if len(lines) != 5 { + t.Fatalf("got %d rows, want 5:\n%s", len(lines), buf.String()) + } + wantHeader := "path,type,depth,commits,churn,files,truncated" + if lines[0] != wantHeader { + t.Errorf("header = %q, want %q", lines[0], wantHeader) + } + // Root: path resolved to ".", dir, aggregate. + if !strings.HasPrefix(lines[1], ".,dir,0,0,47,2,false") { + t.Errorf("root row = %q, want prefix .,dir,0,0,47,2,false", lines[1]) + } + // Dir row for cmd/: commits should be 0 (not aggregated from children). + foundCmdDir := false + for _, ln := range lines[2:] { + if strings.HasPrefix(ln, "cmd,dir,") { + foundCmdDir = true + if !strings.Contains(ln, ",0,42,1,false") { + t.Errorf("cmd dir row = %q, want commits=0 churn=42 files=1", ln) + } + } + } + if !foundCmdDir { + t.Errorf("missing cmd dir row:\n%s", buf.String()) + } + // File row for cmd/main.go: full path, commits preserved. + foundLeaf := false + for _, ln := range lines[2:] { + if strings.HasPrefix(ln, "cmd/main.go,file,") { + foundLeaf = true + if !strings.Contains(ln, ",7,42,0,false") { + t.Errorf("main.go row = %q, want commits=7 churn=42 files=0", ln) + } + } + } + if !foundLeaf { + t.Errorf("missing cmd/main.go row:\n%s", buf.String()) + } +} + func TestRenderTreeTextProducesBoxPrefixes(t *testing.T) { hotspots := []stats.FileStat{ {Path: "cmd/main.go", Commits: 1, Churn: 10}, From 859888cdaaee6d37695f0671cbbdc1ec9588792d Mon Sep 17 00:00:00 2001 From: lex0c Date: Sun, 19 Apr 2026 20:09:45 -0300 Subject: [PATCH 3/5] Fix truncation hint to reference --tree-depth flag The subtree-hidden message pointed users at --depth, but the flag is actually named --tree-depth; copy-pasting from the hint produced an unknown-flag error. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/report/tree.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/report/tree.go b/internal/report/tree.go index 8bf76c9..15a891f 100644 --- a/internal/report/tree.go +++ b/internal/report/tree.go @@ -164,7 +164,7 @@ func renderChildren(w io.Writer, n *TreeNode, prefix string) error { return err } if c.Truncated { - if _, err := fmt.Fprintf(w, "%s%s... (subtree hidden, use --depth to expand)\n", prefix, next); err != nil { + if _, err := fmt.Fprintf(w, "%s%s... (subtree hidden, use --tree-depth to expand)\n", prefix, next); err != nil { return err } continue From 71668f8fc45b004c7b97f462e92ffba231cdf126 Mon Sep 17 00:00:00 2001 From: lex0c Date: Sun, 19 Apr 2026 20:19:08 -0300 Subject: [PATCH 4/5] Address repo-structure review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Honest comment on the leaf Commits += accumulator (defense-in-depth, not a real case โ€” FileHotspots is map-keyed, no dupes reach here). - Rename the glossary CSS class to .accordion so the Repo Structure
no longer reads as misusing a glossary-specific class. - Banner prints "depth unlimited" when --tree-depth 0 is passed, rather than showing a misleading "depth 0". - Replace O(N) findChild linear scan with per-node map index so wide flat directories no longer degrade the build to quadratic. - Prune depth during descent instead of a post-pass, so nodes past the cap are never allocated. - CapChildrenPerDir caps the HTML surface to 50 children per dir, pushing long tails into a "โ€ฆ N more hidden" counter so wide repos don't balloon the report. CLI intentionally left uncapped. - RenderTreeForFormat centralizes the text/CSV dispatch so a future format addition can't repeat the "csv silently wrote Unicode" bug. - New tests: format dispatch table, truncation markers in text + CSV renderers, HTML per-dir cap. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/gitcortex/main.go | 22 +++--- internal/report/report.go | 8 ++ internal/report/template.go | 23 +++--- internal/report/tree.go | 147 ++++++++++++++++++++++++++--------- internal/report/tree_test.go | 101 ++++++++++++++++++++++++ 5 files changed, 243 insertions(+), 58 deletions(-) diff --git a/cmd/gitcortex/main.go b/cmd/gitcortex/main.go index 39efca3..0aee55b 100644 --- a/cmd/gitcortex/main.go +++ b/cmd/gitcortex/main.go @@ -335,18 +335,18 @@ func renderStats(ds *stats.Dataset, sf *statsFlags) error { } if sf.stat == "structure" { root := reportpkg.BuildRepoTree(stats.FileHotspots(ds, 0), sf.treeDepth) - if sf.format == "csv" { - // CSV path: no header banner on stderr โ€” downstream parsers - // sometimes tail stderr onto stdout, and a stray "=== ... ===" - // breaks the single-table contract. - if err := reportpkg.RenderTreeCSV(os.Stdout, root); err != nil { - return err - } - } else { - fmt.Fprintf(os.Stderr, "\n=== Repo Structure (depth %d) ===\n", sf.treeDepth) - if err := reportpkg.RenderTreeText(os.Stdout, root); err != nil { - return err + // CSV skips the stderr banner โ€” downstream parsers sometimes + // tail stderr onto stdout, and a stray "=== ... ===" would + // break the single-table contract. + if sf.format != "csv" { + depthLabel := "unlimited" + if sf.treeDepth > 0 { + depthLabel = fmt.Sprintf("%d", sf.treeDepth) } + fmt.Fprintf(os.Stderr, "\n=== Repo Structure (depth %s) ===\n", depthLabel) + } + if err := reportpkg.RenderTreeForFormat(os.Stdout, root, sf.format); err != nil { + return err } } diff --git a/internal/report/report.go b/internal/report/report.go index 4467816..cddc3a1 100644 --- a/internal/report/report.go +++ b/internal/report/report.go @@ -61,6 +61,13 @@ type ReportData struct { // on kernel-scale repos. CLI users can override via --tree-depth. const htmlTreeDepth = 3 +// htmlTreeMaxChildrenPerDir keeps wide directories (e.g. repos with +// hundreds of sibling files at one level) from ballooning the HTML. +// Children are pre-sorted dirs-first then churn-desc, so the top 50 +// preserves the architectural shape and pushes long tails into a +// "โ€ฆ N more" counter. CLI does not apply this cap. +const htmlTreeMaxChildrenPerDir = 50 + // LabelCount pairs a Churn Risk label with its total count and sort // priority, so the template can render chips in the same label order // used by the table below. @@ -351,6 +358,7 @@ func Generate(w io.Writer, ds *stats.Dataset, repoName string, topN int, sf stat MaxPattern: maxP, Structure: BuildRepoTree(stats.FileHotspots(ds, 0), htmlTreeDepth), } + CapChildrenPerDir(data.Structure, htmlTreeMaxChildrenPerDir) return tmpl.Execute(w, data) } diff --git a/internal/report/template.go b/internal/report/template.go index 9c97376..0947812 100644 --- a/internal/report/template.go +++ b/internal/report/template.go @@ -44,14 +44,14 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col .chip-active { background: #2da44e; color: #fff; } .chip-cold { background: #eaeef2; color: #656d76; } .chip-note { font-size: 11px; color: #656d76; font-style: italic; margin-left: 4px; } -.glossary { background: #fff; border: 1px solid #d0d7de; border-radius: 6px; padding: 10px 16px; margin-bottom: 24px; } -.glossary + .glossary { margin-top: -16px; } -.glossary summary { cursor: pointer; font-weight: 600; font-size: 13px; color: #24292f; } -.glossary[open] summary { margin-bottom: 8px; } -.glossary dl { font-size: 12px; color: #24292f; margin: 0; } -.glossary dt { font-weight: 600; margin-top: 8px; } -.glossary dt:first-child { margin-top: 0; } -.glossary dd { color: #656d76; margin: 2px 0 0; } +.accordion { background: #fff; border: 1px solid #d0d7de; border-radius: 6px; padding: 10px 16px; margin-bottom: 24px; } +.accordion + .accordion { margin-top: -16px; } +.accordion summary { cursor: pointer; font-weight: 600; font-size: 13px; color: #24292f; } +.accordion[open] summary { margin-bottom: 8px; } +.accordion dl { font-size: 12px; color: #24292f; margin: 0; } +.accordion dt { font-weight: 600; margin-top: 8px; } +.accordion dt:first-child { margin-top: 0; } +.accordion dd { color: #656d76; margin: 2px 0 0; } @@ -59,7 +59,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col

{{.RepoName}} report

{{.Summary.FirstCommitDate}} to {{.Summary.LastCommitDate}}

-
+
Glossary โ€” what do these terms mean?

gitcortex is a repository behavior analyzer, not a code analyzer. These metrics describe what people and processes did in git โ€” who touched what, when, and with whom โ€” not the quality of the source code itself. A file classified as silo or legacy-hotspot reveals a human or process pattern; it is not a judgment on the code (a well-written library maintained by one person will classify as silo regardless of how good it is). Labels point at where to look, not what to conclude.

@@ -93,7 +93,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
{{if .Structure}}{{if .Structure.Children}} -
+
Repo Structure โ€” architecture at a glance

Directories sorted by total churn within each level; files listed after. Counts reflect history, so deleted files still appear (they shaped the codebase). Tree capped at the top 3 levels; deeper subtrees collapsed to keep the report readable.

@@ -412,6 +412,9 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{end}} + {{if gt .HiddenChildren 0}} +
  • โ€ฆ {{thousands .HiddenChildren}} more hidden (ranked by churn)
  • + {{end}} {{end}} diff --git a/internal/report/tree.go b/internal/report/tree.go index 15a891f..9da75eb 100644 --- a/internal/report/tree.go +++ b/internal/report/tree.go @@ -38,12 +38,37 @@ type TreeNode struct { // limit. CLI/HTML surfaces show an ellipsis marker so the reader // knows there's more below. Truncated bool + // HiddenChildren counts children dropped by a per-dir render cap + // (applied for the HTML surface so wide directories don't blow up + // the page). CLI does not cap. Zero when no cap was applied. + HiddenChildren int + + // childIndex is an O(1) lookup for BuildRepoTree. Unexported so it + // doesn't show up in JSON. Cleared after build to release memory + // (the Children slice is the durable view; the map is scaffolding). + childIndex map[childKey]*TreeNode `json:"-"` +} + +// childKey disambiguates file/dir with the same name across history +// (delete-file then mkdir). Both coexist as siblings under the same +// parent and the index keeps them addressable. +type childKey struct { + name string + isDir bool } // BuildRepoTree builds a repo structure tree from the hotspots slice. // maxDepth limits how many levels are expanded (root counts as 0); -// 0 = no limit. Nodes whose subtree is pruned are marked Truncated so -// renderers can signal "... N more" to the reader. +// 0 = no limit. Nodes whose subtree would extend past maxDepth are +// marked Truncated and their aggregate counts still reflect everything +// underneath, so renderers can signal "... N more" without losing the +// totals. +// +// Complexity: O(F ร— D) where F is the number of hotspots and D is the +// average path depth. A per-node map index (childIndex) keeps child +// lookup at O(1); without it, wide flat directories degrade the build +// to quadratic. Pruning happens during descent (not as a post-pass), so +// nodes past the cap are never allocated. func BuildRepoTree(hotspots []stats.FileStat, maxDepth int) *TreeNode { root := &TreeNode{Name: ".", Path: "", IsDir: true} @@ -58,20 +83,33 @@ func BuildRepoTree(hotspots []stats.FileStat, maxDepth int) *TreeNode { for i, part := range parts { isLeaf := i == len(parts)-1 - child := findChild(cur, part, !isLeaf) + currentDepth := i + 1 + + // Depth cap: stop creating deeper nodes, but leave the + // aggregates already applied on ancestors intact. cur here + // is the node at exactly maxDepth; mark it Truncated so + // renderers can show "subtree hidden". + if maxDepth > 0 && currentDepth > maxDepth { + cur.Truncated = true + break + } + + child := cur.getChild(part, !isLeaf) if child == nil { child = &TreeNode{ Name: part, Path: strings.Join(parts[:i+1], "/"), IsDir: !isLeaf, - Depth: i + 1, + Depth: currentDepth, } - cur.Children = append(cur.Children, child) + cur.putChild(child) } if isLeaf { - // Leaf: accumulate so rename-induced duplicates within a - // single Dataset (same canonical path emitted twice under - // pathological rename chains) don't silently overwrite. + // Defense-in-depth: FileHotspots iterates a map keyed + // by path, so no duplicates reach this loop in + // practice. If dupes ever did arrive, ancestor .Files + // would also over-count โ€” this += is not sufficient on + // its own, just a cheap safety net. child.Commits += h.Commits child.Churn += h.Churn } else { @@ -85,26 +123,58 @@ func BuildRepoTree(hotspots []stats.FileStat, maxDepth int) *TreeNode { } sortTree(root) - if maxDepth > 0 { - pruneDepth(root, maxDepth) - } + clearChildIndex(root) return root } -// findChild returns the existing sibling that matches both name AND -// directory-ness. A path history where the same name refers to a file in -// one commit and a directory in another (delete-file then mkdir) would -// otherwise corrupt the tree: the file node would grow dir children, or -// the dir node would get leaf values overwritten. Matching on the pair -// lets both coexist as sibling nodes; rare in practice, silent-wrong if -// unhandled. -func findChild(n *TreeNode, name string, wantDir bool) *TreeNode { +// getChild / putChild keep child lookup O(1) during BuildRepoTree. The +// map is pure build-time scaffolding; clearChildIndex drops it so the +// tree kept in ReportData is exactly the exported fields. +func (n *TreeNode) getChild(name string, isDir bool) *TreeNode { + if n.childIndex == nil { + return nil + } + return n.childIndex[childKey{name, isDir}] +} + +func (n *TreeNode) putChild(c *TreeNode) { + if n.childIndex == nil { + n.childIndex = make(map[childKey]*TreeNode) + } + n.childIndex[childKey{c.Name, c.IsDir}] = c + n.Children = append(n.Children, c) +} + +func clearChildIndex(n *TreeNode) { + n.childIndex = nil for _, c := range n.Children { - if c.Name == name && c.IsDir == wantDir { - return c - } + clearChildIndex(c) + } +} + +// CapChildrenPerDir keeps the top `limit` children of each directory +// and records how many were dropped in HiddenChildren so the renderer +// can show "โ€ฆ N more hidden". Applied only to the HTML surface โ€” a +// chromium-scale dir at depth 2 can have thousands of leaves, and the +// tree section was meant to tame "too much output" not reintroduce it. +// The CLI intentionally skips this cap because a piped tree is expected +// to be exhaustive within the --tree-depth limit. +// +// Children are already sorted (dirs first, churn desc within kind), so +// the top N favours structure over noise: at a wide dir with dozens of +// subdirs and hundreds of files, the dirs remain visible and the tail +// of thin files collapses into the counter. +func CapChildrenPerDir(n *TreeNode, limit int) { + if limit <= 0 { + return + } + if len(n.Children) > limit { + n.HiddenChildren = len(n.Children) - limit + n.Children = n.Children[:limit] + } + for _, c := range n.Children { + CapChildrenPerDir(c, limit) } - return nil } // sortTree orders children deterministically: directories first (so the @@ -126,20 +196,6 @@ func sortTree(n *TreeNode) { } } -// pruneDepth drops children past maxDepth levels below root. The node at -// the cut line keeps its aggregate counts but is marked Truncated so the -// renderer can emit "... N more" instead of silently hiding data. -func pruneDepth(n *TreeNode, maxDepth int) { - if n.Depth >= maxDepth && len(n.Children) > 0 { - n.Truncated = true - n.Children = nil - return - } - for _, c := range n.Children { - pruneDepth(c, maxDepth) - } -} - // RenderTreeText prints the tree in the style of `tree(1)`: unicode // box-drawing prefixes, directories annotated with file/churn counts, // files with commits/churn. The output is UTF-8; callers that need @@ -204,6 +260,23 @@ func RenderTreeCSV(w io.Writer, root *TreeNode) error { return cw.Error() } +// RenderTreeForFormat dispatches tree rendering to the writer matching +// the CLI's --format. Centralizing the switch here means the earlier +// bug ("--format csv silently wrote a Unicode tree") can't recur: every +// CLI caller goes through this function, and the table-driven test in +// tree_test.go asserts one writer per format. Unknown formats fall +// through to the text renderer for backward compatibility with users +// who pipe into their own tooling and pass through unrelated --format +// values (e.g. "table"). +func RenderTreeForFormat(w io.Writer, root *TreeNode, format string) error { + switch format { + case "csv": + return RenderTreeCSV(w, root) + default: + return RenderTreeText(w, root) + } +} + func writeTreeCSVRow(cw *csv.Writer, n *TreeNode) error { kind := "file" if n.IsDir { diff --git a/internal/report/tree_test.go b/internal/report/tree_test.go index 0cf64e4..797fc2c 100644 --- a/internal/report/tree_test.go +++ b/internal/report/tree_test.go @@ -194,6 +194,107 @@ func TestBuildRepoTreeDirCommitsZero(t *testing.T) { } } +// Regression: the --format csv path once silently wrote a Unicode tree. +// Assert RenderTreeForFormat routes each format to the correct writer +// by peeking at the first bytes of output โ€” CSV starts with the header +// row, text starts with the root label. Cheap and stable. +func TestRenderTreeForFormatDispatches(t *testing.T) { + hotspots := []stats.FileStat{{Path: "cmd/main.go", Commits: 1, Churn: 10}} + root := BuildRepoTree(hotspots, 0) + + cases := []struct { + format string + wantHead string + }{ + {"csv", "path,type,depth,commits,churn,files,truncated\n"}, + {"table", ".\n"}, // text renderer starts with root name + {"", ".\n"}, // empty format falls through to default (text) + {"garbage", ".\n"}, // unknown format falls through to default (text) + } + for _, c := range cases { + t.Run(c.format, func(t *testing.T) { + var buf bytes.Buffer + if err := RenderTreeForFormat(&buf, root, c.format); err != nil { + t.Fatal(err) + } + if !strings.HasPrefix(buf.String(), c.wantHead) { + t.Errorf("format=%q: output did not start with %q; got:\n%s", + c.format, c.wantHead, buf.String()) + } + }) + } +} + +// Truncation markers: the text renderer must surface the "subtree +// hidden" hint with the correct flag name, and the CSV row for a +// truncated dir must carry truncated=true. Both are user-visible and +// neither was previously asserted. +func TestTruncationMarkersInRenderers(t *testing.T) { + hotspots := []stats.FileStat{{Path: "a/b/c.go", Commits: 1, Churn: 10}} + root := BuildRepoTree(hotspots, 2) + + var txt bytes.Buffer + if err := RenderTreeText(&txt, root); err != nil { + t.Fatal(err) + } + // Must reference the actual flag, not the --depth typo. + if !strings.Contains(txt.String(), "subtree hidden, use --tree-depth to expand") { + t.Errorf("text render missing truncation hint with correct flag:\n%s", txt.String()) + } + + var cs bytes.Buffer + if err := RenderTreeCSV(&cs, root); err != nil { + t.Fatal(err) + } + // The dir row at depth 2 (b/) is the truncated one; it must set + // truncated=true in the last column. + found := false + for _, line := range strings.Split(cs.String(), "\n") { + if strings.HasPrefix(line, "a/b,dir,") && strings.HasSuffix(line, ",true") { + found = true + break + } + } + if !found { + t.Errorf("CSV missing truncated=true on a/b dir row:\n%s", cs.String()) + } +} + +// HTML cap: CapChildrenPerDir should retain the top-N and set +// HiddenChildren on the overflow, preserving churn-desc order from the +// prior sort. CLI callers that never invoke it must not see counts +// change. +func TestCapChildrenPerDir(t *testing.T) { + // 5 siblings, cap at 2. + var hotspots []stats.FileStat + for i, name := range []string{"a.go", "b.go", "c.go", "d.go", "e.go"} { + hotspots = append(hotspots, stats.FileStat{ + Path: "dir/" + name, + Commits: 1, + Churn: int64(100 - i*10), // a=100, b=90, c=80, d=70, e=60 + }) + } + root := BuildRepoTree(hotspots, 0) + dir := root.Children[0] + if dir.Name != "dir" || len(dir.Children) != 5 { + t.Fatalf("expected dir with 5 children, got %s with %d", dir.Name, len(dir.Children)) + } + + CapChildrenPerDir(root, 2) + if dir.HiddenChildren != 3 { + t.Errorf("HiddenChildren = %d, want 3", dir.HiddenChildren) + } + if len(dir.Children) != 2 { + t.Fatalf("capped children = %d, want 2", len(dir.Children)) + } + // Top two must be the highest-churn survivors (a.go, b.go) in that + // order โ€” the cap is a prefix trim on the already-sorted slice. + if dir.Children[0].Name != "a.go" || dir.Children[1].Name != "b.go" { + t.Errorf("top-2 after cap = [%s, %s], want [a.go, b.go]", + dir.Children[0].Name, dir.Children[1].Name) + } +} + // Regression: file and directory can share a name across history // (path deleted, then recreated as a directory). Each must get its own // node rather than the second path corrupting the first. From 22bef6851f9101e1caae479f729d55ce71b6723e Mon Sep 17 00:00:00 2001 From: lex0c Date: Sun, 19 Apr 2026 20:30:15 -0300 Subject: [PATCH 5/5] Document the structure stat Mention --stat structure in the README stat table and RUNBOOK command list, and add a Repo Structure section in METRICS.md explaining the history-lens aggregation semantics (dir Commits=0, child count cap, tree-depth default). Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 1 + docs/METRICS.md | 14 ++++++++++++++ docs/RUNBOOK.md | 1 + 3 files changed, 16 insertions(+) diff --git a/README.md b/README.md index 48e6c90..ff5ce09 100644 --- a/README.md +++ b/README.md @@ -210,6 +210,7 @@ Available stats: | `profile` | Per-developer report: scope, specialization index, contribution type, pace, collaboration, top files | | `top-commits` | Largest commits ranked by lines changed (includes message if extracted with `--include-commit-messages`) | | `pareto` | Concentration (80% threshold) across files, devs (two lenses: commits and churn), and directories | +| `structure` | Repo layout as a `tree(1)`-style view, dirs sorted by aggregate churn, capped by `--tree-depth` (default 3) | Output formats: `table` (default, human-readable), `csv` (single clean table per `--stat`, header row on line 1), `json` (unified object with all sections). diff --git a/docs/METRICS.md b/docs/METRICS.md index 4f28b74..1b7a0f6 100644 --- a/docs/METRICS.md +++ b/docs/METRICS.md @@ -254,6 +254,20 @@ Two dev lenses are surfaced because commit count alone is a flawed proxy for con **How to interpret**: "20 files concentrate 80% of all churn" describes where change lands โ€” it can indicate a healthy core module under active development, or a bottleneck if combined with low bus factor. Cross-reference with the Churn Risk section before drawing conclusions. +## Repo Structure + +A `tree(1)`-style view of the repository's directory layout, built from paths seen in history (`FileHotspots`), not from the filesystem at HEAD. Deleted files are included โ€” the view answers "what shaped the codebase", not "what is present today". + +**Aggregation**: +- File nodes: `Commits` and `Churn` are the per-file values. +- Directory nodes: `Churn` and `Files` sum over all descendants; `Commits` is intentionally left at zero. Per-file commit counts do not sum to a distinct commit count โ€” one commit that touches three files would add to three children. `Files` is the distinct descendant count. + +**Ordering**: within each level, directories come first (architectural shape reads top-down), then files. Ties are broken by churn descending, then name ascending. + +**Truncation**: the CLI caps depth at `--tree-depth` (default 3, 0 = unlimited). The HTML report additionally caps children at 50 per directory to keep the page under ~1MB on kernel-scale repos; the tail is collapsed into a `โ€ฆ N more hidden (ranked by churn)` counter. + +**When to use**: before drilling into hotspots or churn-risk, skim the structure to locate the modules those files live in. The tree is navigational context; ranked tables are where judgment happens. + ## Data Flow ``` diff --git a/docs/RUNBOOK.md b/docs/RUNBOOK.md index a35c0c9..4cc90c8 100644 --- a/docs/RUNBOOK.md +++ b/docs/RUNBOOK.md @@ -155,6 +155,7 @@ Section headers go to stderr, data to stdout. To capture only data: ./gitcortex stats --input data.jsonl --stat profile ./gitcortex stats --input data.jsonl --stat profile --email alice@company.com ./gitcortex stats --input data.jsonl --stat top-commits --top 20 +./gitcortex stats --input data.jsonl --stat structure --tree-depth 3 ``` ### Time filtering