diff --git a/internal/report/profile_template.go b/internal/report/profile_template.go
index 0bda0ee..5bb8f0f 100644
--- a/internal/report/profile_template.go
+++ b/internal/report/profile_template.go
@@ -127,6 +127,9 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
{{.SharedFiles}} files · {{humanize .SharedLines}} lines
{{end}}
+ {{if gt .Profile.CollaboratorsHidden 0}}
+ +{{.Profile.CollaboratorsHidden}} more collaborators not shown
+ {{end}}
{{end}}
@@ -145,6 +148,9 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
|
{{end}}
+{{if gt .Profile.TopFilesHidden 0}}
+| +{{.Profile.TopFilesHidden}} more files not shown |
+{{end}}
{{end}}
diff --git a/internal/report/report.go b/internal/report/report.go
index 6b1b741..80e875f 100644
--- a/internal/report/report.go
+++ b/internal/report/report.go
@@ -54,6 +54,20 @@ type ReportData struct {
// so mature repos (linux-scale) don't blow up the HTML. nil when
// the dataset has no files.
Structure *TreeNode
+
+ // TotalDirectories / TotalExtensions / TotalBusFactorFiles are
+ // the full universe sizes (before top-N truncation) for sections
+ // whose denominators aren't in Summary. Templates render "20 of
+ // 127" headers so the reader sees the scale of what's been
+ // truncated. TotalBusFactorFiles specifically excludes files
+ // with empty devLines (pure-rename-only files post-ingest-fix)
+ // because BusFactor skips those — using Summary.TotalFiles here
+ // would make the header lie on rename-heavy repos. Summary
+ // already carries TotalDevs / TotalFiles / TotalCommits for the
+ // rest.
+ TotalDirectories int
+ TotalExtensions int
+ TotalBusFactorFiles int
}
// htmlTreeDepth caps the repo-structure tree baked into the HTML report.
@@ -359,6 +373,9 @@ func Generate(w io.Writer, ds *stats.Dataset, repoName string, topN int, sf stat
PatternGrid: grid,
MaxPattern: maxP,
Structure: BuildRepoTree(stats.FileHotspots(ds, 0), htmlTreeDepth),
+ TotalDirectories: stats.DirectoryCount(ds),
+ TotalExtensions: stats.ExtensionCount(ds),
+ TotalBusFactorFiles: stats.BusFactorCount(ds),
}
CapChildrenPerDir(data.Structure, htmlTreeMaxChildrenPerDir)
diff --git a/internal/report/template.go b/internal/report/template.go
index a293a96..d6a3a64 100644
--- a/internal/report/template.go
+++ b/internal/report/template.go
@@ -185,7 +185,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
{{end}}
{{if .Contributors}}
-Top Contributors
+Top Contributors{{if lt (len .Contributors) .Summary.TotalDevs}} {{thousands (len .Contributors)}} of {{thousands .Summary.TotalDevs}}{{end}}
Ranked by commit count. High commit count with low lines may indicate small fixes; low count with high lines may indicate large features. · {{docRef "contributors"}}
| Name | Email | Commits | | Additions | Deletions |
@@ -204,7 +204,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
{{end}}
{{if .Hotspots}}
-File Hotspots
+File Hotspots{{if lt (len .Hotspots) .Summary.TotalFiles}} {{thousands (len .Hotspots)}} of {{thousands .Summary.TotalFiles}}{{end}}
Most frequently changed files. High churn with few devs = knowledge silo. High churn with many devs = shared bottleneck. · {{docRef "hotspots"}}
| Path | Commits | Churn | | Devs |
@@ -222,7 +222,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
{{end}}
{{if .Directories}}
-Directories
+Directories{{if lt (len .Directories) .TotalDirectories}} {{thousands (len .Directories)}} of {{thousands .TotalDirectories}}{{end}}
Module-level health. File touches is the sum of per-file commit counts (one commit touching N files contributes N), not distinct commits. Low bus factor = knowledge concentrated in few people. · {{docRef "directories"}}
| Directory | File Touches | Churn | Files | Devs | Bus Factor |
@@ -240,7 +240,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
{{end}}
{{if .Extensions}}
-Extensions
+Extensions{{if lt (len .Extensions) .TotalExtensions}} {{thousands (len .Extensions)}} of {{thousands .TotalExtensions}}{{end}}
File extensions ranked by recent churn — "where is the team spending effort now", not "what exists at HEAD". Cross-read with Directories: a repo with high .yaml recent churn concentrated in one dir is config-as-code; spread across many dirs is config sprawl. · {{docRef "extensions"}}
| Ext | Files | Churn | Recent Churn | | Devs | First Seen | Last Seen |
@@ -261,7 +261,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
{{end}}
{{if .ChurnRisk}}
-Churn Risk
+Churn Risk{{if lt (len .ChurnRisk) .Summary.TotalFiles}} {{thousands (len .ChurnRisk)}} of {{thousands .Summary.TotalFiles}}{{end}}
Files ranked by recent churn. Label classifies context so you can judge action: legacy-hotspot (old code + concentrated + declining) is the urgent alarm; silo suggests knowledge transfer; active-core is young code with a single author (often fine); active is shared healthy work; cold is quiet.{{if (index .ChurnRisk 0).AgePercentile}} Age P__ / Trend P__ under the label show where this file sits in the repo's distribution: age P90 means older than 90% of tracked files; trend P10 means declining more sharply than 90%. Classification boundaries are the P75 age and P25 trend of this dataset (see {{docRef "churn-risk"}}).{{end}}
{{if .ChurnRiskLabelCounts}}
@@ -290,7 +290,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
{{end}}
{{if .BusFactor}}
-
Bus Factor Risk
+
Bus Factor Risk{{if lt (len .BusFactor) .TotalBusFactorFiles}} {{thousands (len .BusFactor)}} of {{thousands .TotalBusFactorFiles}}{{end}}
Files with fewest developers owning 80%+ of changes. Bus factor 1 = if that person leaves, nobody else knows the code. · {{docRef "bus-factor"}}
| Path | Bus Factor | Top Devs |
@@ -337,7 +337,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
{{end}}
{{if .TopCommits}}
-Top Commits
+Top Commits{{if lt (len .TopCommits) .Summary.TotalCommits}} {{thousands (len .TopCommits)}} of {{thousands .Summary.TotalCommits}}{{end}}
Largest commits by lines changed. Unusually large commits may be imports, generated code, or risky big-bang changes worth reviewing. · {{docRef "top-commits"}}
| SHA | Author | Date | Lines | Files | {{if and (gt (len .TopCommits) 0) (index .TopCommits 0).Message}}Message | {{end}}
@@ -398,7 +398,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
{{printf "%.1f" .Pace}} commits/active day
Collaboration
- {{if .Collaborators}}{{range $i, $c := .Collaborators}}{{if $i}}, {{end}}{{$c.Email}} ({{thousands $c.SharedFiles}} files, {{thousands $c.SharedLines}} lines){{end}}{{else}}solo contributor{{end}}
+ {{if .Collaborators}}{{range $i, $c := .Collaborators}}{{if $i}}, {{end}}{{$c.Email}} ({{thousands $c.SharedFiles}} files, {{thousands $c.SharedLines}} lines){{end}}{{if gt .CollaboratorsHidden 0}} (+{{.CollaboratorsHidden}} more){{end}}{{else}}solo contributor{{end}}
Weekend
{{printf "%.1f" .WeekendPct}}%
@@ -414,6 +414,9 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
{{thousands .Churn}} churn
{{end}}
+ {{if gt .TopFilesHidden 0}}
+ +{{.TopFilesHidden}} more files not shown
+ {{end}}
{{end}}
diff --git a/internal/stats/extension_test.go b/internal/stats/extension_test.go
index 7b48146..6a5d974 100644
--- a/internal/stats/extension_test.go
+++ b/internal/stats/extension_test.go
@@ -1,6 +1,7 @@
package stats
import (
+ "fmt"
"strings"
"testing"
"time"
@@ -56,6 +57,62 @@ func TestExtractExtensionPolicy(t *testing.T) {
}
}
+// DirectoryCount and ExtensionCount back the "N of M" header badges
+// in the main report. They must match what DirectoryStats and
+// ExtensionStats would produce pre-truncation — otherwise "showing 20
+// of 127" lies when the user expands to --top 0 and finds a
+// different number.
+func TestDirectoryCountAndExtensionCount(t *testing.T) {
+ ds := &Dataset{
+ files: map[string]*fileEntry{
+ "cmd/main.go": {devLines: map[string]int64{"a": 1}},
+ "cmd/util.go": {devLines: map[string]int64{"a": 1}},
+ "internal/a.go": {devLines: map[string]int64{"a": 1}},
+ "internal/b.go": {devLines: map[string]int64{"a": 1}},
+ "docs/x.md": {devLines: map[string]int64{"a": 1}},
+ "README.md": {devLines: map[string]int64{"a": 1}}, // "." bucket
+ "Makefile": {devLines: map[string]int64{"a": 1}}, // "." bucket, (none) ext
+ },
+ }
+ // Distinct dirs: "cmd", "internal", "docs", "." → 4
+ if n := DirectoryCount(ds); n != 4 {
+ t.Errorf("DirectoryCount = %d, want 4", n)
+ }
+ // Distinct exts: ".go", ".md", "(none)" → 3
+ if n := ExtensionCount(ds); n != 3 {
+ t.Errorf("ExtensionCount = %d, want 3", n)
+ }
+ // Consistency invariant: must match len of the stats function's
+ // full output. If they ever drift, the "N of M" header lies.
+ if got, want := DirectoryCount(ds), len(DirectoryStats(ds, 0)); got != want {
+ t.Errorf("DirectoryCount (%d) != len(DirectoryStats(_, 0)) (%d)", got, want)
+ }
+ if got, want := ExtensionCount(ds), len(ExtensionStats(ds, 0)); got != want {
+ t.Errorf("ExtensionCount (%d) != len(ExtensionStats(_, 0)) (%d)", got, want)
+ }
+}
+
+// BusFactorCount must exclude pure-rename files (empty devLines) —
+// those are skipped by BusFactor itself and so cannot be part of its
+// denominator. Using Summary.TotalFiles would over-count here. Build
+// a dataset with one file that carries dev lines and one that
+// doesn't; assert BusFactorCount == 1 and matches the real output.
+func TestBusFactorCountExcludesEmptyDevLines(t *testing.T) {
+ ds := &Dataset{
+ UniqueFileCount: 2, // Summary total; includes both
+ files: map[string]*fileEntry{
+ "src/authored.go": {devLines: map[string]int64{"alice@x": 10}},
+ "src/pure-rename-only": {devLines: map[string]int64{}}, // no authored lines
+ },
+ }
+ if got := BusFactorCount(ds); got != 1 {
+ t.Errorf("BusFactorCount = %d, want 1 (pure-rename file must be excluded)", got)
+ }
+ if got, want := BusFactorCount(ds), len(BusFactor(ds, 0)); got != want {
+ t.Errorf("BusFactorCount (%d) != len(BusFactor(_, 0)) (%d) — header would lie", got, want)
+ }
+}
+
func TestExtensionStatsAggregation(t *testing.T) {
// Hand-built dataset so aggregation is inspectable: two .go files
// with distinct devs, one .yaml shared by both, and a Makefile
@@ -667,6 +724,62 @@ func TestDevProfileHiddenCounters(t *testing.T) {
}
}
+// Completes the Hidden-counter family: TopFiles truncates at 10,
+// Collaborators at 5. Both used to drop buckets silently — the "+N
+// more" surfaced for Scope/Extensions had no counterpart here, so a
+// dev with 25 touched files or 12 frequent collaborators looked like
+// they had exactly 10 / 5. Build a dev with 12 files and 7
+// collaborators; assert the counters report the true drop count.
+func TestDevProfileHiddenCountersTopFilesAndCollaborators(t *testing.T) {
+ files := map[string]*fileEntry{}
+ // 12 files authored by alice → TopFilesHidden = 2.
+ for i := 0; i < 12; i++ {
+ path := fmt.Sprintf("dir%d/file%d.go", i, i)
+ files[path] = &fileEntry{
+ devLines: map[string]int64{"alice@x": int64(100 - i*5)},
+ devCommits: map[string]int{"alice@x": 1},
+ }
+ }
+ // Seed 6 shared files between alice and each of 6 other devs →
+ // alice has 6 collaborators total; top-5 truncation gives
+ // CollaboratorsHidden = 1.
+ for i := 0; i < 6; i++ {
+ path := fmt.Sprintf("shared/collab%d.go", i)
+ collab := fmt.Sprintf("bob%d@x", i)
+ files[path] = &fileEntry{
+ devLines: map[string]int64{"alice@x": 50, collab: 50},
+ devCommits: map[string]int{"alice@x": 1, collab: 1},
+ }
+ }
+ contribs := map[string]*ContributorStat{
+ "alice@x": {Email: "alice@x", Commits: 18, FilesTouched: 18, ActiveDays: 1},
+ }
+ for i := 0; i < 6; i++ {
+ contribs[fmt.Sprintf("bob%d@x", i)] = &ContributorStat{
+ Email: fmt.Sprintf("bob%d@x", i), Commits: 1, FilesTouched: 1, ActiveDays: 1,
+ }
+ }
+ ds := &Dataset{
+ contributors: contribs,
+ files: files,
+ commits: map[string]*commitEntry{},
+ workGrid: [7][24]int{},
+ }
+ p := DevProfiles(ds, "alice@x", 0)[0]
+ if len(p.TopFiles) != 10 {
+ t.Fatalf("TopFiles len = %d, want 10 (truncated from 18)", len(p.TopFiles))
+ }
+ if p.TopFilesHidden != 8 {
+ t.Errorf("TopFilesHidden = %d, want 8 (18 - 10)", p.TopFilesHidden)
+ }
+ if len(p.Collaborators) != 5 {
+ t.Fatalf("Collaborators len = %d, want 5 (truncated from 6)", len(p.Collaborators))
+ }
+ if p.CollaboratorsHidden != 1 {
+ t.Errorf("CollaboratorsHidden = %d, want 1 (6 - 5)", p.CollaboratorsHidden)
+ }
+}
+
// Silent when nothing to hide — the counters must be zero so the
// renderers don't emit "+0 more" (noise) for the common case.
func TestDevProfileHiddenCountersZeroWhenFits(t *testing.T) {
@@ -687,6 +800,12 @@ func TestDevProfileHiddenCountersZeroWhenFits(t *testing.T) {
t.Errorf("Hidden counters: Scope=%d Ext=%d, want 0/0 (dev has ≤5 buckets each)",
p.ScopeHidden, p.ExtensionsHidden)
}
+ // TopFiles cap is 10, Collaborators cap is 5 — bob has 3 files
+ // and zero collaborators, so both must stay at 0.
+ if p.TopFilesHidden != 0 || p.CollaboratorsHidden != 0 {
+ t.Errorf("Hidden counters: TopFiles=%d Collab=%d, want 0/0",
+ p.TopFilesHidden, p.CollaboratorsHidden)
+ }
}
// Truncate to top-5 when a dev's extension set is larger. Under the
diff --git a/internal/stats/format.go b/internal/stats/format.go
index 895c766..3701812 100644
--- a/internal/stats/format.go
+++ b/internal/stats/format.go
@@ -500,6 +500,9 @@ func (f *Formatter) PrintProfiles(profiles []DevProfile) error {
}
fmt.Fprintf(f.w, "%s (%d shared)", c.Email, c.SharedFiles)
}
+ if p.CollaboratorsHidden > 0 {
+ fmt.Fprintf(f.w, " (+%d more)", p.CollaboratorsHidden)
+ }
} else {
fmt.Fprintf(f.w, "solo contributor")
}
@@ -512,6 +515,9 @@ func (f *Formatter) PrintProfiles(profiles []DevProfile) error {
for _, tf := range p.TopFiles {
fmt.Fprintf(f.w, " %-50s %3d commits %6d churn\n", tf.Path, tf.Commits, tf.Churn)
}
+ if p.TopFilesHidden > 0 {
+ fmt.Fprintf(f.w, " ... (+%d more files not shown)\n", p.TopFilesHidden)
+ }
}
if len(p.MonthlyActivity) > 0 {
diff --git a/internal/stats/stats.go b/internal/stats/stats.go
index 8f9b9e6..a0b985c 100644
--- a/internal/stats/stats.go
+++ b/internal/stats/stats.go
@@ -376,6 +376,35 @@ func DirectoryStats(ds *Dataset, n int) []DirStat {
return result
}
+// DirectoryCount returns the total number of distinct directories
+// across all tracked files — the universe `DirectoryStats` ranks
+// before truncation. Useful for headers like "Top 20 of 127" so the
+// reader sees the size of the repo's dir tree without waiting for
+// DirectoryStats(ds, 0) to materialize a slice. Same derivation
+// (last "/" split, "." for root-level files) to stay consistent.
+func DirectoryCount(ds *Dataset) int {
+ dirs := make(map[string]struct{})
+ for path := range ds.files {
+ dir := "."
+ if i := strings.LastIndex(path, "/"); i >= 0 {
+ dir = path[:i]
+ }
+ dirs[dir] = struct{}{}
+ }
+ return len(dirs)
+}
+
+// ExtensionCount returns the total number of distinct extension
+// buckets ExtensionStats would produce. Same derivation via
+// extractExtension so the count matches what ranking would show.
+func ExtensionCount(ds *Dataset) int {
+ exts := make(map[string]struct{})
+ for path := range ds.files {
+ exts[extractExtension(path)] = struct{}{}
+ }
+ return len(exts)
+}
+
// ExtensionStat rolls history up per file extension. The historical
// lens is the point: "which extension is the team spending effort on"
// answers a different question than "which extension exists in the
@@ -624,6 +653,22 @@ func ActivityOverTime(ds *Dataset, granularity string) []ActivityBucket {
return result
}
+// BusFactorCount returns the universe size for BusFactor — files
+// with at least one dev authoring lines. Cannot use
+// Summary.TotalFiles because BusFactor skips files where
+// fe.devLines is empty (pure-rename-only files after the ingest
+// fix that gates devLines on non-zero churn), which would make a
+// "N of TotalFiles" header lie for rename-heavy repos.
+func BusFactorCount(ds *Dataset) int {
+ n := 0
+ for _, fe := range ds.files {
+ if len(fe.devLines) > 0 {
+ n++
+ }
+ }
+ return n
+}
+
func BusFactor(ds *Dataset, n int) []BusFactorResult {
type devLines struct {
email string
@@ -1263,7 +1308,14 @@ type DevProfile struct {
FirstDate string
LastDate string
TopFiles []DevFileContrib
- Scope []DirScope
+ // TopFilesHidden counts the files dropped by the top-10 truncation
+ // so CLI/HTML can surface "+N more" next to the visible list. Same
+ // motivation as ScopeHidden/ExtensionsHidden below: silent
+ // truncation makes a reader wonder whether the list is the dev's
+ // whole footprint or just a sample. Zero when the dev's touched
+ // file count fits in 10.
+ TopFilesHidden int
+ Scope []DirScope
// ScopeHidden / ExtensionsHidden count the buckets dropped by the
// top-5 truncation so CLI and HTML can surface "+N more" — without
// this, a reader sees Pct summing to e.g. 85% and wonders if the
@@ -1276,6 +1328,11 @@ type DevProfile struct {
ContribType string // "growth", "balanced", "refactor"
Pace float64 // commits per active day
Collaborators []DevCollaborator
+ // CollaboratorsHidden mirrors the Scope/Extensions pattern for
+ // the top-5 collaborator truncation. On a wide team a dev may
+ // share files with dozens of people; top-5 is UI-driven (one
+ // line per collaborator) and the rest should not vanish silently.
+ CollaboratorsHidden int
MonthlyActivity []ActivityBucket
WorkGrid [7][24]int
WeekendPct float64
@@ -1495,6 +1552,7 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile {
}
var topFiles []DevFileContrib
+ topFilesHidden := 0
if files, ok := devFiles[email]; ok {
for path, fa := range files {
topFiles = append(topFiles, DevFileContrib{Path: path, Commits: fa.commits, Churn: fa.churn})
@@ -1509,6 +1567,7 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile {
return topFiles[i].Path < topFiles[j].Path
})
if len(topFiles) > 10 {
+ topFilesHidden = len(topFiles) - 10
topFiles = topFiles[:10]
}
}
@@ -1700,7 +1759,9 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile {
}
return collabs[i].Email < collabs[j].Email
})
+ collabsHidden := 0
if len(collabs) > 5 {
+ collabsHidden = len(collabs) - 5
collabs = collabs[:5]
}
@@ -1709,11 +1770,12 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile {
Commits: cs.Commits, Additions: cs.Additions, Deletions: cs.Deletions,
LinesChanged: cs.Additions + cs.Deletions, FilesTouched: cs.FilesTouched,
ActiveDays: cs.ActiveDays, FirstDate: cs.FirstDate, LastDate: cs.LastDate,
- TopFiles: topFiles, Scope: scope, ScopeHidden: scopeHidden,
+ TopFiles: topFiles, TopFilesHidden: topFilesHidden,
+ Scope: scope, ScopeHidden: scopeHidden,
Extensions: extensions, ExtensionsHidden: extensionsHidden,
Specialization: specialization,
ContribRatio: contribRatio, ContribType: contribType,
- Pace: pace, Collaborators: collabs,
+ Pace: pace, Collaborators: collabs, CollaboratorsHidden: collabsHidden,
MonthlyActivity: monthly, WorkGrid: grid, WeekendPct: wpct,
})
}