diff --git a/internal/report/profile_template.go b/internal/report/profile_template.go index 0bda0ee..5bb8f0f 100644 --- a/internal/report/profile_template.go +++ b/internal/report/profile_template.go @@ -127,6 +127,9 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{.SharedFiles}} files · {{humanize .SharedLines}} lines {{end}} + {{if gt .Profile.CollaboratorsHidden 0}} + +{{.Profile.CollaboratorsHidden}} more collaborators not shown + {{end}} {{end}} @@ -145,6 +148,9 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
{{end}} +{{if gt .Profile.TopFilesHidden 0}} ++{{.Profile.TopFilesHidden}} more files not shown +{{end}} {{end}} diff --git a/internal/report/report.go b/internal/report/report.go index 6b1b741..80e875f 100644 --- a/internal/report/report.go +++ b/internal/report/report.go @@ -54,6 +54,20 @@ type ReportData struct { // so mature repos (linux-scale) don't blow up the HTML. nil when // the dataset has no files. Structure *TreeNode + + // TotalDirectories / TotalExtensions / TotalBusFactorFiles are + // the full universe sizes (before top-N truncation) for sections + // whose denominators aren't in Summary. Templates render "20 of + // 127" headers so the reader sees the scale of what's been + // truncated. TotalBusFactorFiles specifically excludes files + // with empty devLines (pure-rename-only files post-ingest-fix) + // because BusFactor skips those — using Summary.TotalFiles here + // would make the header lie on rename-heavy repos. Summary + // already carries TotalDevs / TotalFiles / TotalCommits for the + // rest. + TotalDirectories int + TotalExtensions int + TotalBusFactorFiles int } // htmlTreeDepth caps the repo-structure tree baked into the HTML report. @@ -359,6 +373,9 @@ func Generate(w io.Writer, ds *stats.Dataset, repoName string, topN int, sf stat PatternGrid: grid, MaxPattern: maxP, Structure: BuildRepoTree(stats.FileHotspots(ds, 0), htmlTreeDepth), + TotalDirectories: stats.DirectoryCount(ds), + TotalExtensions: stats.ExtensionCount(ds), + TotalBusFactorFiles: stats.BusFactorCount(ds), } CapChildrenPerDir(data.Structure, htmlTreeMaxChildrenPerDir) diff --git a/internal/report/template.go b/internal/report/template.go index a293a96..d6a3a64 100644 --- a/internal/report/template.go +++ b/internal/report/template.go @@ -185,7 +185,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .Contributors}} -

Top Contributors

+

Top Contributors{{if lt (len .Contributors) .Summary.TotalDevs}} {{thousands (len .Contributors)}} of {{thousands .Summary.TotalDevs}}{{end}}

Ranked by commit count. High commit count with low lines may indicate small fixes; low count with high lines may indicate large features. · {{docRef "contributors"}}

@@ -204,7 +204,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .Hotspots}} -

File Hotspots

+

File Hotspots{{if lt (len .Hotspots) .Summary.TotalFiles}} {{thousands (len .Hotspots)}} of {{thousands .Summary.TotalFiles}}{{end}}

Most frequently changed files. High churn with few devs = knowledge silo. High churn with many devs = shared bottleneck. · {{docRef "hotspots"}}

NameEmailCommitsAdditionsDeletions
@@ -222,7 +222,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .Directories}} -

Directories

+

Directories{{if lt (len .Directories) .TotalDirectories}} {{thousands (len .Directories)}} of {{thousands .TotalDirectories}}{{end}}

Module-level health. File touches is the sum of per-file commit counts (one commit touching N files contributes N), not distinct commits. Low bus factor = knowledge concentrated in few people. · {{docRef "directories"}}

PathCommitsChurnDevs
@@ -240,7 +240,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .Extensions}} -

Extensions

+

Extensions{{if lt (len .Extensions) .TotalExtensions}} {{thousands (len .Extensions)}} of {{thousands .TotalExtensions}}{{end}}

File extensions ranked by recent churn — "where is the team spending effort now", not "what exists at HEAD". Cross-read with Directories: a repo with high .yaml recent churn concentrated in one dir is config-as-code; spread across many dirs is config sprawl. · {{docRef "extensions"}}

DirectoryFile TouchesChurnFilesDevsBus Factor
@@ -261,7 +261,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .ChurnRisk}} -

Churn Risk

+

Churn Risk{{if lt (len .ChurnRisk) .Summary.TotalFiles}} {{thousands (len .ChurnRisk)}} of {{thousands .Summary.TotalFiles}}{{end}}

Files ranked by recent churn. Label classifies context so you can judge action: legacy-hotspot (old code + concentrated + declining) is the urgent alarm; silo suggests knowledge transfer; active-core is young code with a single author (often fine); active is shared healthy work; cold is quiet.{{if (index .ChurnRisk 0).AgePercentile}} Age P__ / Trend P__ under the label show where this file sits in the repo's distribution: age P90 means older than 90% of tracked files; trend P10 means declining more sharply than 90%. Classification boundaries are the P75 age and P25 trend of this dataset (see {{docRef "churn-risk"}}).{{end}}

{{if .ChurnRiskLabelCounts}}
@@ -290,7 +290,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .BusFactor}} -

Bus Factor Risk

+

Bus Factor Risk{{if lt (len .BusFactor) .TotalBusFactorFiles}} {{thousands (len .BusFactor)}} of {{thousands .TotalBusFactorFiles}}{{end}}

Files with fewest developers owning 80%+ of changes. Bus factor 1 = if that person leaves, nobody else knows the code. · {{docRef "bus-factor"}}

ExtFilesChurnRecent ChurnDevsFirst SeenLast Seen
@@ -337,7 +337,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .TopCommits}} -

Top Commits

+

Top Commits{{if lt (len .TopCommits) .Summary.TotalCommits}} {{thousands (len .TopCommits)}} of {{thousands .Summary.TotalCommits}}{{end}}

Largest commits by lines changed. Unusually large commits may be imports, generated code, or risky big-bang changes worth reviewing. · {{docRef "top-commits"}}

PathBus FactorTop Devs
{{if and (gt (len .TopCommits) 0) (index .TopCommits 0).Message}}{{end}} @@ -398,7 +398,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{printf "%.1f" .Pace}} commits/active dayCollaboration - {{if .Collaborators}}{{range $i, $c := .Collaborators}}{{if $i}}, {{end}}{{$c.Email}} ({{thousands $c.SharedFiles}} files, {{thousands $c.SharedLines}} lines){{end}}{{else}}solo contributor{{end}} + {{if .Collaborators}}{{range $i, $c := .Collaborators}}{{if $i}}, {{end}}{{$c.Email}} ({{thousands $c.SharedFiles}} files, {{thousands $c.SharedLines}} lines){{end}}{{if gt .CollaboratorsHidden 0}} (+{{.CollaboratorsHidden}} more){{end}}{{else}}solo contributor{{end}}Weekend{{printf "%.1f" .WeekendPct}}% @@ -414,6 +414,9 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{thousands .Churn}} churn {{end}} + {{if gt .TopFilesHidden 0}} +
+{{.TopFilesHidden}} more files not shown
+ {{end}} {{end}} diff --git a/internal/stats/extension_test.go b/internal/stats/extension_test.go index 7b48146..6a5d974 100644 --- a/internal/stats/extension_test.go +++ b/internal/stats/extension_test.go @@ -1,6 +1,7 @@ package stats import ( + "fmt" "strings" "testing" "time" @@ -56,6 +57,62 @@ func TestExtractExtensionPolicy(t *testing.T) { } } +// DirectoryCount and ExtensionCount back the "N of M" header badges +// in the main report. They must match what DirectoryStats and +// ExtensionStats would produce pre-truncation — otherwise "showing 20 +// of 127" lies when the user expands to --top 0 and finds a +// different number. +func TestDirectoryCountAndExtensionCount(t *testing.T) { + ds := &Dataset{ + files: map[string]*fileEntry{ + "cmd/main.go": {devLines: map[string]int64{"a": 1}}, + "cmd/util.go": {devLines: map[string]int64{"a": 1}}, + "internal/a.go": {devLines: map[string]int64{"a": 1}}, + "internal/b.go": {devLines: map[string]int64{"a": 1}}, + "docs/x.md": {devLines: map[string]int64{"a": 1}}, + "README.md": {devLines: map[string]int64{"a": 1}}, // "." bucket + "Makefile": {devLines: map[string]int64{"a": 1}}, // "." bucket, (none) ext + }, + } + // Distinct dirs: "cmd", "internal", "docs", "." → 4 + if n := DirectoryCount(ds); n != 4 { + t.Errorf("DirectoryCount = %d, want 4", n) + } + // Distinct exts: ".go", ".md", "(none)" → 3 + if n := ExtensionCount(ds); n != 3 { + t.Errorf("ExtensionCount = %d, want 3", n) + } + // Consistency invariant: must match len of the stats function's + // full output. If they ever drift, the "N of M" header lies. + if got, want := DirectoryCount(ds), len(DirectoryStats(ds, 0)); got != want { + t.Errorf("DirectoryCount (%d) != len(DirectoryStats(_, 0)) (%d)", got, want) + } + if got, want := ExtensionCount(ds), len(ExtensionStats(ds, 0)); got != want { + t.Errorf("ExtensionCount (%d) != len(ExtensionStats(_, 0)) (%d)", got, want) + } +} + +// BusFactorCount must exclude pure-rename files (empty devLines) — +// those are skipped by BusFactor itself and so cannot be part of its +// denominator. Using Summary.TotalFiles would over-count here. Build +// a dataset with one file that carries dev lines and one that +// doesn't; assert BusFactorCount == 1 and matches the real output. +func TestBusFactorCountExcludesEmptyDevLines(t *testing.T) { + ds := &Dataset{ + UniqueFileCount: 2, // Summary total; includes both + files: map[string]*fileEntry{ + "src/authored.go": {devLines: map[string]int64{"alice@x": 10}}, + "src/pure-rename-only": {devLines: map[string]int64{}}, // no authored lines + }, + } + if got := BusFactorCount(ds); got != 1 { + t.Errorf("BusFactorCount = %d, want 1 (pure-rename file must be excluded)", got) + } + if got, want := BusFactorCount(ds), len(BusFactor(ds, 0)); got != want { + t.Errorf("BusFactorCount (%d) != len(BusFactor(_, 0)) (%d) — header would lie", got, want) + } +} + func TestExtensionStatsAggregation(t *testing.T) { // Hand-built dataset so aggregation is inspectable: two .go files // with distinct devs, one .yaml shared by both, and a Makefile @@ -667,6 +724,62 @@ func TestDevProfileHiddenCounters(t *testing.T) { } } +// Completes the Hidden-counter family: TopFiles truncates at 10, +// Collaborators at 5. Both used to drop buckets silently — the "+N +// more" surfaced for Scope/Extensions had no counterpart here, so a +// dev with 25 touched files or 12 frequent collaborators looked like +// they had exactly 10 / 5. Build a dev with 12 files and 7 +// collaborators; assert the counters report the true drop count. +func TestDevProfileHiddenCountersTopFilesAndCollaborators(t *testing.T) { + files := map[string]*fileEntry{} + // 12 files authored by alice → TopFilesHidden = 2. + for i := 0; i < 12; i++ { + path := fmt.Sprintf("dir%d/file%d.go", i, i) + files[path] = &fileEntry{ + devLines: map[string]int64{"alice@x": int64(100 - i*5)}, + devCommits: map[string]int{"alice@x": 1}, + } + } + // Seed 6 shared files between alice and each of 6 other devs → + // alice has 6 collaborators total; top-5 truncation gives + // CollaboratorsHidden = 1. + for i := 0; i < 6; i++ { + path := fmt.Sprintf("shared/collab%d.go", i) + collab := fmt.Sprintf("bob%d@x", i) + files[path] = &fileEntry{ + devLines: map[string]int64{"alice@x": 50, collab: 50}, + devCommits: map[string]int{"alice@x": 1, collab: 1}, + } + } + contribs := map[string]*ContributorStat{ + "alice@x": {Email: "alice@x", Commits: 18, FilesTouched: 18, ActiveDays: 1}, + } + for i := 0; i < 6; i++ { + contribs[fmt.Sprintf("bob%d@x", i)] = &ContributorStat{ + Email: fmt.Sprintf("bob%d@x", i), Commits: 1, FilesTouched: 1, ActiveDays: 1, + } + } + ds := &Dataset{ + contributors: contribs, + files: files, + commits: map[string]*commitEntry{}, + workGrid: [7][24]int{}, + } + p := DevProfiles(ds, "alice@x", 0)[0] + if len(p.TopFiles) != 10 { + t.Fatalf("TopFiles len = %d, want 10 (truncated from 18)", len(p.TopFiles)) + } + if p.TopFilesHidden != 8 { + t.Errorf("TopFilesHidden = %d, want 8 (18 - 10)", p.TopFilesHidden) + } + if len(p.Collaborators) != 5 { + t.Fatalf("Collaborators len = %d, want 5 (truncated from 6)", len(p.Collaborators)) + } + if p.CollaboratorsHidden != 1 { + t.Errorf("CollaboratorsHidden = %d, want 1 (6 - 5)", p.CollaboratorsHidden) + } +} + // Silent when nothing to hide — the counters must be zero so the // renderers don't emit "+0 more" (noise) for the common case. func TestDevProfileHiddenCountersZeroWhenFits(t *testing.T) { @@ -687,6 +800,12 @@ func TestDevProfileHiddenCountersZeroWhenFits(t *testing.T) { t.Errorf("Hidden counters: Scope=%d Ext=%d, want 0/0 (dev has ≤5 buckets each)", p.ScopeHidden, p.ExtensionsHidden) } + // TopFiles cap is 10, Collaborators cap is 5 — bob has 3 files + // and zero collaborators, so both must stay at 0. + if p.TopFilesHidden != 0 || p.CollaboratorsHidden != 0 { + t.Errorf("Hidden counters: TopFiles=%d Collab=%d, want 0/0", + p.TopFilesHidden, p.CollaboratorsHidden) + } } // Truncate to top-5 when a dev's extension set is larger. Under the diff --git a/internal/stats/format.go b/internal/stats/format.go index 895c766..3701812 100644 --- a/internal/stats/format.go +++ b/internal/stats/format.go @@ -500,6 +500,9 @@ func (f *Formatter) PrintProfiles(profiles []DevProfile) error { } fmt.Fprintf(f.w, "%s (%d shared)", c.Email, c.SharedFiles) } + if p.CollaboratorsHidden > 0 { + fmt.Fprintf(f.w, " (+%d more)", p.CollaboratorsHidden) + } } else { fmt.Fprintf(f.w, "solo contributor") } @@ -512,6 +515,9 @@ func (f *Formatter) PrintProfiles(profiles []DevProfile) error { for _, tf := range p.TopFiles { fmt.Fprintf(f.w, " %-50s %3d commits %6d churn\n", tf.Path, tf.Commits, tf.Churn) } + if p.TopFilesHidden > 0 { + fmt.Fprintf(f.w, " ... (+%d more files not shown)\n", p.TopFilesHidden) + } } if len(p.MonthlyActivity) > 0 { diff --git a/internal/stats/stats.go b/internal/stats/stats.go index 8f9b9e6..a0b985c 100644 --- a/internal/stats/stats.go +++ b/internal/stats/stats.go @@ -376,6 +376,35 @@ func DirectoryStats(ds *Dataset, n int) []DirStat { return result } +// DirectoryCount returns the total number of distinct directories +// across all tracked files — the universe `DirectoryStats` ranks +// before truncation. Useful for headers like "Top 20 of 127" so the +// reader sees the size of the repo's dir tree without waiting for +// DirectoryStats(ds, 0) to materialize a slice. Same derivation +// (last "/" split, "." for root-level files) to stay consistent. +func DirectoryCount(ds *Dataset) int { + dirs := make(map[string]struct{}) + for path := range ds.files { + dir := "." + if i := strings.LastIndex(path, "/"); i >= 0 { + dir = path[:i] + } + dirs[dir] = struct{}{} + } + return len(dirs) +} + +// ExtensionCount returns the total number of distinct extension +// buckets ExtensionStats would produce. Same derivation via +// extractExtension so the count matches what ranking would show. +func ExtensionCount(ds *Dataset) int { + exts := make(map[string]struct{}) + for path := range ds.files { + exts[extractExtension(path)] = struct{}{} + } + return len(exts) +} + // ExtensionStat rolls history up per file extension. The historical // lens is the point: "which extension is the team spending effort on" // answers a different question than "which extension exists in the @@ -624,6 +653,22 @@ func ActivityOverTime(ds *Dataset, granularity string) []ActivityBucket { return result } +// BusFactorCount returns the universe size for BusFactor — files +// with at least one dev authoring lines. Cannot use +// Summary.TotalFiles because BusFactor skips files where +// fe.devLines is empty (pure-rename-only files after the ingest +// fix that gates devLines on non-zero churn), which would make a +// "N of TotalFiles" header lie for rename-heavy repos. +func BusFactorCount(ds *Dataset) int { + n := 0 + for _, fe := range ds.files { + if len(fe.devLines) > 0 { + n++ + } + } + return n +} + func BusFactor(ds *Dataset, n int) []BusFactorResult { type devLines struct { email string @@ -1263,7 +1308,14 @@ type DevProfile struct { FirstDate string LastDate string TopFiles []DevFileContrib - Scope []DirScope + // TopFilesHidden counts the files dropped by the top-10 truncation + // so CLI/HTML can surface "+N more" next to the visible list. Same + // motivation as ScopeHidden/ExtensionsHidden below: silent + // truncation makes a reader wonder whether the list is the dev's + // whole footprint or just a sample. Zero when the dev's touched + // file count fits in 10. + TopFilesHidden int + Scope []DirScope // ScopeHidden / ExtensionsHidden count the buckets dropped by the // top-5 truncation so CLI and HTML can surface "+N more" — without // this, a reader sees Pct summing to e.g. 85% and wonders if the @@ -1276,6 +1328,11 @@ type DevProfile struct { ContribType string // "growth", "balanced", "refactor" Pace float64 // commits per active day Collaborators []DevCollaborator + // CollaboratorsHidden mirrors the Scope/Extensions pattern for + // the top-5 collaborator truncation. On a wide team a dev may + // share files with dozens of people; top-5 is UI-driven (one + // line per collaborator) and the rest should not vanish silently. + CollaboratorsHidden int MonthlyActivity []ActivityBucket WorkGrid [7][24]int WeekendPct float64 @@ -1495,6 +1552,7 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile { } var topFiles []DevFileContrib + topFilesHidden := 0 if files, ok := devFiles[email]; ok { for path, fa := range files { topFiles = append(topFiles, DevFileContrib{Path: path, Commits: fa.commits, Churn: fa.churn}) @@ -1509,6 +1567,7 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile { return topFiles[i].Path < topFiles[j].Path }) if len(topFiles) > 10 { + topFilesHidden = len(topFiles) - 10 topFiles = topFiles[:10] } } @@ -1700,7 +1759,9 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile { } return collabs[i].Email < collabs[j].Email }) + collabsHidden := 0 if len(collabs) > 5 { + collabsHidden = len(collabs) - 5 collabs = collabs[:5] } @@ -1709,11 +1770,12 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile { Commits: cs.Commits, Additions: cs.Additions, Deletions: cs.Deletions, LinesChanged: cs.Additions + cs.Deletions, FilesTouched: cs.FilesTouched, ActiveDays: cs.ActiveDays, FirstDate: cs.FirstDate, LastDate: cs.LastDate, - TopFiles: topFiles, Scope: scope, ScopeHidden: scopeHidden, + TopFiles: topFiles, TopFilesHidden: topFilesHidden, + Scope: scope, ScopeHidden: scopeHidden, Extensions: extensions, ExtensionsHidden: extensionsHidden, Specialization: specialization, ContribRatio: contribRatio, ContribType: contribType, - Pace: pace, Collaborators: collabs, + Pace: pace, Collaborators: collabs, CollaboratorsHidden: collabsHidden, MonthlyActivity: monthly, WorkGrid: grid, WeekendPct: wpct, }) }
SHAAuthorDateLinesFilesMessage