From cbf1c986d2db6e9fb2d32fdf2a13716b54ec656e Mon Sep 17 00:00:00 2001 From: lex0c Date: Mon, 20 Apr 2026 00:29:13 -0300 Subject: [PATCH 1/2] Surface hidden counts for TopFiles and Collaborators in DevProfile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes the hidden-counter family started with Scope/Extensions. TopFiles truncates at 10 and Collaborators at 5; both used to drop buckets silently. A dev with 25 touched files or 12 frequent collaborators looked like they had exactly 10/5 — reader had no way to tell "this is the whole footprint" apart from "this is a sample of a larger set". The k8s-release-robot profile is a good example: 62 distinct collaborators and 18 top files, previously collapsed to the displayed top-N without trace. Add TopFilesHidden and CollaboratorsHidden fields on DevProfile; populate at the truncation sites. Render in three surfaces matching the established pattern: - CLI PrintProfiles: inline "(+N more)" on the Collaboration line, standalone " ... (+N more files not shown)" after Top files. - HTML main report profile card: italic span on Collaboration, italic div below Top files. - HTML dedicated profile: italic chip after Collaborators, colspan-4 row at the bottom of the Top Files table. Silent when Hidden == 0 (the common case) so reports of tight teams don't gain noise. Tests: TestDevProfileHiddenCountersTopFilesAndCollaborators constructs a dev with 18 authored files and 6 collaborators, asserts TopFilesHidden=8 and CollaboratorsHidden=1. TestDevProfileHiddenCountersZeroWhenFits extended to pin both new counters to 0 in the no-truncation case. Verified on kubernetes/k8s-release-robot: CLI shows "(+57 more)" on Collaboration and "(+8 more files not shown)" at bottom of Top files; HTML dedicated profile shows both markers. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/report/profile_template.go | 6 +++ internal/report/template.go | 5 ++- internal/stats/extension_test.go | 63 +++++++++++++++++++++++++++++ internal/stats/format.go | 6 +++ internal/stats/stats.go | 23 +++++++++-- 5 files changed, 99 insertions(+), 4 deletions(-) diff --git a/internal/report/profile_template.go b/internal/report/profile_template.go index 0bda0ee..5bb8f0f 100644 --- a/internal/report/profile_template.go +++ b/internal/report/profile_template.go @@ -127,6 +127,9 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{.SharedFiles}} files · {{humanize .SharedLines}} lines {{end}} + {{if gt .Profile.CollaboratorsHidden 0}} + +{{.Profile.CollaboratorsHidden}} more collaborators not shown + {{end}} {{end}} @@ -145,6 +148,9 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
{{end}} +{{if gt .Profile.TopFilesHidden 0}} ++{{.Profile.TopFilesHidden}} more files not shown +{{end}} {{end}} diff --git a/internal/report/template.go b/internal/report/template.go index a293a96..f78aa1f 100644 --- a/internal/report/template.go +++ b/internal/report/template.go @@ -398,7 +398,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{printf "%.1f" .Pace}} commits/active day Collaboration - {{if .Collaborators}}{{range $i, $c := .Collaborators}}{{if $i}}, {{end}}{{$c.Email}} ({{thousands $c.SharedFiles}} files, {{thousands $c.SharedLines}} lines){{end}}{{else}}solo contributor{{end}} + {{if .Collaborators}}{{range $i, $c := .Collaborators}}{{if $i}}, {{end}}{{$c.Email}} ({{thousands $c.SharedFiles}} files, {{thousands $c.SharedLines}} lines){{end}}{{if gt .CollaboratorsHidden 0}} (+{{.CollaboratorsHidden}} more){{end}}{{else}}solo contributor{{end}} Weekend {{printf "%.1f" .WeekendPct}}% @@ -414,6 +414,9 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{thousands .Churn}} churn {{end}} + {{if gt .TopFilesHidden 0}} +
+{{.TopFilesHidden}} more files not shown
+ {{end}} {{end}} diff --git a/internal/stats/extension_test.go b/internal/stats/extension_test.go index 7b48146..3127b7d 100644 --- a/internal/stats/extension_test.go +++ b/internal/stats/extension_test.go @@ -1,6 +1,7 @@ package stats import ( + "fmt" "strings" "testing" "time" @@ -667,6 +668,62 @@ func TestDevProfileHiddenCounters(t *testing.T) { } } +// Completes the Hidden-counter family: TopFiles truncates at 10, +// Collaborators at 5. Both used to drop buckets silently — the "+N +// more" surfaced for Scope/Extensions had no counterpart here, so a +// dev with 25 touched files or 12 frequent collaborators looked like +// they had exactly 10 / 5. Build a dev with 12 files and 7 +// collaborators; assert the counters report the true drop count. +func TestDevProfileHiddenCountersTopFilesAndCollaborators(t *testing.T) { + files := map[string]*fileEntry{} + // 12 files authored by alice → TopFilesHidden = 2. + for i := 0; i < 12; i++ { + path := fmt.Sprintf("dir%d/file%d.go", i, i) + files[path] = &fileEntry{ + devLines: map[string]int64{"alice@x": int64(100 - i*5)}, + devCommits: map[string]int{"alice@x": 1}, + } + } + // Seed 6 shared files between alice and each of 6 other devs → + // alice has 6 collaborators total; top-5 truncation gives + // CollaboratorsHidden = 1. + for i := 0; i < 6; i++ { + path := fmt.Sprintf("shared/collab%d.go", i) + collab := fmt.Sprintf("bob%d@x", i) + files[path] = &fileEntry{ + devLines: map[string]int64{"alice@x": 50, collab: 50}, + devCommits: map[string]int{"alice@x": 1, collab: 1}, + } + } + contribs := map[string]*ContributorStat{ + "alice@x": {Email: "alice@x", Commits: 18, FilesTouched: 18, ActiveDays: 1}, + } + for i := 0; i < 6; i++ { + contribs[fmt.Sprintf("bob%d@x", i)] = &ContributorStat{ + Email: fmt.Sprintf("bob%d@x", i), Commits: 1, FilesTouched: 1, ActiveDays: 1, + } + } + ds := &Dataset{ + contributors: contribs, + files: files, + commits: map[string]*commitEntry{}, + workGrid: [7][24]int{}, + } + p := DevProfiles(ds, "alice@x", 0)[0] + if len(p.TopFiles) != 10 { + t.Fatalf("TopFiles len = %d, want 10 (truncated from 18)", len(p.TopFiles)) + } + if p.TopFilesHidden != 8 { + t.Errorf("TopFilesHidden = %d, want 8 (18 - 10)", p.TopFilesHidden) + } + if len(p.Collaborators) != 5 { + t.Fatalf("Collaborators len = %d, want 5 (truncated from 6)", len(p.Collaborators)) + } + if p.CollaboratorsHidden != 1 { + t.Errorf("CollaboratorsHidden = %d, want 1 (6 - 5)", p.CollaboratorsHidden) + } +} + // Silent when nothing to hide — the counters must be zero so the // renderers don't emit "+0 more" (noise) for the common case. func TestDevProfileHiddenCountersZeroWhenFits(t *testing.T) { @@ -687,6 +744,12 @@ func TestDevProfileHiddenCountersZeroWhenFits(t *testing.T) { t.Errorf("Hidden counters: Scope=%d Ext=%d, want 0/0 (dev has ≤5 buckets each)", p.ScopeHidden, p.ExtensionsHidden) } + // TopFiles cap is 10, Collaborators cap is 5 — bob has 3 files + // and zero collaborators, so both must stay at 0. + if p.TopFilesHidden != 0 || p.CollaboratorsHidden != 0 { + t.Errorf("Hidden counters: TopFiles=%d Collab=%d, want 0/0", + p.TopFilesHidden, p.CollaboratorsHidden) + } } // Truncate to top-5 when a dev's extension set is larger. Under the diff --git a/internal/stats/format.go b/internal/stats/format.go index 895c766..3701812 100644 --- a/internal/stats/format.go +++ b/internal/stats/format.go @@ -500,6 +500,9 @@ func (f *Formatter) PrintProfiles(profiles []DevProfile) error { } fmt.Fprintf(f.w, "%s (%d shared)", c.Email, c.SharedFiles) } + if p.CollaboratorsHidden > 0 { + fmt.Fprintf(f.w, " (+%d more)", p.CollaboratorsHidden) + } } else { fmt.Fprintf(f.w, "solo contributor") } @@ -512,6 +515,9 @@ func (f *Formatter) PrintProfiles(profiles []DevProfile) error { for _, tf := range p.TopFiles { fmt.Fprintf(f.w, " %-50s %3d commits %6d churn\n", tf.Path, tf.Commits, tf.Churn) } + if p.TopFilesHidden > 0 { + fmt.Fprintf(f.w, " ... (+%d more files not shown)\n", p.TopFilesHidden) + } } if len(p.MonthlyActivity) > 0 { diff --git a/internal/stats/stats.go b/internal/stats/stats.go index 8f9b9e6..ec86eed 100644 --- a/internal/stats/stats.go +++ b/internal/stats/stats.go @@ -1263,7 +1263,14 @@ type DevProfile struct { FirstDate string LastDate string TopFiles []DevFileContrib - Scope []DirScope + // TopFilesHidden counts the files dropped by the top-10 truncation + // so CLI/HTML can surface "+N more" next to the visible list. Same + // motivation as ScopeHidden/ExtensionsHidden below: silent + // truncation makes a reader wonder whether the list is the dev's + // whole footprint or just a sample. Zero when the dev's touched + // file count fits in 10. + TopFilesHidden int + Scope []DirScope // ScopeHidden / ExtensionsHidden count the buckets dropped by the // top-5 truncation so CLI and HTML can surface "+N more" — without // this, a reader sees Pct summing to e.g. 85% and wonders if the @@ -1276,6 +1283,11 @@ type DevProfile struct { ContribType string // "growth", "balanced", "refactor" Pace float64 // commits per active day Collaborators []DevCollaborator + // CollaboratorsHidden mirrors the Scope/Extensions pattern for + // the top-5 collaborator truncation. On a wide team a dev may + // share files with dozens of people; top-5 is UI-driven (one + // line per collaborator) and the rest should not vanish silently. + CollaboratorsHidden int MonthlyActivity []ActivityBucket WorkGrid [7][24]int WeekendPct float64 @@ -1495,6 +1507,7 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile { } var topFiles []DevFileContrib + topFilesHidden := 0 if files, ok := devFiles[email]; ok { for path, fa := range files { topFiles = append(topFiles, DevFileContrib{Path: path, Commits: fa.commits, Churn: fa.churn}) @@ -1509,6 +1522,7 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile { return topFiles[i].Path < topFiles[j].Path }) if len(topFiles) > 10 { + topFilesHidden = len(topFiles) - 10 topFiles = topFiles[:10] } } @@ -1700,7 +1714,9 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile { } return collabs[i].Email < collabs[j].Email }) + collabsHidden := 0 if len(collabs) > 5 { + collabsHidden = len(collabs) - 5 collabs = collabs[:5] } @@ -1709,11 +1725,12 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile { Commits: cs.Commits, Additions: cs.Additions, Deletions: cs.Deletions, LinesChanged: cs.Additions + cs.Deletions, FilesTouched: cs.FilesTouched, ActiveDays: cs.ActiveDays, FirstDate: cs.FirstDate, LastDate: cs.LastDate, - TopFiles: topFiles, Scope: scope, ScopeHidden: scopeHidden, + TopFiles: topFiles, TopFilesHidden: topFilesHidden, + Scope: scope, ScopeHidden: scopeHidden, Extensions: extensions, ExtensionsHidden: extensionsHidden, Specialization: specialization, ContribRatio: contribRatio, ContribType: contribType, - Pace: pace, Collaborators: collabs, + Pace: pace, Collaborators: collabs, CollaboratorsHidden: collabsHidden, MonthlyActivity: monthly, WorkGrid: grid, WeekendPct: wpct, }) } From 25252ee1cf05dee5d0d432ea9bb9ff390653d3fa Mon Sep 17 00:00:00 2001 From: lex0c Date: Mon, 20 Apr 2026 00:37:50 -0300 Subject: [PATCH 2/2] Annotate ranked table headers with 'N of M' total counts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sibling to the profile hidden counters from the previous commit, but different pattern: top-level tables in the HTML report now show their universe size inline in the h2 (e.g. "Top Contributors 20 of 5,295") when truncation is in effect. Silent when the displayed list is the whole set. Rationale vs "+N more" rows at table bottoms: user passed --top N explicitly, so they know truncation happened — the useful signal is *how big is the repo*, not *was something hidden*. 5,295 devs vs 40 devs is diagnostic information the reader wants at a glance. Bottom-of-table "+N more" would also compound across 7 tables visually. Header badge: one line per section, no reading weight added. Applied to 7 tables where the denominator is well-defined (stat's output universe matches a clean total): Contributors — Summary.TotalDevs File Hotspots — Summary.TotalFiles Directories — ReportData.TotalDirectories (new) Extensions — ReportData.TotalExtensions (new) Churn Risk — Summary.TotalFiles Bus Factor Risk — ReportData.TotalBusFactorFiles (new; see below) Top Commits — Summary.TotalCommits Skipped Coupling and Dev Network because --coupling-min-changes and --network-min-files filter before ranking, so "total pairs" isn't a clean denominator. BusFactor's universe isn't TotalFiles. The stat skips files where fe.devLines is empty (pure-rename-only files after the earlier ingest fix). Using Summary.TotalFiles as the denominator would lie on rename-heavy repos: kubernetes has 76,538 files but only 71,510 (93.4%) carry authored lines — a 5,028-file gap. Added BusFactorCount that excludes empty-devLines files and use it instead. DirectoryCount / ExtensionCount similarly dedicated to keep the helper lightweight (DirectoryStats with n=0 does expensive per-dir bus factor sorts we don't need just to count). Tests: TestDirectoryCountAndExtensionCount pins the counts on a small fixture AND asserts the invariant `Count == len(stat(ds, 0))`, so any future drift in derivation surfaces in CI. TestBusFactorCountExcludesEmptyDevLines covers the pure-rename skip specifically. Smoke: pi-hole shows "20 of 207" (8 pure-rename files excluded), kubernetes "20 of 71,510" (5,028 excluded). Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/report/report.go | 17 ++++++++++ internal/report/template.go | 14 ++++---- internal/stats/extension_test.go | 56 ++++++++++++++++++++++++++++++++ internal/stats/stats.go | 45 +++++++++++++++++++++++++ 4 files changed, 125 insertions(+), 7 deletions(-) diff --git a/internal/report/report.go b/internal/report/report.go index 6b1b741..80e875f 100644 --- a/internal/report/report.go +++ b/internal/report/report.go @@ -54,6 +54,20 @@ type ReportData struct { // so mature repos (linux-scale) don't blow up the HTML. nil when // the dataset has no files. Structure *TreeNode + + // TotalDirectories / TotalExtensions / TotalBusFactorFiles are + // the full universe sizes (before top-N truncation) for sections + // whose denominators aren't in Summary. Templates render "20 of + // 127" headers so the reader sees the scale of what's been + // truncated. TotalBusFactorFiles specifically excludes files + // with empty devLines (pure-rename-only files post-ingest-fix) + // because BusFactor skips those — using Summary.TotalFiles here + // would make the header lie on rename-heavy repos. Summary + // already carries TotalDevs / TotalFiles / TotalCommits for the + // rest. + TotalDirectories int + TotalExtensions int + TotalBusFactorFiles int } // htmlTreeDepth caps the repo-structure tree baked into the HTML report. @@ -359,6 +373,9 @@ func Generate(w io.Writer, ds *stats.Dataset, repoName string, topN int, sf stat PatternGrid: grid, MaxPattern: maxP, Structure: BuildRepoTree(stats.FileHotspots(ds, 0), htmlTreeDepth), + TotalDirectories: stats.DirectoryCount(ds), + TotalExtensions: stats.ExtensionCount(ds), + TotalBusFactorFiles: stats.BusFactorCount(ds), } CapChildrenPerDir(data.Structure, htmlTreeMaxChildrenPerDir) diff --git a/internal/report/template.go b/internal/report/template.go index f78aa1f..d6a3a64 100644 --- a/internal/report/template.go +++ b/internal/report/template.go @@ -185,7 +185,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .Contributors}} -

Top Contributors

+

Top Contributors{{if lt (len .Contributors) .Summary.TotalDevs}} {{thousands (len .Contributors)}} of {{thousands .Summary.TotalDevs}}{{end}}

Ranked by commit count. High commit count with low lines may indicate small fixes; low count with high lines may indicate large features. · {{docRef "contributors"}}

@@ -204,7 +204,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .Hotspots}} -

File Hotspots

+

File Hotspots{{if lt (len .Hotspots) .Summary.TotalFiles}} {{thousands (len .Hotspots)}} of {{thousands .Summary.TotalFiles}}{{end}}

Most frequently changed files. High churn with few devs = knowledge silo. High churn with many devs = shared bottleneck. · {{docRef "hotspots"}}

NameEmailCommitsAdditionsDeletions
@@ -222,7 +222,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .Directories}} -

Directories

+

Directories{{if lt (len .Directories) .TotalDirectories}} {{thousands (len .Directories)}} of {{thousands .TotalDirectories}}{{end}}

Module-level health. File touches is the sum of per-file commit counts (one commit touching N files contributes N), not distinct commits. Low bus factor = knowledge concentrated in few people. · {{docRef "directories"}}

PathCommitsChurnDevs
@@ -240,7 +240,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .Extensions}} -

Extensions

+

Extensions{{if lt (len .Extensions) .TotalExtensions}} {{thousands (len .Extensions)}} of {{thousands .TotalExtensions}}{{end}}

File extensions ranked by recent churn — "where is the team spending effort now", not "what exists at HEAD". Cross-read with Directories: a repo with high .yaml recent churn concentrated in one dir is config-as-code; spread across many dirs is config sprawl. · {{docRef "extensions"}}

DirectoryFile TouchesChurnFilesDevsBus Factor
@@ -261,7 +261,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .ChurnRisk}} -

Churn Risk

+

Churn Risk{{if lt (len .ChurnRisk) .Summary.TotalFiles}} {{thousands (len .ChurnRisk)}} of {{thousands .Summary.TotalFiles}}{{end}}

Files ranked by recent churn. Label classifies context so you can judge action: legacy-hotspot (old code + concentrated + declining) is the urgent alarm; silo suggests knowledge transfer; active-core is young code with a single author (often fine); active is shared healthy work; cold is quiet.{{if (index .ChurnRisk 0).AgePercentile}} Age P__ / Trend P__ under the label show where this file sits in the repo's distribution: age P90 means older than 90% of tracked files; trend P10 means declining more sharply than 90%. Classification boundaries are the P75 age and P25 trend of this dataset (see {{docRef "churn-risk"}}).{{end}}

{{if .ChurnRiskLabelCounts}}
@@ -290,7 +290,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .BusFactor}} -

Bus Factor Risk

+

Bus Factor Risk{{if lt (len .BusFactor) .TotalBusFactorFiles}} {{thousands (len .BusFactor)}} of {{thousands .TotalBusFactorFiles}}{{end}}

Files with fewest developers owning 80%+ of changes. Bus factor 1 = if that person leaves, nobody else knows the code. · {{docRef "bus-factor"}}

ExtFilesChurnRecent ChurnDevsFirst SeenLast Seen
@@ -337,7 +337,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .TopCommits}} -

Top Commits

+

Top Commits{{if lt (len .TopCommits) .Summary.TotalCommits}} {{thousands (len .TopCommits)}} of {{thousands .Summary.TotalCommits}}{{end}}

Largest commits by lines changed. Unusually large commits may be imports, generated code, or risky big-bang changes worth reviewing. · {{docRef "top-commits"}}

PathBus FactorTop Devs
{{if and (gt (len .TopCommits) 0) (index .TopCommits 0).Message}}{{end}} diff --git a/internal/stats/extension_test.go b/internal/stats/extension_test.go index 3127b7d..6a5d974 100644 --- a/internal/stats/extension_test.go +++ b/internal/stats/extension_test.go @@ -57,6 +57,62 @@ func TestExtractExtensionPolicy(t *testing.T) { } } +// DirectoryCount and ExtensionCount back the "N of M" header badges +// in the main report. They must match what DirectoryStats and +// ExtensionStats would produce pre-truncation — otherwise "showing 20 +// of 127" lies when the user expands to --top 0 and finds a +// different number. +func TestDirectoryCountAndExtensionCount(t *testing.T) { + ds := &Dataset{ + files: map[string]*fileEntry{ + "cmd/main.go": {devLines: map[string]int64{"a": 1}}, + "cmd/util.go": {devLines: map[string]int64{"a": 1}}, + "internal/a.go": {devLines: map[string]int64{"a": 1}}, + "internal/b.go": {devLines: map[string]int64{"a": 1}}, + "docs/x.md": {devLines: map[string]int64{"a": 1}}, + "README.md": {devLines: map[string]int64{"a": 1}}, // "." bucket + "Makefile": {devLines: map[string]int64{"a": 1}}, // "." bucket, (none) ext + }, + } + // Distinct dirs: "cmd", "internal", "docs", "." → 4 + if n := DirectoryCount(ds); n != 4 { + t.Errorf("DirectoryCount = %d, want 4", n) + } + // Distinct exts: ".go", ".md", "(none)" → 3 + if n := ExtensionCount(ds); n != 3 { + t.Errorf("ExtensionCount = %d, want 3", n) + } + // Consistency invariant: must match len of the stats function's + // full output. If they ever drift, the "N of M" header lies. + if got, want := DirectoryCount(ds), len(DirectoryStats(ds, 0)); got != want { + t.Errorf("DirectoryCount (%d) != len(DirectoryStats(_, 0)) (%d)", got, want) + } + if got, want := ExtensionCount(ds), len(ExtensionStats(ds, 0)); got != want { + t.Errorf("ExtensionCount (%d) != len(ExtensionStats(_, 0)) (%d)", got, want) + } +} + +// BusFactorCount must exclude pure-rename files (empty devLines) — +// those are skipped by BusFactor itself and so cannot be part of its +// denominator. Using Summary.TotalFiles would over-count here. Build +// a dataset with one file that carries dev lines and one that +// doesn't; assert BusFactorCount == 1 and matches the real output. +func TestBusFactorCountExcludesEmptyDevLines(t *testing.T) { + ds := &Dataset{ + UniqueFileCount: 2, // Summary total; includes both + files: map[string]*fileEntry{ + "src/authored.go": {devLines: map[string]int64{"alice@x": 10}}, + "src/pure-rename-only": {devLines: map[string]int64{}}, // no authored lines + }, + } + if got := BusFactorCount(ds); got != 1 { + t.Errorf("BusFactorCount = %d, want 1 (pure-rename file must be excluded)", got) + } + if got, want := BusFactorCount(ds), len(BusFactor(ds, 0)); got != want { + t.Errorf("BusFactorCount (%d) != len(BusFactor(_, 0)) (%d) — header would lie", got, want) + } +} + func TestExtensionStatsAggregation(t *testing.T) { // Hand-built dataset so aggregation is inspectable: two .go files // with distinct devs, one .yaml shared by both, and a Makefile diff --git a/internal/stats/stats.go b/internal/stats/stats.go index ec86eed..a0b985c 100644 --- a/internal/stats/stats.go +++ b/internal/stats/stats.go @@ -376,6 +376,35 @@ func DirectoryStats(ds *Dataset, n int) []DirStat { return result } +// DirectoryCount returns the total number of distinct directories +// across all tracked files — the universe `DirectoryStats` ranks +// before truncation. Useful for headers like "Top 20 of 127" so the +// reader sees the size of the repo's dir tree without waiting for +// DirectoryStats(ds, 0) to materialize a slice. Same derivation +// (last "/" split, "." for root-level files) to stay consistent. +func DirectoryCount(ds *Dataset) int { + dirs := make(map[string]struct{}) + for path := range ds.files { + dir := "." + if i := strings.LastIndex(path, "/"); i >= 0 { + dir = path[:i] + } + dirs[dir] = struct{}{} + } + return len(dirs) +} + +// ExtensionCount returns the total number of distinct extension +// buckets ExtensionStats would produce. Same derivation via +// extractExtension so the count matches what ranking would show. +func ExtensionCount(ds *Dataset) int { + exts := make(map[string]struct{}) + for path := range ds.files { + exts[extractExtension(path)] = struct{}{} + } + return len(exts) +} + // ExtensionStat rolls history up per file extension. The historical // lens is the point: "which extension is the team spending effort on" // answers a different question than "which extension exists in the @@ -624,6 +653,22 @@ func ActivityOverTime(ds *Dataset, granularity string) []ActivityBucket { return result } +// BusFactorCount returns the universe size for BusFactor — files +// with at least one dev authoring lines. Cannot use +// Summary.TotalFiles because BusFactor skips files where +// fe.devLines is empty (pure-rename-only files after the ingest +// fix that gates devLines on non-zero churn), which would make a +// "N of TotalFiles" header lie for rename-heavy repos. +func BusFactorCount(ds *Dataset) int { + n := 0 + for _, fe := range ds.files { + if len(fe.devLines) > 0 { + n++ + } + } + return n +} + func BusFactor(ds *Dataset, n int) []BusFactorResult { type devLines struct { email string
SHAAuthorDateLinesFilesMessage