kdeal · kdeal · Jan 4, 2026 · Jan 4, 2026 · coderabbitai · Jan 4, 2026
diff --git a/README.md b/README.md
@@ -16,3 +16,6 @@ Rust command-line tool that streamlines and automates pieces of a personal workf
 
 ## wkfl-app
 Tauri desktop application scaffold that complements the wkfl tooling with a GUI experiment.
+
+## mimir_utils
+Go-based command-line utilities for exploring Mimir and Prometheus TSDB blocks.
diff --git a/mimir_utils/cmd/mimir_utils/main.go b/mimir_utils/cmd/mimir_utils/main.go
@@ -0,0 +1,18 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"os"
+
+	"mimir_utils/internal/cli"
+)
+
+func main() {
+	flag.Usage = cli.RootUsage
+
+	if err := cli.Execute(os.Args[1:]); err != nil {
+		fmt.Fprintln(os.Stderr, "error:", err)
+		os.Exit(1)
+	}
+}
diff --git a/mimir_utils/go.mod b/mimir_utils/go.mod
@@ -0,0 +1,5 @@
+module mimir_utils
+
+go 1.25.1
+
+require github.com/prometheus/prometheus/v3 v3.8.1
diff --git a/mimir_utils/internal/analyzer/top_metrics.go b/mimir_utils/internal/analyzer/top_metrics.go
@@ -0,0 +1,146 @@
+package analyzer
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+
+	"github.com/prometheus/prometheus/v3/model/labels"
+	"github.com/prometheus/prometheus/v3/tsdb/chunkenc"
+	"github.com/prometheus/prometheus/v3/tsdb/chunks"
+	"github.com/prometheus/prometheus/v3/tsdb/index"
+)
+
+// MetricStat captures byte usage information for a metric across a set of blocks.
+type MetricStat struct {
+	Name   string
+	Bytes  int64
+	Series int
+	Chunks int
+}
+
+// TopNMetrics walks the provided directory for TSDB blocks and returns the top metrics by bytes used.
+func TopNMetrics(root string, limit int) ([]MetricStat, error) {
+	aggregate := map[string]*MetricStat{}
+
+	blockDirs, err := findBlockDirs(root)
+	if err != nil {
+		return nil, err
+	}
+
+	for _, blockDir := range blockDirs {
+		if err := accumulateBlock(blockDir, aggregate); err != nil {
+			return nil, fmt.Errorf("block %s: %w", blockDir, err)
+		}
+	}
+
+	stats := make([]MetricStat, 0, len(aggregate))
+	for _, stat := range aggregate {
+		stats = append(stats, *stat)
+	}
+
+	sort.Slice(stats, func(i, j int) bool {
+		if stats[i].Bytes == stats[j].Bytes {
+			return stats[i].Name < stats[j].Name
+		}
+		return stats[i].Bytes > stats[j].Bytes
+	})
+
+	if limit > 0 && len(stats) > limit {
+		stats = stats[:limit]
+	}
+
+	return stats, nil
+}
+
+func findBlockDirs(root string) ([]string, error) {
+	entries, err := os.ReadDir(root)
+	if err != nil {
+		return nil, err
+	}
+
+	var blocks []string
+	for _, entry := range entries {
+		if !entry.IsDir() {
+			continue
+		}
+		dirPath := filepath.Join(root, entry.Name())
+		if _, err := os.Stat(filepath.Join(dirPath, "meta.json")); err == nil {
+			blocks = append(blocks, dirPath)
+		}
+	}
+
+	if len(blocks) == 0 {
+		return nil, fmt.Errorf("no TSDB blocks found in %s", root)
+	}
+
+	return blocks, nil
+}
+
+func accumulateBlock(blockDir string, aggregate map[string]*MetricStat) error {
+	indexPath := filepath.Join(blockDir, "index")
+	chunkDir := filepath.Join(blockDir, "chunks")
+
+	indexReader, err := index.NewFileReader(indexPath)
+	if err != nil {
+		return fmt.Errorf("open index: %w", err)
+	}
+	defer indexReader.Close()
+
+	pool := chunkenc.NewPool()
+	chunkReader, err := chunks.NewDirReader(chunkDir, pool)
+	if err != nil {
+		return fmt.Errorf("open chunks: %w", err)
+	}
+	defer chunkReader.Close()
+
+	name, value := index.AllPostingsKey()
+	postings, err := indexReader.Postings(name, value)
+	if err != nil {
+		return fmt.Errorf("load postings: %w", err)
+	}
+
+	for postings.Next() {
+		ref := postings.At()
+		var lset labels.Labels
+		var metas []chunks.Meta
+
+		if err := indexReader.Series(ref, &lset, &metas); err != nil {
+			return fmt.Errorf("read series %d: %w", ref, err)
+		}
+
+		metricName := lset.Get("__name__")
+		if metricName == "" {
+			metricName = "(no_metric_name)"
+		}
+
+		var seriesBytes int64
+		for _, meta := range metas {
+			chk, err := chunkReader.Chunk(meta.Ref)
+			if err != nil {
+				if strings.Contains(err.Error(), "reference") {
+					return fmt.Errorf("chunk %d: %w", meta.Ref, err)
+				}
+				return fmt.Errorf("read chunk %d: %w", meta.Ref, err)
+			}
+			seriesBytes += int64(len(chk.Bytes()))
+		}
+
+		stat, ok := aggregate[metricName]
+		if !ok {
+			stat = &MetricStat{Name: metricName}
+			aggregate[metricName] = stat
+		}
+		stat.Bytes += seriesBytes
+		stat.Series++
+		stat.Chunks += len(metas)
+	}
+
+	if err := postings.Err(); err != nil {
+		return fmt.Errorf("postings iteration: %w", err)
+	}
+
+	return nil
+}
diff --git a/mimir_utils/internal/cli/main.go b/mimir_utils/internal/cli/main.go
@@ -0,0 +1,39 @@
+package cli
+
+import (
+	"errors"
+	"flag"
+	"fmt"
+)
+
+// RootUsage prints a helpful summary of the available subcommands.
+func RootUsage() {
+	fmt.Fprintf(flag.CommandLine.Output(), `mimir_utils is a collection of small tools.
+
+Usage:
+  mimir_utils <subcommand> [options]
+
+Available subcommands:
+  top-metrics    Analyze TSDB blocks and print the metrics using the most bytes.
+
+`)
+}
+
+// Execute parses the subcommand and invokes it with the provided arguments.
+func Execute(args []string) error {
+	if len(args) == 0 {
+		RootUsage()
+		return errors.New("no subcommand specified")
+	}
+
+	switch args[0] {
+	case "top-metrics":
+		return runTopMetrics(args[1:])
+	case "help", "-h", "--help":
+		RootUsage()
+		return nil
+	default:
+		RootUsage()
+		return fmt.Errorf("unknown subcommand %q", args[0])
+	}
+}
diff --git a/mimir_utils/internal/cli/top_metrics.go b/mimir_utils/internal/cli/top_metrics.go
@@ -0,0 +1,68 @@
+package cli
+
+import (
+	"flag"
+	"fmt"
+	"math"
+	"os"
+	"text/tabwriter"
+
+	"mimir_utils/internal/analyzer"
+)
+
+func runTopMetrics(args []string) error {
+	fs := flag.NewFlagSet("top-metrics", flag.ContinueOnError)
+	dir := fs.String("dir", "", "Directory containing TSDB blocks")
+	limit := fs.Int("limit", 10, "Number of metrics to display (0 for all)")
+
+	fs.Usage = func() {
+		fmt.Fprintf(fs.Output(), `Usage: mimir_utils top-metrics [options]
+
+Options:
+`)
+		fs.PrintDefaults()
+	}
+
+	if err := fs.Parse(args); err != nil {
+		return err
+	}
+
+	if *dir == "" {
+		fs.Usage()
+		return fmt.Errorf("the -dir flag is required")
+	}
+
+	stats, err := analyzer.TopNMetrics(*dir, *limit)
+	if err != nil {
+		return err
+	}
+
+	if len(stats) == 0 {
+		fmt.Println("No metrics found.")
+		return nil
+	}
+
+	w := tabwriter.NewWriter(os.Stdout, 0, 2, 2, ' ', 0)
+	fmt.Fprintln(w, "METRIC\tBYTES\tSERIES\tCHUNKS")
+	for _, stat := range stats {
+		fmt.Fprintf(w, "%s\t%s\t%d\t%d\n", stat.Name, humanReadableBytes(stat.Bytes), stat.Series, stat.Chunks)
+	}
+	return w.Flush()
+}
+
+func humanReadableBytes(bytes int64) string {
+	const unit = 1024.0
+	units := []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB"}
+	val := float64(bytes)
+	exp := 0
+
+	for val >= unit && exp < len(units)-1 {
+		val /= unit
+		exp++
+	}
+
+	if val >= 10 || exp == 0 {
+		return fmt.Sprintf("%.0f %s", val, units[exp])
+	}
+	return fmt.Sprintf("%.1f %s", val, units[exp])
+}
diff --git a/mimir_utils/internal/cli/top_metrics_test.go b/mimir_utils/internal/cli/top_metrics_test.go
@@ -0,0 +1,29 @@
+package cli
+
+import "testing"
+
+func TestHumanReadableBytes(t *testing.T) {
+	tests := []struct {
+		name string
+		in   int64
+		out  string
+	}{
+		{"zero bytes", 0, "0 B"},
+		{"single byte", 1, "1 B"},
+		{"just below kibibyte", 1023, "1023 B"},
+		{"one kibibyte", 1024, "1.0 KiB"},
+		{"fractional kibibyte", 1536, "1.5 KiB"},
+		{"ten kibibytes", 10 * 1024, "10 KiB"},
+		{"one mebibyte", 1024 * 1024, "1.0 MiB"},
+		{"many gibibytes", 25 * 1024 * 1024 * 1024, "25 GiB"},
+		{"overflow past units", 1 << 62, "4 PiB"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := humanReadableBytes(tt.in); got != tt.out {
+				t.Fatalf("humanReadableBytes(%d) = %q, want %q", tt.in, got, tt.out)
+			}
+		})
+	}
+}