-
Notifications
You must be signed in to change notification settings - Fork 0
Add mimir_utils TSDB metric analyzer CLI #227
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| package main | ||
|
|
||
| import ( | ||
| "flag" | ||
| "fmt" | ||
| "os" | ||
|
|
||
| "mimir_utils/internal/cli" | ||
| ) | ||
|
|
||
| func main() { | ||
| flag.Usage = cli.RootUsage | ||
|
|
||
| if err := cli.Execute(os.Args[1:]); err != nil { | ||
| fmt.Fprintln(os.Stderr, "error:", err) | ||
| os.Exit(1) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| module mimir_utils | ||
|
|
||
| go 1.25.1 | ||
|
|
||
| require github.com/prometheus/prometheus/v3 v3.8.1 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,146 @@ | ||
| package analyzer | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "os" | ||
| "path/filepath" | ||
| "sort" | ||
| "strings" | ||
|
|
||
| "github.com/prometheus/prometheus/v3/model/labels" | ||
| "github.com/prometheus/prometheus/v3/tsdb/chunkenc" | ||
| "github.com/prometheus/prometheus/v3/tsdb/chunks" | ||
| "github.com/prometheus/prometheus/v3/tsdb/index" | ||
| ) | ||
|
|
||
| // MetricStat captures byte usage information for a metric across a set of blocks. | ||
| type MetricStat struct { | ||
| Name string | ||
| Bytes int64 | ||
| Series int | ||
| Chunks int | ||
| } | ||
|
|
||
| // TopNMetrics walks the provided directory for TSDB blocks and returns the top metrics by bytes used. | ||
| func TopNMetrics(root string, limit int) ([]MetricStat, error) { | ||
| aggregate := map[string]*MetricStat{} | ||
|
|
||
| blockDirs, err := findBlockDirs(root) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| for _, blockDir := range blockDirs { | ||
| if err := accumulateBlock(blockDir, aggregate); err != nil { | ||
| return nil, fmt.Errorf("block %s: %w", blockDir, err) | ||
| } | ||
| } | ||
|
|
||
| stats := make([]MetricStat, 0, len(aggregate)) | ||
| for _, stat := range aggregate { | ||
| stats = append(stats, *stat) | ||
| } | ||
|
|
||
| sort.Slice(stats, func(i, j int) bool { | ||
| if stats[i].Bytes == stats[j].Bytes { | ||
| return stats[i].Name < stats[j].Name | ||
| } | ||
| return stats[i].Bytes > stats[j].Bytes | ||
| }) | ||
|
|
||
| if limit > 0 && len(stats) > limit { | ||
| stats = stats[:limit] | ||
| } | ||
|
|
||
| return stats, nil | ||
| } | ||
|
|
||
| func findBlockDirs(root string) ([]string, error) { | ||
| entries, err := os.ReadDir(root) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| var blocks []string | ||
| for _, entry := range entries { | ||
| if !entry.IsDir() { | ||
| continue | ||
| } | ||
| dirPath := filepath.Join(root, entry.Name()) | ||
| if _, err := os.Stat(filepath.Join(dirPath, "meta.json")); err == nil { | ||
| blocks = append(blocks, dirPath) | ||
| } | ||
| } | ||
|
|
||
| if len(blocks) == 0 { | ||
| return nil, fmt.Errorf("no TSDB blocks found in %s", root) | ||
| } | ||
|
|
||
| return blocks, nil | ||
| } | ||
|
|
||
| func accumulateBlock(blockDir string, aggregate map[string]*MetricStat) error { | ||
| indexPath := filepath.Join(blockDir, "index") | ||
| chunkDir := filepath.Join(blockDir, "chunks") | ||
|
|
||
| indexReader, err := index.NewFileReader(indexPath) | ||
| if err != nil { | ||
| return fmt.Errorf("open index: %w", err) | ||
| } | ||
| defer indexReader.Close() | ||
|
|
||
| pool := chunkenc.NewPool() | ||
| chunkReader, err := chunks.NewDirReader(chunkDir, pool) | ||
| if err != nil { | ||
| return fmt.Errorf("open chunks: %w", err) | ||
| } | ||
| defer chunkReader.Close() | ||
|
|
||
| name, value := index.AllPostingsKey() | ||
| postings, err := indexReader.Postings(name, value) | ||
| if err != nil { | ||
| return fmt.Errorf("load postings: %w", err) | ||
| } | ||
|
|
||
| for postings.Next() { | ||
| ref := postings.At() | ||
| var lset labels.Labels | ||
| var metas []chunks.Meta | ||
|
|
||
| if err := indexReader.Series(ref, &lset, &metas); err != nil { | ||
| return fmt.Errorf("read series %d: %w", ref, err) | ||
| } | ||
|
|
||
| metricName := lset.Get("__name__") | ||
| if metricName == "" { | ||
| metricName = "(no_metric_name)" | ||
| } | ||
|
|
||
| var seriesBytes int64 | ||
| for _, meta := range metas { | ||
| chk, err := chunkReader.Chunk(meta.Ref) | ||
| if err != nil { | ||
| if strings.Contains(err.Error(), "reference") { | ||
| return fmt.Errorf("chunk %d: %w", meta.Ref, err) | ||
| } | ||
| return fmt.Errorf("read chunk %d: %w", meta.Ref, err) | ||
| } | ||
| seriesBytes += int64(len(chk.Bytes())) | ||
| } | ||
|
|
||
| stat, ok := aggregate[metricName] | ||
| if !ok { | ||
| stat = &MetricStat{Name: metricName} | ||
| aggregate[metricName] = stat | ||
| } | ||
| stat.Bytes += seriesBytes | ||
| stat.Series++ | ||
| stat.Chunks += len(metas) | ||
| } | ||
|
|
||
| if err := postings.Err(); err != nil { | ||
| return fmt.Errorf("postings iteration: %w", err) | ||
| } | ||
|
|
||
| return nil | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| package cli | ||
|
|
||
| import ( | ||
| "errors" | ||
| "flag" | ||
| "fmt" | ||
| ) | ||
|
|
||
| // RootUsage prints a helpful summary of the available subcommands. | ||
| func RootUsage() { | ||
| fmt.Fprintf(flag.CommandLine.Output(), `mimir_utils is a collection of small tools. | ||
|
|
||
| Usage: | ||
| mimir_utils <subcommand> [options] | ||
|
|
||
| Available subcommands: | ||
| top-metrics Analyze TSDB blocks and print the metrics using the most bytes. | ||
|
|
||
| `) | ||
| } | ||
|
|
||
| // Execute parses the subcommand and invokes it with the provided arguments. | ||
| func Execute(args []string) error { | ||
| if len(args) == 0 { | ||
| RootUsage() | ||
| return errors.New("no subcommand specified") | ||
| } | ||
|
|
||
| switch args[0] { | ||
| case "top-metrics": | ||
| return runTopMetrics(args[1:]) | ||
| case "help", "-h", "--help": | ||
| RootUsage() | ||
| return nil | ||
| default: | ||
| RootUsage() | ||
| return fmt.Errorf("unknown subcommand %q", args[0]) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| package cli | ||
|
|
||
| import ( | ||
| "flag" | ||
| "fmt" | ||
| "math" | ||
| "os" | ||
| "text/tabwriter" | ||
|
|
||
| "mimir_utils/internal/analyzer" | ||
| ) | ||
|
|
||
| func runTopMetrics(args []string) error { | ||
| fs := flag.NewFlagSet("top-metrics", flag.ContinueOnError) | ||
| dir := fs.String("dir", "", "Directory containing TSDB blocks") | ||
| limit := fs.Int("limit", 10, "Number of metrics to display (0 for all)") | ||
|
|
||
| fs.Usage = func() { | ||
| fmt.Fprintf(fs.Output(), `Usage: mimir_utils top-metrics [options] | ||
|
|
||
| Options: | ||
| `) | ||
| fs.PrintDefaults() | ||
| } | ||
|
|
||
| if err := fs.Parse(args); err != nil { | ||
| return err | ||
| } | ||
|
|
||
| if *dir == "" { | ||
| fs.Usage() | ||
| return fmt.Errorf("the -dir flag is required") | ||
| } | ||
|
|
||
| stats, err := analyzer.TopNMetrics(*dir, *limit) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| if len(stats) == 0 { | ||
| fmt.Println("No metrics found.") | ||
| return nil | ||
| } | ||
|
|
||
| w := tabwriter.NewWriter(os.Stdout, 0, 2, 2, ' ', 0) | ||
| fmt.Fprintln(w, "METRIC\tBYTES\tSERIES\tCHUNKS") | ||
| for _, stat := range stats { | ||
| fmt.Fprintf(w, "%s\t%s\t%d\t%d\n", stat.Name, humanReadableBytes(stat.Bytes), stat.Series, stat.Chunks) | ||
| } | ||
| return w.Flush() | ||
| } | ||
|
|
||
| func humanReadableBytes(bytes int64) string { | ||
| const unit = 1024.0 | ||
| units := []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB"} | ||
| val := float64(bytes) | ||
| exp := 0 | ||
|
|
||
| for val >= unit && exp < len(units)-1 { | ||
| val /= unit | ||
| exp++ | ||
| } | ||
|
|
||
| if val >= 10 || exp == 0 { | ||
| return fmt.Sprintf("%.0f %s", val, units[exp]) | ||
| } | ||
| return fmt.Sprintf("%.1f %s", val, units[exp]) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| package cli | ||
|
|
||
| import "testing" | ||
|
|
||
| func TestHumanReadableBytes(t *testing.T) { | ||
| tests := []struct { | ||
| name string | ||
| in int64 | ||
| out string | ||
| }{ | ||
| {"zero bytes", 0, "0 B"}, | ||
| {"single byte", 1, "1 B"}, | ||
| {"just below kibibyte", 1023, "1023 B"}, | ||
| {"one kibibyte", 1024, "1.0 KiB"}, | ||
| {"fractional kibibyte", 1536, "1.5 KiB"}, | ||
| {"ten kibibytes", 10 * 1024, "10 KiB"}, | ||
| {"one mebibyte", 1024 * 1024, "1.0 MiB"}, | ||
| {"many gibibytes", 25 * 1024 * 1024 * 1024, "25 GiB"}, | ||
| {"overflow past units", 1 << 62, "4 PiB"}, | ||
|
Comment on lines
+18
to
+19
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The overflow case feeds 1<<62 bytes (≈4 EiB) but expects the formatter to return "4 PiB". humanReadableBytes actually returns 4096 PiB for that input, so this test will fail and the expectation underreports the size by 1024×. Either extend the formatter to emit EiB (e.g., "4 EiB") or adjust the test input to match the PiB expectation. Useful? React with 👍 / 👎. |
||
| } | ||
|
|
||
| for _, tt := range tests { | ||
| t.Run(tt.name, func(t *testing.T) { | ||
| if got := humanReadableBytes(tt.in); got != tt.out { | ||
| t.Fatalf("humanReadableBytes(%d) = %q, want %q", tt.in, got, tt.out) | ||
| } | ||
| }) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: kdeal/misc
Length of output: 36
🌐 Web query:
Prometheus prometheus/prometheus v3.8.1 release💡 Result:
Prometheus v3.8.1 was released on December 16, 2025. It contains a bugfix for the Remote Write receiver (fixes wrong response headers for v1 flow that could cause false partial error logs/metrics) (PR/issue #17683). Binaries and checksums are available on the official download and GitHub release pages. (github.com)
Citations:
🌐 Web query:
Go 1.25.1 version release💡 Result:
Go 1.25.1 was released on September 3, 2025. It’s a minor/patch release that includes security fixes (notably a net/http issue) and several bug fixes to the go command and packages such as net, os, os/exec, and testing/synctest. [1][2]
Sources:
Add go.sum file to the mimir_utils module.
The Prometheus v3.8.1 dependency is valid and corrects the prior issue with v0.56.0. However, the
go.sumfile is still missing from the repository. Go modules require bothgo.modandgo.sumto ensure reproducible builds and verify dependency checksums. Generate and commit thego.sumfile usinggo mod tidybefore merging.🤖 Prompt for AI Agents