Skip to content

Commit e902d2a

Browse files
committed
Add FS and Disk IO stats
Signed-off-by: Muhammad Shahzeb <mhmdshahzeb1993@gmail.com>
1 parent 38d32a3 commit e902d2a

File tree

7 files changed

+211
-0
lines changed

7 files changed

+211
-0
lines changed

collector/diskstats_linux.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
"log/slog"
2323
"os"
24+
"path/filepath"
2425
"strconv"
2526
"strings"
2627

@@ -84,6 +85,8 @@ type diskstatsCollector struct {
8485
filesystemInfoDesc typedFactorDesc
8586
deviceMapperInfoDesc typedFactorDesc
8687
ataDescs map[string]typedFactorDesc
88+
ioErrDesc typedFactorDesc
89+
ioDoneDesc typedFactorDesc
8790
logger *slog.Logger
8891
getUdevDeviceProperties func(uint32, uint32) (udevInfo, error)
8992
}
@@ -256,6 +259,20 @@ func NewDiskstatsCollector(logger *slog.Logger) (Collector, error) {
256259
), valueType: prometheus.GaugeValue,
257260
},
258261
},
262+
ioErrDesc: typedFactorDesc{
263+
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ioerr_total"),
264+
"Number of IO commands that completed with an error.",
265+
[]string{"device"},
266+
nil,
267+
), valueType: prometheus.CounterValue,
268+
},
269+
ioDoneDesc: typedFactorDesc{
270+
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "iodone_total"),
271+
"Number of completed or rejected IO commands.",
272+
[]string{"device"},
273+
nil,
274+
), valueType: prometheus.CounterValue,
275+
},
259276
logger: logger,
260277
}
261278

@@ -372,6 +389,37 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error {
372389
}
373390
}
374391
}
392+
393+
// Read IO error counts if available
394+
iodoneCnt, err := os.ReadFile(filepath.Join(*sysPath, "block", dev, "device/iodone_cnt"))
395+
if err != nil {
396+
// Skip if file doesn't exist
397+
if !os.IsNotExist(err) {
398+
c.logger.Debug("Error reading IO errors count", "collector", "diskstats", "err", err)
399+
}
400+
} else {
401+
iodone, err := strconv.ParseUint(strings.TrimSpace(string(iodoneCnt)), 10, 64)
402+
if err != nil {
403+
c.logger.Debug("Error parsing iodone count", "collector", "diskstats", "err", err)
404+
} else {
405+
ch <- c.ioDoneDesc.mustNewConstMetric(float64(iodone), dev)
406+
}
407+
}
408+
409+
ioerrCnt, err := os.ReadFile(filepath.Join(*sysPath, "block", dev, "device/ioerr_cnt"))
410+
if err != nil {
411+
// Skip if file doesn't exist
412+
if !os.IsNotExist(err) {
413+
c.logger.Debug("Error reading IO errors count", "collector", "diskstats", "err", err)
414+
}
415+
} else {
416+
ioerr, err := strconv.ParseUint(strings.TrimSpace(string(ioerrCnt)), 10, 64)
417+
if err != nil {
418+
c.logger.Debug("Error parsing ioerr count", "collector", "diskstats", "err", err)
419+
} else {
420+
ch <- c.ioErrDesc.mustNewConstMetric(float64(ioerr), dev)
421+
}
422+
}
375423
}
376424
return nil
377425
}

collector/diskstats_linux_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001
179179
node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07
180180
node_disk_io_time_weighted_seconds_total{device="sr0"} 0
181181
node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06
182+
# HELP node_disk_iodone_total Number of completed or rejected IO commands.
183+
# TYPE node_disk_iodone_total counter
184+
node_disk_iodone_total{device="sda"} 307
185+
node_disk_iodone_total{device="sr0"} 2767
186+
# HELP node_disk_ioerr_total Number of IO commands that completed with an error.
187+
# TYPE node_disk_ioerr_total counter
188+
node_disk_ioerr_total{device="sda"} 3
189+
node_disk_ioerr_total{device="sr0"} 29
182190
# HELP node_disk_read_bytes_total The total number of bytes read successfully.
183191
# TYPE node_disk_read_bytes_total counter
184192
node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11

collector/ext4_linux.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
// Copyright 2017 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
//go:build !noext4
15+
// +build !noext4
16+
17+
package collector
18+
19+
import (
20+
"fmt"
21+
"log/slog"
22+
23+
"github.com/prometheus/client_golang/prometheus"
24+
"github.com/prometheus/procfs/ext4"
25+
)
26+
27+
// An ext4Collector is a Collector which gathers metrics from ext4 filesystems.
28+
type ext4Collector struct {
29+
fs ext4.FS
30+
logger *slog.Logger
31+
}
32+
33+
func init() {
34+
registerCollector("ext4", defaultEnabled, NewExt4Collector)
35+
}
36+
37+
// NewExt4Collector returns a new Collector exposing ext4 statistics.
38+
func NewExt4Collector(logger *slog.Logger) (Collector, error) {
39+
fs, err := ext4.NewFS(*procPath, *sysPath)
40+
if err != nil {
41+
return nil, fmt.Errorf("failed to open sysfs: %w", err)
42+
}
43+
44+
return &ext4Collector{
45+
fs: fs,
46+
logger: logger,
47+
}, nil
48+
}
49+
50+
// Update implements Collector.
51+
func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error {
52+
stats, err := c.fs.ProcStat()
53+
if err != nil {
54+
return fmt.Errorf("failed to retrieve ext4 stats: %w", err)
55+
}
56+
57+
for _, s := range stats {
58+
c.updateExt4Stats(ch, s)
59+
}
60+
61+
return nil
62+
}
63+
64+
// updateExt4Stats collects statistics for a single ext4 filesystem.
65+
func (c *ext4Collector) updateExt4Stats(ch chan<- prometheus.Metric, s *ext4.Stats) {
66+
const (
67+
subsystem = "ext4"
68+
)
69+
var (
70+
labels = []string{"device"}
71+
)
72+
73+
metrics := []struct {
74+
name string
75+
desc string
76+
value float64
77+
}{
78+
{
79+
name: "errors",
80+
desc: "Number of ext4 filesystem errors.",
81+
value: float64(s.Errors),
82+
},
83+
{
84+
name: "warnings",
85+
desc: "Number of ext4 filesystem warnings.",
86+
value: float64(s.Warnings),
87+
},
88+
{
89+
name: "messages",
90+
desc: "Number of ext4 filesystem log messages.",
91+
value: float64(s.Messages),
92+
},
93+
}
94+
95+
for _, m := range metrics {
96+
desc := prometheus.NewDesc(
97+
prometheus.BuildFQName(namespace, subsystem, m.name),
98+
m.desc,
99+
labels,
100+
nil,
101+
)
102+
103+
ch <- prometheus.MustNewConstMetric(
104+
desc,
105+
prometheus.CounterValue,
106+
m.value,
107+
s.Name,
108+
)
109+
}
110+
}

collector/fixtures/e2e-64k-page-output.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001
554554
node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07
555555
node_disk_io_time_weighted_seconds_total{device="sr0"} 0
556556
node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06
557+
# HELP node_disk_iodone_total Number of completed or rejected IO commands.
558+
# TYPE node_disk_iodone_total counter
559+
node_disk_iodone_total{device="sda"} 307
560+
node_disk_iodone_total{device="sr0"} 2767
561+
# HELP node_disk_ioerr_total Number of IO commands that completed with an error.
562+
# TYPE node_disk_ioerr_total counter
563+
node_disk_ioerr_total{device="sda"} 3
564+
node_disk_ioerr_total{device="sr0"} 29
557565
# HELP node_disk_read_bytes_total The total number of bytes read successfully.
558566
# TYPE node_disk_read_bytes_total counter
559567
node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11
@@ -2971,6 +2979,7 @@ node_scrape_collector_success{collector="dmi"} 1
29712979
node_scrape_collector_success{collector="drbd"} 1
29722980
node_scrape_collector_success{collector="edac"} 1
29732981
node_scrape_collector_success{collector="entropy"} 1
2982+
node_scrape_collector_success{collector="ext4"} 1
29742983
node_scrape_collector_success{collector="fibrechannel"} 1
29752984
node_scrape_collector_success{collector="filefd"} 1
29762985
node_scrape_collector_success{collector="hwmon"} 1

collector/fixtures/e2e-output.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001
576576
node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07
577577
node_disk_io_time_weighted_seconds_total{device="sr0"} 0
578578
node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06
579+
# HELP node_disk_iodone_total Number of completed or rejected IO commands.
580+
# TYPE node_disk_iodone_total counter
581+
node_disk_iodone_total{device="sda"} 307
582+
node_disk_iodone_total{device="sr0"} 2767
583+
# HELP node_disk_ioerr_total Number of IO commands that completed with an error.
584+
# TYPE node_disk_ioerr_total counter
585+
node_disk_ioerr_total{device="sda"} 3
586+
node_disk_ioerr_total{device="sr0"} 29
579587
# HELP node_disk_read_bytes_total The total number of bytes read successfully.
580588
# TYPE node_disk_read_bytes_total counter
581589
node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11
@@ -2993,6 +3001,7 @@ node_scrape_collector_success{collector="dmi"} 1
29933001
node_scrape_collector_success{collector="drbd"} 1
29943002
node_scrape_collector_success{collector="edac"} 1
29953003
node_scrape_collector_success{collector="entropy"} 1
3004+
node_scrape_collector_success{collector="ext4"} 1
29963005
node_scrape_collector_success{collector="fibrechannel"} 1
29973006
node_scrape_collector_success{collector="filefd"} 1
29983007
node_scrape_collector_success{collector="hwmon"} 1

collector/fixtures/sys.ttar

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,32 @@ Lines: 1
803803
in_sync
804804
Mode: 644
805805
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
806+
Directory: sys/block/sda/device
807+
Mode: 755
808+
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
809+
Path: sys/block/sda/device/iodone_cnt
810+
Lines: 1
811+
307
812+
Mode: 644
813+
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
814+
Path: sys/block/sda/device/ioerr_cnt
815+
Lines: 1
816+
3
817+
Mode: 644
818+
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
819+
Directory: sys/block/sr0/device
820+
Mode: 755
821+
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
822+
Path: sys/block/sr0/device/iodone_cnt
823+
Lines: 1
824+
2767
825+
Mode: 644
826+
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
827+
Path: sys/block/sr0/device/ioerr_cnt
828+
Lines: 1
829+
29
830+
Mode: 644
831+
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
806832
Directory: sys/block/md6/md/rd3
807833
Mode: 755
808834
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

end-to-end-test.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ enabled_collectors=$(cat << COLLECTORS
5050
drbd
5151
edac
5252
entropy
53+
ext4
5354
fibrechannel
5455
filefd
5556
hwmon

0 commit comments

Comments
 (0)