diff --git a/go.mod b/go.mod index 54ee2b1..0fe117a 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,6 @@ require ( github.com/sirupsen/logrus v1.9.3 github.com/vishvananda/netlink v1.3.0 github.com/wobcom/transceiver-exporter v1.5.1 - gopkg.in/yaml.v2 v2.4.0 ) require ( diff --git a/hwmon-configurations/hwmon_as4610-54t.yml b/hwmon-configurations/hwmon_as4610-54t.yml deleted file mode 100644 index 2c71b75..0000000 --- a/hwmon-configurations/hwmon_as4610-54t.yml +++ /dev/null @@ -1,21 +0,0 @@ -# cumulus-exporter hwmon.yml configuration file -# for EdgeCore Networks AS4610-54T -sensors: - - description: Fan 1 RPM speed - type: fan - driver_hwmon: fan1 - driver_path: /sys/class/hwmon/hwmon0/device - - description: Fan 2 RPM speed - type: fan - driver_hwmon: fan2 - driver_path: /sys/class/hwmon/hwmon0/device - - description: PSU 1 Power state - type: raw - driver_path: /sys/class/hwmon/hwmon0/device/psu_pwr1_all_ok - - description: PSU 2 Power state - type: raw - driver_path: /sys/class/hwmon/hwmon0/device/psu_pwr2_all_ok - - description: Ambient temperature - type: temp - driver_hwmon: temp1 - driver_path: /sys/class/hwmon/hwmon1 diff --git a/hwmon-configurations/hwmon_sn2410.yml b/hwmon-configurations/hwmon_sn2410.yml deleted file mode 100644 index 10d0c48..0000000 --- a/hwmon-configurations/hwmon_sn2410.yml +++ /dev/null @@ -1,82 +0,0 @@ -# cumulus-exporter hwmon.yml configuration file -# for Mellanox SN2410 CB2RC (MSN2410-CB2RC) -sensors: - - description: Fan 1 RPM speed - driver_hwmon: fan8 - type: fan - driver_path: /sys/class/hwmon/hwmon10 - - description: Fan 2 RPM speed - driver_hwmon: fan7 - type: fan - driver_path: /sys/class/hwmon/hwmon10 - - description: Fan Tray 2 Fan 3 RPM speed - driver_hwmon: fan6 - type: fan - driver_path: /sys/class/hwmon/hwmon10 - - description: Fan Tray 2 Fan 4 RPM speed - driver_hwmon: fan5 - type: fan - driver_path: /sys/class/hwmon/hwmon10 - - description: Fan Tray 3 Fan 5 RPM speed - driver_hwmon: fan4 - type: fan - driver_path: /sys/class/hwmon/hwmon10 - - description: Fan Tray 3 Fan 6 RPM speed - driver_hwmon: fan3 - type: fan - driver_path: /sys/class/hwmon/hwmon10 - - description: Fan Tray 4 Fan 7 RPM speed - driver_hwmon: fan2 - type: fan - driver_path: /sys/class/hwmon/hwmon10 - - description: Fan Tray 4 Fan 8 RPM speed - driver_hwmon: fan1 - type: fan - driver_path: /sys/class/hwmon/hwmon10 - - description: PSU 1 Power status - type: raw - driver_path: /sys/class/hwmon/hwmon0/pwr1 - - description: PSU 2 Power status - type: raw - driver_path: /sys/class/hwmon/hwmon0/pwr2 - - description: PSU 1 Fan 1 RPM speed - type: fan - driver_hwmon: fan1 - driver_path: /sys/class/hwmon/hwmon2 - - description: PSU 2 Fan 1 RPM speed - type: fan - driver_hwmon: fan1 - driver_path: /sys/class/hwmon/hwmon3 - - description: PSU 1 Temperature - type: temp - driver_hwmon: temp1 - driver_path: /sys/class/hwmon/hwmon2 - - description: PSU 2 Temperature - type: temp - driver_hwmon: temp1 - driver_path: /sys/class/hwmon/hwmon3 - - description: CPU Package temperature - type: temp - driver_hwmon: temp1 - driver_path: /sys/class/hwmon/hwmon4 - - description: CPU Core 0 temperature - type: temp - driver_hwmon: temp2 - driver_path: /sys/class/hwmon/hwmon4 - - description: CPU Core 1 temperature - type: temp - driver_hwmon: temp3 - driver_path: /sys/class/hwmon/hwmon4 - - description: Port ambient temperature - type: temp - driver_hwmon: temp1 - driver_path: /sys/class/hwmon/hwmon8 - - description: Main board ambient temperature - type: temp - driver_hwmon: temp1 - driver_path: /sys/class/hwmon/hwmon9 - - description: Asic temperature - type: temp - driver_hwmon: temp1 - driver_path: /sys/class/hwmon/hwmon10 - diff --git a/hwmon-configurations/hwmon_sn2700.yml b/hwmon-configurations/hwmon_sn2700.yml deleted file mode 100644 index ca7ed38..0000000 --- a/hwmon-configurations/hwmon_sn2700.yml +++ /dev/null @@ -1,84 +0,0 @@ -# cumulus-exporter hwmon.yml configuration file -# for Mellanox SN2700 CS2RC (MSN2700-CS2RC) -sensors: - - description: Fan Tray 1 Fan 1 RPM speed - driver_hwmon: fan7 - type: fan - driver_path: /sys/class/hwmon/hwmon12 - - description: Fan Tray 1 Fan 2 RPM speed - driver_hwmon: fan8 - type: fan - driver_path: /sys/class/hwmon/hwmon12 - - description: Fan Tray 2 Fan 1 RPM speed - driver_hwmon: fan5 - type: fan - driver_path: /sys/class/hwmon/hwmon12 - - description: Fan Tray 2 Fan 2 RPM speed - driver_hwmon: fan6 - type: fan - driver_path: /sys/class/hwmon/hwmon12 - - description: Fan Tray 3 Fan 1 RPM speed - driver_hwmon: fan3 - type: fan - driver_path: /sys/class/hwmon/hwmon12 - - description: Fan Tray 3 Fan 2 RPM speed - driver_hwmon: fan4 - type: fan - driver_path: /sys/class/hwmon/hwmon12 - - description: Fan Tray 4 Fan 1 RPM speed - driver_hwmon: fan1 - type: fan - driver_path: /sys/class/hwmon/hwmon12 - - description: Fan Tray 4 Fan 2 RPM speed - driver_hwmon: fan2 - type: fan - driver_path: /sys/class/hwmon/hwmon12 - - description: PSU 1 Power status - type: raw - driver_path: /sys/class/hwmon/hwmon1/pwr1 - - description: PSU 2 Power status - type: raw - driver_path: /sys/class/hwmon/hwmon1/pwr2 - - description: PSU 1 Fan 1 RPM speed - type: fan - driver_hwmon: fan1 - driver_path: /sys/class/hwmon/hwmon3 - - description: PSU 2 Fan 1 RPM speed - type: fan - driver_hwmon: fan1 - driver_path: /sys/class/hwmon/hwmon4 - - description: PSU 1 Temperature - type: temp - driver_hwmon: temp1 - driver_path: /sys/class/hwmon/hwmon3 - - description: PSU 2 Temperature - type: temp - driver_hwmon: temp1 - driver_path: /sys/class/hwmon/hwmon4 - - description: CPU Package temperature - type: temp - driver_hwmon: temp1 - driver_path: /sys/class/hwmon/hwmon5 - - description: CPU Core 0 temperature - type: temp - driver_hwmon: temp2 - driver_path: /sys/class/hwmon/hwmon5 - - description: CPU Core 1 temperature - type: temp - driver_hwmon: temp3 - driver_path: /sys/class/hwmon/hwmon5 - - description: Port ambient temperature - type: temp - driver_hwmon: temp1 - driver_path: /sys/class/hwmon/hwmon10 - - description: Main board ambient temperature - type: temp - driver_hwmon: temp1 - driver_path: /sys/class/hwmon/hwmon11 - - description: Asic temperature - type: temp - driver_hwmon: temp1 - driver_path: /sys/class/hwmon/hwmon12 - - - diff --git a/hwmon/collector.go b/hwmon/collector.go index 15eef45..2a1feee 100644 --- a/hwmon/collector.go +++ b/hwmon/collector.go @@ -2,8 +2,7 @@ package hwmon import ( "github.com/prometheus/client_golang/prometheus" - "gitlab.com/wobcom/cumulus-exporter/util" - "sync" + "os/exec" ) const prefix = "hwmon_" @@ -14,69 +13,58 @@ var ( voltageMaxDesc *prometheus.Desc voltageCriticalMaxDesc *prometheus.Desc voltageDesc *prometheus.Desc - voltageLabelInfoDesc *prometheus.Desc - voltageSensorEnabledDesc *prometheus.Desc fanMinDesc *prometheus.Desc fanMaxDesc *prometheus.Desc fanDesc *prometheus.Desc - fanDivisorDesc *prometheus.Desc fanPulsesDesc *prometheus.Desc fanTargetDesc *prometheus.Desc - fanLabelsDesc *prometheus.Desc - fanSensorEnabledDesc *prometheus.Desc - temperatureTypeDesc *prometheus.Desc - temperatureMaxDesc *prometheus.Desc temperatureMinDesc *prometheus.Desc - temperatureMaxHysteresisDesc *prometheus.Desc + temperatureMaxDesc *prometheus.Desc temperatureMinHysteresisDesc *prometheus.Desc + temperatureMaxHysteresisDesc *prometheus.Desc temperatureDesc *prometheus.Desc + temperatureCriticalMinDesc *prometheus.Desc temperatureCriticalMaxDesc *prometheus.Desc + temperatureCriticalMinHysteresisDesc *prometheus.Desc temperatureCriticalMaxHysteresisDesc *prometheus.Desc temperatureEmergencyMaxDesc *prometheus.Desc temperatureEmergencyMaxHysteresisDesc *prometheus.Desc - temperatureCriticalMinDesc *prometheus.Desc - temperatureCriticalMinHysteresisDesc *prometheus.Desc temperatureOffsetDesc *prometheus.Desc - temperatureLabelDesc *prometheus.Desc - temperatureSensorEnabledDesc *prometheus.Desc - currentMaxDesc *prometheus.Desc currentMinDesc *prometheus.Desc + currentMaxDesc *prometheus.Desc currentCriticalMinValue *prometheus.Desc currentCriticalMaxValue *prometheus.Desc currentDesc *prometheus.Desc - currentSensorEnabledDesc *prometheus.Desc - powerPresent *prometheus.Desc - powerAllOk *prometheus.Desc + powerWatt *prometheus.Desc + powerPresent *prometheus.Desc + powerAllOk *prometheus.Desc + powerAllOkPrev *prometheus.Desc rawValueDesc *prometheus.Desc ) func init() { - sensorLabels := []string{"driver_path", "hw_mon", "description"} - channelLabels := []string{"driver_path", "hw_mon", "description", "channel"} + // we're using these labels for legacy reasons, in order to not + // break existing dashboards. these names do not reflect the + // description of the value. FIXME copy into other labels & deprecate(?) + sensorLabels := []string{"hw_mon", "description"} voltageMinDesc = prometheus.NewDesc(prefix+"voltage_min_volts", "Voltage min value. Unit: Volts", sensorLabels, nil) voltageCriticalMinDesc = prometheus.NewDesc(prefix+"voltage_critical_min_volts", "Voltage critical min value. Unit: Volts", sensorLabels, nil) voltageMaxDesc = prometheus.NewDesc(prefix+"voltage_max_volts", "Voltage max value. Unit: Volts", sensorLabels, nil) voltageCriticalMaxDesc = prometheus.NewDesc(prefix+"voltage_critical_max_volts", "Voltage critical max value. Unit: Volts", sensorLabels, nil) voltageDesc = prometheus.NewDesc(prefix+"voltage_volts", "Voltage input value. Unit: Volts", sensorLabels, nil) - voltageLabelInfoDesc = prometheus.NewDesc(prefix+"voltage_info", "Suggested voltage channel label.", channelLabels, nil) - voltageSensorEnabledDesc = prometheus.NewDesc(prefix+"voltage_sensor_enabled_bool", "1 = sensor enabled, 0 = sensor disabled", sensorLabels, nil) fanMinDesc = prometheus.NewDesc(prefix+"fan_min_rpm", "Fan minimum value. Unit: revolution/min", sensorLabels, nil) fanMaxDesc = prometheus.NewDesc(prefix+"fan_max_rpm", "Fan maximum value. Unit: revolution/min", sensorLabels, nil) fanDesc = prometheus.NewDesc(prefix+"fan_rpm", "Fan input value. Unit: revolution/min", sensorLabels, nil) - fanDivisorDesc = prometheus.NewDesc(prefix+"fan_divisor", "Fan divisor. Integer value in powers of 2 (1, 2, 4, 8, 16, 32, 64, 128).", sensorLabels, nil) fanPulsesDesc = prometheus.NewDesc(prefix+"fan_pulses", "Number of tachometer pulses per fan revolution", sensorLabels, nil) fanTargetDesc = prometheus.NewDesc(prefix+"fan_target_rpm", "Desired fan speed. Unit: revolution/min", sensorLabels, nil) - fanLabelsDesc = prometheus.NewDesc(prefix+"fan_info", "Suggested fan channel label", channelLabels, nil) - fanSensorEnabledDesc = prometheus.NewDesc(prefix+"fan_sensor_enabled_bool", "1 = sensor enabled, 0 = sensor disabled", sensorLabels, nil) - temperatureTypeDesc = prometheus.NewDesc(prefix+"temperature_sensor_type_selection_info", "Sensor type selection.", append(sensorLabels, "sensor_type"), nil) temperatureMaxDesc = prometheus.NewDesc(prefix+"temperature_max_celsius", "Temperature max value. Unit: degree Celsius", sensorLabels, nil) temperatureMinDesc = prometheus.NewDesc(prefix+"temperature_min_celsius", "Temperature min value. Unit: degree Celsius", sensorLabels, nil) temperatureMaxHysteresisDesc = prometheus.NewDesc(prefix+"temperature_max_hysteresis_celsius", "Temperature hysteresis value for max limit. Unit: degree Celsius", sensorLabels, nil) @@ -89,32 +77,27 @@ func init() { temperatureCriticalMinDesc = prometheus.NewDesc(prefix+"temperature_critical_min_celsius", "Temperature criticial min value, typically lower than corresponding temp_min values. Unit: degree Celsius", sensorLabels, nil) temperatureCriticalMinHysteresisDesc = prometheus.NewDesc(prefix+"temperature_critical_min_hysteresis_celsius", "Temperature hysteresis value for critical min limit. Unit: degree Celsius", sensorLabels, nil) temperatureOffsetDesc = prometheus.NewDesc(prefix+"temperature_offset_celsius", "Temperature offset which is added to the temperature reading by the chip. Unit: degree Celsius", sensorLabels, nil) - temperatureLabelDesc = prometheus.NewDesc(prefix+"temperature_label_info", "Suggested temperature channel label", channelLabels, nil) - temperatureSensorEnabledDesc = prometheus.NewDesc(prefix+"temperature_sensor_enabled_bool", "1 = sensor enabled, 0 = sensor disabled", sensorLabels, nil) currentMaxDesc = prometheus.NewDesc(prefix+"current_max_ampere", "Current max value. Unit: Ampere", sensorLabels, nil) currentMinDesc = prometheus.NewDesc(prefix+"current_min_ampere", "Current min value. Unit: Ampere", sensorLabels, nil) currentCriticalMinValue = prometheus.NewDesc(prefix+"current_critical_min_ampere", "Current critical low value. Unit: Ampere", sensorLabels, nil) currentCriticalMaxValue = prometheus.NewDesc(prefix+"current_critical_max_ampere", "Current critical high value. Unit: Ampere", sensorLabels, nil) currentDesc = prometheus.NewDesc(prefix+"current_ampere", "Current input value. Unit: Ampere", sensorLabels, nil) - currentSensorEnabledDesc = prometheus.NewDesc(prefix+"current_sensor_enabled_bool", "1 = sensor enabled, 0 = sensor disabled", sensorLabels, nil) - powerPresent = prometheus.NewDesc(prefix+"power_present", "Is Power Present. 1 = present, 0 = missing", sensorLabels, nil) - powerAllOk = prometheus.NewDesc(prefix+"power_all_ok", "Is PSU Ok. 1 = OK, 0 = KO", sensorLabels, nil) + powerWatt = prometheus.NewDesc(prefix+"power_watt", "Current Usage. Unit: Watt ", sensorLabels, nil) + powerPresent = prometheus.NewDesc(prefix+"power_present", "Is Power Present. 1 = present, 0 = missing", sensorLabels, nil) + powerAllOk = prometheus.NewDesc(prefix+"power_all_ok", "Is PSU Ok. 1 = OK, 0 = BAD, -1 = POWERED OFF, -2 NOT DETECTED", sensorLabels, nil) + powerAllOkPrev = prometheus.NewDesc(prefix+"power_all_ok_prev", "Is PSU Ok (Previous State). 1 = OK, 0 = BAD, -1 = POWERED OFF, -2 NOT DETECTED", sensorLabels, nil) - rawValueDesc = prometheus.NewDesc(prefix+"raw_sensor_reading", "Arbitrary sensor reading, see labels on how to interpret this value", []string{"path", "description"}, nil) + rawValueDesc = prometheus.NewDesc(prefix+"raw_sensor_reading", "Arbitrary sensor reading, see labels on how to interpret this value", []string{"description"}, nil) } // Collector collects hwmon metrics from the /sys filesystem -type Collector struct { - config *Configuration -} +type Collector struct {} // NewCollector returns a new Collector instance -func NewCollector(config *Configuration) *Collector { - return &Collector{ - config: config, - } +func NewCollector() *Collector { + return &Collector {}; } // Describe implements collector.Collector interface Describe function @@ -124,19 +107,13 @@ func (*Collector) Describe(ch chan<- *prometheus.Desc) { ch <- voltageMaxDesc ch <- voltageCriticalMaxDesc ch <- voltageDesc - ch <- voltageLabelInfoDesc - ch <- voltageSensorEnabledDesc ch <- fanMinDesc ch <- fanMaxDesc ch <- fanDesc - ch <- fanDivisorDesc ch <- fanPulsesDesc ch <- fanTargetDesc - ch <- fanLabelsDesc - ch <- fanSensorEnabledDesc - ch <- temperatureTypeDesc ch <- temperatureMaxDesc ch <- temperatureMinDesc ch <- temperatureMaxHysteresisDesc @@ -149,123 +126,17 @@ func (*Collector) Describe(ch chan<- *prometheus.Desc) { ch <- temperatureCriticalMinDesc ch <- temperatureCriticalMinHysteresisDesc ch <- temperatureOffsetDesc - ch <- temperatureLabelDesc - ch <- temperatureSensorEnabledDesc ch <- currentMaxDesc ch <- currentMinDesc ch <- currentCriticalMinValue ch <- currentCriticalMaxValue ch <- currentDesc - ch <- currentSensorEnabledDesc + ch <- powerWatt ch <- powerPresent ch <- powerAllOk -} - -type parserFunc func(string, string, string) prometheus.Metric - -func getParsers(sensorType string) []parserFunc { - return map[string][]parserFunc{ - "voltage": { - makeDefaultParser(voltageMinDesc, "_min", 1000), - makeDefaultParser(voltageCriticalMinDesc, "_lcrit", 1000), - makeDefaultParser(voltageMaxDesc, "_max", 1000), - makeDefaultParser(voltageDesc, "_input", 1000), - makeChannelParser(voltageLabelInfoDesc, "_label"), - makeDefaultParser(voltageSensorEnabledDesc, "_enable", 1), - }, - "fan": { - makeDefaultParser(fanMinDesc, "_min", 1), - makeDefaultParser(fanMaxDesc, "_max", 1), - makeDefaultParser(fanDesc, "_input", 1), - makeDefaultParser(fanDivisorDesc, "_div", 1), - makeDefaultParser(fanPulsesDesc, "_pulses", 1), - makeDefaultParser(fanTargetDesc, "_target", 1), - makeChannelParser(fanLabelsDesc, "_label"), - makeDefaultParser(fanSensorEnabledDesc, "_enable", 1), - }, - "temp": { - makeTemperatureSensorTypeParser(temperatureTypeDesc, "_type"), - makeDefaultParser(temperatureMaxDesc, "_max", 1000), - makeDefaultParser(temperatureMinDesc, "_min", 1000), - makeDefaultParser(temperatureMaxHysteresisDesc, "_max_hyst", 1000), - makeDefaultParser(temperatureMinHysteresisDesc, "_min_hyst", 1000), - makeDefaultParser(temperatureDesc, "_input", 1000), - makeDefaultParser(temperatureCriticalMaxDesc, "_crit", 1000), - makeDefaultParser(temperatureCriticalMaxHysteresisDesc, "_crit_hyst", 1000), - makeDefaultParser(temperatureEmergencyMaxDesc, "_emergency", 1000), - makeDefaultParser(temperatureEmergencyMaxHysteresisDesc, "_emergency_hyst", 1000), - makeDefaultParser(temperatureCriticalMinDesc, "_lcrit", 1000), - makeDefaultParser(temperatureCriticalMinHysteresisDesc, "_lcrit_hyst", 1000), - makeDefaultParser(temperatureOffsetDesc, "_offset", 1000), - makeChannelParser(temperatureLabelDesc, "_label"), - makeDefaultParser(temperatureSensorEnabledDesc, "_enable", 1), - }, - "current": { - makeDefaultParser(currentMaxDesc, "_max", 1000), - makeDefaultParser(currentMinDesc, "_min", 1000), - makeDefaultParser(currentCriticalMinValue, "_lcrit", 1000), - makeDefaultParser(currentCriticalMaxValue, "_crit", 1000), - makeDefaultParser(currentDesc, "_input", 1000), - makeDefaultParser(currentSensorEnabledDesc, "_enable", 1), - }, - "power": { - makeDefaultParser(powerPresent, "_present", 1), - makeDefaultParser(powerAllOk, "_all_ok", 1), - }, - "raw": { - makeRawParser(rawValueDesc), - }, - }[sensorType] -} - -func makeDefaultParser(metricDesc *prometheus.Desc, pathSuffix string, divisor float64) parserFunc { - return func(driverPath string, hwmon string, description string) prometheus.Metric { - value, err := util.ReadFloat64FromFile(driverPath + "/" + hwmon + pathSuffix) - if err == nil { - return prometheus.MustNewConstMetric(metricDesc, prometheus.GaugeValue, value/divisor, driverPath, hwmon, description) - } - return nil - } -} - -func makeChannelParser(metricDesc *prometheus.Desc, pathSuffix string) parserFunc { - return func(driverPath string, hwmon string, description string) prometheus.Metric { - text, err := util.ReadStringFromFile(driverPath + "/" + hwmon + pathSuffix) - if err == nil { - return prometheus.MustNewConstMetric(metricDesc, prometheus.GaugeValue, 1.0, driverPath, hwmon, description, text) - } - return nil - } -} - -func makeTemperatureSensorTypeParser(metricDesc *prometheus.Desc, pathSuffix string) parserFunc { - return func(driverPath string, hwmon string, description string) prometheus.Metric { - value, err := util.ReadFloat64FromFile(driverPath + "/" + hwmon + pathSuffix) - if err != nil { - return nil - } - sensorType := map[float64]string{ - 1: "CPU embedded diode", - 2: "3904 transistor", - 3: "thermal diode", - 4: "thermistor", - 5: "AMD AMDSI", - 6: "Intel PECI", - }[value] - return prometheus.MustNewConstMetric(metricDesc, prometheus.GaugeValue, 1.0, driverPath, hwmon, description, sensorType) - } -} - -func makeRawParser(metricDesc *prometheus.Desc) parserFunc { - return func(driverPath string, hwmon string, description string) prometheus.Metric { - value, err := util.ReadFloat64FromFile(driverPath) - if err == nil { - return prometheus.MustNewConstMetric(metricDesc, prometheus.GaugeValue, value, driverPath, description) - } - return nil - } + ch <- powerAllOkPrev } func (*Collector) Name() string { @@ -277,26 +148,34 @@ func (c *Collector) Collect(metrics chan<- prometheus.Metric, errorChan chan err done <- struct{}{} }() - wg := &sync.WaitGroup{} - - for _, sensorConfiguration := range c.config.Sensors { - wg.Add(1) - go collectSensor(sensorConfiguration, metrics, wg) + smonCtlOut, err := runSmonCtl() + if err != nil { + errorChan <- err + return } - wg.Wait() + collectSensors(smonCtlOut, metrics, errorChan) } -func collectSensor(sensorConfig *SensorConfiguration, metrics chan<- prometheus.Metric, wg *sync.WaitGroup) { - defer wg.Done() +func runSmonCtl() ([]byte, error) { + cmd := exec.Command("smonctl", "--json", "-v") + + out, err := cmd.CombinedOutput() + if err != nil { + return nil, err + } - applicableParsers := getParsers(sensorConfig.Type) + return out, nil +} - for _, parser := range applicableParsers { - metric := parser(sensorConfig.DriverPath, sensorConfig.DriverHwmon, sensorConfig.Description) +func collectSensors(data []byte, metrics chan<- prometheus.Metric, errorChan chan error) { + sensors, err := UnmarshalSensors(data) + if err != nil { + errorChan <- err + } - if metric != nil { - metrics <- metric - } + for _, sensor := range sensors { + sensor.Collect(metrics) } } + diff --git a/hwmon/config.go b/hwmon/config.go deleted file mode 100644 index 8542aff..0000000 --- a/hwmon/config.go +++ /dev/null @@ -1,30 +0,0 @@ -package hwmon - -import ( - "gopkg.in/yaml.v2" - "os" -) - -// Configuration passed to NewCollector -type Configuration struct { - Sensors []*SensorConfiguration `yaml:"sensors"` -} - -// SensorConfiguration sensor to scrape -type SensorConfiguration struct { - Description string `yaml:"description"` - DriverPath string `yaml:"driver_path"` - DriverHwmon string `yaml:"driver_hwmon,omitempty"` - Type string `yaml:"type"` -} - -// LoadConfiguration loads and returns a configuration from a given filepath -func LoadConfiguration(path string) (*Configuration, error) { - file, err := os.ReadFile(path) - if err != nil { - return nil, err - } - configuration := &Configuration{} - err = yaml.Unmarshal(file, configuration) - return configuration, err -} diff --git a/hwmon/parser.go b/hwmon/parser.go new file mode 100644 index 0000000..8f7b465 --- /dev/null +++ b/hwmon/parser.go @@ -0,0 +1,358 @@ +package hwmon + +import ( + "encoding/json" + "errors" + "fmt" + "strconv" + "strings" + + "github.com/prometheus/client_golang/prometheus" +) + +func (s *Sensor) GetName() *string { + if s.NameNew != nil { + return s.NameNew + } + + if s.Name != nil && len(*s.Name) != 0 { + return &(*s.Name)[0] + } + + return nil +} + +func (s *Sensor) CollectNum(metrics chan<- prometheus.Metric, desc *prometheus.Desc, value *float64) { + if value == nil { + return + } + + name := s.GetName() + if name == nil { + return + } + + metric := prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, *value, s.Description, *name) + metrics <- metric +} + +func (s *Sensor) CollectBool(metrics chan<- prometheus.Metric, desc *prometheus.Desc, value *bool) { + if value == nil { + return + } + + name := s.GetName() + if name == nil { + return + } + + num := 0.0 + if *value { + num = 1.0 + } + + metric := prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, num, s.Description, *name) + metrics <- metric +} + +// Because of funny golang unmarshalling and prometheus metric safety this is necessary +// to unmarshal to either a float64 or to a float64 from a string or nil +type OptFloatString struct { + inner *float64 +} + +func (opt *OptFloatString) UnmarshalJSON(b []byte) error { + var inner any + + err := json.Unmarshal(b, (&inner)) + if err != nil { + return err + } + + switch v := inner.(type) { + case string: + parsed, err := strconv.ParseFloat(v, 64) + if err != nil { + return err + } + opt.inner = &parsed + case float64: + opt.inner = &v + case int: + intermediate := float64(v) + opt.inner = &intermediate + } + + return nil +} + + + +type Sensor struct { + Type string `json:"type"` + Description string `json:"description"` + Name *[]string `json:"driver_hwmon,omitempty"` + NameNew *string `json:"name,omitempty"` +} + +type ISensor interface { + Collect(metrics chan<- prometheus.Metric) +} + + + +type VoltageSensor struct { + Min OptFloatString `json:"min,omitempty"` + Max OptFloatString `json:"max,omitempty"` + CriticalMin OptFloatString `json:"lcrit,omitempty"` + CriticalMax OptFloatString `json:"crit,omitempty"` + Input OptFloatString `json:"input,omitempty"` + Sensor +} + +func (s *VoltageSensor) Collect(metrics chan<- prometheus.Metric) { + s.CollectNum(metrics, voltageMinDesc, s.Min.inner) + s.CollectNum(metrics, voltageMaxDesc, s.Max.inner) + s.CollectNum(metrics, voltageCriticalMinDesc, s.CriticalMin.inner) + s.CollectNum(metrics, voltageCriticalMaxDesc, s.CriticalMax.inner) + s.CollectNum(metrics, voltageDesc, s.Input.inner) +} + + + +type FanSensor struct { + Min OptFloatString `json:"min,omitempty"` + Max OptFloatString `json:"max,omitempty"` + Input OptFloatString `json:"input,omitempty"` + Pulses OptFloatString `json:"pulses,omitempty"` + Target OptFloatString `json:"target,omitempty"` + Sensor +} + +func (s *FanSensor) Collect(metrics chan<- prometheus.Metric) { + s.CollectNum(metrics, fanMinDesc, s.Min.inner) + s.CollectNum(metrics, fanMaxDesc, s.Max.inner) + s.CollectNum(metrics, fanDesc, s.Input.inner) + s.CollectNum(metrics, fanPulsesDesc, s.Pulses.inner) + s.CollectNum(metrics, fanTargetDesc, s.Target.inner) +} + + + +type TemperatureSensor struct { + Min OptFloatString `json:"min,omitempty"` + Max OptFloatString `json:"max,omitempty"` + MinHysteresis OptFloatString `json:"min_hyst,omitempty"` + MaxHysteresis OptFloatString `json:"max_hyst,omitempty"` + Input OptFloatString `json:"input,omitempty"` + CriticalMin OptFloatString `json:"lcrit,omitempty"` + CriticalMax OptFloatString `json:"crit,omitempty"` + CriticalMinHyst OptFloatString `json:"lcrit_hyst,omitempty"` + CriticalMaxHyst OptFloatString `json:"crit_hyst,omitempty"` + EmergencyMax OptFloatString `json:"emergency,omitempty"` + EmergencyMaxHyst OptFloatString `json:"emergency_hyst,omitempty"` + Offset OptFloatString `json:"offset,omitempty"` + Sensor +} + +func (s *TemperatureSensor) Collect(metrics chan<- prometheus.Metric) { + s.CollectNum(metrics, temperatureMinDesc, s.Min.inner) + s.CollectNum(metrics, temperatureMaxDesc, s.Max.inner) + s.CollectNum(metrics, temperatureMinHysteresisDesc, s.MinHysteresis.inner) + s.CollectNum(metrics, temperatureMaxHysteresisDesc, s.MaxHysteresis.inner) + s.CollectNum(metrics, temperatureDesc, s.Input.inner) + s.CollectNum(metrics, temperatureCriticalMinDesc, s.CriticalMin.inner) + s.CollectNum(metrics, temperatureCriticalMaxDesc, s.CriticalMax.inner) + s.CollectNum(metrics, temperatureCriticalMinHysteresisDesc, s.CriticalMinHyst.inner) + s.CollectNum(metrics, temperatureCriticalMaxHysteresisDesc, s.CriticalMaxHyst.inner) + s.CollectNum(metrics, temperatureEmergencyMaxDesc, s.EmergencyMax.inner) + s.CollectNum(metrics, temperatureEmergencyMaxHysteresisDesc, s.EmergencyMaxHyst.inner) + s.CollectNum(metrics, temperatureOffsetDesc, s.Offset.inner) +} + + + +type CurrentSensor struct { + Min OptFloatString `json:"min,omitempty"` + Max OptFloatString `json:"max,omitempty"` + CriticalMin OptFloatString `json:"lcrit,omitempty"` + CriticalMax OptFloatString `json:"crit,omitempty"` + Input OptFloatString `json:"input,omitempty"` + Sensor +} + +func (s *CurrentSensor) Collect(metrics chan<- prometheus.Metric) { + s.CollectNum(metrics, currentMinDesc, s.Min.inner) + s.CollectNum(metrics, currentMaxDesc, s.Max.inner) + s.CollectNum(metrics, currentCriticalMinValue, s.CriticalMin.inner) + s.CollectNum(metrics, currentCriticalMaxValue, s.CriticalMax.inner) + s.CollectNum(metrics, currentDesc, s.Input.inner) +} + + + +// { +// "name": "PSU1", +// "start_time": 1725878482, +// "psu1_pwr_status": 1, # Is power present +// "state": "OK", # We also check this for is all ok +// "prev_state": "OK", # And this +// "prev_msg": null, +// "msg": null, +// "psu1_power": 35, # Power value +// "log_time": 1725878482, +// "type": "power", +// "psu1_status": 1, +// "description": "PSU1" +// }, +// +// or +// +// # No power value +// { +// "driver_hwmon": [ +// "psu_pwr2" +// ], +// "name": "PSU2", +// "start_time": 1745417928, +// "psu_pwr2_all_ok": "1", # Is all ok +// "state": "OK", # But we check this instead +// "prev_state": "OK", # Last state +// "psu_pwr2_present": "1", # Is power present +// "driver_path": "/sys/bus/i2c/devices/0-0030", +// "msg": null, +// "prev_msg": null, +// "log_time": 1745417928, +// "type": "power", +// "description": "PSU2" +// }, + +type PowerSensor struct { + State string `json:"state"` + PrevState string `json:"prev_state"` + IsPresent *bool `json:"-"` + Power *float64 `json:"-"` // Not always available + Sensor +} + +func (s *PowerSensor) UnmarshalJSON(b []byte) error { + type powerSensor PowerSensor + + err := json.Unmarshal(b, (*powerSensor)(s)) + if err != nil { + return err + } + + var data map[string]json.RawMessage; + err = json.Unmarshal(b, &data) + if err != nil { + return err + } + + for key, value := range data { + var opt OptFloatString + err := json.Unmarshal(value, &opt) + if err != nil || opt.inner == nil { + continue + } + num := *opt.inner + + if strings.Contains(key, "present") || strings.Contains(key, "pwr_status") { + isPresent := num > 0 + s.IsPresent = &isPresent + continue + } + + if strings.Contains(key, "power") { + s.Power = &num + } + } + + return nil +} + +func OkValueToFloat(state string) (float64, error) { + switch state { + case "OK": + return 1, nil + case "BAD": + return 0, nil + case "POWERED OFF": + return -1, nil + case "NOT DETECTED": + return -2, nil + } + return float64(-1), fmt.Errorf("could not parse state value of %s to a float", state) +} + +func (s *PowerSensor) Collect(metrics chan<- prometheus.Metric) { + s.CollectNum(metrics, powerWatt, s.Power) + s.CollectBool(metrics, powerPresent, s.IsPresent) + + stateOkF, err := OkValueToFloat(s.State) + if err == nil { + s.CollectNum(metrics, powerAllOk, &stateOkF) + } + + stateOkPrevF, err := OkValueToFloat(s.PrevState) + if err == nil { + s.CollectNum(metrics, powerAllOkPrev, &stateOkPrevF) + } +} + +type RawSensor struct { + Raw OptFloatString `json:"raw"` + Sensor +} + +func (s *RawSensor) Collect(metrics chan<- prometheus.Metric) { + s.CollectNum(metrics, rawValueDesc, s.Raw.inner) +} + +func UnmarshalSensors(data []byte) ([]ISensor, error) { + var rawSensors []json.RawMessage + + err := json.Unmarshal(data, &rawSensors) + if err != nil { + return nil, err + } + + var sensors []ISensor + + for _, raw := range rawSensors { + var sensor Sensor + + err = json.Unmarshal(raw, &sensor) + if err != nil { + return nil, err + } + + var i ISensor + + switch sensor.Type { + case "voltage": + i = &VoltageSensor{} + case "fan": + i = &FanSensor{} + case "temp": + i = &TemperatureSensor{} + case "current": + i = &CurrentSensor{} + case "power": + i = &PowerSensor{} + case "raw": + i = &RawSensor{} + default: + return nil, errors.New("unknown sensor type") + } + + err = json.Unmarshal(raw, i) + if err != nil { + return nil, err + } + + sensors = append(sensors, i) + } + + return sensors, nil +} diff --git a/main.go b/main.go index 2f58127..a131c2a 100644 --- a/main.go +++ b/main.go @@ -34,7 +34,6 @@ var ( excludeInterfacesRegex = flag.String("collectors.transceiver.exclude-interfaces-regex", "", "Regex Expression for interfaces to exclude from scrape") includeInterfacesRegex = flag.String("collectors.transceiver.include-interfaces-regex", "", "Regex Expression for interfaces to include from scrape") hwmonCollector = flag.Bool("collectors.hwmon", false, "Enable hwmon collector") - hwmonCollectorConfig = flag.String("collectors.hwmon.config", "hwmon.yml", "hwmon collector config file") mstpdCollector = flag.Bool("collectors.mstpd", false, "Enable mstpd collector") mstpctlPath = flag.String("collectors.mstpd.mstpctl-path", "/sbin/mstpctl", "mstpctl binary path") logLevel = flag.String("log.level", "info", "The level the application logs at") @@ -115,12 +114,7 @@ func initialize() { } if *hwmonCollector { log.Info("hwmon collector enabled") - hwmonCollectorConfig, err := hwmon.LoadConfiguration(*hwmonCollectorConfig) - if err != nil { - log.Errorf("Could not load hwmon collector config file: %v. Disabling hwmon collector.", err) - } else { - enabledCollectors = append(enabledCollectors, hwmon.NewCollector(hwmonCollectorConfig)) - } + enabledCollectors = append(enabledCollectors, hwmon.NewCollector()) } if *mstpdCollector { enabledCollectors = append(enabledCollectors, mstpd.NewCollector(*mstpctlPath))