// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !nohwmon // +build !nohwmon package collector import ( "errors" "fmt" "log/slog" "os" "path/filepath" "regexp" "strconv" "strings" "github.com/alecthomas/kingpin/v2" "github.com/prometheus/client_golang/prometheus" "golang.org/x/sys/unix" ) var ( collectorHWmonChipInclude = kingpin.Flag("collector.hwmon.chip-include", "Regexp of hwmon chip to include (mutually exclusive to device-exclude).").String() collectorHWmonChipExclude = kingpin.Flag("collector.hwmon.chip-exclude", "Regexp of hwmon chip to exclude (mutually exclusive to device-include).").String() collectorHWmonSensorInclude = kingpin.Flag("collector.hwmon.sensor-include", "Regexp of hwmon sensor to include (mutually exclusive to sensor-exclude).").String() collectorHWmonSensorExclude = kingpin.Flag("collector.hwmon.sensor-exclude", "Regexp of hwmon sensor to exclude (mutually exclusive to sensor-include).").String() hwmonInvalidMetricChars = regexp.MustCompile("[^a-z0-9:_]") hwmonFilenameFormat = regexp.MustCompile(`^(?P[^0-9]+)(?P[0-9]*)?(_(?P.+))?$`) hwmonLabelDesc = []string{"chip", "sensor"} hwmonChipNameLabelDesc = []string{"chip", "chip_name"} hwmonSensorTypes = []string{ "vrm", "beep_enable", "update_interval", "in", "cpu", "fan", "pwm", "temp", "curr", "power", "energy", "humidity", "intrusion", } ) func init() { registerCollector("hwmon", defaultEnabled, NewHwMonCollector) } type hwMonCollector struct { deviceFilter deviceFilter sensorFilter deviceFilter logger *slog.Logger } // NewHwMonCollector returns a new Collector exposing /sys/class/hwmon stats // (similar to lm-sensors). func NewHwMonCollector(logger *slog.Logger) (Collector, error) { return &hwMonCollector{ logger: logger, deviceFilter: newDeviceFilter(*collectorHWmonChipExclude, *collectorHWmonChipInclude), sensorFilter: newDeviceFilter(*collectorHWmonSensorExclude, *collectorHWmonSensorInclude), }, nil } func cleanMetricName(name string) string { lower := strings.ToLower(name) replaced := hwmonInvalidMetricChars.ReplaceAllLiteralString(lower, "_") cleaned := strings.Trim(replaced, "_") return cleaned } func addValueFile(data map[string]map[string]string, sensor string, prop string, file string) { raw, err := sysReadFile(file) if err != nil { return } value := strings.Trim(string(raw), "\n") if _, ok := data[sensor]; !ok { data[sensor] = make(map[string]string) } data[sensor][prop] = value } // sysReadFile is a simplified os.ReadFile that invokes syscall.Read directly. func sysReadFile(file string) ([]byte, error) { f, err := os.Open(file) if err != nil { return nil, err } defer f.Close() // On some machines, hwmon drivers are broken and return EAGAIN. This causes // Go's os.ReadFile implementation to poll forever. // // Since we either want to read data or bail immediately, do the simplest // possible read using system call directly. b := make([]byte, 128) n, err := unix.Read(int(f.Fd()), b) if err != nil { return nil, err } if n < 0 { return nil, fmt.Errorf("failed to read file: %q, read returned negative bytes value: %d", file, n) } return b[:n], nil } // explodeSensorFilename splits a sensor name into _. func explodeSensorFilename(filename string) (ok bool, sensorType string, sensorNum int, sensorProperty string) { matches := hwmonFilenameFormat.FindStringSubmatch(filename) if len(matches) == 0 { return false, sensorType, sensorNum, sensorProperty } for i, match := range hwmonFilenameFormat.SubexpNames() { if i >= len(matches) { return true, sensorType, sensorNum, sensorProperty } if match == "type" { sensorType = matches[i] } if match == "property" { sensorProperty = matches[i] } if match == "id" && len(matches[i]) > 0 { if num, err := strconv.Atoi(matches[i]); err == nil { sensorNum = num } else { return false, sensorType, sensorNum, sensorProperty } } } return true, sensorType, sensorNum, sensorProperty } func collectSensorData(dir string, data map[string]map[string]string) error { sensorFiles, dirError := os.ReadDir(dir) if dirError != nil { return dirError } for _, file := range sensorFiles { filename := file.Name() ok, sensorType, sensorNum, sensorProperty := explodeSensorFilename(filename) if !ok { continue } for _, t := range hwmonSensorTypes { if t == sensorType { addValueFile(data, sensorType+strconv.Itoa(sensorNum), sensorProperty, filepath.Join(dir, file.Name())) break } } } return nil } func (c *hwMonCollector) updateHwmon(ch chan<- prometheus.Metric, dir string) error { hwmonName, err := c.hwmonName(dir) if err != nil { return err } if c.deviceFilter.ignored(hwmonName) { c.logger.Debug("ignoring hwmon chip", "chip", hwmonName) return nil } data := make(map[string]map[string]string) err = collectSensorData(dir, data) if err != nil { return err } if _, err := os.Stat(filepath.Join(dir, "device")); err == nil { err := collectSensorData(filepath.Join(dir, "device"), data) if err != nil { return err } } hwmonChipName, err := c.hwmonHumanReadableChipName(dir) if err == nil { // sensor chip metadata desc := prometheus.NewDesc( "node_hwmon_chip_names", "Annotation metric for human-readable chip names", hwmonChipNameLabelDesc, nil, ) ch <- prometheus.MustNewConstMetric( desc, prometheus.GaugeValue, 1.0, hwmonName, hwmonChipName, ) } // Format all sensors. for sensor, sensorData := range data { // Filtering for sensors is done on concatenated device name and sensor name // separated by a semicolon. This allows for excluding or including of specific // sensors on specific devices. For example, to exclude the sensor "temp3" on // the device "platform_coretemp_0", use "platform_coretemp_0;temp3" if c.sensorFilter.ignored(hwmonName + ";" + sensor) { c.logger.Debug("ignoring sensor", "sensor", sensor) continue } _, sensorType, _, _ := explodeSensorFilename(sensor) labels := []string{hwmonName, sensor} if labelText, ok := sensorData["label"]; ok { label := strings.ToValidUTF8(labelText, "�") desc := prometheus.NewDesc("node_hwmon_sensor_label", "Label for given chip and sensor", []string{"chip", "sensor", "label"}, nil) ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, 1.0, hwmonName, sensor, label) } if sensorType == "beep_enable" { value := 0.0 if sensorData[""] == "1" { value = 1.0 } metricName := "node_hwmon_beep_enabled" desc := prometheus.NewDesc(metricName, "Hardware beep enabled", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric( desc, prometheus.GaugeValue, value, labels...) continue } if sensorType == "vrm" { parsedValue, err := strconv.ParseFloat(sensorData[""], 64) if err != nil { continue } metricName := "node_hwmon_voltage_regulator_version" desc := prometheus.NewDesc(metricName, "Hardware voltage regulator", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric( desc, prometheus.GaugeValue, parsedValue, labels...) continue } if sensorType == "update_interval" { parsedValue, err := strconv.ParseFloat(sensorData[""], 64) if err != nil { continue } metricName := "node_hwmon_update_interval_seconds" desc := prometheus.NewDesc(metricName, "Hardware monitor update interval", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric( desc, prometheus.GaugeValue, parsedValue*0.001, labels...) continue } prefix := "node_hwmon_" + sensorType for element, value := range sensorData { if element == "label" { continue } name := prefix if element == "input" { // input is actually the value if _, ok := sensorData[""]; ok { name = name + "_input" } } else if element != "" { name = name + "_" + cleanMetricName(element) } parsedValue, err := strconv.ParseFloat(value, 64) if err != nil { continue } // special elements, fault, alarm & beep should be handed out without units if element == "fault" || element == "alarm" { desc := prometheus.NewDesc(name, "Hardware sensor "+element+" status ("+sensorType+")", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, parsedValue, labels...) continue } if element == "beep" { desc := prometheus.NewDesc(name+"_enabled", "Hardware monitor sensor has beeping enabled", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, parsedValue, labels...) continue } // everything else should get a unit if sensorType == "in" || sensorType == "cpu" { desc := prometheus.NewDesc(name+"_volts", "Hardware monitor for voltage ("+element+")", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric( desc, prometheus.GaugeValue, parsedValue*0.001, labels...) continue } if sensorType == "temp" && element != "type" { if element == "" { element = "input" } desc := prometheus.NewDesc(name+"_celsius", "Hardware monitor for temperature ("+element+")", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric( desc, prometheus.GaugeValue, parsedValue*0.001, labels...) continue } if sensorType == "curr" { desc := prometheus.NewDesc(name+"_amps", "Hardware monitor for current ("+element+")", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric( desc, prometheus.GaugeValue, parsedValue*0.001, labels...) continue } if sensorType == "energy" { desc := prometheus.NewDesc(name+"_joule_total", "Hardware monitor for joules used so far ("+element+")", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric( desc, prometheus.CounterValue, parsedValue/1000000.0, labels...) continue } if sensorType == "power" && element == "accuracy" { desc := prometheus.NewDesc(name, "Hardware monitor power meter accuracy, as a ratio", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric( desc, prometheus.GaugeValue, parsedValue/1000000.0, labels...) continue } if sensorType == "power" && (element == "average_interval" || element == "average_interval_min" || element == "average_interval_max") { desc := prometheus.NewDesc(name+"_seconds", "Hardware monitor power usage update interval ("+element+")", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric( desc, prometheus.GaugeValue, parsedValue*0.001, labels...) continue } if sensorType == "power" { desc := prometheus.NewDesc(name+"_watt", "Hardware monitor for power usage in watts ("+element+")", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric( desc, prometheus.GaugeValue, parsedValue/1000000.0, labels...) continue } if sensorType == "humidity" { desc := prometheus.NewDesc(name, "Hardware monitor for humidity, as a ratio (multiply with 100.0 to get the humidity as a percentage) ("+element+")", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric( desc, prometheus.GaugeValue, parsedValue/1000000.0, labels...) continue } if sensorType == "fan" && (element == "input" || element == "min" || element == "max" || element == "target") { desc := prometheus.NewDesc(name+"_rpm", "Hardware monitor for fan revolutions per minute ("+element+")", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric( desc, prometheus.GaugeValue, parsedValue, labels...) continue } // fallback, just dump the metric as is desc := prometheus.NewDesc(name, "Hardware monitor "+sensorType+" element "+element, hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric( desc, prometheus.GaugeValue, parsedValue, labels...) } } return nil } func (c *hwMonCollector) hwmonName(dir string) (string, error) { // generate a name for a sensor path // sensor numbering depends on the order of linux module loading and // is thus unstable. // However the path of the device has to be stable: // - /sys/devices// // Some hardware monitors have a "name" file that exports a human // readable name that can be used. // human readable names would be bat0 or coretemp, while a path string // could be platform_applesmc.768 // preference 1: construct a name based on device name, always unique devicePath, devErr := filepath.EvalSymlinks(filepath.Join(dir, "device")) if devErr == nil { devPathPrefix, devName := filepath.Split(devicePath) _, devType := filepath.Split(strings.TrimRight(devPathPrefix, "/")) cleanDevName := cleanMetricName(devName) cleanDevType := cleanMetricName(devType) if cleanDevType != "" && cleanDevName != "" { return cleanDevType + "_" + cleanDevName, nil } if cleanDevName != "" { return cleanDevName, nil } } // preference 2: is there a name file sysnameRaw, nameErr := os.ReadFile(filepath.Join(dir, "name")) if nameErr == nil && string(sysnameRaw) != "" { cleanName := cleanMetricName(string(sysnameRaw)) if cleanName != "" { return cleanName, nil } } // it looks bad, name and device don't provide enough information // return a hwmon[0-9]* name realDir, err := filepath.EvalSymlinks(dir) if err != nil { return "", err } // take the last path element, this will be hwmonX _, name := filepath.Split(realDir) cleanName := cleanMetricName(name) if cleanName != "" { return cleanName, nil } return "", errors.New("Could not derive a monitoring name for " + dir) } // hwmonHumanReadableChipName is similar to the methods in hwmonName, but with // different precedences -- we can allow duplicates here. func (c *hwMonCollector) hwmonHumanReadableChipName(dir string) (string, error) { sysnameRaw, nameErr := os.ReadFile(filepath.Join(dir, "name")) if nameErr != nil { return "", nameErr } if string(sysnameRaw) != "" { cleanName := cleanMetricName(string(sysnameRaw)) if cleanName != "" { return cleanName, nil } } return "", errors.New("Could not derive a human-readable chip type for " + dir) } func (c *hwMonCollector) Update(ch chan<- prometheus.Metric) error { // Step 1: scan /sys/class/hwmon, resolve all symlinks and call // updatesHwmon for each folder hwmonPathName := filepath.Join(sysFilePath("class"), "hwmon") hwmonFiles, err := os.ReadDir(hwmonPathName) if err != nil { if errors.Is(err, os.ErrNotExist) { c.logger.Debug("hwmon collector metrics are not available for this system") return ErrNoData } return err } var lastErr error for _, hwDir := range hwmonFiles { hwmonXPathName := filepath.Join(hwmonPathName, hwDir.Name()) fileInfo, err := os.Lstat(hwmonXPathName) if err != nil { continue } if fileInfo.Mode()&os.ModeSymlink > 0 { fileInfo, err = os.Stat(hwmonXPathName) if err != nil { continue } } if !fileInfo.IsDir() { continue } if err = c.updateHwmon(ch, hwmonXPathName); err != nil { lastErr = err } } return lastErr }