489 lines
15 KiB
Go
489 lines
15 KiB
Go
// Copyright 2016 The Prometheus Authors
|
||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
// you may not use this file except in compliance with the License.
|
||
// You may obtain a copy of the License at
|
||
//
|
||
// http://www.apache.org/licenses/LICENSE-2.0
|
||
//
|
||
// Unless required by applicable law or agreed to in writing, software
|
||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
// See the License for the specific language governing permissions and
|
||
// limitations under the License.
|
||
|
||
//go:build !nohwmon
|
||
// +build !nohwmon
|
||
|
||
package collector
|
||
|
||
import (
|
||
"errors"
|
||
"fmt"
|
||
"log/slog"
|
||
"os"
|
||
"path/filepath"
|
||
"regexp"
|
||
"strconv"
|
||
"strings"
|
||
|
||
"github.com/alecthomas/kingpin/v2"
|
||
"github.com/prometheus/client_golang/prometheus"
|
||
"golang.org/x/sys/unix"
|
||
)
|
||
|
||
var (
|
||
collectorHWmonChipInclude = kingpin.Flag("collector.hwmon.chip-include", "Regexp of hwmon chip to include (mutually exclusive to device-exclude).").String()
|
||
collectorHWmonChipExclude = kingpin.Flag("collector.hwmon.chip-exclude", "Regexp of hwmon chip to exclude (mutually exclusive to device-include).").String()
|
||
collectorHWmonSensorInclude = kingpin.Flag("collector.hwmon.sensor-include", "Regexp of hwmon sensor to include (mutually exclusive to sensor-exclude).").String()
|
||
collectorHWmonSensorExclude = kingpin.Flag("collector.hwmon.sensor-exclude", "Regexp of hwmon sensor to exclude (mutually exclusive to sensor-include).").String()
|
||
|
||
hwmonInvalidMetricChars = regexp.MustCompile("[^a-z0-9:_]")
|
||
hwmonFilenameFormat = regexp.MustCompile(`^(?P<type>[^0-9]+)(?P<id>[0-9]*)?(_(?P<property>.+))?$`)
|
||
hwmonLabelDesc = []string{"chip", "sensor"}
|
||
hwmonChipNameLabelDesc = []string{"chip", "chip_name"}
|
||
hwmonSensorTypes = []string{
|
||
"vrm", "beep_enable", "update_interval", "in", "cpu", "fan",
|
||
"pwm", "temp", "curr", "power", "energy", "humidity",
|
||
"intrusion",
|
||
}
|
||
)
|
||
|
||
func init() {
|
||
registerCollector("hwmon", defaultEnabled, NewHwMonCollector)
|
||
}
|
||
|
||
type hwMonCollector struct {
|
||
deviceFilter deviceFilter
|
||
sensorFilter deviceFilter
|
||
logger *slog.Logger
|
||
}
|
||
|
||
// NewHwMonCollector returns a new Collector exposing /sys/class/hwmon stats
|
||
// (similar to lm-sensors).
|
||
func NewHwMonCollector(logger *slog.Logger) (Collector, error) {
|
||
|
||
return &hwMonCollector{
|
||
logger: logger,
|
||
deviceFilter: newDeviceFilter(*collectorHWmonChipExclude, *collectorHWmonChipInclude),
|
||
sensorFilter: newDeviceFilter(*collectorHWmonSensorExclude, *collectorHWmonSensorInclude),
|
||
}, nil
|
||
}
|
||
|
||
func cleanMetricName(name string) string {
|
||
lower := strings.ToLower(name)
|
||
replaced := hwmonInvalidMetricChars.ReplaceAllLiteralString(lower, "_")
|
||
cleaned := strings.Trim(replaced, "_")
|
||
return cleaned
|
||
}
|
||
|
||
func addValueFile(data map[string]map[string]string, sensor string, prop string, file string) {
|
||
raw, err := sysReadFile(file)
|
||
if err != nil {
|
||
return
|
||
}
|
||
value := strings.Trim(string(raw), "\n")
|
||
|
||
if _, ok := data[sensor]; !ok {
|
||
data[sensor] = make(map[string]string)
|
||
}
|
||
|
||
data[sensor][prop] = value
|
||
}
|
||
|
||
// sysReadFile is a simplified os.ReadFile that invokes syscall.Read directly.
|
||
func sysReadFile(file string) ([]byte, error) {
|
||
f, err := os.Open(file)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
defer f.Close()
|
||
|
||
// On some machines, hwmon drivers are broken and return EAGAIN. This causes
|
||
// Go's os.ReadFile implementation to poll forever.
|
||
//
|
||
// Since we either want to read data or bail immediately, do the simplest
|
||
// possible read using system call directly.
|
||
b := make([]byte, 128)
|
||
n, err := unix.Read(int(f.Fd()), b)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if n < 0 {
|
||
return nil, fmt.Errorf("failed to read file: %q, read returned negative bytes value: %d", file, n)
|
||
}
|
||
|
||
return b[:n], nil
|
||
}
|
||
|
||
// explodeSensorFilename splits a sensor name into <type><num>_<property>.
|
||
func explodeSensorFilename(filename string) (ok bool, sensorType string, sensorNum int, sensorProperty string) {
|
||
matches := hwmonFilenameFormat.FindStringSubmatch(filename)
|
||
if len(matches) == 0 {
|
||
return false, sensorType, sensorNum, sensorProperty
|
||
}
|
||
for i, match := range hwmonFilenameFormat.SubexpNames() {
|
||
if i >= len(matches) {
|
||
return true, sensorType, sensorNum, sensorProperty
|
||
}
|
||
if match == "type" {
|
||
sensorType = matches[i]
|
||
}
|
||
if match == "property" {
|
||
sensorProperty = matches[i]
|
||
}
|
||
if match == "id" && len(matches[i]) > 0 {
|
||
if num, err := strconv.Atoi(matches[i]); err == nil {
|
||
sensorNum = num
|
||
} else {
|
||
return false, sensorType, sensorNum, sensorProperty
|
||
}
|
||
}
|
||
}
|
||
return true, sensorType, sensorNum, sensorProperty
|
||
}
|
||
|
||
func collectSensorData(dir string, data map[string]map[string]string) error {
|
||
sensorFiles, dirError := os.ReadDir(dir)
|
||
if dirError != nil {
|
||
return dirError
|
||
}
|
||
for _, file := range sensorFiles {
|
||
filename := file.Name()
|
||
ok, sensorType, sensorNum, sensorProperty := explodeSensorFilename(filename)
|
||
if !ok {
|
||
continue
|
||
}
|
||
|
||
for _, t := range hwmonSensorTypes {
|
||
if t == sensorType {
|
||
addValueFile(data, sensorType+strconv.Itoa(sensorNum), sensorProperty, filepath.Join(dir, file.Name()))
|
||
break
|
||
}
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func (c *hwMonCollector) updateHwmon(ch chan<- prometheus.Metric, dir string) error {
|
||
hwmonName, err := c.hwmonName(dir)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
if c.deviceFilter.ignored(hwmonName) {
|
||
c.logger.Debug("ignoring hwmon chip", "chip", hwmonName)
|
||
return nil
|
||
}
|
||
|
||
data := make(map[string]map[string]string)
|
||
err = collectSensorData(dir, data)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
if _, err := os.Stat(filepath.Join(dir, "device")); err == nil {
|
||
err := collectSensorData(filepath.Join(dir, "device"), data)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
}
|
||
|
||
hwmonChipName, err := c.hwmonHumanReadableChipName(dir)
|
||
if err == nil {
|
||
// sensor chip metadata
|
||
desc := prometheus.NewDesc(
|
||
"node_hwmon_chip_names",
|
||
"Annotation metric for human-readable chip names",
|
||
hwmonChipNameLabelDesc,
|
||
nil,
|
||
)
|
||
|
||
ch <- prometheus.MustNewConstMetric(
|
||
desc,
|
||
prometheus.GaugeValue,
|
||
1.0,
|
||
hwmonName,
|
||
hwmonChipName,
|
||
)
|
||
}
|
||
|
||
// Format all sensors.
|
||
for sensor, sensorData := range data {
|
||
|
||
// Filtering for sensors is done on concatenated device name and sensor name
|
||
// separated by a semicolon. This allows for excluding or including of specific
|
||
// sensors on specific devices. For example, to exclude the sensor "temp3" on
|
||
// the device "platform_coretemp_0", use "platform_coretemp_0;temp3"
|
||
if c.sensorFilter.ignored(hwmonName + ";" + sensor) {
|
||
c.logger.Debug("ignoring sensor", "sensor", sensor)
|
||
continue
|
||
}
|
||
|
||
_, sensorType, _, _ := explodeSensorFilename(sensor)
|
||
|
||
labels := []string{hwmonName, sensor}
|
||
if labelText, ok := sensorData["label"]; ok {
|
||
label := strings.ToValidUTF8(labelText, "<22>")
|
||
desc := prometheus.NewDesc("node_hwmon_sensor_label", "Label for given chip and sensor",
|
||
[]string{"chip", "sensor", "label"}, nil)
|
||
ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, 1.0, hwmonName, sensor, label)
|
||
}
|
||
|
||
if sensorType == "beep_enable" {
|
||
value := 0.0
|
||
if sensorData[""] == "1" {
|
||
value = 1.0
|
||
}
|
||
metricName := "node_hwmon_beep_enabled"
|
||
desc := prometheus.NewDesc(metricName, "Hardware beep enabled", hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(
|
||
desc, prometheus.GaugeValue, value, labels...)
|
||
continue
|
||
}
|
||
if sensorType == "vrm" {
|
||
parsedValue, err := strconv.ParseFloat(sensorData[""], 64)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
metricName := "node_hwmon_voltage_regulator_version"
|
||
desc := prometheus.NewDesc(metricName, "Hardware voltage regulator", hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(
|
||
desc, prometheus.GaugeValue, parsedValue, labels...)
|
||
continue
|
||
}
|
||
if sensorType == "update_interval" {
|
||
parsedValue, err := strconv.ParseFloat(sensorData[""], 64)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
metricName := "node_hwmon_update_interval_seconds"
|
||
desc := prometheus.NewDesc(metricName, "Hardware monitor update interval", hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(
|
||
desc, prometheus.GaugeValue, parsedValue*0.001, labels...)
|
||
continue
|
||
}
|
||
|
||
prefix := "node_hwmon_" + sensorType
|
||
|
||
for element, value := range sensorData {
|
||
|
||
if element == "label" {
|
||
continue
|
||
}
|
||
|
||
name := prefix
|
||
if element == "input" {
|
||
// input is actually the value
|
||
if _, ok := sensorData[""]; ok {
|
||
name = name + "_input"
|
||
}
|
||
} else if element != "" {
|
||
name = name + "_" + cleanMetricName(element)
|
||
}
|
||
parsedValue, err := strconv.ParseFloat(value, 64)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
|
||
// special elements, fault, alarm & beep should be handed out without units
|
||
if element == "fault" || element == "alarm" {
|
||
desc := prometheus.NewDesc(name, "Hardware sensor "+element+" status ("+sensorType+")", hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, parsedValue, labels...)
|
||
continue
|
||
}
|
||
if element == "beep" {
|
||
desc := prometheus.NewDesc(name+"_enabled", "Hardware monitor sensor has beeping enabled", hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, parsedValue, labels...)
|
||
continue
|
||
}
|
||
|
||
// everything else should get a unit
|
||
if sensorType == "in" || sensorType == "cpu" {
|
||
desc := prometheus.NewDesc(name+"_volts", "Hardware monitor for voltage ("+element+")", hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(
|
||
desc, prometheus.GaugeValue, parsedValue*0.001, labels...)
|
||
continue
|
||
}
|
||
if sensorType == "temp" && element != "type" {
|
||
if element == "" {
|
||
element = "input"
|
||
}
|
||
desc := prometheus.NewDesc(name+"_celsius", "Hardware monitor for temperature ("+element+")", hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(
|
||
desc, prometheus.GaugeValue, parsedValue*0.001, labels...)
|
||
continue
|
||
}
|
||
if sensorType == "curr" {
|
||
desc := prometheus.NewDesc(name+"_amps", "Hardware monitor for current ("+element+")", hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(
|
||
desc, prometheus.GaugeValue, parsedValue*0.001, labels...)
|
||
continue
|
||
}
|
||
if sensorType == "energy" {
|
||
desc := prometheus.NewDesc(name+"_joule_total", "Hardware monitor for joules used so far ("+element+")", hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(
|
||
desc, prometheus.CounterValue, parsedValue/1000000.0, labels...)
|
||
continue
|
||
}
|
||
if sensorType == "power" && element == "accuracy" {
|
||
desc := prometheus.NewDesc(name, "Hardware monitor power meter accuracy, as a ratio", hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(
|
||
desc, prometheus.GaugeValue, parsedValue/1000000.0, labels...)
|
||
continue
|
||
}
|
||
if sensorType == "power" && (element == "average_interval" || element == "average_interval_min" || element == "average_interval_max") {
|
||
desc := prometheus.NewDesc(name+"_seconds", "Hardware monitor power usage update interval ("+element+")", hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(
|
||
desc, prometheus.GaugeValue, parsedValue*0.001, labels...)
|
||
continue
|
||
}
|
||
if sensorType == "power" {
|
||
desc := prometheus.NewDesc(name+"_watt", "Hardware monitor for power usage in watts ("+element+")", hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(
|
||
desc, prometheus.GaugeValue, parsedValue/1000000.0, labels...)
|
||
continue
|
||
}
|
||
|
||
if sensorType == "humidity" {
|
||
desc := prometheus.NewDesc(name, "Hardware monitor for humidity, as a ratio (multiply with 100.0 to get the humidity as a percentage) ("+element+")", hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(
|
||
desc, prometheus.GaugeValue, parsedValue/1000000.0, labels...)
|
||
continue
|
||
}
|
||
|
||
if sensorType == "fan" && (element == "input" || element == "min" || element == "max" || element == "target") {
|
||
desc := prometheus.NewDesc(name+"_rpm", "Hardware monitor for fan revolutions per minute ("+element+")", hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(
|
||
desc, prometheus.GaugeValue, parsedValue, labels...)
|
||
continue
|
||
}
|
||
|
||
// fallback, just dump the metric as is
|
||
|
||
desc := prometheus.NewDesc(name, "Hardware monitor "+sensorType+" element "+element, hwmonLabelDesc, nil)
|
||
ch <- prometheus.MustNewConstMetric(
|
||
desc, prometheus.GaugeValue, parsedValue, labels...)
|
||
}
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
func (c *hwMonCollector) hwmonName(dir string) (string, error) {
|
||
// generate a name for a sensor path
|
||
|
||
// sensor numbering depends on the order of linux module loading and
|
||
// is thus unstable.
|
||
// However the path of the device has to be stable:
|
||
// - /sys/devices/<bus>/<device>
|
||
// Some hardware monitors have a "name" file that exports a human
|
||
// readable name that can be used.
|
||
|
||
// human readable names would be bat0 or coretemp, while a path string
|
||
// could be platform_applesmc.768
|
||
|
||
// preference 1: construct a name based on device name, always unique
|
||
|
||
devicePath, devErr := filepath.EvalSymlinks(filepath.Join(dir, "device"))
|
||
if devErr == nil {
|
||
devPathPrefix, devName := filepath.Split(devicePath)
|
||
_, devType := filepath.Split(strings.TrimRight(devPathPrefix, "/"))
|
||
|
||
cleanDevName := cleanMetricName(devName)
|
||
cleanDevType := cleanMetricName(devType)
|
||
|
||
if cleanDevType != "" && cleanDevName != "" {
|
||
return cleanDevType + "_" + cleanDevName, nil
|
||
}
|
||
|
||
if cleanDevName != "" {
|
||
return cleanDevName, nil
|
||
}
|
||
}
|
||
|
||
// preference 2: is there a name file
|
||
sysnameRaw, nameErr := os.ReadFile(filepath.Join(dir, "name"))
|
||
if nameErr == nil && string(sysnameRaw) != "" {
|
||
cleanName := cleanMetricName(string(sysnameRaw))
|
||
if cleanName != "" {
|
||
return cleanName, nil
|
||
}
|
||
}
|
||
|
||
// it looks bad, name and device don't provide enough information
|
||
// return a hwmon[0-9]* name
|
||
|
||
realDir, err := filepath.EvalSymlinks(dir)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
|
||
// take the last path element, this will be hwmonX
|
||
_, name := filepath.Split(realDir)
|
||
cleanName := cleanMetricName(name)
|
||
if cleanName != "" {
|
||
return cleanName, nil
|
||
}
|
||
return "", errors.New("Could not derive a monitoring name for " + dir)
|
||
}
|
||
|
||
// hwmonHumanReadableChipName is similar to the methods in hwmonName, but with
|
||
// different precedences -- we can allow duplicates here.
|
||
func (c *hwMonCollector) hwmonHumanReadableChipName(dir string) (string, error) {
|
||
sysnameRaw, nameErr := os.ReadFile(filepath.Join(dir, "name"))
|
||
if nameErr != nil {
|
||
return "", nameErr
|
||
}
|
||
|
||
if string(sysnameRaw) != "" {
|
||
cleanName := cleanMetricName(string(sysnameRaw))
|
||
if cleanName != "" {
|
||
return cleanName, nil
|
||
}
|
||
}
|
||
|
||
return "", errors.New("Could not derive a human-readable chip type for " + dir)
|
||
}
|
||
|
||
func (c *hwMonCollector) Update(ch chan<- prometheus.Metric) error {
|
||
// Step 1: scan /sys/class/hwmon, resolve all symlinks and call
|
||
// updatesHwmon for each folder
|
||
|
||
hwmonPathName := filepath.Join(sysFilePath("class"), "hwmon")
|
||
|
||
hwmonFiles, err := os.ReadDir(hwmonPathName)
|
||
if err != nil {
|
||
if errors.Is(err, os.ErrNotExist) {
|
||
c.logger.Debug("hwmon collector metrics are not available for this system")
|
||
return ErrNoData
|
||
}
|
||
|
||
return err
|
||
}
|
||
|
||
var lastErr error
|
||
for _, hwDir := range hwmonFiles {
|
||
hwmonXPathName := filepath.Join(hwmonPathName, hwDir.Name())
|
||
fileInfo, err := os.Lstat(hwmonXPathName)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
|
||
if fileInfo.Mode()&os.ModeSymlink > 0 {
|
||
fileInfo, err = os.Stat(hwmonXPathName)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
}
|
||
|
||
if !fileInfo.IsDir() {
|
||
continue
|
||
}
|
||
|
||
if err = c.updateHwmon(ch, hwmonXPathName); err != nil {
|
||
lastErr = err
|
||
}
|
||
}
|
||
|
||
return lastErr
|
||
}
|