From c481dd19dafcf463a2554ac07290cb33a50b617d Mon Sep 17 00:00:00 2001 From: Jonas Wielicki Date: Tue, 29 Nov 2016 11:53:29 +0100 Subject: [PATCH 1/2] Re-introduce human-readable chip types The chip label generation has been changed in #334 to prefer the unique device path (e.g. the location on the PCI bus) due to #333. Here, a new annotation metric ``node_hwmon_chip_names`` is introduced which allows to link the unique chip sysfs path to a human-readable chip name which may not be unique among chip sysfs paths (for example, dual-slot systems have multiple chipType="coretemp" sensors). This allows to mitigate the downsides of the solution to #333 (namely that the device path may not be stable across kernels and reboots) for cases where it does not matter that multiple devices may have the same human-readable name (e.g. aggregation or where at most one device with a common chip name is present). For cases where no human-readable name can be derived, the annotation metric is not emitted. --- collector/hwmon_linux.go | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/collector/hwmon_linux.go b/collector/hwmon_linux.go index d879aca9..ef4b7272 100644 --- a/collector/hwmon_linux.go +++ b/collector/hwmon_linux.go @@ -36,6 +36,7 @@ var ( hwmonInvalidMetricChars = regexp.MustCompile("[^a-z0-9:_]") hwmonFilenameFormat = regexp.MustCompile(`^(?P[^0-9]+)(?P[0-9]*)?(_(?P.+))?$`) hwmonLabelDesc = []string{"chip", "sensor"} + hwmonChipNameLabelDesc = []string{"chip", "chip_name"} hwmonSensorTypes = []string{ "vrm", "beep_enable", "update_interval", "in", "cpu", "fan", "pwm", "temp", "curr", "power", "energy", "humidity", @@ -143,6 +144,26 @@ func (c *hwMonCollector) updateHwmon(ch chan<- prometheus.Metric, dir string) (e } } + hwmonChipName, err := c.hwmonHumanReadableChipName(dir) + + if err == nil { + // sensor chip metadata + desc := prometheus.NewDesc( + "node_hwmon_chip_names", + "Annotation metric for human-readable chip names", + hwmonChipNameLabelDesc, + nil, + ) + + ch <- prometheus.MustNewConstMetric( + desc, + prometheus.GaugeValue, + 1.0, + hwmonName, + hwmonChipName, + ) + } + // format all sensors for sensor, sensorData := range data { @@ -351,6 +372,27 @@ func (c *hwMonCollector) hwmonName(dir string) (string, error) { return "", errors.New("Could not derive a monitoring name for " + dir) } +func (c *hwMonCollector) hwmonHumanReadableChipName(dir string) (string, error) { + // this is similar to the methods in hwmonName, but with different + // precedences -- we can allow duplicates here. + + // preference 1: is there a name file + + sysnameRaw, nameErr := ioutil.ReadFile(path.Join(dir, "name")) + if nameErr != nil { + return "", nameErr + } + + if string(sysnameRaw) != "" { + cleanName := cleanMetricName(string(sysnameRaw)) + if cleanName != "" { + return cleanName, nil + } + } + + return "", errors.New("Could not derive a human-readable chip type for " + dir) +} + func (c *hwMonCollector) Update(ch chan<- prometheus.Metric) (err error) { // Step 1: scan /sys/class/hwmon, resolve all symlinks and call // updatesHwmon for each folder From 3efaa1a6a89ad2d381b5d70ebb9fb69d198b4f49 Mon Sep 17 00:00:00 2001 From: Jonas Wielicki Date: Tue, 29 Nov 2016 11:55:08 +0100 Subject: [PATCH 2/2] Update end-to-end tests --- collector/fixtures/e2e-output.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index d3bfc6c1..ac3ca169 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -477,6 +477,11 @@ node_filefd_maximum 1.631329e+06 # HELP node_forks Total number of forks. # TYPE node_forks counter node_forks 26442 +# HELP node_hwmon_chip_names Annotation metric for human-readable chip names +# TYPE node_hwmon_chip_names gauge +node_hwmon_chip_names{chip="nct6779",chip_name="nct6779"} 1 +node_hwmon_chip_names{chip="platform_coretemp_0",chip_name="coretemp"} 1 +node_hwmon_chip_names{chip="platform_coretemp_1",chip_name="coretemp"} 1 # HELP node_hwmon_fan_alarm Hardware sensor alarm status (fan) # TYPE node_hwmon_fan_alarm gauge node_hwmon_fan_alarm{chip="nct6779",sensor="fan2"} 0