From b23146db3fa6900466cb37362b2f3e9e54d18adc Mon Sep 17 00:00:00 2001 From: Benjamin Drung Date: Tue, 6 Jul 2021 10:20:47 +0200 Subject: [PATCH] Add nvme collector Add a collector for NVMes to expose the firmware versions. This requires procfs >= 0.7.0. Fixes #1891 Signed-off-by: Benjamin Drung --- README.md | 1 + collector/fixtures/e2e-64k-page-output.txt | 4 ++ collector/fixtures/e2e-output.txt | 4 ++ collector/fixtures/sys.ttar | 26 ++++++++ collector/nvme_linux.go | 74 ++++++++++++++++++++++ go.mod | 2 +- go.sum | 4 +- 7 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 collector/nvme_linux.go diff --git a/README.md b/README.md index f1966513..32f011e1 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,7 @@ netdev | Exposes network interface statistics such as bytes transferred. | Darwi netstat | Exposes network statistics from `/proc/net/netstat`. This is the same information as `netstat -s`. | Linux nfs | Exposes NFS client statistics from `/proc/net/rpc/nfs`. This is the same information as `nfsstat -c`. | Linux nfsd | Exposes NFS kernel server statistics from `/proc/net/rpc/nfsd`. This is the same information as `nfsstat -s`. | Linux +nvme | Exposes NVMe info from `/sys/class/nvme/` | Linux powersupplyclass | Exposes Power Supply statistics from `/sys/class/power_supply` | Linux pressure | Exposes pressure stall statistics from `/proc/pressure/`. | Linux (kernel 4.20+ and/or [CONFIG\_PSI](https://www.kernel.org/doc/html/latest/accounting/psi.html)) rapl | Exposes various statistics from `/sys/class/powercap`. | Linux diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 8ad3d96d..726c0eda 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -2439,6 +2439,9 @@ node_nfsd_server_rpcs_total 18628 # HELP node_nfsd_server_threads Total number of NFSd kernel threads that are running. # TYPE node_nfsd_server_threads gauge node_nfsd_server_threads 8 +# HELP node_nvme_info Non-numeric data from /sys/class/nvme/, value is always 1. +# TYPE node_nvme_info gauge +node_nvme_info{device="nvme0",firmware_revision="1B2QEXP7",model="Samsung SSD 970 PRO 512GB",serial="S680HF8N190894I",state="live"} 1 # HELP node_power_supply_capacity capacity value of /sys/class/power_supply/. # TYPE node_power_supply_capacity gauge node_power_supply_capacity{power_supply="BAT0"} 81 @@ -2586,6 +2589,7 @@ node_scrape_collector_success{collector="netdev"} 1 node_scrape_collector_success{collector="netstat"} 1 node_scrape_collector_success{collector="nfs"} 1 node_scrape_collector_success{collector="nfsd"} 1 +node_scrape_collector_success{collector="nvme"} 1 node_scrape_collector_success{collector="powersupplyclass"} 1 node_scrape_collector_success{collector="pressure"} 1 node_scrape_collector_success{collector="processes"} 1 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 19f40d2a..0eea7744 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -2637,6 +2637,9 @@ node_nfsd_server_rpcs_total 18628 # HELP node_nfsd_server_threads Total number of NFSd kernel threads that are running. # TYPE node_nfsd_server_threads gauge node_nfsd_server_threads 8 +# HELP node_nvme_info Non-numeric data from /sys/class/nvme/, value is always 1. +# TYPE node_nvme_info gauge +node_nvme_info{device="nvme0",firmware_revision="1B2QEXP7",model="Samsung SSD 970 PRO 512GB",serial="S680HF8N190894I",state="live"} 1 # HELP node_power_supply_capacity capacity value of /sys/class/power_supply/. # TYPE node_power_supply_capacity gauge node_power_supply_capacity{power_supply="BAT0"} 81 @@ -2787,6 +2790,7 @@ node_scrape_collector_success{collector="netdev"} 1 node_scrape_collector_success{collector="netstat"} 1 node_scrape_collector_success{collector="nfs"} 1 node_scrape_collector_success{collector="nfsd"} 1 +node_scrape_collector_success{collector="nvme"} 1 node_scrape_collector_success{collector="powersupplyclass"} 1 node_scrape_collector_success{collector="pressure"} 1 node_scrape_collector_success{collector="processes"} 1 diff --git a/collector/fixtures/sys.ttar b/collector/fixtures/sys.ttar index acf1cec8..a8848936 100644 --- a/collector/fixtures/sys.ttar +++ b/collector/fixtures/sys.ttar @@ -1124,6 +1124,32 @@ Lines: 1 1 Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/nvme +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/nvme/nvme0 +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme/nvme0/firmware_rev +Lines: 1 +1B2QEXP7 +Mode: 444 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme/nvme0/model +Lines: 1 +Samsung SSD 970 PRO 512GB +Mode: 444 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme/nvme0/serial +Lines: 1 +S680HF8N190894I +Mode: 444 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme/nvme0/state +Lines: 1 +live +Mode: 444 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/class/power_supply Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/collector/nvme_linux.go b/collector/nvme_linux.go new file mode 100644 index 00000000..bcb63931 --- /dev/null +++ b/collector/nvme_linux.go @@ -0,0 +1,74 @@ +// Copyright 2021 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux +// +build !nonvme + +package collector + +import ( + "errors" + "fmt" + "os" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs/sysfs" +) + +type nvmeCollector struct { + fs sysfs.FS + logger log.Logger +} + +func init() { + registerCollector("nvme", defaultEnabled, NewNVMeCollector) +} + +// NewNVMeCollector returns a new Collector exposing NVMe stats. +func NewNVMeCollector(logger log.Logger) (Collector, error) { + fs, err := sysfs.NewFS(*sysPath) + if err != nil { + return nil, fmt.Errorf("failed to open sysfs: %w", err) + } + + return &nvmeCollector{ + fs: fs, + logger: logger, + }, nil +} + +func (c *nvmeCollector) Update(ch chan<- prometheus.Metric) error { + devices, err := c.fs.NVMeClass() + if err != nil { + if errors.Is(err, os.ErrNotExist) { + level.Debug(c.logger).Log("msg", "nvme statistics not found, skipping") + return ErrNoData + } + return fmt.Errorf("error obtaining NVMe class info: %w", err) + } + + for _, device := range devices { + infoDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "nvme", "info"), + "Non-numeric data from /sys/class/nvme/, value is always 1.", + []string{"device", "firmware_revision", "model", "serial", "state"}, + nil, + ) + infoValue := 1.0 + ch <- prometheus.MustNewConstMetric(infoDesc, prometheus.GaugeValue, infoValue, device.Name, device.FirmwareRevision, device.Model, device.Serial, device.State) + } + + return nil +} diff --git a/go.mod b/go.mod index 70880530..3c40a562 100644 --- a/go.mod +++ b/go.mod @@ -16,7 +16,7 @@ require ( github.com/prometheus/client_model v0.2.0 github.com/prometheus/common v0.26.0 github.com/prometheus/exporter-toolkit v0.5.1 - github.com/prometheus/procfs v0.6.0 + github.com/prometheus/procfs v0.7.0 github.com/safchain/ethtool v0.0.0-20200804214954-8f958a28363a github.com/siebenmann/go-kstat v0.0.0-20200303194639-4e8294f9e9d5 github.com/soundcloud/go-runit v0.0.0-20150630195641-06ad41a06c4a diff --git a/go.sum b/go.sum index 5045e570..6e4d33a5 100644 --- a/go.sum +++ b/go.sum @@ -281,8 +281,8 @@ github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsT github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= -github.com/prometheus/procfs v0.6.0 h1:mxy4L2jP6qMonqmq+aTtOx1ifVWUgG/TAmntgbh3xv4= -github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= +github.com/prometheus/procfs v0.7.0 h1:OQZ41sZU9XkRpzrz8/TD0EldH/Rwbddkdu5wDyUwzfE= +github.com/prometheus/procfs v0.7.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=