Add nvme collector

Add a collector for NVMes to expose the firmware versions. This requires
procfs >= 0.7.0.

Fixes #1891
Signed-off-by: Benjamin Drung <benjamin.drung@ionos.com>
This commit is contained in:
Benjamin Drung 2021-07-06 10:20:47 +02:00
parent 13be860e25
commit b23146db3f
7 changed files with 112 additions and 3 deletions

View File

@ -113,6 +113,7 @@ netdev | Exposes network interface statistics such as bytes transferred. | Darwi
netstat | Exposes network statistics from `/proc/net/netstat`. This is the same information as `netstat -s`. | Linux
nfs | Exposes NFS client statistics from `/proc/net/rpc/nfs`. This is the same information as `nfsstat -c`. | Linux
nfsd | Exposes NFS kernel server statistics from `/proc/net/rpc/nfsd`. This is the same information as `nfsstat -s`. | Linux
nvme | Exposes NVMe info from `/sys/class/nvme/` | Linux
powersupplyclass | Exposes Power Supply statistics from `/sys/class/power_supply` | Linux
pressure | Exposes pressure stall statistics from `/proc/pressure/`. | Linux (kernel 4.20+ and/or [CONFIG\_PSI](https://www.kernel.org/doc/html/latest/accounting/psi.html))
rapl | Exposes various statistics from `/sys/class/powercap`. | Linux

View File

@ -2439,6 +2439,9 @@ node_nfsd_server_rpcs_total 18628
# HELP node_nfsd_server_threads Total number of NFSd kernel threads that are running.
# TYPE node_nfsd_server_threads gauge
node_nfsd_server_threads 8
# HELP node_nvme_info Non-numeric data from /sys/class/nvme/<device>, value is always 1.
# TYPE node_nvme_info gauge
node_nvme_info{device="nvme0",firmware_revision="1B2QEXP7",model="Samsung SSD 970 PRO 512GB",serial="S680HF8N190894I",state="live"} 1
# HELP node_power_supply_capacity capacity value of /sys/class/power_supply/<power_supply>.
# TYPE node_power_supply_capacity gauge
node_power_supply_capacity{power_supply="BAT0"} 81
@ -2586,6 +2589,7 @@ node_scrape_collector_success{collector="netdev"} 1
node_scrape_collector_success{collector="netstat"} 1
node_scrape_collector_success{collector="nfs"} 1
node_scrape_collector_success{collector="nfsd"} 1
node_scrape_collector_success{collector="nvme"} 1
node_scrape_collector_success{collector="powersupplyclass"} 1
node_scrape_collector_success{collector="pressure"} 1
node_scrape_collector_success{collector="processes"} 1

View File

@ -2637,6 +2637,9 @@ node_nfsd_server_rpcs_total 18628
# HELP node_nfsd_server_threads Total number of NFSd kernel threads that are running.
# TYPE node_nfsd_server_threads gauge
node_nfsd_server_threads 8
# HELP node_nvme_info Non-numeric data from /sys/class/nvme/<device>, value is always 1.
# TYPE node_nvme_info gauge
node_nvme_info{device="nvme0",firmware_revision="1B2QEXP7",model="Samsung SSD 970 PRO 512GB",serial="S680HF8N190894I",state="live"} 1
# HELP node_power_supply_capacity capacity value of /sys/class/power_supply/<power_supply>.
# TYPE node_power_supply_capacity gauge
node_power_supply_capacity{power_supply="BAT0"} 81
@ -2787,6 +2790,7 @@ node_scrape_collector_success{collector="netdev"} 1
node_scrape_collector_success{collector="netstat"} 1
node_scrape_collector_success{collector="nfs"} 1
node_scrape_collector_success{collector="nfsd"} 1
node_scrape_collector_success{collector="nvme"} 1
node_scrape_collector_success{collector="powersupplyclass"} 1
node_scrape_collector_success{collector="pressure"} 1
node_scrape_collector_success{collector="processes"} 1

View File

@ -1124,6 +1124,32 @@ Lines: 1
1
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/class/nvme
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/class/nvme/nvme0
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/nvme/nvme0/firmware_rev
Lines: 1
1B2QEXP7
Mode: 444
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/nvme/nvme0/model
Lines: 1
Samsung SSD 970 PRO 512GB
Mode: 444
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/nvme/nvme0/serial
Lines: 1
S680HF8N190894I
Mode: 444
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/nvme/nvme0/state
Lines: 1
live
Mode: 444
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/class/power_supply
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

74
collector/nvme_linux.go Normal file
View File

@ -0,0 +1,74 @@
// Copyright 2021 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build linux
// +build !nonvme
package collector
import (
"errors"
"fmt"
"os"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs/sysfs"
)
type nvmeCollector struct {
fs sysfs.FS
logger log.Logger
}
func init() {
registerCollector("nvme", defaultEnabled, NewNVMeCollector)
}
// NewNVMeCollector returns a new Collector exposing NVMe stats.
func NewNVMeCollector(logger log.Logger) (Collector, error) {
fs, err := sysfs.NewFS(*sysPath)
if err != nil {
return nil, fmt.Errorf("failed to open sysfs: %w", err)
}
return &nvmeCollector{
fs: fs,
logger: logger,
}, nil
}
func (c *nvmeCollector) Update(ch chan<- prometheus.Metric) error {
devices, err := c.fs.NVMeClass()
if err != nil {
if errors.Is(err, os.ErrNotExist) {
level.Debug(c.logger).Log("msg", "nvme statistics not found, skipping")
return ErrNoData
}
return fmt.Errorf("error obtaining NVMe class info: %w", err)
}
for _, device := range devices {
infoDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "nvme", "info"),
"Non-numeric data from /sys/class/nvme/<device>, value is always 1.",
[]string{"device", "firmware_revision", "model", "serial", "state"},
nil,
)
infoValue := 1.0
ch <- prometheus.MustNewConstMetric(infoDesc, prometheus.GaugeValue, infoValue, device.Name, device.FirmwareRevision, device.Model, device.Serial, device.State)
}
return nil
}

2
go.mod
View File

@ -16,7 +16,7 @@ require (
github.com/prometheus/client_model v0.2.0
github.com/prometheus/common v0.26.0
github.com/prometheus/exporter-toolkit v0.5.1
github.com/prometheus/procfs v0.6.0
github.com/prometheus/procfs v0.7.0
github.com/safchain/ethtool v0.0.0-20200804214954-8f958a28363a
github.com/siebenmann/go-kstat v0.0.0-20200303194639-4e8294f9e9d5
github.com/soundcloud/go-runit v0.0.0-20150630195641-06ad41a06c4a

4
go.sum
View File

@ -281,8 +281,8 @@ github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsT
github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/prometheus/procfs v0.6.0 h1:mxy4L2jP6qMonqmq+aTtOx1ifVWUgG/TAmntgbh3xv4=
github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
github.com/prometheus/procfs v0.7.0 h1:OQZ41sZU9XkRpzrz8/TD0EldH/Rwbddkdu5wDyUwzfE=
github.com/prometheus/procfs v0.7.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=