Don't count empty collection as success (#1613)

Many collectors depend on underlying features to be enabled. This causes
confusion about what "success" means. This changes the behavior of the
`node_scrape_collector_success` metric.

* When a collector is unable to find data don't return success.
* Catch the no data error and send to Debug log level to avoid log spam.
* Update collectors to support this new functionality.
* Fix copy-pasta mistake in infiband debug message.

Closes: https://github.com/prometheus/node_exporter/issues/1323

Signed-off-by: Ben Kochie <superq@gmail.com>
This commit is contained in:
Ben Kochie 2020-02-19 16:11:29 +01:00 committed by GitHub
parent 1a75bc7b50
commit 3e1b0f1bee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 26 additions and 12 deletions

View File

@ -10,12 +10,14 @@
- `node_md_is_active` is replaced by `node_md_state` with a state set of "active", "inactive", "recovering", "resync".
* Additional label `mountaddr` added to NFS device metrics to distinguish mounts from the same URL, but different IP addresses. #1417
* Metrics node_cpu_scaling_frequency_min_hrts and node_cpu_scaling_frequency_max_hrts of the cpufreq collector were renamed to node_cpu_scaling_frequency_min_hertz and node_cpu_scaling_frequency_max_hertz. #1510
* Collectors that are enabled, but are unable to find data to collect, now return 0 for `node_scrape_collector_success`.
### Changes
* [CHANGE] Add `--collector.netdev.device-whitelist`. #1279
* [CHANGE] Refactor mdadm collector #1403
* [CHANGE] Add `mountaddr` label to NFS metrics. #1417
* [CHANGE] Don't count empty collectors as success. #...
* [FEATURE] Add new schedstat collector #1389
* [FEATURE] Add uname support for Darwin and OpenBSD #1433
* [FEATURE] Add new metric node_cpu_info #1489

View File

@ -61,7 +61,7 @@ func (c *bondingCollector) Update(ch chan<- prometheus.Metric) error {
if err != nil {
if os.IsNotExist(err) {
level.Debug(c.logger).Log("msg", "Not collecting bonding, file does not exist", "file", statusfile)
return nil
return ErrNoData
}
return err
}

View File

@ -15,6 +15,7 @@
package collector
import (
"errors"
"fmt"
"sync"
"time"
@ -131,7 +132,11 @@ func execute(name string, c Collector, ch chan<- prometheus.Metric, logger log.L
var success float64
if err != nil {
level.Error(logger).Log("msg", "collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err)
if IsNoDataError(err) {
level.Debug(logger).Log("msg", "collector returned no data", "name", name, "duration_seconds", duration.Seconds(), "err", err)
} else {
level.Error(logger).Log("msg", "collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err)
}
success = 0
} else {
level.Debug(logger).Log("msg", "collector succeeded", "name", name, "duration_seconds", duration.Seconds())
@ -155,3 +160,10 @@ type typedDesc struct {
func (d *typedDesc) mustNewConstMetric(value float64, labels ...string) prometheus.Metric {
return prometheus.MustNewConstMetric(d.desc, d.valueType, value, labels...)
}
// ErrNoData indicates the collector found no data to collect, but had no other error.
var ErrNoData = errors.New("collector returned no data")
func IsNoDataError(err error) bool {
return err == ErrNoData
}

View File

@ -188,7 +188,7 @@ func (c *drbdCollector) Update(ch chan<- prometheus.Metric) error {
if err != nil {
if os.IsNotExist(err) {
level.Debug(c.logger).Log("msg", "stats file does not exist, skipping", "file", statsFile, "err", err)
return nil
return ErrNoData
}
return err

View File

@ -426,7 +426,7 @@ func (c *hwMonCollector) Update(ch chan<- prometheus.Metric) error {
if err != nil {
if os.IsNotExist(err) {
level.Debug(c.logger).Log("msg", "hwmon collector metrics are not available for this system")
return nil
return ErrNoData
}
return err

View File

@ -109,8 +109,8 @@ func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error {
devices, err := c.fs.InfiniBandClass()
if err != nil {
if os.IsNotExist(err) {
level.Debug(c.logger).Log("msg", "IPv4 sockstat statistics not found, skipping")
return nil
level.Debug(c.logger).Log("msg", "infiniband statistics not found, skipping")
return ErrNoData
}
return fmt.Errorf("error obtaining InfiniBand class info: %s", err)
}

View File

@ -115,7 +115,7 @@ func (c *ipvsCollector) Update(ch chan<- prometheus.Metric) error {
// Cannot access ipvs metrics, report no error.
if os.IsNotExist(err) {
level.Debug(c.logger).Log("msg", "ipvs collector metrics are not available for this system")
return nil
return ErrNoData
}
return fmt.Errorf("could not get IPVS stats: %s", err)
}

View File

@ -105,7 +105,7 @@ func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error {
if err != nil {
if os.IsNotExist(err) {
level.Debug(c.logger).Log("msg", "Not collecting mdstat, file does not exist", "file", *procPath)
return nil
return ErrNoData
}
return fmt.Errorf("error parsing mdstatus: %s", err)

View File

@ -97,7 +97,7 @@ func (c *nfsCollector) Update(ch chan<- prometheus.Metric) error {
if err != nil {
if os.IsNotExist(err) {
level.Debug(c.logger).Log("msg", "Not collecting NFS metrics", "err", err)
return nil
return ErrNoData
}
return fmt.Errorf("failed to retrieve nfs stats: %w", err)
}

View File

@ -63,7 +63,7 @@ func (c *nfsdCollector) Update(ch chan<- prometheus.Metric) error {
if err != nil {
if os.IsNotExist(err) {
level.Debug(c.logger).Log("msg", "Not collecting NFSd metrics", "err", err)
return nil
return ErrNoData
}
return fmt.Errorf("failed to retrieve nfsd stats: %w", err)
}

View File

@ -167,11 +167,11 @@ func (c *wifiCollector) Update(ch chan<- prometheus.Metric) error {
// Cannot access wifi metrics, report no error.
if os.IsNotExist(err) {
level.Debug(c.logger).Log("msg", "wifi collector metrics are not available for this system")
return nil
return ErrNoData
}
if os.IsPermission(err) {
level.Debug(c.logger).Log("msg", "wifi collector got permission denied when accessing metrics")
return nil
return ErrNoData
}
return fmt.Errorf("failed to access wifi data: %w", err)