Don't count empty collection as success (#1613)
Many collectors depend on underlying features to be enabled. This causes confusion about what "success" means. This changes the behavior of the `node_scrape_collector_success` metric. * When a collector is unable to find data don't return success. * Catch the no data error and send to Debug log level to avoid log spam. * Update collectors to support this new functionality. * Fix copy-pasta mistake in infiband debug message. Closes: https://github.com/prometheus/node_exporter/issues/1323 Signed-off-by: Ben Kochie <superq@gmail.com>
This commit is contained in:
parent
1a75bc7b50
commit
3e1b0f1bee
|
@ -10,12 +10,14 @@
|
|||
- `node_md_is_active` is replaced by `node_md_state` with a state set of "active", "inactive", "recovering", "resync".
|
||||
* Additional label `mountaddr` added to NFS device metrics to distinguish mounts from the same URL, but different IP addresses. #1417
|
||||
* Metrics node_cpu_scaling_frequency_min_hrts and node_cpu_scaling_frequency_max_hrts of the cpufreq collector were renamed to node_cpu_scaling_frequency_min_hertz and node_cpu_scaling_frequency_max_hertz. #1510
|
||||
* Collectors that are enabled, but are unable to find data to collect, now return 0 for `node_scrape_collector_success`.
|
||||
|
||||
### Changes
|
||||
|
||||
* [CHANGE] Add `--collector.netdev.device-whitelist`. #1279
|
||||
* [CHANGE] Refactor mdadm collector #1403
|
||||
* [CHANGE] Add `mountaddr` label to NFS metrics. #1417
|
||||
* [CHANGE] Don't count empty collectors as success. #...
|
||||
* [FEATURE] Add new schedstat collector #1389
|
||||
* [FEATURE] Add uname support for Darwin and OpenBSD #1433
|
||||
* [FEATURE] Add new metric node_cpu_info #1489
|
||||
|
|
|
@ -61,7 +61,7 @@ func (c *bondingCollector) Update(ch chan<- prometheus.Metric) error {
|
|||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
level.Debug(c.logger).Log("msg", "Not collecting bonding, file does not exist", "file", statusfile)
|
||||
return nil
|
||||
return ErrNoData
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
@ -131,7 +132,11 @@ func execute(name string, c Collector, ch chan<- prometheus.Metric, logger log.L
|
|||
var success float64
|
||||
|
||||
if err != nil {
|
||||
level.Error(logger).Log("msg", "collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err)
|
||||
if IsNoDataError(err) {
|
||||
level.Debug(logger).Log("msg", "collector returned no data", "name", name, "duration_seconds", duration.Seconds(), "err", err)
|
||||
} else {
|
||||
level.Error(logger).Log("msg", "collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err)
|
||||
}
|
||||
success = 0
|
||||
} else {
|
||||
level.Debug(logger).Log("msg", "collector succeeded", "name", name, "duration_seconds", duration.Seconds())
|
||||
|
@ -155,3 +160,10 @@ type typedDesc struct {
|
|||
func (d *typedDesc) mustNewConstMetric(value float64, labels ...string) prometheus.Metric {
|
||||
return prometheus.MustNewConstMetric(d.desc, d.valueType, value, labels...)
|
||||
}
|
||||
|
||||
// ErrNoData indicates the collector found no data to collect, but had no other error.
|
||||
var ErrNoData = errors.New("collector returned no data")
|
||||
|
||||
func IsNoDataError(err error) bool {
|
||||
return err == ErrNoData
|
||||
}
|
||||
|
|
|
@ -188,7 +188,7 @@ func (c *drbdCollector) Update(ch chan<- prometheus.Metric) error {
|
|||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
level.Debug(c.logger).Log("msg", "stats file does not exist, skipping", "file", statsFile, "err", err)
|
||||
return nil
|
||||
return ErrNoData
|
||||
}
|
||||
|
||||
return err
|
||||
|
|
|
@ -426,7 +426,7 @@ func (c *hwMonCollector) Update(ch chan<- prometheus.Metric) error {
|
|||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
level.Debug(c.logger).Log("msg", "hwmon collector metrics are not available for this system")
|
||||
return nil
|
||||
return ErrNoData
|
||||
}
|
||||
|
||||
return err
|
||||
|
|
|
@ -109,8 +109,8 @@ func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error {
|
|||
devices, err := c.fs.InfiniBandClass()
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
level.Debug(c.logger).Log("msg", "IPv4 sockstat statistics not found, skipping")
|
||||
return nil
|
||||
level.Debug(c.logger).Log("msg", "infiniband statistics not found, skipping")
|
||||
return ErrNoData
|
||||
}
|
||||
return fmt.Errorf("error obtaining InfiniBand class info: %s", err)
|
||||
}
|
||||
|
|
|
@ -115,7 +115,7 @@ func (c *ipvsCollector) Update(ch chan<- prometheus.Metric) error {
|
|||
// Cannot access ipvs metrics, report no error.
|
||||
if os.IsNotExist(err) {
|
||||
level.Debug(c.logger).Log("msg", "ipvs collector metrics are not available for this system")
|
||||
return nil
|
||||
return ErrNoData
|
||||
}
|
||||
return fmt.Errorf("could not get IPVS stats: %s", err)
|
||||
}
|
||||
|
|
|
@ -105,7 +105,7 @@ func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error {
|
|||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
level.Debug(c.logger).Log("msg", "Not collecting mdstat, file does not exist", "file", *procPath)
|
||||
return nil
|
||||
return ErrNoData
|
||||
}
|
||||
|
||||
return fmt.Errorf("error parsing mdstatus: %s", err)
|
||||
|
|
|
@ -97,7 +97,7 @@ func (c *nfsCollector) Update(ch chan<- prometheus.Metric) error {
|
|||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
level.Debug(c.logger).Log("msg", "Not collecting NFS metrics", "err", err)
|
||||
return nil
|
||||
return ErrNoData
|
||||
}
|
||||
return fmt.Errorf("failed to retrieve nfs stats: %w", err)
|
||||
}
|
||||
|
|
|
@ -63,7 +63,7 @@ func (c *nfsdCollector) Update(ch chan<- prometheus.Metric) error {
|
|||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
level.Debug(c.logger).Log("msg", "Not collecting NFSd metrics", "err", err)
|
||||
return nil
|
||||
return ErrNoData
|
||||
}
|
||||
return fmt.Errorf("failed to retrieve nfsd stats: %w", err)
|
||||
}
|
||||
|
|
|
@ -167,11 +167,11 @@ func (c *wifiCollector) Update(ch chan<- prometheus.Metric) error {
|
|||
// Cannot access wifi metrics, report no error.
|
||||
if os.IsNotExist(err) {
|
||||
level.Debug(c.logger).Log("msg", "wifi collector metrics are not available for this system")
|
||||
return nil
|
||||
return ErrNoData
|
||||
}
|
||||
if os.IsPermission(err) {
|
||||
level.Debug(c.logger).Log("msg", "wifi collector got permission denied when accessing metrics")
|
||||
return nil
|
||||
return ErrNoData
|
||||
}
|
||||
|
||||
return fmt.Errorf("failed to access wifi data: %w", err)
|
||||
|
|
Loading…
Reference in New Issue