From d290ea94b813c68fa9778adde4dae5d8dd866ba2 Mon Sep 17 00:00:00 2001 From: Tobias Schmidt Date: Wed, 22 Mar 2017 21:48:18 -0300 Subject: [PATCH] Fix export of stale device error metrics for unmounted filesystems Instead of maintaining a counter metric for device errors in memory, this change exports a gauge and uses const metrics to avoid leaking metrics for unmounted filesystems. --- collector/filesystem_common.go | 36 ++++++++++++++++++++++------------ collector/filesystem_linux.go | 17 ++++++++++------ 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/collector/filesystem_common.go b/collector/filesystem_common.go index 3ce12725..a5554fa7 100644 --- a/collector/filesystem_common.go +++ b/collector/filesystem_common.go @@ -44,11 +44,11 @@ var ( ) type filesystemCollector struct { - ignoredMountPointsPattern *regexp.Regexp - ignoredFSTypesPattern *regexp.Regexp - sizeDesc, freeDesc, availDesc, - filesDesc, filesFreeDesc, roDesc *prometheus.Desc - devErrors *prometheus.CounterVec + ignoredMountPointsPattern *regexp.Regexp + ignoredFSTypesPattern *regexp.Regexp + sizeDesc, freeDesc, availDesc *prometheus.Desc + filesDesc, filesFreeDesc *prometheus.Desc + roDesc, deviceErrorDesc *prometheus.Desc } type filesystemLabels struct { @@ -56,8 +56,10 @@ type filesystemLabels struct { } type filesystemStats struct { - labels filesystemLabels - size, free, avail, files, filesFree, ro float64 + labels filesystemLabels + size, free, avail float64 + files, filesFree float64 + ro, deviceError float64 } func init() { @@ -106,10 +108,11 @@ func NewFilesystemCollector() (Collector, error) { filesystemLabelNames, nil, ) - devErrors := prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: prometheus.BuildFQName(Namespace, subsystem, "device_errors_total"), - Help: "Total number of errors occurred when getting stats for device", - }, filesystemLabelNames) + deviceErrorDesc := prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "device_error"), + "Whether an error occured while getting statistics for the given device.", + filesystemLabelNames, nil, + ) return &filesystemCollector{ ignoredMountPointsPattern: mountPointPattern, @@ -120,7 +123,7 @@ func NewFilesystemCollector() (Collector, error) { filesDesc: filesDesc, filesFreeDesc: filesFreeDesc, roDesc: roDesc, - devErrors: devErrors, + deviceErrorDesc: deviceErrorDesc, }, nil } @@ -137,6 +140,14 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error { } seen[s.labels] = true + ch <- prometheus.MustNewConstMetric( + c.deviceErrorDesc, prometheus.GaugeValue, + s.deviceError, s.labels.device, s.labels.mountPoint, s.labels.fsType, + ) + if s.deviceError > 0 { + continue + } + ch <- prometheus.MustNewConstMetric( c.sizeDesc, prometheus.GaugeValue, s.size, s.labels.device, s.labels.mountPoint, s.labels.fsType, @@ -162,6 +173,5 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error { s.ro, s.labels.device, s.labels.mountPoint, s.labels.fsType, ) } - c.devErrors.Collect(ch) return nil } diff --git a/collector/filesystem_linux.go b/collector/filesystem_linux.go index 7d875016..d9a44320 100644 --- a/collector/filesystem_linux.go +++ b/collector/filesystem_linux.go @@ -46,13 +46,15 @@ func (c *filesystemCollector) GetStats() ([]filesystemStats, error) { log.Debugf("Ignoring fs type: %s", labels.fsType) continue } - labelValues := []string{labels.device, labels.mountPoint, labels.fsType} + buf := new(syscall.Statfs_t) err := syscall.Statfs(labels.mountPoint, buf) if err != nil { - c.devErrors.WithLabelValues(labelValues...).Inc() - log.Debugf("Statfs on %s returned %s", - labels.mountPoint, err) + stats = append(stats, filesystemStats{ + labels: labels, + deviceError: 1, + }) + log.Errorf("Error on statfs() system call for %q: %s", labels.mountPoint, err) continue } @@ -82,11 +84,14 @@ func mountPointDetails() ([]filesystemLabels, error) { defer file.Close() filesystems := []filesystemLabels{} - scanner := bufio.NewScanner(file) for scanner.Scan() { parts := strings.Fields(scanner.Text()) - filesystems = append(filesystems, filesystemLabels{parts[0], parts[1], parts[2]}) + filesystems = append(filesystems, filesystemLabels{ + device: parts[0], + mountPoint: parts[1], + fsType: parts[2], + }) } return filesystems, scanner.Err() }