Handle vanishing PIDs (#1043)
PIDs can vanish (exit) from /proc/ between gathering the list of PIDs and getting all of their stats. * Ignore file not found errors. * Explicitly count the PIDs we find. * Cleanup some error style issues. Signed-off-by: Ben Kochie <superq@gmail.com>
This commit is contained in:
parent
099c1527f1
commit
fe5a117831
|
@ -14,9 +14,9 @@ The wifi collector is disabled by default due to suspected caching issues and go
|
|||
* [FEATURE] Add socket unit stats to systemd collector #968
|
||||
* [FEATURE] Collect start time for systemd units
|
||||
* [ENHANCEMENT]
|
||||
* [BUGFIX]
|
||||
|
||||
* [BUGFIX] Fix goroutine leak in supervisord collector
|
||||
* [BUGFIX] Handle vanishing PIDs #1043
|
||||
|
||||
## 0.16.0 / 2018-05-15
|
||||
|
||||
|
|
|
@ -17,7 +17,10 @@ package collector
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/log"
|
||||
"github.com/prometheus/procfs"
|
||||
)
|
||||
|
||||
|
@ -62,13 +65,13 @@ func NewProcessStatCollector() (Collector, error) {
|
|||
func (t *processCollector) Update(ch chan<- prometheus.Metric) error {
|
||||
pids, states, threads, err := getAllocatedThreads()
|
||||
if err != nil {
|
||||
return fmt.Errorf("Unable to retrieve number of allocated threads %v\n", err)
|
||||
return fmt.Errorf("unable to retrieve number of allocated threads: %q", err)
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(t.threadAlloc, prometheus.GaugeValue, float64(threads))
|
||||
maxThreads, err := readUintFromFile(procFilePath("sys/kernel/threads-max"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("Unable to retrieve limit number of threads %v\n", err)
|
||||
return fmt.Errorf("unable to retrieve limit number of threads: %q", err)
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(t.threadLimit, prometheus.GaugeValue, float64(maxThreads))
|
||||
|
||||
|
@ -78,7 +81,7 @@ func (t *processCollector) Update(ch chan<- prometheus.Metric) error {
|
|||
|
||||
pidM, err := readUintFromFile(procFilePath("sys/kernel/pid_max"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("Unable to retrieve limit number of maximum pids alloved %v\n", err)
|
||||
return fmt.Errorf("unable to retrieve limit number of maximum pids alloved: %q", err)
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(t.pidUsed, prometheus.GaugeValue, float64(pids))
|
||||
ch <- prometheus.MustNewConstMetric(t.pidMax, prometheus.GaugeValue, float64(pidM))
|
||||
|
@ -95,15 +98,22 @@ func getAllocatedThreads() (int, map[string]int32, int, error) {
|
|||
if err != nil {
|
||||
return 0, nil, 0, err
|
||||
}
|
||||
pids := 0
|
||||
thread := 0
|
||||
procStates := make(map[string]int32)
|
||||
for _, pid := range p {
|
||||
stat, err := pid.NewStat()
|
||||
// PIDs can vanish between getting the list and getting stats.
|
||||
if os.IsNotExist(err) {
|
||||
log.Debugf("file not found when retrieving stats: %q", err)
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
return 0, nil, 0, err
|
||||
}
|
||||
pids += 1
|
||||
procStates[stat.State] += 1
|
||||
thread += stat.NumThreads
|
||||
}
|
||||
return len(p), procStates, thread, nil
|
||||
return pids, procStates, thread, nil
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue