From 182810056fd3db63441975db385fcd387e1d1ddd Mon Sep 17 00:00:00 2001 From: Ben Kochie Date: Tue, 20 Jun 2017 07:51:26 +0200 Subject: [PATCH] Fix Linux cpu errors (#606) Make the Linux cpu collector soft-error on missing `cpufreq` and `thermal_throttle` features. --- collector/cpu_linux.go | 46 +++++++++++-------- collector/fixtures/e2e-output.txt | 5 ++ .../cpu2/thermal_throttle/core_throttle_count | 1 + .../thermal_throttle/package_throttle_count | 1 + .../cpu/devices/cpu3/cpufreq/scaling_cur_freq | 1 + .../cpu/devices/cpu3/cpufreq/scaling_max_freq | 1 + .../cpu/devices/cpu3/cpufreq/scaling_min_freq | 1 + 7 files changed, 38 insertions(+), 18 deletions(-) create mode 100644 collector/fixtures/sys/bus/cpu/devices/cpu2/thermal_throttle/core_throttle_count create mode 100644 collector/fixtures/sys/bus/cpu/devices/cpu2/thermal_throttle/package_throttle_count create mode 100644 collector/fixtures/sys/bus/cpu/devices/cpu3/cpufreq/scaling_cur_freq create mode 100644 collector/fixtures/sys/bus/cpu/devices/cpu3/cpufreq/scaling_max_freq create mode 100644 collector/fixtures/sys/bus/cpu/devices/cpu3/cpufreq/scaling_min_freq diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index abe12e72..39db3bb1 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -17,9 +17,11 @@ package collector import ( "fmt" + "os" "path/filepath" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" "github.com/prometheus/procfs" ) @@ -99,30 +101,38 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error { for _, cpu := range cpus { _, cpuname := filepath.Split(cpu) - if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_cur_freq")); err != nil { - return err - } - ch <- prometheus.MustNewConstMetric(c.cpuFreq, prometheus.GaugeValue, float64(value), cpuname) + if _, err := os.Stat(filepath.Join(cpu, "cpufreq")); os.IsNotExist(err) { + log.Debugf("CPU %q is missing cpufreq", cpu) + } else { + if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_cur_freq")); err != nil { + return err + } + ch <- prometheus.MustNewConstMetric(c.cpuFreq, prometheus.GaugeValue, float64(value), cpuname) - if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_min_freq")); err != nil { - return err - } - ch <- prometheus.MustNewConstMetric(c.cpuFreqMin, prometheus.GaugeValue, float64(value), cpuname) + if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_min_freq")); err != nil { + return err + } + ch <- prometheus.MustNewConstMetric(c.cpuFreqMin, prometheus.GaugeValue, float64(value), cpuname) - if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_max_freq")); err != nil { - return err + if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_max_freq")); err != nil { + return err + } + ch <- prometheus.MustNewConstMetric(c.cpuFreqMax, prometheus.GaugeValue, float64(value), cpuname) } - ch <- prometheus.MustNewConstMetric(c.cpuFreqMax, prometheus.GaugeValue, float64(value), cpuname) - if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle/core_throttle_count")); err != nil { - return err - } - ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuname) + if _, err := os.Stat(filepath.Join(cpu, "thermal_throttle")); os.IsNotExist(err) { + log.Debugf("CPU %q is missing thermal_throttle", cpu) + } else { + if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle/core_throttle_count")); err != nil { + return err + } + ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuname) - if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle/package_throttle_count")); err != nil { - return err + if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle/package_throttle_count")); err != nil { + return err + } + ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), cpuname) } - ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), cpuname) } return nil diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 7d81c14e..c8211903 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -212,22 +212,27 @@ node_cpu{cpu="cpu7",mode="user"} 290.98 # TYPE node_cpu_core_throttles_total counter node_cpu_core_throttles_total{cpu="cpu0"} 5 node_cpu_core_throttles_total{cpu="cpu1"} 0 +node_cpu_core_throttles_total{cpu="cpu2"} 40 # HELP node_cpu_frequency_hertz Current cpu thread frequency in hertz. # TYPE node_cpu_frequency_hertz gauge node_cpu_frequency_hertz{cpu="cpu0"} 1.699981e+06 node_cpu_frequency_hertz{cpu="cpu1"} 1.699981e+06 +node_cpu_frequency_hertz{cpu="cpu3"} 8000 # HELP node_cpu_frequency_max_hertz Maximum cpu thread frequency in hertz. # TYPE node_cpu_frequency_max_hertz gauge node_cpu_frequency_max_hertz{cpu="cpu0"} 3.7e+06 node_cpu_frequency_max_hertz{cpu="cpu1"} 3.7e+06 +node_cpu_frequency_max_hertz{cpu="cpu3"} 4.2e+06 # HELP node_cpu_frequency_min_hertz Minimum cpu thread frequency in hertz. # TYPE node_cpu_frequency_min_hertz gauge node_cpu_frequency_min_hertz{cpu="cpu0"} 800000 node_cpu_frequency_min_hertz{cpu="cpu1"} 800000 +node_cpu_frequency_min_hertz{cpu="cpu3"} 1000 # HELP node_cpu_package_throttles_total Number of times this cpu package has been throttled. # TYPE node_cpu_package_throttles_total counter node_cpu_package_throttles_total{cpu="cpu0"} 30 node_cpu_package_throttles_total{cpu="cpu1"} 30 +node_cpu_package_throttles_total{cpu="cpu2"} 6 # HELP node_disk_bytes_read The total number of bytes read successfully. # TYPE node_disk_bytes_read counter node_disk_bytes_read{device="dm-0"} 5.13708655616e+11 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu2/thermal_throttle/core_throttle_count b/collector/fixtures/sys/bus/cpu/devices/cpu2/thermal_throttle/core_throttle_count new file mode 100644 index 00000000..425151f3 --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu2/thermal_throttle/core_throttle_count @@ -0,0 +1 @@ +40 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu2/thermal_throttle/package_throttle_count b/collector/fixtures/sys/bus/cpu/devices/cpu2/thermal_throttle/package_throttle_count new file mode 100644 index 00000000..1e8b3149 --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu2/thermal_throttle/package_throttle_count @@ -0,0 +1 @@ +6 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu3/cpufreq/scaling_cur_freq b/collector/fixtures/sys/bus/cpu/devices/cpu3/cpufreq/scaling_cur_freq new file mode 100644 index 00000000..e002b362 --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu3/cpufreq/scaling_cur_freq @@ -0,0 +1 @@ +8000 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu3/cpufreq/scaling_max_freq b/collector/fixtures/sys/bus/cpu/devices/cpu3/cpufreq/scaling_max_freq new file mode 100644 index 00000000..745c6380 --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu3/cpufreq/scaling_max_freq @@ -0,0 +1 @@ +4200000 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu3/cpufreq/scaling_min_freq b/collector/fixtures/sys/bus/cpu/devices/cpu3/cpufreq/scaling_min_freq new file mode 100644 index 00000000..83b33d23 --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu3/cpufreq/scaling_min_freq @@ -0,0 +1 @@ +1000