Move stat_linux to cpu_linux and add cpufreq stats (#548)

This commit is contained in:
Rene Treffer 2017-06-13 11:21:53 +02:00 committed by Johannes 'fish' Ziemke
parent 798950d25b
commit 2e9f1913b8
15 changed files with 210 additions and 81 deletions

View File

@ -23,7 +23,7 @@ Name | Description | OS
---------|-------------|----
arp | Exposes ARP statistics from `/proc/net/arp`. | Linux
conntrack | Shows conntrack statistics (does nothing if no `/proc/sys/net/netfilter/` present). | Linux
cpu | Exposes CPU statistics | Darwin, Dragonfly, FreeBSD
cpu | Exposes CPU statistics | Darwin, Dragonfly, FreeBSD, Linux
diskstats | Exposes disk I/O statistics from `/proc/diskstats`. | Linux
edac | Exposes error detection and correction statistics. | Linux
entropy | Exposes available entropy. | Linux
@ -38,7 +38,7 @@ meminfo | Exposes memory statistics. | Darwin, Dragonfly, FreeBSD, Linux
netdev | Exposes network interface statistics such as bytes transferred. | Darwin, Dragonfly, FreeBSD, Linux, OpenBSD
netstat | Exposes network statistics from `/proc/net/netstat`. This is the same information as `netstat -s`. | Linux
sockstat | Exposes various statistics from `/proc/net/sockstat`. | Linux
stat | Exposes various statistics from `/proc/stat`. This includes CPU usage, boot time, forks and interrupts. | Linux
stat | Exposes various statistics from `/proc/stat`. This includes boot time, forks and interrupts. | Linux
textfile | Exposes statistics read from local disk. The `--collector.textfile.directory` flag must be set. | _any_
time | Exposes the current system time. | _any_
uname | Exposes system information as provided by the uname system call. | Linux

157
collector/cpu_linux.go Normal file
View File

@ -0,0 +1,157 @@
// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build !nocpu
package collector
import (
"fmt"
"path/filepath"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs"
)
const (
cpuCollectorNamespace = "cpu"
)
type cpuCollector struct {
cpu *prometheus.Desc
cpuFreq *prometheus.Desc
cpuFreqMin *prometheus.Desc
cpuFreqMax *prometheus.Desc
cpuCoreThrottle *prometheus.Desc
cpuPackageThrottle *prometheus.Desc
}
func init() {
Factories["cpu"] = NewCPUCollector
}
// NewCPUCollector returns a new Collector exposing kernel/system statistics.
func NewCPUCollector() (Collector, error) {
return &cpuCollector{
cpu: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "", cpuCollectorNamespace),
"Seconds the cpus spent in each mode.",
[]string{"cpu", "mode"}, nil,
),
cpuFreq: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "frequency_hertz"),
"Current cpu thread frequency in hertz.",
[]string{"cpu"}, nil,
),
cpuFreqMin: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "frequency_min_hertz"),
"Minimum cpu thread frequency in hertz.",
[]string{"cpu"}, nil,
),
cpuFreqMax: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "frequency_max_hertz"),
"Maximum cpu thread frequency in hertz.",
[]string{"cpu"}, nil,
),
cpuCoreThrottle: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "core_throttles_total"),
"Number of times this cpu core has been throttled.",
[]string{"cpu"}, nil,
),
cpuPackageThrottle: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "package_throttles_total"),
"Number of times this cpu package has been throttled.",
[]string{"cpu"}, nil,
),
}, nil
}
// Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/.
func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error {
if err := c.updateStat(ch); err != nil {
return err
}
if err := c.updateCPUfreq(ch); err != nil {
return err
}
return nil
}
// updateCPUfreq reads /sys/bus/cpu/devices/cpu* and expose cpu frequency statistics.
func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
cpus, err := filepath.Glob(sysFilePath("bus/cpu/devices/cpu[0-9]*"))
if err != nil {
return err
}
var value uint64
for _, cpu := range cpus {
_, cpuname := filepath.Split(cpu)
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_cur_freq")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuFreq, prometheus.GaugeValue, float64(value), cpuname)
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_min_freq")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuFreqMin, prometheus.GaugeValue, float64(value), cpuname)
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_max_freq")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuFreqMax, prometheus.GaugeValue, float64(value), cpuname)
if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle/core_throttle_count")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuname)
if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle/package_throttle_count")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), cpuname)
}
return nil
}
// updateStat reads /proc/stat through procfs and exports cpu related metrics.
func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error {
fs, err := procfs.NewFS(*procPath)
if err != nil {
return fmt.Errorf("failed to open procfs: %v", err)
}
stats, err := fs.NewStat()
if err != nil {
return err
}
for cpuID, cpuStat := range stats.CPU {
cpuName := fmt.Sprintf("cpu%d", cpuID)
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuName, "user")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuName, "nice")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuName, "system")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuName, "idle")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuName, "iowait")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuName, "irq")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuName, "softirq")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuName, "steal")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Guest, cpuName, "guest")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.GuestNice, cpuName, "guest_nice")
}
return nil
}

View File

@ -208,6 +208,26 @@ node_cpu{cpu="cpu7",mode="softirq"} 0.31
node_cpu{cpu="cpu7",mode="steal"} 0
node_cpu{cpu="cpu7",mode="system"} 101.64
node_cpu{cpu="cpu7",mode="user"} 290.98
# HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled.
# TYPE node_cpu_core_throttles_total counter
node_cpu_core_throttles_total{cpu="cpu0"} 5
node_cpu_core_throttles_total{cpu="cpu1"} 0
# HELP node_cpu_frequency_hertz Current cpu thread frequency in hertz.
# TYPE node_cpu_frequency_hertz gauge
node_cpu_frequency_hertz{cpu="cpu0"} 1.699981e+06
node_cpu_frequency_hertz{cpu="cpu1"} 1.699981e+06
# HELP node_cpu_frequency_max_hertz Maximum cpu thread frequency in hertz.
# TYPE node_cpu_frequency_max_hertz gauge
node_cpu_frequency_max_hertz{cpu="cpu0"} 3.7e+06
node_cpu_frequency_max_hertz{cpu="cpu1"} 3.7e+06
# HELP node_cpu_frequency_min_hertz Minimum cpu thread frequency in hertz.
# TYPE node_cpu_frequency_min_hertz gauge
node_cpu_frequency_min_hertz{cpu="cpu0"} 800000
node_cpu_frequency_min_hertz{cpu="cpu1"} 800000
# HELP node_cpu_package_throttles_total Number of times this cpu package has been throttled.
# TYPE node_cpu_package_throttles_total counter
node_cpu_package_throttles_total{cpu="cpu0"} 30
node_cpu_package_throttles_total{cpu="cpu1"} 30
# HELP node_disk_bytes_read The total number of bytes read successfully.
# TYPE node_disk_bytes_read counter
node_disk_bytes_read{device="dm-0"} 5.13708655616e+11
@ -2142,6 +2162,7 @@ node_scrape_collector_success{collector="arp"} 1
node_scrape_collector_success{collector="bonding"} 1
node_scrape_collector_success{collector="buddyinfo"} 1
node_scrape_collector_success{collector="conntrack"} 1
node_scrape_collector_success{collector="cpu"} 1
node_scrape_collector_success{collector="diskstats"} 1
node_scrape_collector_success{collector="drbd"} 1
node_scrape_collector_success{collector="edac"} 1

View File

@ -0,0 +1 @@
1699981

View File

@ -0,0 +1 @@
3700000

View File

@ -0,0 +1 @@
800000

View File

@ -0,0 +1 @@
1699981

View File

@ -0,0 +1 @@
3700000

View File

@ -0,0 +1 @@
800000

View File

@ -16,18 +16,13 @@
package collector
import (
"bufio"
"os"
"strconv"
"strings"
"fmt"
"github.com/prometheus/procfs"
"github.com/prometheus/client_golang/prometheus"
)
const (
userHz = 100
)
type statCollector struct {
cpu *prometheus.Desc
intr *prometheus.Desc
@ -83,80 +78,25 @@ func NewStatCollector() (Collector, error) {
}, nil
}
// Expose kernel and system statistics.
// Update implements Collector and exposes kernel and system statistics.
func (c *statCollector) Update(ch chan<- prometheus.Metric) error {
file, err := os.Open(procFilePath("stat"))
fs, err := procfs.NewFS(*procPath)
if err != nil {
return fmt.Errorf("failed to open procfs: %v", err)
}
stats, err := fs.NewStat()
if err != nil {
return err
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
parts := strings.Fields(scanner.Text())
if len(parts) == 0 {
continue
}
switch {
case strings.HasPrefix(parts[0], "cpu"):
// Export only per-cpu stats, it can be aggregated up in prometheus.
if parts[0] == "cpu" {
break
}
// Only some of these may be present, depending on kernel version.
cpuFields := []string{"user", "nice", "system", "idle", "iowait", "irq", "softirq", "steal", "guest", "guest_nice"}
// OpenVZ guests lack the "guest" CPU field, which needs to be ignored.
expectedFieldNum := len(cpuFields) + 1
if expectedFieldNum > len(parts) {
expectedFieldNum = len(parts)
}
for i, v := range parts[1:expectedFieldNum] {
value, err := strconv.ParseFloat(v, 64)
if err != nil {
return err
}
// Convert from ticks to seconds
value /= userHz
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, value, parts[0], cpuFields[i])
}
case parts[0] == "intr":
// Only expose the overall number, use the 'interrupts' collector for more detail.
value, err := strconv.ParseFloat(parts[1], 64)
if err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.intr, prometheus.CounterValue, value)
case parts[0] == "ctxt":
value, err := strconv.ParseFloat(parts[1], 64)
if err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.ctxt, prometheus.CounterValue, value)
case parts[0] == "processes":
value, err := strconv.ParseFloat(parts[1], 64)
if err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.forks, prometheus.CounterValue, value)
case parts[0] == "btime":
value, err := strconv.ParseFloat(parts[1], 64)
if err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.btime, prometheus.GaugeValue, value)
case parts[0] == "procs_running":
value, err := strconv.ParseFloat(parts[1], 64)
if err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.procsRunning, prometheus.GaugeValue, value)
case parts[0] == "procs_blocked":
value, err := strconv.ParseFloat(parts[1], 64)
if err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.procsBlocked, prometheus.GaugeValue, value)
}
}
return scanner.Err()
ch <- prometheus.MustNewConstMetric(c.intr, prometheus.CounterValue, float64(stats.IRQTotal))
ch <- prometheus.MustNewConstMetric(c.ctxt, prometheus.CounterValue, float64(stats.ContextSwitches))
ch <- prometheus.MustNewConstMetric(c.forks, prometheus.CounterValue, float64(stats.ProcessCreated))
ch <- prometheus.MustNewConstMetric(c.btime, prometheus.GaugeValue, float64(stats.BootTime))
ch <- prometheus.MustNewConstMetric(c.procsRunning, prometheus.GaugeValue, float64(stats.ProcessesRunning))
ch <- prometheus.MustNewConstMetric(c.procsBlocked, prometheus.GaugeValue, float64(stats.ProcessesBlocked))
return nil
}

View File

@ -6,6 +6,7 @@ collectors=$(cat << COLLECTORS
arp
buddyinfo
conntrack
cpu
diskstats
drbd
edac