2015-09-26 15:36:40 +00:00
|
|
|
// Copyright 2015 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2015-06-24 12:45:40 +00:00
|
|
|
// +build !nocpu
|
2015-05-12 07:13:08 +00:00
|
|
|
|
|
|
|
package collector
|
|
|
|
|
|
|
|
import (
|
2016-12-30 06:31:08 +00:00
|
|
|
"fmt"
|
2017-01-05 05:47:13 +00:00
|
|
|
"math"
|
2015-05-12 07:13:08 +00:00
|
|
|
"strconv"
|
|
|
|
"unsafe"
|
|
|
|
|
2019-12-31 16:19:37 +00:00
|
|
|
"github.com/go-kit/kit/log"
|
|
|
|
"github.com/go-kit/kit/log/level"
|
2015-05-12 07:13:08 +00:00
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
2016-12-29 03:26:41 +00:00
|
|
|
"golang.org/x/sys/unix"
|
2015-05-12 07:13:08 +00:00
|
|
|
)
|
|
|
|
|
2016-12-29 03:26:41 +00:00
|
|
|
type clockinfo struct {
|
|
|
|
hz int32 // clock frequency
|
|
|
|
tick int32 // micro-seconds per hz tick
|
|
|
|
spare int32
|
|
|
|
stathz int32 // statistics clock frequency
|
|
|
|
profhz int32 // profiling clock frequency
|
|
|
|
}
|
2016-02-17 02:20:55 +00:00
|
|
|
|
2016-12-29 03:26:41 +00:00
|
|
|
type cputime struct {
|
|
|
|
user float64
|
|
|
|
nice float64
|
|
|
|
sys float64
|
|
|
|
intr float64
|
|
|
|
idle float64
|
2016-02-17 02:20:55 +00:00
|
|
|
}
|
|
|
|
|
2016-12-29 03:26:41 +00:00
|
|
|
func getCPUTimes() ([]cputime, error) {
|
|
|
|
const states = 5
|
2016-02-17 02:20:55 +00:00
|
|
|
|
2016-12-29 03:26:41 +00:00
|
|
|
clockb, err := unix.SysctlRaw("kern.clockrate")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2016-02-17 02:20:55 +00:00
|
|
|
}
|
2016-12-29 03:26:41 +00:00
|
|
|
clock := *(*clockinfo)(unsafe.Pointer(&clockb[0]))
|
|
|
|
cpb, err := unix.SysctlRaw("kern.cp_times")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2016-02-17 02:20:55 +00:00
|
|
|
}
|
|
|
|
|
2016-12-29 03:26:41 +00:00
|
|
|
var cpufreq float64
|
|
|
|
if clock.stathz > 0 {
|
|
|
|
cpufreq = float64(clock.stathz)
|
|
|
|
} else {
|
|
|
|
cpufreq = float64(clock.hz)
|
2016-02-17 02:20:55 +00:00
|
|
|
}
|
2016-12-29 03:26:41 +00:00
|
|
|
var times []float64
|
|
|
|
for len(cpb) >= int(unsafe.Sizeof(int(0))) {
|
|
|
|
t := *(*int)(unsafe.Pointer(&cpb[0]))
|
|
|
|
times = append(times, float64(t)/cpufreq)
|
|
|
|
cpb = cpb[unsafe.Sizeof(int(0)):]
|
2015-09-26 22:08:18 +00:00
|
|
|
}
|
2016-02-17 02:20:55 +00:00
|
|
|
|
2016-12-29 03:26:41 +00:00
|
|
|
cpus := make([]cputime, len(times)/states)
|
|
|
|
for i := 0; i < len(times); i += states {
|
|
|
|
cpu := &cpus[i/states]
|
|
|
|
cpu.user = times[i]
|
|
|
|
cpu.nice = times[i+1]
|
|
|
|
cpu.sys = times[i+2]
|
|
|
|
cpu.intr = times[i+3]
|
|
|
|
cpu.idle = times[i+4]
|
2015-09-26 22:08:18 +00:00
|
|
|
}
|
2016-12-29 03:26:41 +00:00
|
|
|
return cpus, nil
|
2016-02-17 02:20:55 +00:00
|
|
|
}
|
|
|
|
|
2015-05-12 07:13:08 +00:00
|
|
|
type statCollector struct {
|
2019-12-31 16:19:37 +00:00
|
|
|
cpu typedDesc
|
|
|
|
temp typedDesc
|
|
|
|
logger log.Logger
|
2015-05-12 07:13:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func init() {
|
2017-09-28 13:06:26 +00:00
|
|
|
registerCollector("cpu", defaultEnabled, NewStatCollector)
|
2015-05-12 07:13:08 +00:00
|
|
|
}
|
|
|
|
|
2017-05-14 11:32:35 +00:00
|
|
|
// NewStatCollector returns a new Collector exposing CPU stats.
|
2019-12-31 16:19:37 +00:00
|
|
|
func NewStatCollector(logger log.Logger) (Collector, error) {
|
2015-05-12 07:13:08 +00:00
|
|
|
return &statCollector{
|
2018-04-29 12:34:47 +00:00
|
|
|
cpu: typedDesc{nodeCPUSecondsDesc, prometheus.CounterValue},
|
2016-12-30 06:31:08 +00:00
|
|
|
temp: typedDesc{prometheus.NewDesc(
|
2018-02-01 17:42:20 +00:00
|
|
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "temperature_celsius"),
|
2016-12-30 06:31:08 +00:00
|
|
|
"CPU temperature",
|
|
|
|
[]string{"cpu"}, nil,
|
|
|
|
), prometheus.GaugeValue},
|
2019-12-31 16:19:37 +00:00
|
|
|
logger: logger,
|
2015-05-12 07:13:08 +00:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2016-02-17 02:20:55 +00:00
|
|
|
// Expose CPU stats using sysctl.
|
2017-02-28 18:47:20 +00:00
|
|
|
func (c *statCollector) Update(ch chan<- prometheus.Metric) error {
|
2016-02-17 02:20:55 +00:00
|
|
|
// We want time spent per-cpu per CPUSTATE.
|
|
|
|
// CPUSTATES (number of CPUSTATES) is defined as 5U.
|
|
|
|
// Order: CP_USER | CP_NICE | CP_SYS | CP_IDLE | CP_INTR
|
|
|
|
// sysctl kern.cp_times provides hw.ncpu * CPUSTATES long integers:
|
|
|
|
// hw.ncpu * (space-separated list of the above variables)
|
|
|
|
//
|
|
|
|
// Each value is a counter incremented at frequency
|
|
|
|
// kern.clockrate.(stathz | hz)
|
|
|
|
//
|
|
|
|
// Look into sys/kern/kern_clock.c for details.
|
|
|
|
|
2016-12-29 03:26:41 +00:00
|
|
|
cpuTimes, err := getCPUTimes()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2015-09-26 22:08:18 +00:00
|
|
|
}
|
2016-12-29 03:26:41 +00:00
|
|
|
for cpu, t := range cpuTimes {
|
2016-12-30 06:31:08 +00:00
|
|
|
lcpu := strconv.Itoa(cpu)
|
|
|
|
ch <- c.cpu.mustNewConstMetric(float64(t.user), lcpu, "user")
|
|
|
|
ch <- c.cpu.mustNewConstMetric(float64(t.nice), lcpu, "nice")
|
|
|
|
ch <- c.cpu.mustNewConstMetric(float64(t.sys), lcpu, "system")
|
|
|
|
ch <- c.cpu.mustNewConstMetric(float64(t.intr), lcpu, "interrupt")
|
|
|
|
ch <- c.cpu.mustNewConstMetric(float64(t.idle), lcpu, "idle")
|
|
|
|
|
|
|
|
temp, err := unix.SysctlUint32(fmt.Sprintf("dev.cpu.%d.temperature", cpu))
|
2017-01-05 05:47:13 +00:00
|
|
|
if err != nil {
|
|
|
|
if err == unix.ENOENT {
|
|
|
|
// No temperature information for this CPU
|
2019-12-31 16:19:37 +00:00
|
|
|
level.Debug(c.logger).Log("msg", "no temperature information for CPU", "cpu", cpu)
|
2017-01-05 05:47:13 +00:00
|
|
|
} else {
|
|
|
|
// Unexpected error
|
|
|
|
ch <- c.temp.mustNewConstMetric(math.NaN(), lcpu)
|
2019-12-31 16:19:37 +00:00
|
|
|
level.Error(c.logger).Log("msg", "failed to query CPU temperature for CPU", "cpu", cpu, "err", err)
|
2017-01-05 05:47:13 +00:00
|
|
|
}
|
|
|
|
continue
|
2016-12-30 06:31:08 +00:00
|
|
|
}
|
2018-06-07 13:01:25 +00:00
|
|
|
|
|
|
|
// Temp is a signed integer in deci-degrees Kelvin.
|
|
|
|
// Cast uint32 to int32 and convert to float64 degrees Celsius.
|
|
|
|
//
|
|
|
|
// 2732 is used as the conversion constant for deci-degrees
|
|
|
|
// Kelvin, in multiple places in the kernel that feed into this
|
|
|
|
// sysctl, so we want to maintain consistency:
|
|
|
|
//
|
|
|
|
// sys/dev/amdtemp/amdtemp.c
|
|
|
|
// #define AMDTEMP_ZERO_C_TO_K 2732
|
|
|
|
//
|
|
|
|
// sys/dev/acpica/acpi_thermal.c
|
|
|
|
// #define TZ_ZEROC 2732
|
|
|
|
//
|
|
|
|
// sys/dev/coretemp/coretemp.c
|
|
|
|
// #define TZ_ZEROC 2732
|
|
|
|
ch <- c.temp.mustNewConstMetric(float64(int32(temp)-2732)/10, lcpu)
|
2016-04-19 19:59:53 +00:00
|
|
|
}
|
2015-05-12 07:13:08 +00:00
|
|
|
return err
|
|
|
|
}
|