mirror of
https://github.com/prometheus-community/windows_exporter
synced 2025-02-11 17:20:28 +00:00
Merge pull request #1088 from higels/add_mperf_metric
Add cpu metrics based on newer and more accurate perflib sources
This commit is contained in:
commit
1493a20262
@ -37,6 +37,10 @@ type cpuCollectorFull struct {
|
||||
ProcessorFrequencyMHz *prometheus.Desc
|
||||
ProcessorMaxFrequencyMHz *prometheus.Desc
|
||||
ProcessorPerformance *prometheus.Desc
|
||||
ProcessorMPerf *prometheus.Desc
|
||||
ProcessorRTC *prometheus.Desc
|
||||
ProcessorUtility *prometheus.Desc
|
||||
ProcessorPrivUtility *prometheus.Desc
|
||||
}
|
||||
|
||||
// newCPUCollector constructs a new cpuCollector, appropriate for the running OS
|
||||
@ -129,11 +133,35 @@ func newCPUCollector() (Collector, error) {
|
||||
nil,
|
||||
),
|
||||
ProcessorPerformance: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "processor_performance"),
|
||||
prometheus.BuildFQName(Namespace, subsystem, "processor_performance_total"),
|
||||
"Processor Performance is the average performance of the processor while it is executing instructions, as a percentage of the nominal performance of the processor. On some processors, Processor Performance may exceed 100%",
|
||||
[]string{"core"},
|
||||
nil,
|
||||
),
|
||||
ProcessorMPerf: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "processor_mperf_total"),
|
||||
"Processor MPerf is the number of TSC ticks incremented while executing instructions",
|
||||
[]string{"core"},
|
||||
nil,
|
||||
),
|
||||
ProcessorRTC: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "processor_rtc_total"),
|
||||
"Processor RTC represents the number of RTC ticks made since the system booted. It should consistently be 64e6, and can be used to properly derive Processor Utility Rate",
|
||||
[]string{"core"},
|
||||
nil,
|
||||
),
|
||||
ProcessorUtility: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "processor_utility_total"),
|
||||
"Processor Utility represents is the amount of time the core spends executing instructions",
|
||||
[]string{"core"},
|
||||
nil,
|
||||
),
|
||||
ProcessorPrivUtility: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "processor_privileged_utility_total"),
|
||||
"Processor Privilieged Utility represents is the amount of time the core has spent executing instructions inside the kernel",
|
||||
[]string{"core"},
|
||||
nil,
|
||||
),
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -258,8 +286,10 @@ type perflibProcessorInformation struct {
|
||||
PrivilegedUtilitySeconds float64 `perflib:"% Privileged Utility"`
|
||||
ProcessorFrequencyMHz float64 `perflib:"Processor Frequency"`
|
||||
ProcessorPerformance float64 `perflib:"% Processor Performance"`
|
||||
ProcessorMPerf float64 `perflib:"% Processor Performance,secondvalue"`
|
||||
ProcessorTimeSeconds float64 `perflib:"% Processor Time"`
|
||||
ProcessorUtilityRate float64 `perflib:"% Processor Utility"`
|
||||
ProcessorRTC float64 `perflib:"% Processor Utility,secondvalue"`
|
||||
UserTimeSeconds float64 `perflib:"% User Time"`
|
||||
}
|
||||
|
||||
@ -366,10 +396,34 @@ func (c *cpuCollectorFull) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metr
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.ProcessorPerformance,
|
||||
prometheus.GaugeValue,
|
||||
prometheus.CounterValue,
|
||||
cpu.ProcessorPerformance,
|
||||
core,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.ProcessorMPerf,
|
||||
prometheus.CounterValue,
|
||||
cpu.ProcessorMPerf,
|
||||
core,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.ProcessorRTC,
|
||||
prometheus.CounterValue,
|
||||
cpu.ProcessorRTC,
|
||||
core,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.ProcessorUtility,
|
||||
prometheus.CounterValue,
|
||||
cpu.ProcessorUtilityRate,
|
||||
core,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.ProcessorPrivUtility,
|
||||
prometheus.CounterValue,
|
||||
cpu.PrivilegedUtilitySeconds,
|
||||
core,
|
||||
)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -4,6 +4,7 @@ import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
perflibCollector "github.com/leoluk/perflib_exporter/collector"
|
||||
"github.com/leoluk/perflib_exporter/perflib"
|
||||
@ -67,6 +68,16 @@ func unmarshalObject(obj *perflib.PerfObject, vs interface{}) error {
|
||||
if tag == "" {
|
||||
continue
|
||||
}
|
||||
secondValue := false
|
||||
|
||||
st := strings.Split(tag, ",")
|
||||
tag = st[0]
|
||||
|
||||
for _, t := range st {
|
||||
if t == "secondvalue" {
|
||||
secondValue = true
|
||||
}
|
||||
}
|
||||
|
||||
ctr, found := counters[tag]
|
||||
if !found {
|
||||
@ -80,6 +91,14 @@ func unmarshalObject(obj *perflib.PerfObject, vs interface{}) error {
|
||||
return fmt.Errorf("tagged field %v has wrong type %v, must be float64", f.Name, fieldType)
|
||||
}
|
||||
|
||||
if secondValue {
|
||||
if !ctr.Def.HasSecondValue {
|
||||
return fmt.Errorf("tagged field %v expected a SecondValue, which was not present", f.Name)
|
||||
}
|
||||
target.Field(i).SetFloat(float64(ctr.SecondValue))
|
||||
continue
|
||||
}
|
||||
|
||||
switch ctr.Def.CounterType {
|
||||
case perflibCollector.PERF_ELAPSED_TIME:
|
||||
target.Field(i).SetFloat(float64(ctr.Value-windowsEpoch) / float64(obj.Frequency))
|
||||
|
@ -11,6 +11,7 @@ import (
|
||||
type simple struct {
|
||||
ValA float64 `perflib:"Something"`
|
||||
ValB float64 `perflib:"Something Else"`
|
||||
ValC float64 `perflib:"Something Else,secondvalue"`
|
||||
}
|
||||
|
||||
func TestUnmarshalPerflib(t *testing.T) {
|
||||
@ -62,16 +63,18 @@ func TestUnmarshalPerflib(t *testing.T) {
|
||||
},
|
||||
{
|
||||
Def: &perflib.PerfCounterDef{
|
||||
Name: "Something Else",
|
||||
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
|
||||
Name: "Something Else",
|
||||
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
|
||||
HasSecondValue: true,
|
||||
},
|
||||
Value: 256,
|
||||
Value: 256,
|
||||
SecondValue: 222,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedOutput: []simple{{ValA: 123, ValB: 256}},
|
||||
expectedOutput: []simple{{ValA: 123, ValB: 256, ValC: 222}},
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
|
@ -31,7 +31,11 @@ Name | Description | Type | Labels
|
||||
`windows_cpu_idle_break_events_total` | Total number of time processor was woken from idle | counter | `core`
|
||||
`windows_cpu_parking_status` | Parking Status represents whether a processor is parked or not | gauge | `core`
|
||||
`windows_cpu_core_frequency_mhz` | Core frequency in megahertz | gauge | `core`
|
||||
`windows_cpu_processor_performance` | Processor Performance is the average performance of the processor while it is executing instructions, as a percentage of the nominal performance of the processor. On some processors, Processor Performance may exceed 100% | gauge | `core`
|
||||
`windows_cpu_processor_performance_total` | Processor Performance is the number of CPU cycles executing instructions by each core; it is believed to be similar to the value that the APERF MSR would show, were it exposed | counter | `core`
|
||||
`windows_cpu_processor_mperf_total` | Processor MPerf Total is proportioanl to the number of TSC ticks each core has accumulated while executing instructions. Due to the manner in which it is presented, it should be scaled by 1e2 to properly line up with Processor Performance Total. As above, it is believed to be closely related to the MPERF MSR. | counter | `core`
|
||||
`windows_cpu_processor_rtc_total` | RTC total is assumed to represent the 64Hz tick rate in Windows. It is not by itself useful, but can be used with `windows_cpu_processor_utility_total` to more accurately measure CPU utilisation than with `windows_cpu_time_total` | counter | `core`
|
||||
`windows_cpu_processor_utility_total` | Processor Utility Total is a newer, more accurate measure of CPU utilization, in particular handling modern CPUs with variant CPU frequencies. The rate of this counter divided by the rate of `windows_cpu_processor_rtc_total` should provide an accurate view of CPU utilisation on modern systems, as observed in Task Manager. | counter | `core`
|
||||
`windows_cpu_processor_privileged_utility_total` | Processor Privilged Utility Total, when used in a similar fashion to `windows_cpu_processor_utility_total` will show the portion of CPU utilization which is happening in privileged mode. | counter | `core`
|
||||
|
||||
### Example metric
|
||||
Show frequency of host CPU cores
|
||||
@ -44,6 +48,19 @@ Show cpu usage by mode.
|
||||
```
|
||||
sum by (mode) (irate(windows_cpu_time_total{instance="localhost"}[5m]))
|
||||
```
|
||||
Show per-cpu utilisation using the processor utility metrics
|
||||
```
|
||||
rate(windows_cpu_processor_utility_total{instance="localhost"}[5m]) / rate(windows_cpu_processor_rtc_total{instance="localhost"}[5m])
|
||||
```
|
||||
Show actual average CPU frequency in Hz
|
||||
```
|
||||
avg by(instance) (
|
||||
1e4 * windows_cpu_core_frequency_mhz{}
|
||||
* rate(windows_cpu_processor_performance_total{}[5m])
|
||||
/ rate(windows_cpu_processor_mperf_total{}[5m])
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
## Alerting examples
|
||||
**prometheus.rules**
|
||||
@ -57,4 +74,18 @@ sum by (mode) (irate(windows_cpu_time_total{instance="localhost"}[5m]))
|
||||
annotations:
|
||||
summary: "CPU Usage (instance {{ $labels.instance }})"
|
||||
description: "CPU Usage is more than 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
|
||||
# Alert on hosts which are not boosting their CPU frequencies
|
||||
- alert: NoCpuTurbo
|
||||
expr: |
|
||||
avg by(instance) (
|
||||
1e4 * windows_cpu_core_frequency_mhz{}
|
||||
* rate(windows_cpu_processor_performance_total{}[5m])
|
||||
/ rate(windows_cpu_processor_mperf_total{}[5m])
|
||||
)
|
||||
/
|
||||
(1e6 * avg by (instance) (windows_cpu_core_frequency_mhz))
|
||||
< 1.1
|
||||
for: 1h
|
||||
annotations:
|
||||
summary: "CPU Frequency on {{ $labels.instance }} is less than 110% of base frequency, suggesting it is not able to boost.
|
||||
```
|
||||
|
@ -81,10 +81,18 @@ test_alpha_total 42
|
||||
# TYPE windows_cpu_interrupts_total counter
|
||||
# HELP windows_cpu_parking_status Parking Status represents whether a processor is parked or not
|
||||
# TYPE windows_cpu_parking_status gauge
|
||||
# HELP windows_cpu_processor_performance Processor Performance is the average performance of the processor while it is executing instructions, as a percentage of the nominal performance of the processor. On some processors, Processor Performance may exceed 100%
|
||||
# TYPE windows_cpu_processor_performance gauge
|
||||
# HELP windows_cpu_processor_performance_total Processor Performance is the average performance of the processor while it is executing instructions, as a percentage of the nominal performance of the processor. On some processors, Processor Performance may exceed 100%
|
||||
# TYPE windows_cpu_processor_performance_total counter
|
||||
# HELP windows_cpu_time_total Time that processor spent in different modes (dpc, idle, interrupt, privileged, user)
|
||||
# TYPE windows_cpu_time_total counter
|
||||
# HELP windows_cpu_processor_mperf_total Processor MPerf is the number of TSC ticks incremented while executing instructions
|
||||
# TYPE windows_cpu_processor_mperf_total counter
|
||||
# HELP windows_cpu_processor_privileged_utility_total Processor Privilieged Utility represents is the amount of time the core has spent executing instructions inside the kernel
|
||||
# TYPE windows_cpu_processor_privileged_utility_total counter
|
||||
# HELP windows_cpu_processor_rtc_total Processor RTC represents the number of RTC ticks made since the system booted. It should consistently be 64e6, and can be used to properly derive Processor Utility Rate
|
||||
# TYPE windows_cpu_processor_rtc_total counter
|
||||
# HELP windows_cpu_processor_utility_total Processor Utility represents is the amount of time the core spends executing instructions
|
||||
# TYPE windows_cpu_processor_utility_total counter
|
||||
# HELP windows_cs_hostname Labeled system hostname information as provided by ComputerSystem.DNSHostName and ComputerSystem.Domain
|
||||
# TYPE windows_cs_hostname gauge
|
||||
# HELP windows_cs_logical_processors ComputerSystem.NumberOfLogicalProcessors
|
||||
|
Loading…
Reference in New Issue
Block a user