system: refactor collector (#1730)

Signed-off-by: Jan-Otto Kröpke <mail@jkroepke.de>
This commit is contained in:
Jan-Otto Kröpke 2024-11-14 00:06:22 +01:00 committed by GitHub
parent f332361723
commit 31bcf42473
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 138 additions and 125 deletions

View File

@ -179,6 +179,12 @@ func run() int {
logger.Debug("Logging has Started")
if v, ok := os.LookupEnv("WINDOWS_EXPORTER_PERF_COUNTERS_ENGINE"); ok && v == "pdh" || *togglePDH == "pdh" {
logger.Info("Using performance data helper from PHD.dll for performance counter collection. This is in experimental state.")
toggle.PHDEnabled = true
}
if *printCollectors {
printCollectorsToStdout()
@ -221,12 +227,6 @@ func run() int {
logger.Info("Enabled collectors: " + strings.Join(enabledCollectorList, ", "))
if v, ok := os.LookupEnv("WINDOWS_EXPORTER_PERF_COUNTERS_ENGINE"); ok && v == "pdh" || *togglePDH == "pdh" {
logger.Info("Using performance data helper from PHD.dll for performance counter collection. This is in experimental state.")
toggle.PHDEnabled = true
}
mux := http.NewServeMux()
mux.Handle("GET /health", httphandler.NewHealthHandler())
mux.Handle("GET /version", httphandler.NewVersionHandler())

View File

@ -109,7 +109,7 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
var err error
c.perfDataCollector, err = perfdata.NewCollector(perfdata.V1, "Processor Information", perfdata.AllInstances, counters)
c.perfDataCollector, err = perfdata.NewCollector(perfdata.V2, "Processor Information", perfdata.AllInstances, counters)
if err != nil {
return fmt.Errorf("failed to create Processor Information collector: %w", err)
}

View File

@ -0,0 +1,11 @@
package system
const (
ContextSwitchesPersec = "Context Switches/sec"
ExceptionDispatchesPersec = "Exception Dispatches/sec"
ProcessorQueueLength = "Processor Queue Length"
SystemCallsPersec = "System Calls/sec"
SystemUpTime = "System Up Time"
Processes = "Processes"
Threads = "Threads"
)

View File

@ -4,11 +4,13 @@ package system
import (
"errors"
"fmt"
"log/slog"
"github.com/alecthomas/kingpin/v2"
"github.com/prometheus-community/windows_exporter/internal/mi"
v1 "github.com/prometheus-community/windows_exporter/internal/perfdata/v1"
"github.com/prometheus-community/windows_exporter/internal/perfdata"
"github.com/prometheus-community/windows_exporter/internal/perfdata/perftypes"
"github.com/prometheus-community/windows_exporter/internal/types"
"github.com/prometheus/client_golang/prometheus"
)
@ -23,6 +25,8 @@ var ConfigDefaults = Config{}
type Collector struct {
config Config
perfDataCollector perfdata.Collector
contextSwitchesTotal *prometheus.Desc
exceptionDispatchesTotal *prometheus.Desc
processorQueueLength *prometheus.Desc
@ -54,7 +58,7 @@ func (c *Collector) GetName() string {
}
func (c *Collector) GetPerfCounter(_ *slog.Logger) ([]string, error) {
return []string{"System"}, nil
return []string{}, nil
}
func (c *Collector) Close(_ *slog.Logger) error {
@ -62,6 +66,23 @@ func (c *Collector) Close(_ *slog.Logger) error {
}
func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
counters := []string{
ContextSwitchesPersec,
ExceptionDispatchesPersec,
ProcessorQueueLength,
SystemCallsPersec,
SystemUpTime,
Processes,
Threads,
}
var err error
c.perfDataCollector, err = perfdata.NewCollector(perfdata.V2, "System", nil, counters)
if err != nil {
return fmt.Errorf("failed to create System collector: %w", err)
}
c.contextSwitchesTotal = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "context_switches_total"),
"Total number of context switches (WMI source: PerfOS_System.ContextSwitchesPersec)",
@ -117,78 +138,59 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *Collector) Collect(ctx *types.ScrapeContext, logger *slog.Logger, ch chan<- prometheus.Metric) error {
logger = logger.With(slog.String("collector", Name))
if err := c.collect(ctx, logger, ch); err != nil {
logger.Error("failed collecting system metrics",
slog.Any("err", err),
)
return err
func (c *Collector) Collect(_ *types.ScrapeContext, _ *slog.Logger, ch chan<- prometheus.Metric) error {
if err := c.collect(ch); err != nil {
return fmt.Errorf("failed collecting system metrics: %w", err)
}
return nil
}
// Win32_PerfRawData_PerfOS_System docs:
// - https://web.archive.org/web/20050830140516/http://msdn.microsoft.com/library/en-us/wmisdk/wmi/win32_perfrawdata_perfos_system.asp
type system struct {
ContextSwitchesPersec float64 `perflib:"Context Switches/sec"`
ExceptionDispatchesPersec float64 `perflib:"Exception Dispatches/sec"`
ProcessorQueueLength float64 `perflib:"Processor Queue Length"`
SystemCallsPersec float64 `perflib:"System Calls/sec"`
SystemUpTime float64 `perflib:"System Up Time"`
Processes float64 `perflib:"Processes"`
Threads float64 `perflib:"Threads"`
}
func (c *Collector) collect(ctx *types.ScrapeContext, logger *slog.Logger, ch chan<- prometheus.Metric) error {
logger = logger.With(slog.String("collector", Name))
var dst []system
if err := v1.UnmarshalObject(ctx.PerfObjects["System"], &dst, logger); err != nil {
return err
func (c *Collector) collect(ch chan<- prometheus.Metric) error {
perfData, err := c.perfDataCollector.Collect()
if err != nil {
return fmt.Errorf("failed to collect System metrics: %w", err)
}
if len(dst) == 0 {
return errors.New("no data returned from Performance Counter")
data, ok := perfData[perftypes.EmptyInstance]
if !ok {
return errors.New("query for System returned empty result set")
}
ch <- prometheus.MustNewConstMetric(
c.contextSwitchesTotal,
prometheus.CounterValue,
dst[0].ContextSwitchesPersec,
data[ContextSwitchesPersec].FirstValue,
)
ch <- prometheus.MustNewConstMetric(
c.exceptionDispatchesTotal,
prometheus.CounterValue,
dst[0].ExceptionDispatchesPersec,
data[ExceptionDispatchesPersec].FirstValue,
)
ch <- prometheus.MustNewConstMetric(
c.processorQueueLength,
prometheus.GaugeValue,
dst[0].ProcessorQueueLength,
data[ProcessorQueueLength].FirstValue,
)
ch <- prometheus.MustNewConstMetric(
c.processes,
prometheus.GaugeValue,
dst[0].Processes,
data[Processes].FirstValue,
)
ch <- prometheus.MustNewConstMetric(
c.systemCallsTotal,
prometheus.CounterValue,
dst[0].SystemCallsPersec,
data[SystemCallsPersec].FirstValue,
)
ch <- prometheus.MustNewConstMetric(
c.systemUpTime,
prometheus.GaugeValue,
dst[0].SystemUpTime,
data[SystemUpTime].FirstValue,
)
ch <- prometheus.MustNewConstMetric(
c.threads,
prometheus.GaugeValue,
dst[0].Threads,
data[Threads].FirstValue,
)
// Windows has no defined limit, and is based off available resources. This currently isn't calculated by WMI and is set to default value.

View File

@ -1,23 +1,23 @@
package terminal_services
const (
HandleCount = "Handle Count"
PageFaultsPersec = "Page Faults/sec"
PageFileBytes = "Page File Bytes"
PageFileBytesPeak = "Page File Bytes Peak"
PercentPrivilegedTime = "% Privileged Time"
PercentProcessorTime = "% Processor Time"
PercentUserTime = "% User Time"
PoolNonpagedBytes = "Pool Nonpaged Bytes"
PoolPagedBytes = "Pool Paged Bytes"
PrivateBytes = "Private Bytes"
ThreadCount = "Thread Count"
VirtualBytes = "Virtual Bytes"
VirtualBytesPeak = "Virtual Bytes Peak"
WorkingSet = "Working Set"
WorkingSetPeak = "Working Set Peak"
handleCount = "Handle Count"
pageFaultsPersec = "Page Faults/sec"
pageFileBytes = "Page File Bytes"
pageFileBytesPeak = "Page File Bytes Peak"
percentPrivilegedTime = "% Privileged Time"
percentProcessorTime = "% Processor Time"
percentUserTime = "% User Time"
poolNonpagedBytes = "Pool Nonpaged Bytes"
poolPagedBytes = "Pool Paged Bytes"
privateBytes = "Private Bytes"
threadCount = "Thread Count"
virtualBytes = "Virtual Bytes"
virtualBytesPeak = "Virtual Bytes Peak"
workingSet = "Working Set"
workingSetPeak = "Working Set Peak"
SuccessfulConnections = "Successful Connections"
PendingConnections = "Pending Connections"
FailedConnections = "Failed Connections"
successfulConnections = "Successful Connections"
pendingConnections = "Pending Connections"
failedConnections = "Failed Connections"
)

View File

@ -126,21 +126,21 @@ func (c *Collector) Build(logger *slog.Logger, miSession *mi.Session) error {
logger = logger.With(slog.String("collector", Name))
counters := []string{
HandleCount,
PageFaultsPersec,
PageFileBytes,
PageFileBytesPeak,
PercentPrivilegedTime,
PercentProcessorTime,
PercentUserTime,
PoolNonpagedBytes,
PoolPagedBytes,
PrivateBytes,
ThreadCount,
VirtualBytes,
VirtualBytesPeak,
WorkingSet,
WorkingSetPeak,
handleCount,
pageFaultsPersec,
pageFileBytes,
pageFileBytesPeak,
percentPrivilegedTime,
percentProcessorTime,
percentUserTime,
poolNonpagedBytes,
poolPagedBytes,
privateBytes,
threadCount,
virtualBytes,
virtualBytesPeak,
workingSet,
workingSetPeak,
}
var err error
@ -154,9 +154,9 @@ func (c *Collector) Build(logger *slog.Logger, miSession *mi.Session) error {
if c.connectionBrokerEnabled {
counters = []string{
SuccessfulConnections,
PendingConnections,
FailedConnections,
successfulConnections,
pendingConnections,
failedConnections,
}
var err error
@ -317,94 +317,94 @@ func (c *Collector) collectTSSessionCounters(ch chan<- prometheus.Metric) error
ch <- prometheus.MustNewConstMetric(
c.handleCount,
prometheus.GaugeValue,
data[HandleCount].FirstValue,
data[handleCount].FirstValue,
name,
)
ch <- prometheus.MustNewConstMetric(
c.pageFaultsPerSec,
prometheus.CounterValue,
data[PageFaultsPersec].FirstValue,
data[pageFaultsPersec].FirstValue,
name,
)
ch <- prometheus.MustNewConstMetric(
c.pageFileBytes,
prometheus.GaugeValue,
data[PageFileBytes].FirstValue,
data[pageFileBytes].FirstValue,
name,
)
ch <- prometheus.MustNewConstMetric(
c.pageFileBytesPeak,
prometheus.GaugeValue,
data[PageFileBytesPeak].FirstValue,
data[pageFileBytesPeak].FirstValue,
name,
)
ch <- prometheus.MustNewConstMetric(
c.percentCPUTime,
prometheus.CounterValue,
data[PercentPrivilegedTime].FirstValue,
data[percentPrivilegedTime].FirstValue,
name,
"privileged",
)
ch <- prometheus.MustNewConstMetric(
c.percentCPUTime,
prometheus.CounterValue,
data[PercentProcessorTime].FirstValue,
data[percentProcessorTime].FirstValue,
name,
"processor",
)
ch <- prometheus.MustNewConstMetric(
c.percentCPUTime,
prometheus.CounterValue,
data[PercentUserTime].FirstValue,
data[percentUserTime].FirstValue,
name,
"user",
)
ch <- prometheus.MustNewConstMetric(
c.poolNonPagedBytes,
prometheus.GaugeValue,
data[PoolNonpagedBytes].FirstValue,
data[poolNonpagedBytes].FirstValue,
name,
)
ch <- prometheus.MustNewConstMetric(
c.poolPagedBytes,
prometheus.GaugeValue,
data[PoolPagedBytes].FirstValue,
data[poolPagedBytes].FirstValue,
name,
)
ch <- prometheus.MustNewConstMetric(
c.privateBytes,
prometheus.GaugeValue,
data[PrivateBytes].FirstValue,
data[privateBytes].FirstValue,
name,
)
ch <- prometheus.MustNewConstMetric(
c.threadCount,
prometheus.GaugeValue,
data[ThreadCount].FirstValue,
data[threadCount].FirstValue,
name,
)
ch <- prometheus.MustNewConstMetric(
c.virtualBytes,
prometheus.GaugeValue,
data[VirtualBytes].FirstValue,
data[virtualBytes].FirstValue,
name,
)
ch <- prometheus.MustNewConstMetric(
c.virtualBytesPeak,
prometheus.GaugeValue,
data[VirtualBytesPeak].FirstValue,
data[virtualBytesPeak].FirstValue,
name,
)
ch <- prometheus.MustNewConstMetric(
c.workingSet,
prometheus.GaugeValue,
data[WorkingSet].FirstValue,
data[workingSet].FirstValue,
name,
)
ch <- prometheus.MustNewConstMetric(
c.workingSetPeak,
prometheus.GaugeValue,
data[WorkingSetPeak].FirstValue,
data[workingSetPeak].FirstValue,
name,
)
}
@ -426,21 +426,21 @@ func (c *Collector) collectCollectionBrokerPerformanceCounter(ch chan<- promethe
ch <- prometheus.MustNewConstMetric(
c.connectionBrokerPerformance,
prometheus.CounterValue,
data[SuccessfulConnections].FirstValue,
data[successfulConnections].FirstValue,
"Successful",
)
ch <- prometheus.MustNewConstMetric(
c.connectionBrokerPerformance,
prometheus.CounterValue,
data[PendingConnections].FirstValue,
data[pendingConnections].FirstValue,
"Pending",
)
ch <- prometheus.MustNewConstMetric(
c.connectionBrokerPerformance,
prometheus.CounterValue,
data[FailedConnections].FirstValue,
data[failedConnections].FirstValue,
"Failed",
)

View File

@ -4,8 +4,8 @@ import "github.com/prometheus/client_golang/prometheus"
// Conversion factors.
const (
TicksToSecondScaleFactor = 1 / 1e7
WindowsEpoch = 116444736000000000
TicksToSecondScaleFactor = 1 / 1e7
WindowsEpoch int64 = 116444736000000000
)
// Based on https://github.com/leoluk/perflib_exporter/blob/master/collector/mapper.go

View File

@ -24,7 +24,7 @@ type Counter struct {
Desc string
Instances map[string]pdhCounterHandle
Type uint32
Frequency float64
Frequency int64
}
func NewCollector(object string, instances []string, counters []string) (*Collector, error) {
@ -67,30 +67,30 @@ func NewCollector(object string, instances []string, counters []string) (*Collec
counter.Instances[instance] = counterHandle
if counter.Type == 0 {
// Get the info with the current buffer size
bufLen := uint32(0)
if counter.Type != 0 {
continue
}
if ret := PdhGetCounterInfo(counterHandle, 1, &bufLen, nil); ret != PdhMoreData {
return nil, fmt.Errorf("PdhGetCounterInfo: %w", NewPdhError(ret))
}
// Get the info with the current buffer size
bufLen := uint32(0)
buf := make([]byte, bufLen)
if ret := PdhGetCounterInfo(counterHandle, 1, &bufLen, &buf[0]); ret != ErrorSuccess {
return nil, fmt.Errorf("PdhGetCounterInfo: %w", NewPdhError(ret))
}
if ret := PdhGetCounterInfo(counterHandle, 1, &bufLen, nil); ret != PdhMoreData {
return nil, fmt.Errorf("PdhGetCounterInfo: %w", NewPdhError(ret))
}
ci := (*PdhCounterInfo)(unsafe.Pointer(&buf[0]))
counter.Type = ci.DwType
counter.Desc = windows.UTF16PtrToString(ci.SzExplainText)
buf := make([]byte, bufLen)
if ret := PdhGetCounterInfo(counterHandle, 1, &bufLen, &buf[0]); ret != ErrorSuccess {
return nil, fmt.Errorf("PdhGetCounterInfo: %w", NewPdhError(ret))
}
frequency := float64(0)
ci := (*PdhCounterInfo)(unsafe.Pointer(&buf[0]))
counter.Type = ci.DwType
counter.Desc = windows.UTF16PtrToString(ci.SzExplainText)
if ret := PdhGetCounterTimeBase(counterHandle, &frequency); ret != ErrorSuccess {
if counter.Type == perftypes.PERF_ELAPSED_TIME {
if ret := PdhGetCounterTimeBase(counterHandle, &counter.Frequency); ret != ErrorSuccess {
return nil, fmt.Errorf("PdhGetCounterTimeBase: %w", NewPdhError(ret))
}
counter.Frequency = frequency
}
}
@ -153,7 +153,7 @@ func (c *Collector) Collect() (map[string]map[string]perftypes.CounterValues, er
continue
}
items := (*[1 << 20]PdhRawCounterItem)(unsafe.Pointer(&buf[0]))[:itemCount]
items := unsafe.Slice((*PdhRawCounterItem)(unsafe.Pointer(&buf[0])), itemCount)
if data == nil {
data = make(map[string]map[string]perftypes.CounterValues, itemCount)
@ -193,14 +193,14 @@ func (c *Collector) Collect() (map[string]map[string]perftypes.CounterValues, er
switch counter.Type {
case perftypes.PERF_ELAPSED_TIME:
values.FirstValue = float64(item.RawValue.FirstValue-perftypes.WindowsEpoch) / counter.Frequency
values.SecondValue = float64(item.RawValue.SecondValue-perftypes.WindowsEpoch) / counter.Frequency
values.FirstValue = float64((item.RawValue.FirstValue - perftypes.WindowsEpoch) / counter.Frequency)
case perftypes.PERF_100NSEC_TIMER, perftypes.PERF_PRECISION_100NS_TIMER:
values.FirstValue = float64(item.RawValue.FirstValue) * perftypes.TicksToSecondScaleFactor
values.SecondValue = float64(item.RawValue.SecondValue) * perftypes.TicksToSecondScaleFactor
default:
case perftypes.PERF_AVERAGE_BULK:
values.FirstValue = float64(item.RawValue.FirstValue)
values.SecondValue = float64(item.RawValue.SecondValue)
default:
values.FirstValue = float64(item.RawValue.FirstValue)
}
data[instanceName][counter.Name] = values

View File

@ -622,7 +622,7 @@ func PdhGetRawCounterArray(hCounter pdhCounterHandle, lpdwBufferSize *uint32, lp
//
// lpdwItemCount
// Time base that specifies the number of performance values a counter samples per second.
func PdhGetCounterTimeBase(hCounter pdhCounterHandle, pTimeBase *float64) uint32 {
func PdhGetCounterTimeBase(hCounter pdhCounterHandle, pTimeBase *int64) uint32 {
ret, _, _ := pdhPdhGetCounterTimeBase.Call(
uintptr(hCounter),
uintptr(unsafe.Pointer(pTimeBase)))