From eeb7955f5ebe38bf65123324bdd28b360a6f6663 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Otto=20Kr=C3=B6pke?= Date: Mon, 11 Nov 2024 17:17:19 +0100 Subject: [PATCH] udp: Added UDP collector (#1725) --- Makefile | 2 +- README.md | 1 + docs/README.md | 1 + docs/collector.process.md | 4 +- docs/collector.tcp.md | 27 +++-- docs/collector.udp.md | 31 ++++++ internal/collector/tcp/tcp.go | 28 +++-- internal/collector/time/time.go | 2 +- internal/collector/udp/const.go | 15 +++ internal/collector/udp/udp.go | 168 +++++++++++++++++++++++++++++ internal/collector/udp/udp_test.go | 16 +++ pkg/collector/collector.go | 2 + pkg/collector/config.go | 3 + pkg/collector/map.go | 2 + tools/e2e-output.txt | 98 ++++++++++++++--- tools/end-to-end-test.ps1 | 6 +- tools/promtool.ps1 | 28 +++-- 17 files changed, 376 insertions(+), 58 deletions(-) create mode 100644 docs/collector.udp.md create mode 100644 internal/collector/udp/const.go create mode 100644 internal/collector/udp/udp.go create mode 100644 internal/collector/udp/udp_test.go diff --git a/Makefile b/Makefile index ba218620..5221d51e 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ lint: .PHONY: e2e-test e2e-test: windows_exporter.exe - pwsh -NonInteractive -ExecutionPolicy Bypass -File .\tools\end-to-end-test.ps1 + powershell -NonInteractive -ExecutionPolicy Bypass -File .\tools\end-to-end-test.ps1 .PHONY: promtool promtool: windows_exporter.exe diff --git a/README.md b/README.md index 6df53f07..085ecbb0 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,7 @@ Name | Description | Enabled by default [textfile](docs/collector.textfile.md) | Read prometheus metrics from a text file | [thermalzone](docs/collector.thermalzone.md) | Thermal information | [time](docs/collector.time.md) | Windows Time Service | +[udp](docs/collector.udp.md) | UDP connections | [update](docs/collector.update.md) | Windows Update Service | [vmware](docs/collector.vmware.md) | Performance counters installed by the Vmware Guest agent | diff --git a/docs/README.md b/docs/README.md index 5f000c80..879c8ad1 100644 --- a/docs/README.md +++ b/docs/README.md @@ -41,5 +41,6 @@ This directory contains documentation of the collectors in the windows_exporter, - [`textfile`](collector.textfile.md) - [`thermalzone`](collector.thermalzone.md) - [`time`](collector.time.md) +- [`udp`](collector.udp.md) - [`update`](collector.update.md) - [`vmware`](collector.vmware.md) diff --git a/docs/collector.process.md b/docs/collector.process.md index 79345534..6d361db9 100644 --- a/docs/collector.process.md +++ b/docs/collector.process.md @@ -35,7 +35,7 @@ metrics. Enables IIS process name queries. IIS process names are combined with their app pool name to form the `process` label. -Disabled by default, and can be enabled with `--collector.process.iis=true`. +Disabled by default, and can be enabled with `--collector.process.iis`. NOTE: Just plain parameter without `true`. ### Example @@ -44,7 +44,7 @@ Note that multiple processes with the same name will be disambiguated by Windows by adding a number suffix, such as `firefox#2`. Your [regexp](https://en.wikipedia.org/wiki/Regular_expression) must take these suffixes into consideration. -:warning: The regexp is case-sensitive, so `--collector.process.include="FIREFOX.*"` will **NOT** match a process named `firefox` . +:warning: The regexp is case-sensitive, so `--collector.process.include="FIREFOX.*"` will **NOT** match a process named `firefox` . To specify multiple names, use the pipe `|` character: ``` diff --git a/docs/collector.tcp.md b/docs/collector.tcp.md index a3f46c36..d6e36957 100644 --- a/docs/collector.tcp.md +++ b/docs/collector.tcp.md @@ -1,12 +1,11 @@ # tcp collector -The tcp collector exposes metrics about the TCP/IPv4 network stack. +The tcp collector exposes metrics about the TCP network stack. ||| -|- Metric name prefix | `tcp` Data source | Perflib -Classes | [`Win32_PerfRawData_Tcpip_TCPv4`](https://msdn.microsoft.com/en-us/library/aa394341(v=vs.85).aspx), Win32_PerfRawData_Tcpip_TCPv6 Enabled by default? | No ## Flags @@ -15,18 +14,18 @@ None ## Metrics -Name | Description | Type | Labels ------|-------------|------|------- -`windows_tcp_connection_failures_total` | Number of times TCP connections have made a direct transition to the CLOSED state from the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition from the SYN-RCVD state to the LISTEN state | counter | af -`windows_tcp_connections_active_total` | Number of times TCP connections have made a direct transition from the CLOSED state to the SYN-SENT state.| counter | af -`windows_tcp_connections_established` | Number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT. | gauge | af -`windows_tcp_connections_passive_total` | Number of times TCP connections have made a direct transition from the LISTEN state to the SYN-RCVD state. | counter | af -`windows_tcp_connections_reset_total` | Connections Reset is the number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state. | counter | af -`windows_tcp_segments_total` | Total segments sent or received using the TCP protocol | counter | af -`windows_tcp_segments_received_total` | Total segments received, including those received in error. This count includes segments received on currently established connections | counter | af -`windows_tcp_segments_retransmitted_total` | Total segments retransmitted. That is, segments transmitted that contain one or more previously transmitted bytes | counter | af -`windows_tcp_segments_sent_total` | Total segments sent, including those on current connections, but excluding those containing *only* retransmitted bytes | counter | af -`windows_tcp_connections_state_count` | Number of TCP connections by state among: CLOSED, LISTENING, SYN_SENT, SYN_RECEIVED, ESTABLISHED, FIN_WAIT1, FIN_WAIT2, CLOSE_WAIT, CLOSING, LAST_ACK, TIME_WAIT, DELETE_TCB | gauge | af +| Name | Description | Type | Labels | +|--------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|--------| +| `windows_tcp_connection_failures_total` | Number of times TCP connections have made a direct transition to the CLOSED state from the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition from the SYN-RCVD state to the LISTEN state | counter | af | +| `windows_tcp_connections_active_total` | Number of times TCP connections have made a direct transition from the CLOSED state to the SYN-SENT state. | counter | af | +| `windows_tcp_connections_established` | Number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT. | gauge | af | +| `windows_tcp_connections_passive_total` | Number of times TCP connections have made a direct transition from the LISTEN state to the SYN-RCVD state. | counter | af | +| `windows_tcp_connections_reset_total` | Connections Reset is the number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state. | counter | af | +| `windows_tcp_segments_total` | Total segments sent or received using the TCP protocol | counter | af | +| `windows_tcp_segments_received_total` | Total segments received, including those received in error. This count includes segments received on currently established connections | counter | af | +| `windows_tcp_segments_retransmitted_total` | Total segments retransmitted. That is, segments transmitted that contain one or more previously transmitted bytes | counter | af | +| `windows_tcp_segments_sent_total` | Total segments sent, including those on current connections, but excluding those containing *only* retransmitted bytes | counter | af | +| `windows_tcp_connections_state_count` | Number of TCP connections by state among: CLOSED, LISTENING, SYN_SENT, SYN_RECEIVED, ESTABLISHED, FIN_WAIT1, FIN_WAIT2, CLOSE_WAIT, CLOSING, LAST_ACK, TIME_WAIT, DELETE_TCB | gauge | af | ### Example metric _This collector does not yet have explained examples, we would appreciate your help adding them!_ diff --git a/docs/collector.udp.md b/docs/collector.udp.md new file mode 100644 index 00000000..5eedb010 --- /dev/null +++ b/docs/collector.udp.md @@ -0,0 +1,31 @@ +# udp collector + +The udp collector exposes metrics about the UDP network stack. + +||| +-|- +Metric name prefix | `udp` +Data source | Perflib +Enabled by default? | No + +## Flags + +None + +## Metrics + +| Name | Description | Type | Labels | +|-----------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------|---------|--------| +| `windows_udp_datagram_datagram_no_port_total` | Number of received UDP datagrams for which there was no application at the destination port | counter | af | +| `windows_udp_datagram_received_errors_total` | Number of received UDP datagrams that could not be delivered for reasons other than the lack of an application at the destination port | counter | af | +| `windows_udp_datagram_received_total` | Number of UDP datagrams segments received | counter | af | +| `windows_udp_datagram_sent_total` | Number of UDP datagrams segments sent | counter | af | + +### Example metric +_This collector does not yet have explained examples, we would appreciate your help adding them!_ + +## Useful queries +_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ + +## Alerting examples +_This collector does not yet have alerting examples, we would appreciate your help adding them!_ diff --git a/internal/collector/tcp/tcp.go b/internal/collector/tcp/tcp.go index ef96d3d1..f53b3dc2 100644 --- a/internal/collector/tcp/tcp.go +++ b/internal/collector/tcp/tcp.go @@ -3,6 +3,7 @@ package tcp import ( + "errors" "fmt" "log/slog" "slices" @@ -97,6 +98,11 @@ func (c *Collector) GetPerfCounter(_ *slog.Logger) ([]string, error) { } func (c *Collector) Close(_ *slog.Logger) error { + if slices.Contains(c.config.CollectorsEnabled, "metrics") { + c.perfDataCollector4.Close() + c.perfDataCollector6.Close() + } + return nil } @@ -115,12 +121,12 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error { var err error - c.perfDataCollector4, err = perfdata.NewCollector(perfdata.V1, "TCPv4", nil, counters) + c.perfDataCollector4, err = perfdata.NewCollector(perfdata.V2, "TCPv4", nil, counters) if err != nil { return fmt.Errorf("failed to create TCPv4 collector: %w", err) } - c.perfDataCollector6, err = perfdata.NewCollector(perfdata.V1, "TCPv6", nil, counters) + c.perfDataCollector6, err = perfdata.NewCollector(perfdata.V2, "TCPv6", nil, counters) if err != nil { return fmt.Errorf("failed to create TCPv6 collector: %w", err) } @@ -190,30 +196,22 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error { // Collect sends the metric values for each metric // to the provided prometheus Metric channel. -func (c *Collector) Collect(_ *types.ScrapeContext, logger *slog.Logger, ch chan<- prometheus.Metric) error { - logger = logger.With(slog.String("collector", Name)) +func (c *Collector) Collect(_ *types.ScrapeContext, _ *slog.Logger, ch chan<- prometheus.Metric) error { + errs := make([]error, 0, 2) if slices.Contains(c.config.CollectorsEnabled, "metrics") { if err := c.collect(ch); err != nil { - logger.Error("failed collecting tcp metrics", - slog.Any("err", err), - ) - - return err + errs = append(errs, fmt.Errorf("failed collecting tcp metrics: %w", err)) } } if slices.Contains(c.config.CollectorsEnabled, "connections_state") { if err := c.collectConnectionsState(ch); err != nil { - logger.Error("failed collecting tcp connection state metrics", - slog.Any("err", err), - ) - - return err + errs = append(errs, fmt.Errorf("failed collecting tcp connection state metrics: %w", err)) } } - return nil + return errors.Join(errs...) } func (c *Collector) collect(ch chan<- prometheus.Metric) error { diff --git a/internal/collector/time/time.go b/internal/collector/time/time.go index c955bbc8..8c0ee5b3 100644 --- a/internal/collector/time/time.go +++ b/internal/collector/time/time.go @@ -145,7 +145,7 @@ func (c *Collector) Collect(ctx *types.ScrapeContext, logger *slog.Logger, ch ch // Perflib "Windows Time Service". type windowsTime struct { - ClockFrequencyAdjustmentPPBTotal float64 `perflib:"Clock Frequency Adjustment (ppb)"` + ClockFrequencyAdjustmentPPBTotal float64 `perflib:"Clock Frequency Adjustment (PPB)"` ComputedTimeOffset float64 `perflib:"Computed Time Offset"` NTPClientTimeSourceCount float64 `perflib:"NTP Client Time Source Count"` NTPRoundTripDelay float64 `perflib:"NTP Roundtrip Delay"` diff --git a/internal/collector/udp/const.go b/internal/collector/udp/const.go new file mode 100644 index 00000000..b2989a01 --- /dev/null +++ b/internal/collector/udp/const.go @@ -0,0 +1,15 @@ +package udp + +// The TCPv6 performance object uses the same fields. +// https://learn.microsoft.com/en-us/dotnet/api/system.net.networkinformation.tcpstate?view=net-8.0. +const ( + datagramsNoPortPerSec = "Datagrams No Port/sec" + datagramsReceivedPerSec = "Datagrams Received/sec" + datagramsReceivedErrors = "Datagrams Received Errors" + datagramsSentPerSec = "Datagrams Sent/sec" +) + +// Datagrams No Port/sec is the rate of received UDP datagrams for which there was no application at the destination port. +// Datagrams Received Errors is the number of received UDP datagrams that could not be delivered for reasons other than the lack of an application at the destination port. +// Datagrams Received/sec is the rate at which UDP datagrams are delivered to UDP users. +// Datagrams Sent/sec is the rate at which UDP datagrams are sent from the entity. diff --git a/internal/collector/udp/udp.go b/internal/collector/udp/udp.go new file mode 100644 index 00000000..0fa7be75 --- /dev/null +++ b/internal/collector/udp/udp.go @@ -0,0 +1,168 @@ +//go:build windows + +package udp + +import ( + "fmt" + "log/slog" + + "github.com/alecthomas/kingpin/v2" + "github.com/prometheus-community/windows_exporter/internal/mi" + "github.com/prometheus-community/windows_exporter/internal/perfdata" + "github.com/prometheus-community/windows_exporter/internal/perfdata/perftypes" + "github.com/prometheus-community/windows_exporter/internal/types" + "github.com/prometheus/client_golang/prometheus" +) + +const Name = "udp" + +type Config struct{} + +var ConfigDefaults = Config{} + +// A Collector is a Prometheus Collector for WMI Win32_PerfRawData_Tcpip_TCPv{4,6} metrics. +type Collector struct { + config Config + + perfDataCollector4 perfdata.Collector + perfDataCollector6 perfdata.Collector + + datagramsNoPortTotal *prometheus.Desc + datagramsReceivedTotal *prometheus.Desc + datagramsReceivedErrorsTotal *prometheus.Desc + datagramsSentTotal *prometheus.Desc +} + +func New(config *Config) *Collector { + if config == nil { + config = &ConfigDefaults + } + + c := &Collector{ + config: *config, + } + + return c +} + +func NewWithFlags(_ *kingpin.Application) *Collector { + c := &Collector{ + config: ConfigDefaults, + } + + return c +} + +func (c *Collector) GetName() string { + return Name +} + +func (c *Collector) GetPerfCounter(_ *slog.Logger) ([]string, error) { + return []string{}, nil +} + +func (c *Collector) Close(_ *slog.Logger) error { + c.perfDataCollector4.Close() + c.perfDataCollector6.Close() + + return nil +} + +func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error { + counters := []string{ + datagramsNoPortPerSec, + datagramsReceivedPerSec, + datagramsReceivedErrors, + datagramsSentPerSec, + } + + var err error + + c.perfDataCollector4, err = perfdata.NewCollector(perfdata.V2, "UDPv4", nil, counters) + if err != nil { + return fmt.Errorf("failed to create UDPv4 collector: %w", err) + } + + c.perfDataCollector6, err = perfdata.NewCollector(perfdata.V2, "UDPv6", nil, counters) + if err != nil { + return fmt.Errorf("failed to create UDPv6 collector: %w", err) + } + + c.datagramsNoPortTotal = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "datagram_no_port_total"), + "Number of received UDP datagrams for which there was no application at the destination port", + []string{"af"}, + nil, + ) + c.datagramsReceivedTotal = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "datagram_received_total"), + "UDP datagrams are delivered to UDP users", + []string{"af"}, + nil, + ) + c.datagramsReceivedErrorsTotal = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "datagram_received_errors_total"), + "Number of received UDP datagrams that could not be delivered for reasons other than the lack of an application at the destination port", + []string{"af"}, + nil, + ) + c.datagramsSentTotal = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "datagram_sent_total"), + "UDP datagrams are sent from the entity", + []string{"af"}, + nil, + ) + + return nil +} + +// Collect sends the metric values for each metric +// to the provided prometheus Metric channel. +func (c *Collector) Collect(_ *types.ScrapeContext, _ *slog.Logger, ch chan<- prometheus.Metric) error { + return c.collect(ch) +} + +func (c *Collector) collect(ch chan<- prometheus.Metric) error { + data, err := c.perfDataCollector4.Collect() + if err != nil { + return fmt.Errorf("failed to collect UDPv4 metrics: %w", err) + } + + c.writeUDPCounters(ch, data[perftypes.EmptyInstance], []string{"ipv4"}) + + data, err = c.perfDataCollector6.Collect() + if err != nil { + return fmt.Errorf("failed to collect UDPv6 metrics: %w", err) + } + + c.writeUDPCounters(ch, data[perftypes.EmptyInstance], []string{"ipv6"}) + + return nil +} + +func (c *Collector) writeUDPCounters(ch chan<- prometheus.Metric, metrics map[string]perftypes.CounterValues, labels []string) { + ch <- prometheus.MustNewConstMetric( + c.datagramsNoPortTotal, + prometheus.CounterValue, + metrics[datagramsNoPortPerSec].FirstValue, + labels..., + ) + ch <- prometheus.MustNewConstMetric( + c.datagramsReceivedErrorsTotal, + prometheus.CounterValue, + metrics[datagramsReceivedErrors].FirstValue, + labels..., + ) + ch <- prometheus.MustNewConstMetric( + c.datagramsReceivedTotal, + prometheus.GaugeValue, + metrics[datagramsReceivedPerSec].FirstValue, + labels..., + ) + ch <- prometheus.MustNewConstMetric( + c.datagramsSentTotal, + prometheus.CounterValue, + metrics[datagramsSentPerSec].FirstValue, + labels..., + ) +} diff --git a/internal/collector/udp/udp_test.go b/internal/collector/udp/udp_test.go new file mode 100644 index 00000000..92f1e03b --- /dev/null +++ b/internal/collector/udp/udp_test.go @@ -0,0 +1,16 @@ +package udp_test + +import ( + "testing" + + "github.com/prometheus-community/windows_exporter/internal/collector/udp" + "github.com/prometheus-community/windows_exporter/internal/testutils" +) + +func BenchmarkCollector(b *testing.B) { + testutils.FuncBenchmarkCollector(b, udp.Name, udp.NewWithFlags) +} + +func TestCollector(t *testing.T) { + testutils.TestCollector(t, udp.New, nil) +} diff --git a/pkg/collector/collector.go b/pkg/collector/collector.go index 006b5bb2..014b2937 100644 --- a/pkg/collector/collector.go +++ b/pkg/collector/collector.go @@ -55,6 +55,7 @@ import ( "github.com/prometheus-community/windows_exporter/internal/collector/textfile" "github.com/prometheus-community/windows_exporter/internal/collector/thermalzone" "github.com/prometheus-community/windows_exporter/internal/collector/time" + "github.com/prometheus-community/windows_exporter/internal/collector/udp" "github.com/prometheus-community/windows_exporter/internal/collector/update" "github.com/prometheus-community/windows_exporter/internal/collector/vmware" "github.com/prometheus-community/windows_exporter/internal/mi" @@ -122,6 +123,7 @@ func NewWithConfig(config Config) *MetricCollectors { collectors[textfile.Name] = textfile.New(&config.Textfile) collectors[thermalzone.Name] = thermalzone.New(&config.ThermalZone) collectors[time.Name] = time.New(&config.Time) + collectors[udp.Name] = udp.New(&config.UDP) collectors[update.Name] = update.New(&config.Update) collectors[vmware.Name] = vmware.New(&config.Vmware) diff --git a/pkg/collector/config.go b/pkg/collector/config.go index f9e67276..879345b1 100644 --- a/pkg/collector/config.go +++ b/pkg/collector/config.go @@ -45,6 +45,7 @@ import ( "github.com/prometheus-community/windows_exporter/internal/collector/textfile" "github.com/prometheus-community/windows_exporter/internal/collector/thermalzone" "github.com/prometheus-community/windows_exporter/internal/collector/time" + "github.com/prometheus-community/windows_exporter/internal/collector/udp" "github.com/prometheus-community/windows_exporter/internal/collector/update" "github.com/prometheus-community/windows_exporter/internal/collector/vmware" ) @@ -94,6 +95,7 @@ type Config struct { Textfile textfile.Config `yaml:"textfile"` ThermalZone thermalzone.Config `yaml:"thermal_zone"` Time time.Config `yaml:"time"` + UDP udp.Config `yaml:"udp"` Update update.Config `yaml:"update"` Vmware vmware.Config `yaml:"vmware"` } @@ -146,6 +148,7 @@ var ConfigDefaults = Config{ Textfile: textfile.ConfigDefaults, ThermalZone: thermalzone.ConfigDefaults, Time: time.ConfigDefaults, + UDP: udp.ConfigDefaults, Update: update.ConfigDefaults, Vmware: vmware.ConfigDefaults, } diff --git a/pkg/collector/map.go b/pkg/collector/map.go index 3fcdf25c..471f2449 100644 --- a/pkg/collector/map.go +++ b/pkg/collector/map.go @@ -49,6 +49,7 @@ import ( "github.com/prometheus-community/windows_exporter/internal/collector/textfile" "github.com/prometheus-community/windows_exporter/internal/collector/thermalzone" "github.com/prometheus-community/windows_exporter/internal/collector/time" + "github.com/prometheus-community/windows_exporter/internal/collector/udp" "github.com/prometheus-community/windows_exporter/internal/collector/update" "github.com/prometheus-community/windows_exporter/internal/collector/vmware" ) @@ -104,6 +105,7 @@ var BuildersWithFlags = map[string]BuilderWithFlags[Collector]{ textfile.Name: NewBuilderWithFlags(textfile.NewWithFlags), thermalzone.Name: NewBuilderWithFlags(thermalzone.NewWithFlags), time.Name: NewBuilderWithFlags(time.NewWithFlags), + udp.Name: NewBuilderWithFlags(udp.NewWithFlags), update.Name: NewBuilderWithFlags(update.NewWithFlags), vmware.Name: NewBuilderWithFlags(vmware.NewWithFlags), } diff --git a/tools/e2e-output.txt b/tools/e2e-output.txt index e95a9e6a..bd87324d 100644 --- a/tools/e2e-output.txt +++ b/tools/e2e-output.txt @@ -87,8 +87,6 @@ test_alpha_total 42 # TYPE windows_cpu_interrupts_total counter # HELP windows_cpu_logical_processor Total number of logical processors # TYPE windows_cpu_logical_processor gauge -# HELP windows_net_nic_address_info A metric with a constant '1' value labeled with the network interface's address information. -# TYPE windows_net_nic_address_info gauge # HELP windows_cpu_parking_status Parking Status represents whether a processor is parked or not # TYPE windows_cpu_parking_status gauge # HELP windows_cpu_processor_mperf_total Processor MPerf is the number of TSC ticks incremented while executing instructions @@ -120,16 +118,21 @@ windows_exporter_collector_success{collector="cpu"} 1 windows_exporter_collector_success{collector="cpu_info"} 1 windows_exporter_collector_success{collector="cs"} 1 windows_exporter_collector_success{collector="logical_disk"} 1 +windows_exporter_collector_success{collector="logon"} 1 windows_exporter_collector_success{collector="memory"} 1 windows_exporter_collector_success{collector="net"} 1 windows_exporter_collector_success{collector="os"} 1 windows_exporter_collector_success{collector="perfdata"} 1 windows_exporter_collector_success{collector="physical_disk"} 1 +windows_exporter_collector_success{collector="printer"} 1 windows_exporter_collector_success{collector="process"} 1 windows_exporter_collector_success{collector="scheduled_task"} 1 windows_exporter_collector_success{collector="service"} 1 windows_exporter_collector_success{collector="system"} 1 +windows_exporter_collector_success{collector="tcp"} 1 windows_exporter_collector_success{collector="textfile"} 1 +windows_exporter_collector_success{collector="time"} 1 +windows_exporter_collector_success{collector="udp"} 1 # HELP windows_exporter_collector_timeout windows_exporter: Whether the collector timed out. # TYPE windows_exporter_collector_timeout gauge windows_exporter_collector_timeout{collector="cache"} 0 @@ -137,16 +140,21 @@ windows_exporter_collector_timeout{collector="cpu"} 0 windows_exporter_collector_timeout{collector="cpu_info"} 0 windows_exporter_collector_timeout{collector="cs"} 0 windows_exporter_collector_timeout{collector="logical_disk"} 0 +windows_exporter_collector_timeout{collector="logon"} 0 windows_exporter_collector_timeout{collector="memory"} 0 windows_exporter_collector_timeout{collector="net"} 0 windows_exporter_collector_timeout{collector="os"} 0 windows_exporter_collector_timeout{collector="perfdata"} 0 windows_exporter_collector_timeout{collector="physical_disk"} 0 +windows_exporter_collector_timeout{collector="printer"} 0 windows_exporter_collector_timeout{collector="process"} 0 windows_exporter_collector_timeout{collector="scheduled_task"} 0 windows_exporter_collector_timeout{collector="service"} 0 windows_exporter_collector_timeout{collector="system"} 0 +windows_exporter_collector_timeout{collector="tcp"} 0 windows_exporter_collector_timeout{collector="textfile"} 0 +windows_exporter_collector_timeout{collector="time"} 0 +windows_exporter_collector_timeout{collector="udp"} 0 # HELP windows_exporter_perflib_snapshot_duration_seconds Duration of perflib snapshot capture # TYPE windows_exporter_perflib_snapshot_duration_seconds gauge # HELP windows_exporter_scrape_duration_seconds windows_exporter: Total scrape duration. @@ -185,6 +193,8 @@ windows_exporter_collector_timeout{collector="textfile"} 0 # TYPE windows_logical_disk_write_seconds_total counter # HELP windows_logical_disk_writes_total The number of write operations on the disk (LogicalDisk.DiskWritesPerSec) # TYPE windows_logical_disk_writes_total counter +# HELP windows_logon_session_logon_timestamp_seconds timestamp of the logon session in seconds. +# TYPE windows_logon_session_logon_timestamp_seconds gauge # HELP windows_memory_available_bytes The amount of physical memory immediately available for allocation to a process or for system use. It is equal to the sum of memory assigned to the standby (cached), free and zero page lists (AvailableBytes) # TYPE windows_memory_available_bytes gauge # HELP windows_memory_cache_bytes (CacheBytes) @@ -229,15 +239,15 @@ windows_exporter_collector_timeout{collector="textfile"} 0 # TYPE windows_memory_standby_cache_normal_priority_bytes gauge # HELP windows_memory_standby_cache_reserve_bytes The amount of physical memory, in bytes, that is assigned to the reserve standby cache page lists. This memory contains cached data and code that is not actively in use by processes, the system and the system cache (StandbyCacheReserveBytes) # TYPE windows_memory_standby_cache_reserve_bytes gauge -# HELP windows_memory_swap_page_operations_total Total number of swap page read and writes (PagesPersec) +# HELP windows_memory_swap_page_operations_total Total number of swap page read and writes (PagesPerSec) # TYPE windows_memory_swap_page_operations_total counter -# HELP windows_memory_swap_page_reads_total Number of disk page reads (a single read operation reading several pages is still only counted once) (PageReadsPersec) +# HELP windows_memory_swap_page_reads_total Number of disk page reads (a single read operation reading several pages is still only counted once) (PageReadsPerSec) # TYPE windows_memory_swap_page_reads_total counter -# HELP windows_memory_swap_page_writes_total Number of disk page writes (a single write operation writing several pages is still only counted once) (PageWritesPersec) +# HELP windows_memory_swap_page_writes_total Number of disk page writes (a single write operation writing several pages is still only counted once) (PageWritesPerSec) # TYPE windows_memory_swap_page_writes_total counter -# HELP windows_memory_swap_pages_read_total Number of pages read across all page reads (ie counting all pages read even if they are read in a single operation) (PagesInputPersec) +# HELP windows_memory_swap_pages_read_total Number of pages read across all page reads (ie counting all pages read even if they are read in a single operation) (PagesInputPerSec) # TYPE windows_memory_swap_pages_read_total counter -# HELP windows_memory_swap_pages_written_total Number of pages written across all page writes (ie counting all pages written even if they are written in a single operation) (PagesOutputPersec) +# HELP windows_memory_swap_pages_written_total Number of pages written across all page writes (ie counting all pages written even if they are written in a single operation) (PagesOutputPerSec) # TYPE windows_memory_swap_pages_written_total counter # HELP windows_memory_system_cache_resident_bytes The size, in bytes, of the portion of the system file cache which is currently resident and active in physical memory (SystemCacheResidentBytes) # TYPE windows_memory_system_cache_resident_bytes gauge @@ -249,11 +259,11 @@ windows_exporter_collector_timeout{collector="textfile"} 0 # TYPE windows_memory_system_driver_resident_bytes gauge # HELP windows_memory_system_driver_total_bytes The size, in bytes, of the pageable virtual memory currently being used by device drivers. Pageable memory can be written to disk when it is not being used (SystemDriverTotalBytes) # TYPE windows_memory_system_driver_total_bytes gauge -# HELP windows_memory_transition_faults_total Number of faults rate at which page faults are resolved by recovering pages that were being used by another process sharing the page, or were on the modified page list or the standby list, or were being written to disk at the time of the page fault (TransitionFaultsPersec) +# HELP windows_memory_transition_faults_total Number of faults rate at which page faults are resolved by recovering pages that were being used by another process sharing the page, or were on the modified page list or the standby list, or were being written to disk at the time of the page fault (TransitionFaultsPerSec) # TYPE windows_memory_transition_faults_total counter -# HELP windows_memory_transition_pages_repurposed_total Transition Pages RePurposed is the rate at which the number of transition cache pages were reused for a different purpose (TransitionPagesRePurposedPersec) +# HELP windows_memory_transition_pages_repurposed_total Transition Pages RePurposed is the rate at which the number of transition cache pages were reused for a different purpose (TransitionPagesRePurposedPerSec) # TYPE windows_memory_transition_pages_repurposed_total counter -# HELP windows_memory_write_copies_total The number of page faults caused by attempting to write that were satisfied by copying the page from elsewhere in physical memory (WriteCopiesPersec) +# HELP windows_memory_write_copies_total The number of page faults caused by attempting to write that were satisfied by copying the page from elsewhere in physical memory (WriteCopiesPerSec) # TYPE windows_memory_write_copies_total counter # HELP windows_net_bytes_received_total (Network.BytesReceivedPerSec) # TYPE windows_net_bytes_received_total counter @@ -263,6 +273,8 @@ windows_exporter_collector_timeout{collector="textfile"} 0 # TYPE windows_net_bytes_total counter # HELP windows_net_current_bandwidth_bytes (Network.CurrentBandwidth) # TYPE windows_net_current_bandwidth_bytes gauge +# HELP windows_net_nic_address_info A metric with a constant '1' value labeled with the network interface's address information. +# TYPE windows_net_nic_address_info gauge # HELP windows_net_output_queue_length_packets (Network.OutputQueueLength) # TYPE windows_net_output_queue_length_packets gauge # HELP windows_net_packets_outbound_discarded_total (Network.PacketsOutboundDiscarded) @@ -339,16 +351,26 @@ windows_exporter_collector_timeout{collector="textfile"} 0 # TYPE windows_physical_disk_write_seconds_total counter # HELP windows_physical_disk_writes_total The number of write operations on the disk (PhysicalDisk.DiskWritesPerSec) # TYPE windows_physical_disk_writes_total counter +# HELP windows_printer_job_count Number of jobs processed by the printer since the last reset +# TYPE windows_printer_job_count counter +# HELP windows_printer_status Printer status +# TYPE windows_printer_status gauge +# HELP windows_scheduled_task_last_result The result that was returned the last time the registered task was run +# TYPE windows_scheduled_task_last_result gauge +windows_scheduled_task_last_result{task="/Microsoft/Windows/PLA/GAEvents"} 0 +# HELP windows_scheduled_task_missed_runs The number of times the registered task missed a scheduled run +# TYPE windows_scheduled_task_missed_runs gauge +windows_scheduled_task_missed_runs{task="/Microsoft/Windows/PLA/GAEvents"} 0 # HELP windows_scheduled_task_state The current state of a scheduled task # TYPE windows_scheduled_task_state gauge -windows_scheduled_task_state{state="disabled",task="/Microsoft/Windows/Maintenance/WinSAT"} 1 -windows_scheduled_task_state{state="queued",task="/Microsoft/Windows/Maintenance/WinSAT"} 0 -windows_scheduled_task_state{state="ready",task="/Microsoft/Windows/Maintenance/WinSAT"} 0 -windows_scheduled_task_state{state="running",task="/Microsoft/Windows/Maintenance/WinSAT"} 0 -windows_scheduled_task_state{state="unknown",task="/Microsoft/Windows/Maintenance/WinSAT"} 0 +windows_scheduled_task_state{state="disabled",task="/Microsoft/Windows/PLA/GAEvents"} 0 +windows_scheduled_task_state{state="queued",task="/Microsoft/Windows/PLA/GAEvents"} 0 +windows_scheduled_task_state{state="ready",task="/Microsoft/Windows/PLA/GAEvents"} 0 +windows_scheduled_task_state{state="running",task="/Microsoft/Windows/PLA/GAEvents"} 1 +windows_scheduled_task_state{state="unknown",task="/Microsoft/Windows/PLA/GAEvents"} 0 # HELP windows_service_info A metric with a constant '1' value labeled with service information # TYPE windows_service_info gauge -windows_service_info{display_name="Themes",name="Themes",path_name="C:\\WINDOWS\\System32\\svchost.exe -k netsvcs -p",run_as="LocalSystem"} 1 +windows_service_info{display_name="Themes",name="Themes",path_name="C:\\Windows\\System32\\svchost.exe -k netsvcs -p",run_as="LocalSystem"} 1 # HELP windows_service_process Process of started service. The value is the creation time of the process as a unix timestamp. # TYPE windows_service_process gauge # HELP windows_service_start_mode The start mode of the service (StartMode) @@ -383,9 +405,53 @@ windows_service_state{name="Themes",state="stopped"} 0 # TYPE windows_system_system_up_time gauge # HELP windows_system_threads Current number of threads (WMI source: PerfOS_System.Threads) # TYPE windows_system_threads gauge +# HELP windows_tcp_connection_failures_total (TCP.ConnectionFailures) +# TYPE windows_tcp_connection_failures_total counter +# HELP windows_tcp_connections_active_total (TCP.ConnectionsActive) +# TYPE windows_tcp_connections_active_total counter +# HELP windows_tcp_connections_established (TCP.ConnectionsEstablished) +# TYPE windows_tcp_connections_established gauge +# HELP windows_tcp_connections_passive_total (TCP.ConnectionsPassive) +# TYPE windows_tcp_connections_passive_total counter +# HELP windows_tcp_connections_reset_total (TCP.ConnectionsReset) +# TYPE windows_tcp_connections_reset_total counter +# HELP windows_tcp_connections_state_count Number of TCP connections by state and address family +# TYPE windows_tcp_connections_state_count gauge +# HELP windows_tcp_segments_received_total (TCP.SegmentsReceivedTotal) +# TYPE windows_tcp_segments_received_total counter +# HELP windows_tcp_segments_retransmitted_total (TCP.SegmentsRetransmittedTotal) +# TYPE windows_tcp_segments_retransmitted_total counter +# HELP windows_tcp_segments_sent_total (TCP.SegmentsSentTotal) +# TYPE windows_tcp_segments_sent_total counter +# HELP windows_tcp_segments_total (TCP.SegmentsTotal) +# TYPE windows_tcp_segments_total counter # HELP windows_textfile_mtime_seconds Unixtime mtime of textfiles successfully read. # TYPE windows_textfile_mtime_seconds gauge # HELP windows_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise # TYPE windows_textfile_scrape_error gauge windows_textfile_scrape_error 0 +# HELP windows_time_clock_frequency_adjustment_ppb_total Total adjustment made to the local system clock frequency by W32Time in Parts Per Billion (PPB) units. +# TYPE windows_time_clock_frequency_adjustment_ppb_total counter +# HELP windows_time_computed_time_offset_seconds Absolute time offset between the system clock and the chosen time source, in seconds +# TYPE windows_time_computed_time_offset_seconds gauge +# HELP windows_time_current_timestamp_seconds OperatingSystem.LocalDateTime +# TYPE windows_time_current_timestamp_seconds gauge +# HELP windows_time_ntp_client_time_sources Active number of NTP Time sources being used by the client +# TYPE windows_time_ntp_client_time_sources gauge +# HELP windows_time_ntp_round_trip_delay_seconds Roundtrip delay experienced by the NTP client in receiving a response from the server for the most recent request, in seconds +# TYPE windows_time_ntp_round_trip_delay_seconds gauge +# HELP windows_time_ntp_server_incoming_requests_total Total number of requests received by NTP server +# TYPE windows_time_ntp_server_incoming_requests_total counter +# HELP windows_time_ntp_server_outgoing_responses_total Total number of requests responded to by NTP server +# TYPE windows_time_ntp_server_outgoing_responses_total counter +# HELP windows_time_timezone OperatingSystem.LocalDateTime +# TYPE windows_time_timezone gauge +# HELP windows_udp_datagram_no_port_total Number of received UDP datagrams for which there was no application at the destination port +# TYPE windows_udp_datagram_no_port_total counter +# HELP windows_udp_datagram_received_errors_total Number of received UDP datagrams that could not be delivered for reasons other than the lack of an application at the destination port +# TYPE windows_udp_datagram_received_errors_total counter +# HELP windows_udp_datagram_received_total UDP datagrams are delivered to UDP users +# TYPE windows_udp_datagram_received_total gauge +# HELP windows_udp_datagram_sent_total UDP datagrams are sent from the entity +# TYPE windows_udp_datagram_sent_total counter diff --git a/tools/end-to-end-test.ps1 b/tools/end-to-end-test.ps1 index 0beee530..5bc81fbe 100644 --- a/tools/end-to-end-test.ps1 +++ b/tools/end-to-end-test.ps1 @@ -10,7 +10,7 @@ if (-not (Test-Path -Path '..\windows_exporter.exe')) { Write-Output "..\windows_exporter.exe not found. Consider running \`go build\` first" } -$temp_dir = Join-Path $env:TEMP $(New-Guid) | ForEach-Object { mkdir $_ } +$temp_dir = Join-Path $env:TEMP $([guid]::newguid()) | ForEach-Object { mkdir $_ } # Create temporary directory for textfile collector $textfile_dir = "$($temp_dir)/textfile" @@ -18,14 +18,14 @@ mkdir $textfile_dir | Out-Null Copy-Item 'e2e-textfile.prom' -Destination "$($textfile_dir)/e2e-textfile.prom" # Omit dynamic collector information that will change after each run -$skip_re = "^(go_|windows_exporter_build_info|windows_exporter_collector_duration_seconds|windows_exporter_perflib_snapshot_duration_seconds|windows_exporter_scrape_duration_seconds|process_|windows_textfile_mtime_seconds|windows_cpu|windows_cs|windows_cache|windows_logical_disk|windows_physical_disk|windows_memory|windows_net|windows_os|windows_process|windows_service_process|windows_system|windows_perfdata|windows_textfile_mtime_seconds)" +$skip_re = "^(go_|windows_exporter_build_info|windows_exporter_collector_duration_seconds|windows_exporter_perflib_snapshot_duration_seconds|windows_exporter_scrape_duration_seconds|process_|windows_textfile_mtime_seconds|windows_cpu|windows_cs|windows_cache|windows_logon|windows_logical_disk|windows_physical_disk|windows_memory|windows_net|windows_os|windows_process|windows_service_process|windows_printer|windows_udp|windows_tcp|windows_system|windows_time|windows_session|windows_perfdata|windows_textfile_mtime_seconds)" # Start process in background, awaiting HTTP requests. # Use default collectors, port and address: http://localhost:9182/metrics $exporter_proc = Start-Process ` -PassThru ` -FilePath ..\windows_exporter.exe ` - -ArgumentList "--log.level=debug","--web.disable-exporter-metrics","--collectors.enabled=[defaults],cache,cpu_info,textfile,process,perfdata,scheduled_task","--collector.process.include=explorer.exe","--collector.scheduled_task.include=.*WinSAT","--collector.service.include=Themes","--collector.textfile.directories=$($textfile_dir)",@" + -ArgumentList "--log.level=debug","--web.disable-exporter-metrics","--collectors.enabled=[defaults],cpu_info,textfile,process,perfdata,scheduled_task,tcp,udp,time,system,service,logical_disk,printer,os,net,memory,logon,cache","--collector.process.include=explorer.exe","--collector.scheduled_task.include=.*GAEvents","--collector.service.include=Themes","--collector.textfile.directories=$($textfile_dir)",@" --collector.perfdata.objects="[{\"object\":\"Processor Information\",\"instance_label\":\"core\",\"instances\":[\"*\"],\"counters\":{\"% Processor Time\":{},\"% Privileged Time\":{}}},{\"object\":\"Memory\",\"counters\":{\"Cache Faults/sec\":{\"type\":\"counter\"}}}]" "@ ` -WindowStyle Hidden ` diff --git a/tools/promtool.ps1 b/tools/promtool.ps1 index 948153d9..c4657f52 100644 --- a/tools/promtool.ps1 +++ b/tools/promtool.ps1 @@ -85,7 +85,7 @@ $script_path = $MyInvocation.MyCommand.Path $working_dir = Split-Path $script_path Push-Location $working_dir -$temp_dir = Join-Path $env:TEMP $(New-Guid) | ForEach-Object { mkdir $_ } +$temp_dir = Join-Path $env:TEMP $([guid]::newguid()) | ForEach-Object { mkdir $_ } # Start process in background, awaiting HTTP requests. # Listen on 9183/TCP, preventing conflicts with 9182/TCP used by end-to-end-test.ps1 @@ -113,13 +113,29 @@ for ($i=1; $i -le 5; $i++) { # windows_memory_pool_nonpaged_allocs_total is wrong for years. It's not a gauge, but a counter. $skip_re = "^([#]?\s*(HELP|TYPE)?\s*go_|windows_memory_pool_nonpaged_allocs_total)" -# Need to remove carriage returns, as promtool expects LF line endings -$output = ((Invoke-WebRequest -UseBasicParsing -URI http://127.0.0.1:9183/metrics).Content) -Split "`r?`n" | Select-String -NotMatch $skip_re | Join-String -Separator "`n" -# Join the split lines back to a single String (with LF line endings!) -$output = $output -Join "`n" -Stop-Process -Id $exporter_proc.Id +try { + # Need to remove carriage returns, as promtool expects LF line endings + $output = ((Invoke-WebRequest -UseBasicParsing -URI http://127.0.0.1:9183/metrics).Content) -Split "`r?`n" | Select-String -NotMatch $skip_re | Join-String -Separator "`n" + # $output = (((Invoke-WebRequest -UseBasicParsing -URI http://127.0.0.1:9183/metrics).Content) -Split "`r?`n" | Select-String -NotMatch $skip_re) -join "`n" + # Join the split lines back to a single String (with LF line endings!) + $output = $output -Join "`n" + + Stop-Process -Id $exporter_proc.Id +} catch { + Write-Host "STDOUT" + Get-Content "$($temp_dir)/windows_exporter.log" + Write-Host "STDERR" + Get-Content "$($temp_dir)/windows_exporter_error.log" + + throw $_ +} + $ExitCode = Start-RawProcess -InputVar $output -CommandName promtool.exe -CommandArgs @("check metrics") if ($ExitCode -ne 0) { + Write-Host "OUTPUT" + + Write-Host $output + Write-Host "Promtool command returned exit code $($ExitCode). See output for details." EXIT 1 }