From d1517d839840dae0752106e23dda4aee75f23627 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Otto=20Kr=C3=B6pke?= Date: Sun, 13 Oct 2024 10:19:41 +0200 Subject: [PATCH] logon: BREAKING: replace wmi query by Win32 API calls and expose detailed logon sessions. (click PR for more information) (#1687) --- README.md | 1 + docs/collector.logon.md | 68 ++++++-- internal/collector/logon/logon.go | 188 +++-------------------- internal/headers/secur32/secur32.go | 119 ++++++++++++++ internal/headers/secur32/secur32_test.go | 16 ++ internal/headers/secur32/types.go | 112 ++++++++++++++ 6 files changed, 323 insertions(+), 181 deletions(-) create mode 100644 internal/headers/secur32/secur32.go create mode 100644 internal/headers/secur32/secur32_test.go create mode 100644 internal/headers/secur32/types.go diff --git a/README.md b/README.md index b2a39477..a8715f77 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ Name | Description | Enabled by default [netframework](docs/collector.netframework.md) | .NET Framework metrics | [net](docs/collector.net.md) | Network interface I/O | ✓ [os](docs/collector.os.md) | OS metrics (memory, processes, users) | ✓ +[perfdata](docs/collector.perfdata.md) | Custom perfdata metrics | [physical_disk](docs/collector.physical_disk.md) | physical disk metrics | ✓ [printer](docs/collector.printer.md) | Printer metrics | [process](docs/collector.process.md) | Per-process metrics | diff --git a/docs/collector.logon.md b/docs/collector.logon.md index 9d956e19..93d7088f 100644 --- a/docs/collector.logon.md +++ b/docs/collector.logon.md @@ -2,13 +2,11 @@ The logon collector exposes metrics detailing the active user logon sessions. -||| --|- -Metric name prefix | `logon` -Classes | [`Win32_LogonSession`](https://docs.microsoft.com/en-us/windows/win32/cimwin32prov/win32-logonsession) -Enabled by default? | No - -> :warning: **On some deployments, this collector seems to have some memory/timeout issues**: See [#583](https://github.com/prometheus-community/windows_exporter/issues/583) +| | | +|---------------------|-----------| +| Metric name prefix | `logon` | +| Source | Win32 API | +| Enabled by default? | No | ## Flags @@ -16,21 +14,65 @@ None ## Metrics -Name | Description | Type | Labels ------|-------------|------|------- -`windows_logon_logon_type` | Number of active user logon sessions | gauge | status +| Name | Description | Type | Labels | +|-------------------------------------------|--------------------------------------------|-------|------------------------------------| +| `windows_logon_session_logon_timestamp_seconds` | timestamp of the logon session in seconds. | gauge | `domain`, `id`, `type`, `username` | ### Example metric Query the total number of interactive logon sessions ``` -windows_logon_logon_type{status="interactive"} +# HELP windows_logon_session_logon_timestamp_seconds timestamp of the logon session in seconds. +# TYPE windows_logon_session_logon_timestamp_seconds gauge +windows_logon_session_logon_timestamp_seconds{domain="",id="0x0:0x8c54",type="System",username=""} 1.72876928e+09 +windows_logon_session_logon_timestamp_seconds{domain="Font Driver Host",id="0x0:0x991a",type="Interactive",username="UMFD-1"} 1.728769282e+09 +windows_logon_session_logon_timestamp_seconds{domain="Font Driver Host",id="0x0:0x9933",type="Interactive",username="UMFD-0"} 1.728769282e+09 +windows_logon_session_logon_timestamp_seconds{domain="Font Driver Host",id="0x0:0x994a",type="Interactive",username="UMFD-0"} 1.728769282e+09 +windows_logon_session_logon_timestamp_seconds{domain="Font Driver Host",id="0x0:0x999d",type="Interactive",username="UMFD-1"} 1.728769282e+09 +windows_logon_session_logon_timestamp_seconds{domain="Font Driver Host",id="0x0:0xbf25a",type="Interactive",username="UMFD-2"} 1.728769532e+09 +windows_logon_session_logon_timestamp_seconds{domain="Font Driver Host",id="0x0:0xbf290",type="Interactive",username="UMFD-2"} 1.728769532e+09 +windows_logon_session_logon_timestamp_seconds{domain="JKROEPKE",id="0x0:0x130241",type="Network",username="vm-jok-dev$"} 1.728769625e+09 +windows_logon_session_logon_timestamp_seconds{domain="JKROEPKE",id="0x0:0x24f7c9",type="Network",username="vm-jok-dev$"} 1.728770121e+09 +windows_logon_session_logon_timestamp_seconds{domain="JKROEPKE",id="0x0:0x276846",type="Network",username="vm-jok-dev$"} 1.728770195e+09 +windows_logon_session_logon_timestamp_seconds{domain="JKROEPKE",id="0x0:0x3e4",type="Service",username="vm-jok-dev$"} 1.728769283e+09 +windows_logon_session_logon_timestamp_seconds{domain="JKROEPKE",id="0x0:0x3e7",type="System",username="vm-jok-dev$"} 1.728769279e+09 +windows_logon_session_logon_timestamp_seconds{domain="JKROEPKE",id="0x0:0x71d0f",type="Network",username="vm-jok-dev$"} 1.728769324e+09 +windows_logon_session_logon_timestamp_seconds{domain="JKROEPKE",id="0x0:0x720a3",type="Network",username="vm-jok-dev$"} 1.728769324e+09 +windows_logon_session_logon_timestamp_seconds{domain="JKROEPKE",id="0x0:0x725cb",type="Network",username="vm-jok-dev$"} 1.728769324e+09 +windows_logon_session_logon_timestamp_seconds{domain="JKROEPKE",id="0x0:0x753d8",type="Network",username="vm-jok-dev$"} 1.728769325e+09 +windows_logon_session_logon_timestamp_seconds{domain="JKROEPKE",id="0x0:0xa3913",type="Network",username="vm-jok-dev$"} 1.728769385e+09 +windows_logon_session_logon_timestamp_seconds{domain="JKROEPKE",id="0x0:0xbe7f2",type="Network",username="jok"} 1.728769531e+09 +windows_logon_session_logon_timestamp_seconds{domain="JKROEPKE",id="0x0:0xc76c4",type="RemoteInteractive",username="jok"} 1.728769533e+09 +windows_logon_session_logon_timestamp_seconds{domain="NT AUTHORITY",id="0x0:0x3e3",type="Service",username="IUSR"} 1.728769295e+09 +windows_logon_session_logon_timestamp_seconds{domain="NT AUTHORITY",id="0x0:0x3e5",type="Service",username="LOCAL SERVICE"} 1.728769283e+09 +windows_logon_session_logon_timestamp_seconds{domain="NT Service",id="0x0:0xae4c7",type="Service",username="MSSQLSERVER"} 1.728769425e+09 +windows_logon_session_logon_timestamp_seconds{domain="NT Service",id="0x0:0xb42f1",type="Service",username="SQLTELEMETRY"} 1.728769431e+09 +windows_logon_session_logon_timestamp_seconds{domain="Window Manager",id="0x0:0xbfbac",type="Interactive",username="DWM-2"} 1.728769532e+09 +windows_logon_session_logon_timestamp_seconds{domain="Window Manager",id="0x0:0xbfc72",type="Interactive",username="DWM-2"} 1.728769532e+09 +windows_logon_session_logon_timestamp_seconds{domain="Window Manager",id="0x0:0xdedd",type="Interactive",username="DWM-1"} 1.728769283e+09 +windows_logon_session_logon_timestamp_seconds{domain="Window Manager",id="0x0:0xdefd",type="Interactive",username="DWM-1"} 1.728769283e+09 ``` +### Possible values for `type` + +- System +- Interactive +- Network +- Batch +- Service +- Proxy +- Unlock +- NetworkCleartext +- NewCredentials +- RemoteInteractive +- CachedInteractive +- CachedRemoteInteractive +- CachedUnlock + ## Useful queries Query the total number of local and remote (I.E. Terminal Services) interactive sessions. ``` -windows_logon_logon_type{status=~"interactive|remote_interactive"} +count(windows_logon_logon_type{type=~"Interactive|RemoteInteractive"}) by (type) ``` ## Alerting examples -_This collector does not yet have alerting examples, we would appreciate your help adding them!_ +_This collector doesn’t yet have alerting examples, we would appreciate your help adding them!_ diff --git a/internal/collector/logon/logon.go b/internal/collector/logon/logon.go index b3f9a201..3c40410d 100644 --- a/internal/collector/logon/logon.go +++ b/internal/collector/logon/logon.go @@ -3,10 +3,11 @@ package logon import ( - "errors" + "fmt" "log/slog" "github.com/alecthomas/kingpin/v2" + "github.com/prometheus-community/windows_exporter/internal/headers/secur32" "github.com/prometheus-community/windows_exporter/internal/types" "github.com/prometheus/client_golang/prometheus" "github.com/yusufpapurcu/wmi" @@ -20,10 +21,9 @@ var ConfigDefaults = Config{} // A Collector is a Prometheus Collector for WMI metrics. type Collector struct { - config Config - wmiClient *wmi.Client + config Config - logonType *prometheus.Desc + sessionInfo *prometheus.Desc } func New(config *Config) *Collector { @@ -54,16 +54,11 @@ func (c *Collector) Close(_ *slog.Logger) error { return nil } -func (c *Collector) Build(_ *slog.Logger, wmiClient *wmi.Client) error { - if wmiClient == nil || wmiClient.SWbemServicesClient == nil { - return errors.New("wmiClient or SWbemServicesClient is nil") - } - - c.wmiClient = wmiClient - c.logonType = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "logon_type"), - "Number of active logon sessions (LogonSession.LogonType)", - []string{"status"}, +func (c *Collector) Build(_ *slog.Logger, _ *wmi.Client) error { + c.sessionInfo = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "session_logon_timestamp_seconds"), + "timestamp of the logon session in seconds.", + []string{"id", "username", "domain", "type"}, nil, ) @@ -72,171 +67,28 @@ func (c *Collector) Build(_ *slog.Logger, wmiClient *wmi.Client) error { // Collect sends the metric values for each metric // to the provided prometheus Metric channel. -func (c *Collector) Collect(_ *types.ScrapeContext, logger *slog.Logger, ch chan<- prometheus.Metric) error { - logger = logger.With(slog.String("collector", Name)) +func (c *Collector) Collect(_ *types.ScrapeContext, _ *slog.Logger, ch chan<- prometheus.Metric) error { if err := c.collect(ch); err != nil { - logger.Error("failed collecting user metrics", - slog.Any("err", err), - ) - return err } return nil } -// Win32_LogonSession docs: -// - https://docs.microsoft.com/en-us/windows/win32/cimwin32prov/win32-logonsession -type Win32_LogonSession struct { - LogonType uint32 -} - func (c *Collector) collect(ch chan<- prometheus.Metric) error { - var dst []Win32_LogonSession - if err := c.wmiClient.Query("SELECT * FROM Win32_LogonSession", &dst); err != nil { - return err + logonSessions, err := secur32.GetLogonSessions() + if err != nil { + return fmt.Errorf("failed to get logon sessions: %w", err) } - if len(dst) == 0 { - return errors.New("WMI query returned empty result set") + for _, session := range logonSessions { + ch <- prometheus.MustNewConstMetric( + c.sessionInfo, + prometheus.GaugeValue, + float64(session.LogonTime.Unix()), + session.LogonId.String(), session.UserName, session.LogonDomain, session.LogonType.String(), + ) } - // Init counters - system := 0 - interactive := 0 - network := 0 - batch := 0 - service := 0 - proxy := 0 - unlock := 0 - networkcleartext := 0 - newcredentials := 0 - remoteinteractive := 0 - cachedinteractive := 0 - cachedremoteinteractive := 0 - cachedunlock := 0 - - for _, entry := range dst { - switch entry.LogonType { - case 0: - system++ - case 2: - interactive++ - case 3: - network++ - case 4: - batch++ - case 5: - service++ - case 6: - proxy++ - case 7: - unlock++ - case 8: - networkcleartext++ - case 9: - newcredentials++ - case 10: - remoteinteractive++ - case 11: - cachedinteractive++ - case 12: - cachedremoteinteractive++ - case 13: - cachedunlock++ - } - } - - ch <- prometheus.MustNewConstMetric( - c.logonType, - prometheus.GaugeValue, - float64(system), - "system", - ) - - ch <- prometheus.MustNewConstMetric( - c.logonType, - prometheus.GaugeValue, - float64(interactive), - "interactive", - ) - - ch <- prometheus.MustNewConstMetric( - c.logonType, - prometheus.GaugeValue, - float64(network), - "network", - ) - - ch <- prometheus.MustNewConstMetric( - c.logonType, - prometheus.GaugeValue, - float64(batch), - "batch", - ) - - ch <- prometheus.MustNewConstMetric( - c.logonType, - prometheus.GaugeValue, - float64(service), - "service", - ) - - ch <- prometheus.MustNewConstMetric( - c.logonType, - prometheus.GaugeValue, - float64(proxy), - "proxy", - ) - - ch <- prometheus.MustNewConstMetric( - c.logonType, - prometheus.GaugeValue, - float64(unlock), - "unlock", - ) - - ch <- prometheus.MustNewConstMetric( - c.logonType, - prometheus.GaugeValue, - float64(networkcleartext), - "network_clear_text", - ) - - ch <- prometheus.MustNewConstMetric( - c.logonType, - prometheus.GaugeValue, - float64(newcredentials), - "new_credentials", - ) - - ch <- prometheus.MustNewConstMetric( - c.logonType, - prometheus.GaugeValue, - float64(remoteinteractive), - "remote_interactive", - ) - - ch <- prometheus.MustNewConstMetric( - c.logonType, - prometheus.GaugeValue, - float64(cachedinteractive), - "cached_interactive", - ) - - ch <- prometheus.MustNewConstMetric( - c.logonType, - prometheus.GaugeValue, - float64(remoteinteractive), - "cached_remote_interactive", - ) - - ch <- prometheus.MustNewConstMetric( - c.logonType, - prometheus.GaugeValue, - float64(cachedunlock), - "cached_unlock", - ) - return nil } diff --git a/internal/headers/secur32/secur32.go b/internal/headers/secur32/secur32.go new file mode 100644 index 00000000..fe5049ae --- /dev/null +++ b/internal/headers/secur32/secur32.go @@ -0,0 +1,119 @@ +package secur32 + +import ( + "errors" + "fmt" + "time" + "unsafe" + + "golang.org/x/sys/windows" +) + +// based on https://github.com/carlpett/winlsa/blob/master/winlsa.go + +var ( + secur32 = windows.NewLazySystemDLL("Secur32.dll") + advapi32 = windows.NewLazySystemDLL("advapi32.dll") + + procLsaEnumerateLogonSessions = secur32.NewProc("LsaEnumerateLogonSessions") + procLsaGetLogonSessionData = secur32.NewProc("LsaGetLogonSessionData") + procLsaFreeReturnBuffer = secur32.NewProc("LsaFreeReturnBuffer") + procLsaNtStatusToWinError = advapi32.NewProc("LsaNtStatusToWinError") +) + +func GetLogonSessions() ([]*LogonSessionData, error) { + var ( + buffer uintptr + sessionCount uint32 + ) + + err := LsaEnumerateLogonSessions(&sessionCount, &buffer) + if err != nil { + return nil, err + } + + if buffer != 0 { + defer func(buffer uintptr) { + _ = LsaFreeReturnBuffer(buffer) + }(buffer) + } + + sizeLUID := unsafe.Sizeof(windows.LUID{}) + + sessionDataSlice := make([]*LogonSessionData, 0, sessionCount) + + for i := range sessionCount { + curPtr := unsafe.Pointer(buffer + (uintptr(i) * sizeLUID)) + luid := (*windows.LUID)(curPtr) + + sessionData, err := GetLogonSessionData(luid) + if err != nil { + if errors.Is(err, windows.ERROR_ACCESS_DENIED) { + // Skip logon sessions that we don't have access to + continue + } + + return nil, err + } + + sessionDataSlice = append(sessionDataSlice, sessionData) + } + + return sessionDataSlice, nil +} + +func GetLogonSessionData(luid *windows.LUID) (*LogonSessionData, error) { + var dataBuffer *SECURITY_LOGON_SESSION_DATA + if err := LsaGetLogonSessionData(luid, &dataBuffer); err != nil { + return nil, fmt.Errorf("failed to get logon session data: %w", err) + } + + defer func(buffer uintptr) { + _ = LsaFreeReturnBuffer(buffer) + }(uintptr(unsafe.Pointer(dataBuffer))) + + return newLogonSessionData(dataBuffer), nil +} + +func LsaEnumerateLogonSessions(sessionCount *uint32, sessions *uintptr) error { + r0, _, _ := procLsaEnumerateLogonSessions.Call(uintptr(unsafe.Pointer(sessionCount)), uintptr(unsafe.Pointer(sessions))) + + return LsaNtStatusToWinError(r0) +} + +func LsaGetLogonSessionData(luid *windows.LUID, ppLogonSessionData **SECURITY_LOGON_SESSION_DATA) error { + r0, _, _ := procLsaGetLogonSessionData.Call(uintptr(unsafe.Pointer(luid)), uintptr(unsafe.Pointer(ppLogonSessionData))) + + return LsaNtStatusToWinError(r0) +} + +func LsaFreeReturnBuffer(buffer uintptr) error { + r0, _, _ := procLsaFreeReturnBuffer.Call(buffer) + + return LsaNtStatusToWinError(r0) +} + +func LsaNtStatusToWinError(ntstatus uintptr) error { + r0, _, err := procLsaNtStatusToWinError.Call(ntstatus) + + switch { + case errors.Is(err, windows.ERROR_SUCCESS): + if r0 == 0 { + return nil + } + case errors.Is(err, windows.ERROR_MR_MID_NOT_FOUND): + return fmt.Errorf("unknown LSA NTSTATUS code %x", ntstatus) + } + + return windows.Errno(r0) +} + +func newLogonSessionData(data *SECURITY_LOGON_SESSION_DATA) *LogonSessionData { + return &LogonSessionData{ + LogonId: data.LogonId, + UserName: data.UserName.String(), + LogonDomain: data.LogonDomain.String(), + LogonType: data.LogonType, + LogonTime: time.Unix(0, data.LogonTime.Nanoseconds()), + } +} diff --git a/internal/headers/secur32/secur32_test.go b/internal/headers/secur32/secur32_test.go new file mode 100644 index 00000000..c6a0ed51 --- /dev/null +++ b/internal/headers/secur32/secur32_test.go @@ -0,0 +1,16 @@ +package secur32_test + +import ( + "testing" + + "github.com/prometheus-community/windows_exporter/internal/headers/secur32" + "github.com/stretchr/testify/require" +) + +func TestGetLogonSessions(t *testing.T) { + t.Parallel() + + sessionData, err := secur32.GetLogonSessions() + require.NoError(t, err) + require.NotEmpty(t, sessionData) +} diff --git a/internal/headers/secur32/types.go b/internal/headers/secur32/types.go new file mode 100644 index 00000000..ae4e8929 --- /dev/null +++ b/internal/headers/secur32/types.go @@ -0,0 +1,112 @@ +package secur32 + +import ( + "fmt" + "time" + + "golang.org/x/sys/windows" +) + +type LogonType uint32 + +type LSA_LAST_INTER_LOGON_INFO struct { + LastSuccessfulLogon windows.Filetime + LastFailedLogon windows.Filetime + FailedAttemptCountSinceLastSuccessfulLogon uint32 +} + +type SECURITY_LOGON_SESSION_DATA struct { + Size uint32 + LogonId LUID + UserName windows.NTUnicodeString + LogonDomain windows.NTUnicodeString + AuthenticationPackage windows.NTUnicodeString + LogonType LogonType + Session uint32 + Sid *windows.SID + LogonTime windows.Filetime + LogonServer windows.NTUnicodeString + DnsDomainName windows.NTUnicodeString + Upn windows.NTUnicodeString + UserFlags uint32 + LastLogonInfo LSA_LAST_INTER_LOGON_INFO + LogonScript windows.NTUnicodeString + ProfilePath windows.NTUnicodeString + HomeDirectory windows.NTUnicodeString + HomeDirectoryDrive windows.NTUnicodeString + LogoffTime windows.Filetime + KickOffTime windows.Filetime + PasswordLastSet windows.Filetime + PasswordCanChange windows.Filetime + PasswordMustChange windows.Filetime +} + +const ( + // LogonTypeSystem Not explicitly defined in LSA, but according to + // https://docs.microsoft.com/en-us/windows/win32/cimwin32prov/win32-logonsession, + // LogonType=0 is "Used only by the System account." + LogonTypeSystem LogonType = iota + _ // LogonType=1 is not used + LogonTypeInteractive + LogonTypeNetwork + LogonTypeBatch + LogonTypeService + LogonTypeProxy + LogonTypeUnlock + LogonTypeNetworkCleartext + LogonTypeNewCredentials + LogonTypeRemoteInteractive + LogonTypeCachedInteractive + LogonTypeCachedRemoteInteractive + LogonTypeCachedUnlock +) + +func (lt LogonType) String() string { + switch lt { + case LogonTypeSystem: + return "System" + case LogonTypeInteractive: + return "Interactive" + case LogonTypeNetwork: + return "Network" + case LogonTypeBatch: + return "Batch" + case LogonTypeService: + return "Service" + case LogonTypeProxy: + return "Proxy" + case LogonTypeUnlock: + return "Unlock" + case LogonTypeNetworkCleartext: + return "NetworkCleartext" + case LogonTypeNewCredentials: + return "NewCredentials" + case LogonTypeRemoteInteractive: + return "RemoteInteractive" + case LogonTypeCachedInteractive: + return "CachedInteractive" + case LogonTypeCachedRemoteInteractive: + return "CachedRemoteInteractive" + case LogonTypeCachedUnlock: + return "CachedUnlock" + default: + return fmt.Sprintf("Undefined LogonType(%d)", lt) + } +} + +type LogonSessionData struct { + LogonId LUID + UserName string + LogonDomain string + AuthenticationPackage string + LogonType LogonType + Session uint32 + Sid *windows.SID + LogonTime time.Time +} + +type LUID windows.LUID + +func (l LUID) String() string { + return fmt.Sprintf("0x%x:0x%x", l.HighPart, l.LowPart) +}