From 1b96bb6d083f1bfd454ffc6a98a697cf970dfc24 Mon Sep 17 00:00:00 2001 From: Benjamin Blattberg Date: Tue, 29 Jun 2021 14:32:08 -0500 Subject: [PATCH] Add MSSQL Wait Statistics (#793) Signed-off-by: benjaminjb --- collector/mssql.go | 221 +++++++++++++++++++++++++++++++++++++++- docs/collector.mssql.md | 18 +++- 2 files changed, 235 insertions(+), 4 deletions(-) diff --git a/collector/mssql.go b/collector/mssql.go index e243589c..9975b230 100644 --- a/collector/mssql.go +++ b/collector/mssql.go @@ -70,7 +70,7 @@ func getMSSQLInstances() mssqlInstancesType { type mssqlCollectorsMap map[string]mssqlCollectorFunc func mssqlAvailableClassCollectors() string { - return "accessmethods,availreplica,bufman,databases,dbreplica,genstats,locks,memmgr,sqlstats,sqlerrors,transactions" + return "accessmethods,availreplica,bufman,databases,dbreplica,genstats,locks,memmgr,sqlstats,sqlerrors,transactions,waitstats" } func (c *MSSQLCollector) getMSSQLCollectors() mssqlCollectorsMap { @@ -86,6 +86,7 @@ func (c *MSSQLCollector) getMSSQLCollectors() mssqlCollectorsMap { mssqlCollectors["sqlstats"] = c.collectSQLStats mssqlCollectors["sqlerrors"] = c.collectSQLErrors mssqlCollectors["transactions"] = c.collectTransactions + mssqlCollectors["waitstats"] = c.collectWaitStats return mssqlCollectors } @@ -121,6 +122,8 @@ func mssqlGetPerfObjectName(sqlInstance string, collector string) string { suffix = "SQL Statistics" case "transactions": suffix = "Transactions" + case "waitstats": + suffix = "Wait Statistics" } return (prefix + suffix) } @@ -382,6 +385,20 @@ type MSSQLCollector struct { TransactionsVersionStoreCreationUnits *prometheus.Desc TransactionsVersionStoreTruncationUnits *prometheus.Desc + // Win32_PerfRawData_{instance}_SQLServerWaitStatistics + WaitStatsLockWaits *prometheus.Desc + WaitStatsMemoryGrantQueueWaits *prometheus.Desc + WaitStatsThreadSafeMemoryObjectsWaits *prometheus.Desc + WaitStatsLogWriteWaits *prometheus.Desc + WaitStatsLogBufferWaits *prometheus.Desc + WaitStatsNetworkIOWaits *prometheus.Desc + WaitStatsPageIOLatchWaits *prometheus.Desc + WaitStatsPageLatchWaits *prometheus.Desc + WaitStatsNonpageLatchWaits *prometheus.Desc + WaitStatsWaitForTheWorkerWaits *prometheus.Desc + WaitStatsWorkspaceSynchronizationWaits *prometheus.Desc + WaitStatsTransactionOwnershipWaits *prometheus.Desc + mssqlInstances mssqlInstancesType mssqlCollectors mssqlCollectorsMap mssqlChildCollectorFailure int @@ -1789,6 +1806,91 @@ func NewMSSQLCollector() (Collector, error) { nil, ), + // Win32_PerfRawData_{instance}_SQLServerWaitStatistics + WaitStatsLockWaits: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "waitstats_lock_waits"), + "(WaitStats.LockWaits)", + []string{"mssql_instance", "item"}, + nil, + ), + + WaitStatsMemoryGrantQueueWaits: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "waitstats_memory_grant_queue_waits"), + "(WaitStats.MemoryGrantQueueWaits)", + []string{"mssql_instance", "item"}, + nil, + ), + + WaitStatsThreadSafeMemoryObjectsWaits: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "waitstats_thread_safe_memory_objects_waits"), + "(WaitStats.ThreadSafeMemoryObjectsWaits)", + []string{"mssql_instance", "item"}, + nil, + ), + + WaitStatsLogWriteWaits: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "waitstats_log_write_waits"), + "(WaitStats.LogWriteWaits)", + []string{"mssql_instance", "item"}, + nil, + ), + + WaitStatsLogBufferWaits: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "waitstats_log_buffer_waits"), + "(WaitStats.LogBufferWaits)", + []string{"mssql_instance", "item"}, + nil, + ), + + WaitStatsNetworkIOWaits: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "waitstats_network_io_waits"), + "(WaitStats.NetworkIOWaits)", + []string{"mssql_instance", "item"}, + nil, + ), + + WaitStatsPageIOLatchWaits: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "waitstats_page_io_latch_waits"), + "(WaitStats.PageIOLatchWaits)", + []string{"mssql_instance", "item"}, + nil, + ), + + WaitStatsPageLatchWaits: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "waitstats_page_latch_waits"), + "(WaitStats.PageLatchWaits)", + []string{"mssql_instance", "item"}, + nil, + ), + + WaitStatsNonpageLatchWaits: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "waitstats_nonpage_latch_waits"), + "(WaitStats.NonpageLatchWaits)", + []string{"mssql_instance", "item"}, + nil, + ), + + WaitStatsWaitForTheWorkerWaits: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "waitstats_wait_for_the_worker_waits"), + "(WaitStats.WaitForTheWorkerWaits)", + []string{"mssql_instance", "item"}, + nil, + ), + + WaitStatsWorkspaceSynchronizationWaits: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "waitstats_workspace_synchronization_waits"), + "(WaitStats.WorkspaceSynchronizationWaits)", + []string{"mssql_instance", "item"}, + nil, + ), + + WaitStatsTransactionOwnershipWaits: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "waitstats_transaction_ownership_waits"), + "(WaitStats.TransactionOwnershipWaits)", + []string{"mssql_instance", "item"}, + nil, + ), + mssqlInstances: mssqlInstances, } @@ -3731,6 +3833,123 @@ func (c *MSSQLCollector) collectSQLStats(ctx *ScrapeContext, ch chan<- prometheu return nil, nil } +// Win32_PerfRawData_MSSQLSERVER_SQLServerWaitStatistics docs: +// - https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-wait-statistics-object +type mssqlWaitStatistics struct { + Name string + WaitStatsLockWaits float64 `perflib:"Lock waits"` + WaitStatsMemoryGrantQueueWaits float64 `perflib:"Memory grant queue waits"` + WaitStatsThreadSafeMemoryObjectsWaits float64 `perflib:"Thread-safe memory objects waits"` + WaitStatsLogWriteWaits float64 `perflib:"Log write waits"` + WaitStatsLogBufferWaits float64 `perflib:"Log buffer waits"` + WaitStatsNetworkIOWaits float64 `perflib:"Network IO waits"` + WaitStatsPageIOLatchWaits float64 `perflib:"Page IO latch waits"` + WaitStatsPageLatchWaits float64 `perflib:"Page latch waits"` + WaitStatsNonpageLatchWaits float64 `perflib:"Non-Page latch waits"` + WaitStatsWaitForTheWorkerWaits float64 `perflib:"Wait for the worker"` + WaitStatsWorkspaceSynchronizationWaits float64 `perflib:"Workspace synchronization waits"` + WaitStatsTransactionOwnershipWaits float64 `perflib:"Transaction ownership waits"` +} + +func (c *MSSQLCollector) collectWaitStats(ctx *ScrapeContext, ch chan<- prometheus.Metric, sqlInstance string) (*prometheus.Desc, error) { + var dst []mssqlWaitStatistics + log.Debugf("mssql_waitstats collector iterating sql instance %s.", sqlInstance) + + if err := unmarshalObject(ctx.perfObjects[mssqlGetPerfObjectName(sqlInstance, "waitstats")], &dst); err != nil { + return nil, err + } + + for _, v := range dst { + item := v.Name + + ch <- prometheus.MustNewConstMetric( + c.WaitStatsLockWaits, + prometheus.CounterValue, + v.WaitStatsLockWaits, + sqlInstance, item, + ) + + ch <- prometheus.MustNewConstMetric( + c.WaitStatsMemoryGrantQueueWaits, + prometheus.CounterValue, + v.WaitStatsMemoryGrantQueueWaits, + sqlInstance, item, + ) + + ch <- prometheus.MustNewConstMetric( + c.WaitStatsThreadSafeMemoryObjectsWaits, + prometheus.CounterValue, + v.WaitStatsThreadSafeMemoryObjectsWaits, + sqlInstance, item, + ) + + ch <- prometheus.MustNewConstMetric( + c.WaitStatsLogWriteWaits, + prometheus.CounterValue, + v.WaitStatsLogWriteWaits, + sqlInstance, item, + ) + + ch <- prometheus.MustNewConstMetric( + c.WaitStatsLogBufferWaits, + prometheus.CounterValue, + v.WaitStatsLogBufferWaits, + sqlInstance, item, + ) + + ch <- prometheus.MustNewConstMetric( + c.WaitStatsNetworkIOWaits, + prometheus.CounterValue, + v.WaitStatsNetworkIOWaits, + sqlInstance, item, + ) + + ch <- prometheus.MustNewConstMetric( + c.WaitStatsPageIOLatchWaits, + prometheus.CounterValue, + v.WaitStatsPageIOLatchWaits, + sqlInstance, item, + ) + + ch <- prometheus.MustNewConstMetric( + c.WaitStatsPageLatchWaits, + prometheus.CounterValue, + v.WaitStatsPageLatchWaits, + sqlInstance, item, + ) + + ch <- prometheus.MustNewConstMetric( + c.WaitStatsNonpageLatchWaits, + prometheus.CounterValue, + v.WaitStatsNonpageLatchWaits, + sqlInstance, item, + ) + + ch <- prometheus.MustNewConstMetric( + c.WaitStatsWaitForTheWorkerWaits, + prometheus.CounterValue, + v.WaitStatsWaitForTheWorkerWaits, + sqlInstance, item, + ) + + ch <- prometheus.MustNewConstMetric( + c.WaitStatsWorkspaceSynchronizationWaits, + prometheus.CounterValue, + v.WaitStatsWorkspaceSynchronizationWaits, + sqlInstance, item, + ) + + ch <- prometheus.MustNewConstMetric( + c.WaitStatsTransactionOwnershipWaits, + prometheus.CounterValue, + v.WaitStatsTransactionOwnershipWaits, + sqlInstance, item, + ) + } + + return nil, nil +} + type mssqlSQLErrors struct { Name string ErrorsPersec float64 `perflib:"Errors/sec"` diff --git a/docs/collector.mssql.md b/docs/collector.mssql.md index 884115e4..4754be64 100644 --- a/docs/collector.mssql.md +++ b/docs/collector.mssql.md @@ -5,14 +5,14 @@ The mssql collector exposes metrics about the MSSQL server ||| -|- Metric name prefix | `mssql` -Classes | [`Win32_PerfRawData_MSSQLSERVER_SQLServerAccessMethods`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-access-methods-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerAvailabilityReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-availability-replica)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerBufferManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-buffer-manager-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabaseReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-database-replica)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabases`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-databases-object?view=sql-server-2017)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerGeneralStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-general-statistics-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerLocks`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-locks-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerMemoryManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-memory-manager-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-statistics-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLErrors`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-errors-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerTransactions`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-transactions-object) +Classes | [`Win32_PerfRawData_MSSQLSERVER_SQLServerAccessMethods`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-access-methods-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerAvailabilityReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-availability-replica)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerBufferManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-buffer-manager-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabaseReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-database-replica)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabases`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-databases-object?view=sql-server-2017)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerGeneralStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-general-statistics-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerLocks`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-locks-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerMemoryManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-memory-manager-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-statistics-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLErrors`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-errors-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerTransactions`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-transactions-object)
[`Win32_PerfRawData_MSSQLSERVER_SQLServerWaitStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-wait-statistics-object) Enabled by default? | No ## Flags ### `--collectors.mssql.classes-enabled` -Comma-separated list of MSSQL WMI classes to use. Supported values are `accessmethods`, `availreplica`, `bufman`, `databases`, `dbreplica`, `genstats`, `locks`, `memmgr`, `sqlstats`, `sqlerrors` and `transactions`. +Comma-separated list of MSSQL WMI classes to use. Supported values are `accessmethods`, `availreplica`, `bufman`, `databases`, `dbreplica`, `genstats`, `locks`, `memmgr`, `sqlstats`, `sqlerrors`, `transactions`, and `waitstats`. ### `--collectors.mssql.class-print` @@ -127,7 +127,7 @@ Name | Description | Type | Labels `windows_mssql_databases_bulk_copy_rows` | Number of rows bulk copied per second | counter | `mssql_instance`, `database` `windows_mssql_databases_bulk_copy_bytes` | Amount of data bulk copied (in kilobytes) per second | counter | `mssql_instance`, `database` `windows_mssql_databases_commit_table_entries` | he size (row count) of the in-memory portion of the commit table for the database | counter | `mssql_instance`, `database` -`windows_mssql_databases_data_files_size_bytes` | Cumulative size (in kilobytes) of all the data files in the database including any automatic growth. Monitoring this counter is useful, for example, for determining the correct size of tempdb | counter | `mssql_instance`, `database` +`windows_mssql_databases_data_files_size_bytes` | Cumulative size (in kilobytes) of all the data files in the database including any automatic growth. Monitoring this counter is useful, for example, for determining the correct size of tempdb | gauge | `mssql_instance`, `database` `windows_mssql_databases_dbcc_logical_scan_bytes` | Number of logical read scan bytes per second for database console commands (DBCC) | counter | `mssql_instance`, `database` `windows_mssql_databases_group_commit_stall_seconds` | Group stall time (microseconds) per second | counter | `mssql_instance`, `database` `windows_mssql_databases_log_flushed_bytes` | Total number of log bytes flushed | counter | `mssql_instance`, `database` @@ -244,6 +244,18 @@ Name | Description | Type | Labels `windows_mssql_transactions_version_store_units` | The number of active allocation units in the snapshot isolation version store in tempdb | counter | `mssql_instance` `windows_mssql_transactions_version_store_creation_units` | The number of allocation units that have been created in the snapshot isolation store since the instance of the Database Engine was started | counter | `mssql_instance` `windows_mssql_transactions_version_store_truncation_units` | The number of allocation units that have been removed from the snapshot isolation store since the instance of the Database Engine was started | counter | `mssql_instance` +`windows_mssql_waitstats_lock_waits` | Statistics for processes waiting on a lock | gauge | `mssql_instance`, `item` +`windows_mssql_waitstats_memory_grant_queue_waits` | Statistics for processes waiting for memory grant to become available | gauge | `mssql_instance`, `item` +`windows_mssql_waitstats_thread_safe_memory_objects_waits` | Statistics for processes waiting on thread-safe memory allocators | gauge | `mssql_instance`, `item` +`windows_mssql_waitstats_log_write_waits` | Statistics for processes waiting for log buffer to be written | gauge | `mssql_instance`, `item` +`windows_mssql_waitstats_log_buffer_waits` | Statistics for processes waiting for log buffer to be available | gauge | `mssql_instance`, `item` +`windows_mssql_waitstats_network_io_waits` | Statistics relevant to wait on network I/O | gauge | `mssql_instance`, `item` +`windows_mssql_waitstats_page_io_latch_waits` | Statistics relevant to page I/O latches | gauge | `mssql_instance`, `item` +`windows_mssql_waitstats_page_latch_waits` | Statistics relevant to page latches, not including I/O latches | gauge | `mssql_instance`, `item` +`windows_mssql_waitstats_nonpage_latch_waits` | Statistics relevant to non-page latches | gauge | `mssql_instance`, `item` +`windows_mssql_waitstats_wait_for_the_worker_waits` | Statistics relevant to processes waiting for worker to become available | gauge | `mssql_instance`, `item` +`windows_mssql_waitstats_workspace_synchronization_waits` | Statistics relevant to processes synchronizing access to workspace | gauge | `mssql_instance`, `item` +`windows_mssql_waitstats_transaction_ownership_waits` | Statistics relevant to processes synchronizing access to transaction | gauge | `mssql_instance`, `item` ### Example metric _This collector does not yet have explained examples, we would appreciate your help adding them!_