From 3ce25ff1efcce203b38500eed7bc51eda418b4cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Otto=20Kr=C3=B6pke?= Date: Thu, 29 Aug 2024 22:03:05 +0200 Subject: [PATCH] mscluster: merge multiple collector into one (Click here for more information) (#1585) --- README.md | 6 +- docs/README.md | 6 +- docs/collector.mscluster.md | 186 +++ docs/collector.mscluster_cluster.md | 104 -- docs/collector.mscluster_network.md | 32 - docs/collector.mscluster_node.md | 41 - docs/collector.mscluster_resource.md | 50 - docs/collector.mscluster_resourcegroup.md | 48 - pkg/collector/collector.go | 12 +- pkg/collector/config.go | 18 +- pkg/collector/exchange/exchange.go | 17 +- pkg/collector/map.go | 12 +- pkg/collector/mscluster/mscluster.go | 299 ++++ .../mscluster_cluster.go | 1218 ++++++++--------- .../mscluster_network.go | 137 +- .../mscluster_node.go | 283 ++-- .../mscluster_resource.go | 350 ++--- .../mscluster_resourcegroup.go | 305 ++--- pkg/collector/prometheus.go | 1 + 19 files changed, 1473 insertions(+), 1652 deletions(-) create mode 100644 docs/collector.mscluster.md delete mode 100644 docs/collector.mscluster_cluster.md delete mode 100644 docs/collector.mscluster_network.md delete mode 100644 docs/collector.mscluster_node.md delete mode 100644 docs/collector.mscluster_resource.md delete mode 100644 docs/collector.mscluster_resourcegroup.md create mode 100644 pkg/collector/mscluster/mscluster.go rename pkg/collector/{mscluster_cluster => mscluster}/mscluster_cluster.go (55%) rename pkg/collector/{mscluster_network => mscluster}/mscluster_network.go (51%) rename pkg/collector/{mscluster_node => mscluster}/mscluster_node.go (53%) rename pkg/collector/{mscluster_resource => mscluster}/mscluster_resource.go (61%) rename pkg/collector/{mscluster_resourcegroup => mscluster}/mscluster_resourcegroup.go (51%) diff --git a/README.md b/README.md index 655abaa4..6426ff6e 100644 --- a/README.md +++ b/README.md @@ -28,11 +28,7 @@ Name | Description | Enabled by default [logical_disk](docs/collector.logical_disk.md) | Logical disks, disk I/O | ✓ [logon](docs/collector.logon.md) | User logon sessions | [memory](docs/collector.memory.md) | Memory usage metrics | -[mscluster_cluster](docs/collector.mscluster_cluster.md) | MSCluster cluster metrics | -[mscluster_network](docs/collector.mscluster_network.md) | MSCluster network metrics | -[mscluster_node](docs/collector.mscluster_node.md) | MSCluster Node metrics | -[mscluster_resource](docs/collector.mscluster_resource.md) | MSCluster Resource metrics | -[mscluster_resourcegroup](docs/collector.mscluster_resourcegroup.md) | MSCluster ResourceGroup metrics | +[mscluster](docs/collector.mscluster.md) | MSCluster metrics | [msmq](docs/collector.msmq.md) | MSMQ queues | [mssql](docs/collector.mssql.md) | [SQL Server Performance Objects](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/use-sql-server-objects#SQLServerPOs) metrics | [netframework_clrexceptions](docs/collector.netframework_clrexceptions.md) | .NET Framework CLR Exceptions | diff --git a/docs/README.md b/docs/README.md index 015ee19d..6d4af6b7 100644 --- a/docs/README.md +++ b/docs/README.md @@ -21,11 +21,7 @@ This directory contains documentation of the collectors in the windows_exporter, - [`logical_disk`](collector.logical_disk.md) - [`logon`](collector.logon.md) - [`memory`](collector.memory.md) -- [`mscluster_cluster`](collector.mscluster_cluster.md) -- [`mscluster_network`](collector.mscluster_network.md) -- [`mscluster_node`](collector.mscluster_node.md) -- [`mscluster_resource`](collector.mscluster_resource.md) -- [`mscluster_resourcegroup`](collector.mscluster_resourcegroup.md) +- [`mscluster`](collector.mscluster.md) - [`msmq`](collector.msmq.md) - [`mssql`](collector.mssql.md) - [`net`](collector.net.md) diff --git a/docs/collector.mscluster.md b/docs/collector.mscluster.md new file mode 100644 index 00000000..af577ae9 --- /dev/null +++ b/docs/collector.mscluster.md @@ -0,0 +1,186 @@ +# mscluster_cluster collector + +The MSCluster_Cluster class is a dynamic WMI class that represents a cluster. + +||| +-|- +Metric name prefix | `mscluster` +Classes | `MSCluster_Cluster`,`MSCluster_Network`,`MSCluster_Node`,`MSCluster_Resource`,`MSCluster_ResourceGroup` +Enabled by default? | No + +## Flags + +### `--collectors.mscluster.enabled` +Comma-separated list of collectors to use, for example: +`--collectors.mscluster.enabled=cluster,network,node,resource,resouregroup`. +Matching is case-sensitive. + +## Metrics + +### Cluster + +| Name | Description | Type | Labels | +|-------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------|--------| +| `mscluster_cluster_AddEvictDelay` | Provides access to the cluster's AddEvictDelay property, which is the number a seconds that a new node is delayed after an eviction of another node. | gauge | `name` | +| `mscluster_cluster_AdminAccessPoint` | The type of the cluster administrative access point. | gauge | `name` | +| `mscluster_cluster_AutoAssignNodeSite` | Determines whether or not the cluster will attempt to automatically assign nodes to sites based on networks and Active Directory Site information. | gauge | `name` | +| `mscluster_cluster_AutoBalancerLevel` | Determines the level of aggressiveness of AutoBalancer. | gauge | `name` | +| `mscluster_cluster_AutoBalancerMode` | Determines whether or not the auto balancer is enabled. | gauge | `name` | +| `mscluster_cluster_BackupInProgress` | Indicates whether a backup is in progress. | gauge | `name` | +| `mscluster_cluster_BlockCacheSize` | CSV BlockCache Size in MB. | gauge | `name` | +| `mscluster_cluster_ClusSvcHangTimeout` | Controls how long the cluster network driver waits between Failover Cluster Service heartbeats before it determines that the Failover Cluster Service has stopped responding. | gauge | `name` | +| `mscluster_cluster_ClusSvcRegroupOpeningTimeout` | Controls how long a node will wait on other nodes in the opening stage before deciding that they failed. | gauge | `name` | +| `mscluster_cluster_ClusSvcRegroupPruningTimeout` | Controls how long the membership leader will wait to reach full connectivity between cluster nodes. | gauge | `name` | +| `mscluster_cluster_ClusSvcRegroupStageTimeout` | Controls how long a node will wait on other nodes in a membership stage before deciding that they failed. | gauge | `name` | +| `mscluster_cluster_ClusSvcRegroupTickInMilliseconds` | Controls how frequently the membership algorithm is sending periodic membership messages. | gauge | `name` | +| `mscluster_cluster_ClusterEnforcedAntiAffinity` | Enables or disables hard enforcement of group anti-affinity classes. | gauge | `name` | +| `mscluster_cluster_ClusterFunctionalLevel` | The functional level the cluster is currently running in. | gauge | `name` | +| `mscluster_cluster_ClusterGroupWaitDelay` | Maximum time in seconds that a group waits for its preferred node to come online during cluster startup before coming online on a different node. | gauge | `name` | +| `mscluster_cluster_ClusterLogLevel` | Controls the level of cluster logging. | gauge | `name` | +| `mscluster_cluster_ClusterLogSize` | Controls the maximum size of the cluster log files on each of the nodes. | gauge | `name` | +| `mscluster_cluster_ClusterUpgradeVersion` | Specifies the upgrade version the cluster is currently running in. | gauge | `name` | +| `mscluster_cluster_CrossSiteDelay` | Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats across sites. | gauge | `name` | +| `mscluster_cluster_CrossSiteThreshold` | Controls how many Cluster Service heartbeats can be missed across sites before it determines that Cluster Service has stopped responding. | gauge | `name` | +| `mscluster_cluster_CrossSubnetDelay` | Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats across subnets. | gauge | `name` | +| `mscluster_cluster_CrossSubnetThreshold` | Controls how many Cluster Service heartbeats can be missed across subnets before it determines that Cluster Service has stopped responding. | gauge | `name` | +| `mscluster_cluster_CsvBalancer` | Whether automatic balancing for CSV is enabled. | gauge | `name` | +| `mscluster_cluster_DatabaseReadWriteMode` | Sets the database read and write mode. | gauge | `name` | +| `mscluster_cluster_DefaultNetworkRole` | Provides access to the cluster's DefaultNetworkRole property. | gauge | `name` | +| `mscluster_cluster_DetectedCloudPlatform` | | gauge | `name` | +| `mscluster_cluster_DetectManagedEvents` | | gauge | `name` | +| `mscluster_cluster_DetectManagedEventsThreshold` | | gauge | `name` | +| `mscluster_cluster_DisableGroupPreferredOwnerRandomization` | | gauge | `name` | +| `mscluster_cluster_DrainOnShutdown` | Whether to drain the node when cluster service is being stopped. | gauge | `name` | +| `mscluster_cluster_DynamicQuorumEnabled` | Allows cluster service to adjust node weights as needed to increase availability. | gauge | `name` | +| `mscluster_cluster_EnableSharedVolumes` | Enables or disables cluster shared volumes on this cluster. | gauge | `name` | +| `mscluster_cluster_FixQuorum` | Provides access to the cluster's FixQuorum property, which specifies if the cluster is in a fix quorum state. | gauge | `name` | +| `mscluster_cluster_GracePeriodEnabled` | Whether the node grace period feature of this cluster is enabled. | gauge | `name` | +| `mscluster_cluster_GracePeriodTimeout` | The grace period timeout in milliseconds. | gauge | `name` | +| `mscluster_cluster_GroupDependencyTimeout` | The timeout after which a group will be brought online despite unsatisfied dependencies | gauge | `name` | +| `mscluster_cluster_HangRecoveryAction` | Controls the action to take if the user-mode processes have stopped responding. | gauge | `name` | +| `mscluster_cluster_IgnorePersistentStateOnStartup` | Provides access to the cluster's IgnorePersistentStateOnStartup property, which specifies whether the cluster will bring online groups that were online when the cluster was shut down. | gauge | `name` | +| `mscluster_cluster_LogResourceControls` | Controls the logging of resource controls. | gauge | `name` | +| `mscluster_cluster_LowerQuorumPriorityNodeId` | Specifies the Node ID that has a lower priority when voting for quorum is performed. If the quorum vote is split 50/50%, the specified node's vote would be ignored to break the tie. If this is not set then the cluster will pick a node at random to break the tie. | gauge | `name` | +| `mscluster_cluster_MaxNumberOfNodes` | Indicates the maximum number of nodes that may participate in the Cluster. | gauge | `name` | +| `mscluster_cluster_MessageBufferLength` | The maximum unacknowledged message count for GEM. | gauge | `name` | +| `mscluster_cluster_MinimumNeverPreemptPriority` | Groups with this priority or higher cannot be preempted. | gauge | `name` | +| `mscluster_cluster_MinimumPreemptorPriority` | Minimum priority a cluster group must have to be able to preempt another group. | gauge | `name` | +| `mscluster_cluster_NetftIPSecEnabled` | Whether IPSec is enabled for cluster internal traffic. | gauge | `name` | +| `mscluster_cluster_PlacementOptions` | Various option flags to modify default placement behavior. | gauge | `name` | +| `mscluster_cluster_PlumbAllCrossSubnetRoutes` | Plumbs all possible cross subnet routes to all nodes. | gauge | `name` | +| `mscluster_cluster_PreventQuorum` | Whether the cluster will ignore group persistent state on startup. | gauge | `name` | +| `mscluster_cluster_QuarantineDuration` | The quarantine period timeout in milliseconds. | gauge | `name` | +| `mscluster_cluster_QuarantineThreshold` | Number of node failures before it will be quarantined. | gauge | `name` | +| `mscluster_cluster_QuorumArbitrationTimeMax` | Controls the maximum time necessary to decide the Quorum owner node. | gauge | `name` | +| `mscluster_cluster_QuorumArbitrationTimeMin` | Controls the minimum time necessary to decide the Quorum owner node. | gauge | `name` | +| `mscluster_cluster_QuorumLogFileSize` | This property is obsolete. | gauge | `name` | +| `mscluster_cluster_QuorumTypeValue` | Get the current quorum type value. -1: Unknown; 1: Node; 2: FileShareWitness; 3: Storage; 4: None | gauge | `name` | +| `mscluster_cluster_RequestReplyTimeout` | Controls the request reply time-out period. | gauge | `name` | +| `mscluster_cluster_ResiliencyDefaultPeriod` | The default resiliency period, in seconds, for the cluster. | gauge | `name` | +| `mscluster_cluster_ResiliencyLevel` | The resiliency level for the cluster. | gauge | `name` | +| `mscluster_cluster_ResourceDllDeadlockPeriod` | This property is obsolete. | gauge | `name` | +| `mscluster_cluster_RootMemoryReserved` | Controls the amount of memory reserved for the parent partition on all cluster nodes. | gauge | `name` | +| `mscluster_cluster_RouteHistoryLength` | The history length for routes to help finding network issues. | gauge | `name` | +| `mscluster_cluster_S2DBusTypes` | Bus types for storage spaces direct. | gauge | `name` | +| `mscluster_cluster_S2DCacheDesiredState` | Desired state of the storage spaces direct cache. | gauge | `name` | +| `mscluster_cluster_S2DCacheFlashReservePercent` | Percentage of allocated flash space to utilize when caching. | gauge | `name` | +| `mscluster_cluster_S2DCachePageSizeKBytes` | Page size in KB used by S2D cache. | gauge | `name` | +| `mscluster_cluster_S2DEnabled` | Whether direct attached storage (DAS) is enabled. | gauge | `name` | +| `mscluster_cluster_S2DIOLatencyThreshold` | The I/O latency threshold for storage spaces direct. | gauge | `name` | +| `mscluster_cluster_S2DOptimizations` | Optimization flags for storage spaces direct. | gauge | `name` | +| `mscluster_cluster_SameSubnetDelay` | Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats on the same subnet. | gauge | `name` | +| `mscluster_cluster_SameSubnetThreshold` | Controls how many Cluster Service heartbeats can be missed on the same subnet before it determines that Cluster Service has stopped responding. | gauge | `name` | +| `mscluster_cluster_SecurityLevel` | Controls the level of security that should apply to intracluster messages. 0: Clear Text; 1: Sign; 2: Encrypt | gauge | `name` | +| `mscluster_cluster_SecurityLevelForStorage` | | gauge | `name` | +| `mscluster_cluster_SharedVolumeVssWriterOperationTimeout` | CSV VSS Writer operation timeout in seconds. | gauge | `name` | +| `mscluster_cluster_ShutdownTimeoutInMinutes` | The maximum time in minutes allowed for cluster resources to come offline during cluster service shutdown. | gauge | `name` | +| `mscluster_cluster_UseClientAccessNetworksForSharedVolumes` | Whether the use of client access networks for cluster shared volumes feature of this cluster is enabled. 0: Disabled; 1: Enabled; 2: Auto | gauge | `name` | +| `mscluster_cluster_WitnessDatabaseWriteTimeout` | Controls the maximum time in seconds that a cluster database write to a witness can take before the write is abandoned. | gauge | `name` | +| `mscluster_cluster_WitnessDynamicWeight` | The weight of the configured witness. | gauge | `name` | +| `mscluster_cluster_WitnessRestartInterval` | Controls the witness restart interval. | gauge | `name` | + +### Network + +| Name | Description | Type | Labels | +|-------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------|--------| +| `mscluster_network_Characteristics` | Provides the characteristics of the network. The cluster defines characteristics only for resources. For a description of these characteristics, see [CLUSCTL_RESOURCE_GET_CHARACTERISTICS](https://msdn.microsoft.com/library/aa367466). | gauge | `name` | +| `mscluster_network_Flags` | Provides access to the flags set for the network. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `name` | +| `mscluster_network_Metric` | The metric of a cluster network (networks with lower values are used first). If this value is set, then the AutoMetric property is set to false. | gauge | `name` | +| `mscluster_network_Role` | Provides access to the network's Role property. The Role property describes the role of the network in the cluster. 0: None; 1: Cluster; 2: Client; 3: Both | gauge | `name` | +| `mscluster_network_State` | Provides the current state of the network. 1-1: Unknown; 0: Unavailable; 1: Down; 2: Partitioned; 3: Up | gauge | `name` | + +### Network + +| Name | Description | Type | Labels | +|----------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------|--------| +| `mscluster_node_BuildNumber` | Provides access to the node's BuildNumber property. | gauge | `name` | +| `mscluster_node_Characteristics` | Provides access to the characteristics set for the node. For a list of possible characteristics, see [CLUSCTL_NODE_GET_CHARACTERISTICS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-node-get-characteristics). | gauge | `name` | +| `mscluster_node_DetectedCloudPlatform` | The dynamic vote weight of the node adjusted by dynamic quorum feature. | gauge | `name` | +| `mscluster_node_DynamicWeight` | The dynamic vote weight of the node adjusted by dynamic quorum feature. | gauge | `name` | +| `mscluster_node_Flags` | Provides access to the flags set for the node. For a list of possible characteristics, see [CLUSCTL_NODE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-node-get-flags). | gauge | `name` | +| `mscluster_node_MajorVersion` | Provides access to the node's MajorVersion property, which specifies the major portion of the Windows version installed. | gauge | `name` | +| `mscluster_node_MinorVersion` | Provides access to the node's MinorVersion property, which specifies the minor portion of the Windows version installed. | gauge | `name` | +| `mscluster_node_NeedsPreventQuorum` | Whether the cluster service on that node should be started with prevent quorum flag. | gauge | `name` | +| `mscluster_node_NodeDrainStatus` | The current node drain status of a node. 0: Not Initiated; 1: In Progress; 2: Completed; 3: Failed | gauge | `name` | +| `mscluster_node_NodeHighestVersion` | Provides access to the node's NodeHighestVersion property, which specifies the highest possible version of the cluster service with which the node can join or communicate. | gauge | `name` | +| `mscluster_node_NodeLowestVersion` | Provides access to the node's NodeLowestVersion property, which specifies the lowest possible version of the cluster service with which the node can join or communicate. | gauge | `name` | +| `mscluster_node_NodeWeight` | The vote weight of the node. | gauge | `name` | +| `mscluster_node_State` | Returns the current state of a node. -1: Unknown; 0: Up; 1: Down; 2: Paused; 3: Joining | gauge | `name` | +| `mscluster_node_StatusInformation` | The isolation or quarantine status of the node. | gauge | `name` | + +### Resource + +| Name | Description | Type | Labels | +|---------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------|--------------------------------------------| +| `mscluster_resource_Characteristics` | Provides the characteristics of the object. The cluster defines characteristics only for resources. For a description of these characteristics, see [CLUSCTL_RESOURCE_GET_CHARACTERISTICS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-characteristics). | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_DeadlockTimeout` | Indicates the length of time to wait, in milliseconds, before declaring a deadlock in any call into a resource. | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_EmbeddedFailureAction` | The time, in milliseconds, that a resource should remain in a failed state before the Cluster service attempts to restart it. | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_Flags` | Provides access to the flags set for the object. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_IsAlivePollInterval` | Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_LooksAlivePollInterval` | Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_MonitorProcessId` | Provides the process ID of the resource host service that is currently hosting the resource. | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_OwnerNode` | The node hosting the resource. | gauge | `type`, `owner_group`, `node_name`, `name` | +| `mscluster_resource_PendingTimeout` | Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated. | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_ResourceClass` | Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_RestartAction` | Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails. | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_RestartDelay` | Indicates the time delay before a failed resource is restarted. | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_RestartPeriod` | Provides access to the resource's RestartPeriod property, which is interval of time, in milliseconds, during which a specified number of restart attempts can be made on a nonresponsive resource. | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_RestartThreshold` | Provides access to the resource's RestartThreshold property which is the maximum number of restart attempts that can be made on a resource within an interval defined by the RestartPeriod property before the Cluster Service initiates the action specified by the RestartAction property. | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_RetryPeriodOnFailure` | Provides access to the resource's RetryPeriodOnFailure property, which is the interval of time (in milliseconds) that a resource should remain in a failed state before the Cluster service attempts to restart it. | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_State` | The current state of the resource. -1: Unknown; 0: Inherited; 1: Initializing; 2: Online; 3: Offline; 4: Failed; 128: Pending; 129: Online Pending; 130: Offline Pending | gauge | `type`, `owner_group`, `name` | +| `mscluster_resource_Subclass` | Provides the list of references to nodes that can be the owner of this resource. | gauge | `type`, `owner_group`, `name` | + +## ResourceGroup + +| Name | Description | Type | Labels | +|-----------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------|---------------------| +| `mscluster_resourcegroup_AutoFailbackType` | Provides access to the group's AutoFailbackType property. | gauge | `name` | +| `mscluster_resourcegroup_Characteristics` | Provides the characteristics of the group. The cluster defines characteristics only for resources. For a description of these characteristics, see [CLUSCTL_RESOURCE_GET_CHARACTERISTICS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-characteristics). | gauge | `name` | +| `mscluster_resourcegroup_ColdStartSetting` | Indicates whether a group can start after a cluster cold start. | gauge | `name` | +| `mscluster_resourcegroup_DefaultOwner` | Number of the last node the resource group was activated on or explicitly moved to. | gauge | `name` | +| `mscluster_resourcegroup_FailbackWindowEnd` | The FailbackWindowEnd property provides the latest time that the group can be moved back to the node identified as its preferred node. | gauge | `name` | +| `mscluster_resourcegroup_FailbackWindowStart` | The FailbackWindowStart property provides the earliest time (that is, local time as kept by the cluster) that the group can be moved back to the node identified as its preferred node. | gauge | `name` | +| `mscluster_resourcegroup_FailoverPeriod` | The FailoverPeriod property specifies a number of hours during which a maximum number of failover attempts, specified by the FailoverThreshold property, can occur. | gauge | `name` | +| `mscluster_resourcegroup_FailoverThreshold` | The FailoverThreshold property specifies the maximum number of failover attempts. | gauge | `name` | +| `mscluster_resourcegroup_Flags` | Provides access to the flags set for the group. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `name` | +| `mscluster_resourcegroup_GroupType` | The Type of the resource group. | gauge | `name` | +| `mscluster_resourcegroup_OwnerNode` | The node hosting the resource group. | gauge | `node_name`, `name` | +| `mscluster_resourcegroup_Priority` | Priority value of the resource group | gauge | `name` | +| `mscluster_resourcegroup_ResiliencyPeriod` | The resiliency period for this group, in seconds. | gauge | `name` | +| `mscluster_resourcegroup_State` | The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending | gauge | `name` | +| `mscluster_resourcegroup_UpdateDomain` | | gauge | `name` | + +### Example metric +Query the state of all cluster resource owned by node1 +``` +windows_mscluster_resource_owner_node{node_name="node1"} +``` + +## Useful queries +Counts the number of Network Name cluster resource +``` +count(windows_mscluster_resource_state{type="Network Name"}) +``` + +## Alerting examples +_This collector does not yet have alerting examples, we would appreciate your help adding them!_ diff --git a/docs/collector.mscluster_cluster.md b/docs/collector.mscluster_cluster.md deleted file mode 100644 index e9b80052..00000000 --- a/docs/collector.mscluster_cluster.md +++ /dev/null @@ -1,104 +0,0 @@ -# mscluster_cluster collector - -The MSCluster_Cluster class is a dynamic WMI class that represents a cluster. - -||| --|- -Metric name prefix | `mscluster_cluster` -Classes | `MSCluster_Cluster` -Enabled by default? | No - -## Flags - -None - -## Metrics - -Name | Description | Type | Labels ------|-------------|------|------- -`AddEvictDelay` | Provides access to the cluster's AddEvictDelay property, which is the number a seconds that a new node is delayed after an eviction of another node. | gauge | `name` -`AdminAccessPoint` | The type of the cluster administrative access point. | gauge | `name` -`AutoAssignNodeSite` | Determines whether or not the cluster will attempt to automatically assign nodes to sites based on networks and Active Directory Site information. | gauge | `name` -`AutoBalancerLevel` | Determines the level of aggressiveness of AutoBalancer. | gauge | `name` -`AutoBalancerMode` | Determines whether or not the auto balancer is enabled. | gauge | `name` -`BackupInProgress` | Indicates whether a backup is in progress. | gauge | `name` -`BlockCacheSize` | CSV BlockCache Size in MB. | gauge | `name` -`ClusSvcHangTimeout` | Controls how long the cluster network driver waits between Failover Cluster Service heartbeats before it determines that the Failover Cluster Service has stopped responding. | gauge | `name` -`ClusSvcRegroupOpeningTimeout` | Controls how long a node will wait on other nodes in the opening stage before deciding that they failed. | gauge | `name` -`ClusSvcRegroupPruningTimeout` | Controls how long the membership leader will wait to reach full connectivity between cluster nodes. | gauge | `name` -`ClusSvcRegroupStageTimeout` | Controls how long a node will wait on other nodes in a membership stage before deciding that they failed. | gauge | `name` -`ClusSvcRegroupTickInMilliseconds` | Controls how frequently the membership algorithm is sending periodic membership messages. | gauge | `name` -`ClusterEnforcedAntiAffinity` | Enables or disables hard enforcement of group anti-affinity classes. | gauge | `name` -`ClusterFunctionalLevel` | The functional level the cluster is currently running in. | gauge | `name` -`ClusterGroupWaitDelay` | Maximum time in seconds that a group waits for its preferred node to come online during cluster startup before coming online on a different node. | gauge | `name` -`ClusterLogLevel` | Controls the level of cluster logging. | gauge | `name` -`ClusterLogSize` | Controls the maximum size of the cluster log files on each of the nodes. | gauge | `name` -`ClusterUpgradeVersion` | Specifies the upgrade version the cluster is currently running in. | gauge | `name` -`CrossSiteDelay` | Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats across sites. | gauge | `name` -`CrossSiteThreshold` | Controls how many Cluster Service heartbeats can be missed across sites before it determines that Cluster Service has stopped responding. | gauge | `name` -`CrossSubnetDelay` | Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats across subnets. | gauge | `name` -`CrossSubnetThreshold` | Controls how many Cluster Service heartbeats can be missed across subnets before it determines that Cluster Service has stopped responding. | gauge | `name` -`CsvBalancer` | Whether automatic balancing for CSV is enabled. | gauge | `name` -`DatabaseReadWriteMode` | Sets the database read and write mode. | gauge | `name` -`DefaultNetworkRole` | Provides access to the cluster's DefaultNetworkRole property. | gauge | `name` -`DetectedCloudPlatform` | | gauge | `name` -`DetectManagedEvents` | | gauge | `name` -`DetectManagedEventsThreshold` | | gauge | `name` -`DisableGroupPreferredOwnerRandomization` | | gauge | `name` -`DrainOnShutdown` | Whether to drain the node when cluster service is being stopped. | gauge | `name` -`DynamicQuorumEnabled` | Allows cluster service to adjust node weights as needed to increase availability. | gauge | `name` -`EnableSharedVolumes` | Enables or disables cluster shared volumes on this cluster. | gauge | `name` -`FixQuorum` | Provides access to the cluster's FixQuorum property, which specifies if the cluster is in a fix quorum state. | gauge | `name` -`GracePeriodEnabled` | Whether the node grace period feature of this cluster is enabled. | gauge | `name` -`GracePeriodTimeout` | The grace period timeout in milliseconds. | gauge | `name` -`GroupDependencyTimeout` | The timeout after which a group will be brought online despite unsatisfied dependencies | gauge | `name` -`HangRecoveryAction` | Controls the action to take if the user-mode processes have stopped responding. | gauge | `name` -`IgnorePersistentStateOnStartup` | Provides access to the cluster's IgnorePersistentStateOnStartup property, which specifies whether the cluster will bring online groups that were online when the cluster was shut down. | gauge | `name` -`LogResourceControls` | Controls the logging of resource controls. | gauge | `name` -`LowerQuorumPriorityNodeId` | Specifies the Node ID that has a lower priority when voting for quorum is performed. If the quorum vote is split 50/50%, the specified node's vote would be ignored to break the tie. If this is not set then the cluster will pick a node at random to break the tie. | gauge | `name` -`MaxNumberOfNodes` | Indicates the maximum number of nodes that may participate in the Cluster. | gauge | `name` -`MessageBufferLength` | The maximum unacknowledged message count for GEM. | gauge | `name` -`MinimumNeverPreemptPriority` | Groups with this priority or higher cannot be preempted. | gauge | `name` -`MinimumPreemptorPriority` | Minimum priority a cluster group must have to be able to preempt another group. | gauge | `name` -`NetftIPSecEnabled` | Whether IPSec is enabled for cluster internal traffic. | gauge | `name` -`PlacementOptions` | Various option flags to modify default placement behavior. | gauge | `name` -`PlumbAllCrossSubnetRoutes` | Plumbs all possible cross subnet routes to all nodes. | gauge | `name` -`PreventQuorum` | Whether the cluster will ignore group persistent state on startup. | gauge | `name` -`QuarantineDuration` | The quarantine period timeout in milliseconds. | gauge | `name` -`QuarantineThreshold` | Number of node failures before it will be quarantined. | gauge | `name` -`QuorumArbitrationTimeMax` | Controls the maximum time necessary to decide the Quorum owner node. | gauge | `name` -`QuorumArbitrationTimeMin` | Controls the minimum time necessary to decide the Quorum owner node. | gauge | `name` -`QuorumLogFileSize` | This property is obsolete. | gauge | `name` -`QuorumTypeValue` | Get the current quorum type value. -1: Unknown; 1: Node; 2: FileShareWitness; 3: Storage; 4: None | gauge | `name` -`RequestReplyTimeout` | Controls the request reply time-out period. | gauge | `name` -`ResiliencyDefaultPeriod` | The default resiliency period, in seconds, for the cluster. | gauge | `name` -`ResiliencyLevel` | The resiliency level for the cluster. | gauge | `name` -`ResourceDllDeadlockPeriod` | This property is obsolete. | gauge | `name` -`RootMemoryReserved` | Controls the amount of memory reserved for the parent partition on all cluster nodes. | gauge | `name` -`RouteHistoryLength` | The history length for routes to help finding network issues. | gauge | `name` -`S2DBusTypes` | Bus types for storage spaces direct. | gauge | `name` -`S2DCacheDesiredState` | Desired state of the storage spaces direct cache. | gauge | `name` -`S2DCacheFlashReservePercent` | Percentage of allocated flash space to utilize when caching. | gauge | `name` -`S2DCachePageSizeKBytes` | Page size in KB used by S2D cache. | gauge | `name` -`S2DEnabled` | Whether direct attached storage (DAS) is enabled. | gauge | `name` -`S2DIOLatencyThreshold` | The I/O latency threshold for storage spaces direct. | gauge | `name` -`S2DOptimizations` | Optimization flags for storage spaces direct. | gauge | `name` -`SameSubnetDelay` | Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats on the same subnet. | gauge | `name` -`SameSubnetThreshold` | Controls how many Cluster Service heartbeats can be missed on the same subnet before it determines that Cluster Service has stopped responding. | gauge | `name` -`SecurityLevel` | Controls the level of security that should apply to intracluster messages. 0: Clear Text; 1: Sign; 2: Encrypt | gauge | `name` -`SecurityLevelForStorage` | | gauge | `name` -`SharedVolumeVssWriterOperationTimeout` | CSV VSS Writer operation timeout in seconds. | gauge | `name` -`ShutdownTimeoutInMinutes` | The maximum time in minutes allowed for cluster resources to come offline during cluster service shutdown. | gauge | `name` -`UseClientAccessNetworksForSharedVolumes` | Whether the use of client access networks for cluster shared volumes feature of this cluster is enabled. 0: Disabled; 1: Enabled; 2: Auto | gauge | `name` -`WitnessDatabaseWriteTimeout` | Controls the maximum time in seconds that a cluster database write to a witness can take before the write is abandoned. | gauge | `name` -`WitnessDynamicWeight` | The weight of the configured witness. | gauge | `name` -`WitnessRestartInterval` | Controls the witness restart interval. | gauge | `name` - -### Example metric -_This collector does not yet have explained examples, we would appreciate your help adding them!_ - -## Useful queries -_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ - -## Alerting examples -_This collector does not yet have alerting examples, we would appreciate your help adding them!_ diff --git a/docs/collector.mscluster_network.md b/docs/collector.mscluster_network.md deleted file mode 100644 index 0219a92f..00000000 --- a/docs/collector.mscluster_network.md +++ /dev/null @@ -1,32 +0,0 @@ -# mscluster_network collector - -The MSCluster_Network class is a dynamic WMI class that represents cluster networks. - -||| --|- -Metric name prefix | `mscluster_network` -Classes | `MSCluster_Network` -Enabled by default? | No - -## Flags - -None - -## Metrics - -Name | Description | Type | Labels ------|-------------|------|------- -`Characteristics` | Provides the characteristics of the network. The cluster defines characteristics only for resources. For a description of these characteristics, see [CLUSCTL_RESOURCE_GET_CHARACTERISTICS](https://msdn.microsoft.com/library/aa367466). | gauge | `name` -`Flags` | Provides access to the flags set for the network. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `name` -`Metric` | The metric of a cluster network (networks with lower values are used first). If this value is set, then the AutoMetric property is set to false. | gauge | `name` -`Role` | Provides access to the network's Role property. The Role property describes the role of the network in the cluster. 0: None; 1: Cluster; 2: Client; 3: Both | gauge | `name` -`State` | Provides the current state of the network. 1-1: Unknown; 0: Unavailable; 1: Down; 2: Partitioned; 3: Up | gauge | `name` - -### Example metric -_This collector does not yet have explained examples, we would appreciate your help adding them!_ - -## Useful queries -_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ - -## Alerting examples -_This collector does not yet have alerting examples, we would appreciate your help adding them!_ diff --git a/docs/collector.mscluster_node.md b/docs/collector.mscluster_node.md deleted file mode 100644 index 3daf542c..00000000 --- a/docs/collector.mscluster_node.md +++ /dev/null @@ -1,41 +0,0 @@ -# mscluster_node collector - -The MSCluster_Node class is a dynamic WMI class that represents a cluster node. - -||| --|- -Metric name prefix | `mscluster_node` -Classes | `MSCluster_Node` -Enabled by default? | No - -## Flags - -None - -## Metrics - -Name | Description | Type | Labels ------|-------------|------|------- -`BuildNumber` | Provides access to the node's BuildNumber property. | gauge | `name` -`Characteristics` | Provides access to the characteristics set for the node. For a list of possible characteristics, see [CLUSCTL_NODE_GET_CHARACTERISTICS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-node-get-characteristics). | gauge | `name` -`DetectedCloudPlatform` | The dynamic vote weight of the node adjusted by dynamic quorum feature. | gauge | `name` -`DynamicWeight` | The dynamic vote weight of the node adjusted by dynamic quorum feature. | gauge | `name` -`Flags` | Provides access to the flags set for the node. For a list of possible characteristics, see [CLUSCTL_NODE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-node-get-flags). | gauge | `name` -`MajorVersion` | Provides access to the node's MajorVersion property, which specifies the major portion of the Windows version installed. | gauge | `name` -`MinorVersion` | Provides access to the node's MinorVersion property, which specifies the minor portion of the Windows version installed. | gauge | `name` -`NeedsPreventQuorum` | Whether the cluster service on that node should be started with prevent quorum flag. | gauge | `name` -`NodeDrainStatus` | The current node drain status of a node. 0: Not Initiated; 1: In Progress; 2: Completed; 3: Failed | gauge | `name` -`NodeHighestVersion` | Provides access to the node's NodeHighestVersion property, which specifies the highest possible version of the cluster service with which the node can join or communicate. | gauge | `name` -`NodeLowestVersion` | Provides access to the node's NodeLowestVersion property, which specifies the lowest possible version of the cluster service with which the node can join or communicate. | gauge | `name` -`NodeWeight` | The vote weight of the node. | gauge | `name` -`State` | Returns the current state of a node. -1: Unknown; 0: Up; 1: Down; 2: Paused; 3: Joining | gauge | `name` -`StatusInformation` | The isolation or quarantine status of the node. | gauge | `name` - -### Example metric -_This collector does not yet have explained examples, we would appreciate your help adding them!_ - -## Useful queries -_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ - -## Alerting examples -_This collector does not yet have alerting examples, we would appreciate your help adding them!_ diff --git a/docs/collector.mscluster_resource.md b/docs/collector.mscluster_resource.md deleted file mode 100644 index c7ad995a..00000000 --- a/docs/collector.mscluster_resource.md +++ /dev/null @@ -1,50 +0,0 @@ -# mscluster_resource collector - -The MSCluster_resource class is a dynamic WMI class that represents a cluster resource. - -||| --|- -Metric name prefix | `mscluster_resource` -Classes | `MSCluster_Resource` -Enabled by default? | No - -## Flags - -None - -## Metrics - -Name | Description | Type | Labels ------|-------------|------|------- -`Characteristics` | Provides the characteristics of the object. The cluster defines characteristics only for resources. For a description of these characteristics, see [CLUSCTL_RESOURCE_GET_CHARACTERISTICS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-characteristics). | gauge | `type`, `owner_group`, `name` -`DeadlockTimeout` | Indicates the length of time to wait, in milliseconds, before declaring a deadlock in any call into a resource. | gauge | `type`, `owner_group`, `name` -`EmbeddedFailureAction` | The time, in milliseconds, that a resource should remain in a failed state before the Cluster service attempts to restart it. | gauge | `type`, `owner_group`, `name` -`Flags` | Provides access to the flags set for the object. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `type`, `owner_group`, `name` -`IsAlivePollInterval` | Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `name` -`LooksAlivePollInterval` | Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `name` -`MonitorProcessId` | Provides the process ID of the resource host service that is currently hosting the resource. | gauge | `type`, `owner_group`, `name` -`OwnerNode` | The node hosting the resource. | gauge | `type`, `owner_group`, `node_name`, `name` -`PendingTimeout` | Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated. | gauge | `type`, `owner_group`, `name` -`ResourceClass` | Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown | gauge | `type`, `owner_group`, `name` -`RestartAction` | Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails. | gauge | `type`, `owner_group`, `name` -`RestartDelay` | Indicates the time delay before a failed resource is restarted. | gauge | `type`, `owner_group`, `name` -`RestartPeriod` | Provides access to the resource's RestartPeriod property, which is interval of time, in milliseconds, during which a specified number of restart attempts can be made on a nonresponsive resource. | gauge | `type`, `owner_group`, `name` -`RestartThreshold` | Provides access to the resource's RestartThreshold property which is the maximum number of restart attempts that can be made on a resource within an interval defined by the RestartPeriod property before the Cluster Service initiates the action specified by the RestartAction property. | gauge | `type`, `owner_group`, `name` -`RetryPeriodOnFailure` | Provides access to the resource's RetryPeriodOnFailure property, which is the interval of time (in milliseconds) that a resource should remain in a failed state before the Cluster service attempts to restart it. | gauge | `type`, `owner_group`, `name` -`State` | The current state of the resource. -1: Unknown; 0: Inherited; 1: Initializing; 2: Online; 3: Offline; 4: Failed; 128: Pending; 129: Online Pending; 130: Offline Pending | gauge | `type`, `owner_group`, `name` -`Subclass` | Provides the list of references to nodes that can be the owner of this resource. | gauge | `type`, `owner_group`, `name` - -### Example metric -Query the state of all cluster resource owned by node1 -``` -windows_mscluster_resource_owner_node{node_name="node1"} -``` - -## Useful queries -Counts the number of Network Name cluster resource -``` -count(windows_mscluster_resource_state{type="Network Name"}) -``` - -## Alerting examples -_This collector does not yet have alerting examples, we would appreciate your help adding them!_ diff --git a/docs/collector.mscluster_resourcegroup.md b/docs/collector.mscluster_resourcegroup.md deleted file mode 100644 index 88c07b6e..00000000 --- a/docs/collector.mscluster_resourcegroup.md +++ /dev/null @@ -1,48 +0,0 @@ -# mscluster_resourcegroup collector - -The MSCluster_ResourceGroup class is a dynamic WMI class that represents a cluster group. - -||| --|- -Metric name prefix | `mscluster_resourcegroup` -Classes | `MSCluster_ResourceGroup` -Enabled by default? | No - -## Flags - -None - -## Metrics - -Name | Description | Type | Labels ------|-------------|------|------- -`AutoFailbackType` | Provides access to the group's AutoFailbackType property. | gauge | `name` -`Characteristics` | Provides the characteristics of the group. The cluster defines characteristics only for resources. For a description of these characteristics, see [CLUSCTL_RESOURCE_GET_CHARACTERISTICS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-characteristics). | gauge | `name` -`ColdStartSetting` | Indicates whether a group can start after a cluster cold start. | gauge | `name` -`DefaultOwner` | Number of the last node the resource group was activated on or explicitly moved to. | gauge | `name` -`FailbackWindowEnd` | The FailbackWindowEnd property provides the latest time that the group can be moved back to the node identified as its preferred node. | gauge | `name` -`FailbackWindowStart` | The FailbackWindowStart property provides the earliest time (that is, local time as kept by the cluster) that the group can be moved back to the node identified as its preferred node. | gauge | `name` -`FailoverPeriod` | The FailoverPeriod property specifies a number of hours during which a maximum number of failover attempts, specified by the FailoverThreshold property, can occur. | gauge | `name` -`FailoverThreshold` | The FailoverThreshold property specifies the maximum number of failover attempts. | gauge | `name` -`Flags` | Provides access to the flags set for the group. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `name` -`GroupType` | The Type of the resource group. | gauge | `name` -`OwnerNode` | The node hosting the resource group. | gauge | `node_name`, `name` -`Priority` | Priority value of the resource group | gauge | `name` -`ResiliencyPeriod` | The resiliency period for this group, in seconds. | gauge | `name` -`State` | The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending | gauge | `name` -`UpdateDomain` | | gauge | `name` - -### Example metric -Query the state of all cluster group owned by node1 -``` -windows_mscluster_resourcegroup_owner_node{node_name="node1"} -``` - -## Useful queries -Counts the number of cluster group by type -``` -count_values("count", windows_mscluster_resourcegroup_group_type) -``` - -## Alerting examples -_This collector does not yet have alerting examples, we would appreciate your help adding them!_ diff --git a/pkg/collector/collector.go b/pkg/collector/collector.go index e2972b00..5269b484 100644 --- a/pkg/collector/collector.go +++ b/pkg/collector/collector.go @@ -29,11 +29,7 @@ import ( "github.com/prometheus-community/windows_exporter/pkg/collector/logical_disk" "github.com/prometheus-community/windows_exporter/pkg/collector/logon" "github.com/prometheus-community/windows_exporter/pkg/collector/memory" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_cluster" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_network" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_node" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_resource" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_resourcegroup" + "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster" "github.com/prometheus-community/windows_exporter/pkg/collector/msmq" "github.com/prometheus-community/windows_exporter/pkg/collector/mssql" "github.com/prometheus-community/windows_exporter/pkg/collector/net" @@ -111,11 +107,7 @@ func NewWithConfig(config Config) Collectors { collectors[logical_disk.Name] = logical_disk.New(&config.LogicalDisk) collectors[logon.Name] = logon.New(&config.Logon) collectors[memory.Name] = memory.New(&config.Memory) - collectors[mscluster_cluster.Name] = mscluster_cluster.New(&config.MsclusterCluster) - collectors[mscluster_network.Name] = mscluster_network.New(&config.MsclusterNetwork) - collectors[mscluster_node.Name] = mscluster_node.New(&config.MsclusterNode) - collectors[mscluster_resource.Name] = mscluster_resource.New(&config.MsclusterResource) - collectors[mscluster_resourcegroup.Name] = mscluster_resourcegroup.New(&config.MsclusterResourceGroup) + collectors[mscluster.Name] = mscluster.New(&config.Mscluster) collectors[msmq.Name] = msmq.New(&config.Msmq) collectors[mssql.Name] = mssql.New(&config.Mssql) collectors[net.Name] = net.New(&config.Net) diff --git a/pkg/collector/config.go b/pkg/collector/config.go index 4ebf3cdf..ea34bd78 100644 --- a/pkg/collector/config.go +++ b/pkg/collector/config.go @@ -21,11 +21,7 @@ import ( "github.com/prometheus-community/windows_exporter/pkg/collector/logical_disk" "github.com/prometheus-community/windows_exporter/pkg/collector/logon" "github.com/prometheus-community/windows_exporter/pkg/collector/memory" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_cluster" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_network" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_node" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_resource" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_resourcegroup" + "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster" "github.com/prometheus-community/windows_exporter/pkg/collector/msmq" "github.com/prometheus-community/windows_exporter/pkg/collector/mssql" "github.com/prometheus-community/windows_exporter/pkg/collector/net" @@ -80,11 +76,7 @@ type Config struct { LogicalDisk logical_disk.Config `yaml:"logical_disk"` Logon logon.Config `yaml:"logon"` Memory memory.Config `yaml:"memory"` - MsclusterCluster mscluster_cluster.Config `yaml:"mscluster_cluster"` - MsclusterNetwork mscluster_network.Config `yaml:"mscluster_network"` - MsclusterNode mscluster_node.Config `yaml:"mscluster_node"` - MsclusterResource mscluster_resource.Config `yaml:"mscluster_resource"` - MsclusterResourceGroup mscluster_resourcegroup.Config `yaml:"mscluster_resourcegroup"` //nolint:tagliatelle + Mscluster mscluster.Config `yaml:"mscluster"` Msmq msmq.Config `yaml:"msmq"` Mssql mssql.Config `yaml:"mssql"` Net net.Config `yaml:"net"` @@ -142,11 +134,7 @@ var ConfigDefaults = Config{ LogicalDisk: logical_disk.ConfigDefaults, Logon: logon.ConfigDefaults, Memory: memory.ConfigDefaults, - MsclusterCluster: mscluster_cluster.ConfigDefaults, - MsclusterNetwork: mscluster_network.ConfigDefaults, - MsclusterNode: mscluster_node.ConfigDefaults, - MsclusterResource: mscluster_resource.ConfigDefaults, - MsclusterResourceGroup: mscluster_resourcegroup.ConfigDefaults, + Mscluster: mscluster.ConfigDefaults, Msmq: msmq.ConfigDefaults, Mssql: mssql.ConfigDefaults, Net: net.ConfigDefaults, diff --git a/pkg/collector/exchange/exchange.go b/pkg/collector/exchange/exchange.go index d89d9e14..02ec1859 100644 --- a/pkg/collector/exchange/exchange.go +++ b/pkg/collector/exchange/exchange.go @@ -5,7 +5,6 @@ package exchange import ( "fmt" "os" - "slices" "strings" "github.com/alecthomas/kingpin/v2" @@ -78,8 +77,6 @@ type Collector struct { unreachableQueueLength *prometheus.Desc userCount *prometheus.Desc yieldedTasks *prometheus.Desc - - enabledCollectors []string } func New(config *Config) *Collector { @@ -229,18 +226,6 @@ func (c *Collector) Build(_ log.Logger) error { c.syncCommandsPerSec = desc("activesync_sync_cmds_total", "Number of sync commands processed per second. Clients use this command to synchronize items within a folder") c.activeUserCountMapiHttpEmsMDB = desc("mapihttp_emsmdb_active_user_count", "Number of unique outlook users that have shown some kind of activity in the last 2 minutes") - c.enabledCollectors = make([]string, 0, len(ConfigDefaults.CollectorsEnabled)) - - for _, collectorName := range c.config.CollectorsEnabled { - if !slices.Contains(ConfigDefaults.CollectorsEnabled, collectorName) { - return fmt.Errorf("unknown exchange collector: %s", collectorName) - } - - c.enabledCollectors = append(c.enabledCollectors, collectorName) - } - - c.enabledCollectors = slices.Clip(c.enabledCollectors) - return nil } @@ -260,7 +245,7 @@ func (c *Collector) Collect(ctx *types.ScrapeContext, logger log.Logger, ch chan "MapiHttpEmsmdb": c.collectMapiHttpEmsmdb, } - for _, collectorName := range c.enabledCollectors { + for _, collectorName := range c.config.CollectorsEnabled { if err := collectorFuncs[collectorName](ctx, logger, ch); err != nil { _ = level.Error(logger).Log("msg", "Error in "+collectorName, "err", err) return err diff --git a/pkg/collector/map.go b/pkg/collector/map.go index 70abfe1d..2a6d08a1 100644 --- a/pkg/collector/map.go +++ b/pkg/collector/map.go @@ -21,11 +21,7 @@ import ( "github.com/prometheus-community/windows_exporter/pkg/collector/logical_disk" "github.com/prometheus-community/windows_exporter/pkg/collector/logon" "github.com/prometheus-community/windows_exporter/pkg/collector/memory" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_cluster" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_network" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_node" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_resource" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_resourcegroup" + "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster" "github.com/prometheus-community/windows_exporter/pkg/collector/msmq" "github.com/prometheus-community/windows_exporter/pkg/collector/mssql" "github.com/prometheus-community/windows_exporter/pkg/collector/net" @@ -81,11 +77,7 @@ var BuildersWithFlags = map[string]BuilderWithFlags[Collector]{ logical_disk.Name: NewBuilderWithFlags(logical_disk.NewWithFlags), logon.Name: NewBuilderWithFlags(logon.NewWithFlags), memory.Name: NewBuilderWithFlags(memory.NewWithFlags), - mscluster_cluster.Name: NewBuilderWithFlags(mscluster_cluster.NewWithFlags), - mscluster_network.Name: NewBuilderWithFlags(mscluster_network.NewWithFlags), - mscluster_node.Name: NewBuilderWithFlags(mscluster_node.NewWithFlags), - mscluster_resource.Name: NewBuilderWithFlags(mscluster_resource.NewWithFlags), - mscluster_resourcegroup.Name: NewBuilderWithFlags(mscluster_resourcegroup.NewWithFlags), + mscluster.Name: NewBuilderWithFlags(mscluster.NewWithFlags), msmq.Name: NewBuilderWithFlags(msmq.NewWithFlags), mssql.Name: NewBuilderWithFlags(mssql.NewWithFlags), net.Name: NewBuilderWithFlags(net.NewWithFlags), diff --git a/pkg/collector/mscluster/mscluster.go b/pkg/collector/mscluster/mscluster.go new file mode 100644 index 00000000..50e5580a --- /dev/null +++ b/pkg/collector/mscluster/mscluster.go @@ -0,0 +1,299 @@ +package mscluster + +import ( + "errors" + "fmt" + "slices" + "strings" + + "github.com/alecthomas/kingpin/v2" + "github.com/go-kit/log" + "github.com/prometheus-community/windows_exporter/pkg/types" + "github.com/prometheus/client_golang/prometheus" +) + +const Name = "mscluster" + +type Config struct { + CollectorsEnabled []string `yaml:"collectors_enabled"` +} + +var ConfigDefaults = Config{ + CollectorsEnabled: []string{ + "cluster", + "network", + "node", + "resource", + "resourcegroup", + }, +} + +// A Collector is a Prometheus Collector for WMI MSCluster_Cluster metrics. +type Collector struct { + config Config + + // cluster + clusterAddEvictDelay *prometheus.Desc + clusterAdminAccessPoint *prometheus.Desc + clusterAutoAssignNodeSite *prometheus.Desc + clusterAutoBalancerLevel *prometheus.Desc + clusterAutoBalancerMode *prometheus.Desc + clusterBackupInProgress *prometheus.Desc + clusterBlockCacheSize *prometheus.Desc + clusterClusSvcHangTimeout *prometheus.Desc + clusterClusSvcRegroupOpeningTimeout *prometheus.Desc + clusterClusSvcRegroupPruningTimeout *prometheus.Desc + clusterClusSvcRegroupStageTimeout *prometheus.Desc + clusterClusSvcRegroupTickInMilliseconds *prometheus.Desc + clusterClusterEnforcedAntiAffinity *prometheus.Desc + clusterClusterFunctionalLevel *prometheus.Desc + clusterClusterGroupWaitDelay *prometheus.Desc + clusterClusterLogLevel *prometheus.Desc + clusterClusterLogSize *prometheus.Desc + clusterClusterUpgradeVersion *prometheus.Desc + clusterCrossSiteDelay *prometheus.Desc + clusterCrossSiteThreshold *prometheus.Desc + clusterCrossSubnetDelay *prometheus.Desc + clusterCrossSubnetThreshold *prometheus.Desc + clusterCsvBalancer *prometheus.Desc + clusterDatabaseReadWriteMode *prometheus.Desc + clusterDefaultNetworkRole *prometheus.Desc + clusterDetectedCloudPlatform *prometheus.Desc + clusterDetectManagedEvents *prometheus.Desc + clusterDetectManagedEventsThreshold *prometheus.Desc + clusterDisableGroupPreferredOwnerRandomization *prometheus.Desc + clusterDrainOnShutdown *prometheus.Desc + clusterDynamicQuorumEnabled *prometheus.Desc + clusterEnableSharedVolumes *prometheus.Desc + clusterFixQuorum *prometheus.Desc + clusterGracePeriodEnabled *prometheus.Desc + clusterGracePeriodTimeout *prometheus.Desc + clusterGroupDependencyTimeout *prometheus.Desc + clusterHangRecoveryAction *prometheus.Desc + clusterIgnorePersistentStateOnStartup *prometheus.Desc + clusterLogResourceControls *prometheus.Desc + clusterLowerQuorumPriorityNodeId *prometheus.Desc + clusterMaxNumberOfNodes *prometheus.Desc + clusterMessageBufferLength *prometheus.Desc + clusterMinimumNeverPreemptPriority *prometheus.Desc + clusterMinimumPreemptorPriority *prometheus.Desc + clusterNetftIPSecEnabled *prometheus.Desc + clusterPlacementOptions *prometheus.Desc + clusterPlumbAllCrossSubnetRoutes *prometheus.Desc + clusterPreventQuorum *prometheus.Desc + clusterQuarantineDuration *prometheus.Desc + clusterQuarantineThreshold *prometheus.Desc + clusterQuorumArbitrationTimeMax *prometheus.Desc + clusterQuorumArbitrationTimeMin *prometheus.Desc + clusterQuorumLogFileSize *prometheus.Desc + clusterQuorumTypeValue *prometheus.Desc + clusterRequestReplyTimeout *prometheus.Desc + clusterResiliencyDefaultPeriod *prometheus.Desc + clusterResiliencyLevel *prometheus.Desc + clusterResourceDllDeadlockPeriod *prometheus.Desc + clusterRootMemoryReserved *prometheus.Desc + clusterRouteHistoryLength *prometheus.Desc + clusterS2DBusTypes *prometheus.Desc + clusterS2DCacheDesiredState *prometheus.Desc + clusterS2DCacheFlashReservePercent *prometheus.Desc + clusterS2DCachePageSizeKBytes *prometheus.Desc + clusterS2DEnabled *prometheus.Desc + clusterS2DIOLatencyThreshold *prometheus.Desc + clusterS2DOptimizations *prometheus.Desc + clusterSameSubnetDelay *prometheus.Desc + clusterSameSubnetThreshold *prometheus.Desc + clusterSecurityLevel *prometheus.Desc + clusterSecurityLevelForStorage *prometheus.Desc + clusterSharedVolumeVssWriterOperationTimeout *prometheus.Desc + clusterShutdownTimeoutInMinutes *prometheus.Desc + clusterUseClientAccessNetworksForSharedVolumes *prometheus.Desc + clusterWitnessDatabaseWriteTimeout *prometheus.Desc + clusterWitnessDynamicWeight *prometheus.Desc + clusterWitnessRestartInterval *prometheus.Desc + + // network + networkCharacteristics *prometheus.Desc + networkFlags *prometheus.Desc + networkMetric *prometheus.Desc + networkRole *prometheus.Desc + networkState *prometheus.Desc + + // node + nodeBuildNumber *prometheus.Desc + nodeCharacteristics *prometheus.Desc + nodeDetectedCloudPlatform *prometheus.Desc + nodeDynamicWeight *prometheus.Desc + nodeFlags *prometheus.Desc + nodeMajorVersion *prometheus.Desc + nodeMinorVersion *prometheus.Desc + nodeNeedsPreventQuorum *prometheus.Desc + nodeNodeDrainStatus *prometheus.Desc + nodeNodeHighestVersion *prometheus.Desc + nodeNodeLowestVersion *prometheus.Desc + nodeNodeWeight *prometheus.Desc + nodeState *prometheus.Desc + nodeStatusInformation *prometheus.Desc + + resourceCharacteristics *prometheus.Desc + resourceDeadlockTimeout *prometheus.Desc + resourceEmbeddedFailureAction *prometheus.Desc + resourceFlags *prometheus.Desc + resourceIsAlivePollInterval *prometheus.Desc + resourceLooksAlivePollInterval *prometheus.Desc + resourceMonitorProcessId *prometheus.Desc + resourceOwnerNode *prometheus.Desc + resourcePendingTimeout *prometheus.Desc + resourceResourceClass *prometheus.Desc + resourceRestartAction *prometheus.Desc + resourceRestartDelay *prometheus.Desc + resourceRestartPeriod *prometheus.Desc + resourceRestartThreshold *prometheus.Desc + resourceRetryPeriodOnFailure *prometheus.Desc + resourceState *prometheus.Desc + resourceSubClass *prometheus.Desc + + // ResourceGroup + resourceGroupAutoFailbackType *prometheus.Desc + resourceGroupCharacteristics *prometheus.Desc + resourceGroupColdStartSetting *prometheus.Desc + resourceGroupDefaultOwner *prometheus.Desc + resourceGroupFailbackWindowEnd *prometheus.Desc + resourceGroupFailbackWindowStart *prometheus.Desc + resourceGroupFailOverPeriod *prometheus.Desc + resourceGroupFailOverThreshold *prometheus.Desc + resourceGroupFlags *prometheus.Desc + resourceGroupGroupType *prometheus.Desc + resourceGroupOwnerNode *prometheus.Desc + resourceGroupPriority *prometheus.Desc + resourceGroupResiliencyPeriod *prometheus.Desc + resourceGroupState *prometheus.Desc +} + +func New(config *Config) *Collector { + if config == nil { + config = &ConfigDefaults + } + + if config.CollectorsEnabled == nil { + config.CollectorsEnabled = ConfigDefaults.CollectorsEnabled + } + + c := &Collector{ + config: *config, + } + + return c +} + +func NewWithFlags(app *kingpin.Application) *Collector { + c := &Collector{ + config: ConfigDefaults, + } + c.config.CollectorsEnabled = make([]string, 0) + + var collectorsEnabled string + + app.Flag( + "collectors.mscluster.enabled", + "Comma-separated list of collectors to use.", + ).Default(strings.Join(ConfigDefaults.CollectorsEnabled, ",")).StringVar(&collectorsEnabled) + + app.Action(func(*kingpin.ParseContext) error { + c.config.CollectorsEnabled = strings.Split(collectorsEnabled, ",") + + return nil + }) + + return c +} + +func (c *Collector) GetName() string { + return Name +} + +func (c *Collector) GetPerfCounter(_ log.Logger) ([]string, error) { + return []string{"Memory"}, nil +} + +func (c *Collector) Close() error { + return nil +} + +func (c *Collector) Build(_ log.Logger) error { + if len(c.config.CollectorsEnabled) == 0 { + return nil + } + + if slices.Contains(c.config.CollectorsEnabled, "cluster") { + c.buildCluster() + } + + if slices.Contains(c.config.CollectorsEnabled, "network") { + c.buildNetwork() + } + + if slices.Contains(c.config.CollectorsEnabled, "node") { + c.buildNode() + } + + if slices.Contains(c.config.CollectorsEnabled, "resource") { + c.buildResource() + } + + if slices.Contains(c.config.CollectorsEnabled, "resourcegroup") { + c.buildResourceGroup() + } + + return nil +} + +// Collect sends the metric values for each metric +// to the provided prometheus Metric channel. +func (c *Collector) Collect(_ *types.ScrapeContext, logger log.Logger, ch chan<- prometheus.Metric) error { + logger = log.With(logger, "collector", Name) + if len(c.config.CollectorsEnabled) == 0 { + return nil + } + + var ( + err error + errs []error + nodeNames []string + ) + + if slices.Contains(c.config.CollectorsEnabled, "cluster") { + if err = c.collectCluster(logger, ch); err != nil { + errs = append(errs, fmt.Errorf("failed to collect cluster metrics: %w", err)) + } + } + + if slices.Contains(c.config.CollectorsEnabled, "network") { + if err = c.collectNetwork(logger, ch); err != nil { + errs = append(errs, fmt.Errorf("failed to collect network metrics: %w", err)) + } + } + + if slices.Contains(c.config.CollectorsEnabled, "node") { + if nodeNames, err = c.collectNode(logger, ch); err != nil { + errs = append(errs, fmt.Errorf("failed to collect node metrics: %w", err)) + } + } + + if slices.Contains(c.config.CollectorsEnabled, "resource") { + if err = c.collectResource(logger, ch, nodeNames); err != nil { + errs = append(errs, fmt.Errorf("failed to collect resource metrics: %w", err)) + } + } + + if slices.Contains(c.config.CollectorsEnabled, "resourcegroup") { + if err = c.collectResourceGroup(logger, ch, nodeNames); err != nil { + errs = append(errs, fmt.Errorf("failed to collect resource group metrics: %w", err)) + } + } + + if len(errs) > 0 { + return errors.Join(errs...) + } + + return nil +} diff --git a/pkg/collector/mscluster_cluster/mscluster_cluster.go b/pkg/collector/mscluster/mscluster_cluster.go similarity index 55% rename from pkg/collector/mscluster_cluster/mscluster_cluster.go rename to pkg/collector/mscluster/mscluster_cluster.go index 159b9b7e..38ea1575 100644 --- a/pkg/collector/mscluster_cluster/mscluster_cluster.go +++ b/pkg/collector/mscluster/mscluster_cluster.go @@ -1,599 +1,17 @@ -package mscluster_cluster +package mscluster import ( - "github.com/alecthomas/kingpin/v2" "github.com/go-kit/log" "github.com/prometheus-community/windows_exporter/pkg/types" "github.com/prometheus-community/windows_exporter/pkg/wmi" "github.com/prometheus/client_golang/prometheus" ) -const Name = "mscluster_cluster" +const nameCluster = Name + "_cluster" -type Config struct{} - -var ConfigDefaults = Config{} - -// A Collector is a Prometheus Collector for WMI MSCluster_Cluster metrics. -type Collector struct { - config Config - - addEvictDelay *prometheus.Desc - adminAccessPoint *prometheus.Desc - autoAssignNodeSite *prometheus.Desc - autoBalancerLevel *prometheus.Desc - autoBalancerMode *prometheus.Desc - backupInProgress *prometheus.Desc - blockCacheSize *prometheus.Desc - clusSvcHangTimeout *prometheus.Desc - clusSvcRegroupOpeningTimeout *prometheus.Desc - clusSvcRegroupPruningTimeout *prometheus.Desc - clusSvcRegroupStageTimeout *prometheus.Desc - clusSvcRegroupTickInMilliseconds *prometheus.Desc - clusterEnforcedAntiAffinity *prometheus.Desc - clusterFunctionalLevel *prometheus.Desc - clusterGroupWaitDelay *prometheus.Desc - clusterLogLevel *prometheus.Desc - clusterLogSize *prometheus.Desc - clusterUpgradeVersion *prometheus.Desc - crossSiteDelay *prometheus.Desc - crossSiteThreshold *prometheus.Desc - crossSubnetDelay *prometheus.Desc - crossSubnetThreshold *prometheus.Desc - csvBalancer *prometheus.Desc - databaseReadWriteMode *prometheus.Desc - defaultNetworkRole *prometheus.Desc - detectedCloudPlatform *prometheus.Desc - detectManagedEvents *prometheus.Desc - detectManagedEventsThreshold *prometheus.Desc - disableGroupPreferredOwnerRandomization *prometheus.Desc - drainOnShutdown *prometheus.Desc - dynamicQuorumEnabled *prometheus.Desc - enableSharedVolumes *prometheus.Desc - fixQuorum *prometheus.Desc - gracePeriodEnabled *prometheus.Desc - gracePeriodTimeout *prometheus.Desc - groupDependencyTimeout *prometheus.Desc - hangRecoveryAction *prometheus.Desc - ignorePersistentStateOnStartup *prometheus.Desc - logResourceControls *prometheus.Desc - lowerQuorumPriorityNodeId *prometheus.Desc - maxNumberOfNodes *prometheus.Desc - messageBufferLength *prometheus.Desc - minimumNeverPreemptPriority *prometheus.Desc - minimumPreemptorPriority *prometheus.Desc - netftIPSecEnabled *prometheus.Desc - placementOptions *prometheus.Desc - plumbAllCrossSubnetRoutes *prometheus.Desc - preventQuorum *prometheus.Desc - quarantineDuration *prometheus.Desc - quarantineThreshold *prometheus.Desc - quorumArbitrationTimeMax *prometheus.Desc - quorumArbitrationTimeMin *prometheus.Desc - quorumLogFileSize *prometheus.Desc - quorumTypeValue *prometheus.Desc - requestReplyTimeout *prometheus.Desc - resiliencyDefaultPeriod *prometheus.Desc - resiliencyLevel *prometheus.Desc - resourceDllDeadlockPeriod *prometheus.Desc - rootMemoryReserved *prometheus.Desc - routeHistoryLength *prometheus.Desc - s2DBusTypes *prometheus.Desc - s2DCacheDesiredState *prometheus.Desc - s2DCacheFlashReservePercent *prometheus.Desc - s2DCachePageSizeKBytes *prometheus.Desc - s2DEnabled *prometheus.Desc - s2DIOLatencyThreshold *prometheus.Desc - s2DOptimizations *prometheus.Desc - sameSubnetDelay *prometheus.Desc - sameSubnetThreshold *prometheus.Desc - securityLevel *prometheus.Desc - securityLevelForStorage *prometheus.Desc - sharedVolumeVssWriterOperationTimeout *prometheus.Desc - shutdownTimeoutInMinutes *prometheus.Desc - useClientAccessNetworksForSharedVolumes *prometheus.Desc - witnessDatabaseWriteTimeout *prometheus.Desc - witnessDynamicWeight *prometheus.Desc - witnessRestartInterval *prometheus.Desc -} - -func New(config *Config) *Collector { - if config == nil { - config = &ConfigDefaults - } - - c := &Collector{ - config: *config, - } - - return c -} - -func NewWithFlags(_ *kingpin.Application) *Collector { - return &Collector{} -} - -func (c *Collector) GetName() string { - return Name -} - -func (c *Collector) GetPerfCounter(_ log.Logger) ([]string, error) { - return []string{"Memory"}, nil -} - -func (c *Collector) Close() error { - return nil -} - -func (c *Collector) Build(_ log.Logger) error { - c.addEvictDelay = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "add_evict_delay"), - "Provides access to the cluster's AddEvictDelay property, which is the number a seconds that a new node is delayed after an eviction of another node.", - []string{"name"}, - nil, - ) - c.adminAccessPoint = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "admin_access_point"), - "The type of the cluster administrative access point.", - []string{"name"}, - nil, - ) - c.autoAssignNodeSite = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "auto_assign_node_site"), - "Determines whether or not the cluster will attempt to automatically assign nodes to sites based on networks and Active Directory Site information.", - []string{"name"}, - nil, - ) - c.autoBalancerLevel = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "auto_balancer_level"), - "Determines the level of aggressiveness of AutoBalancer.", - []string{"name"}, - nil, - ) - c.autoBalancerMode = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "auto_balancer_mode"), - "Determines whether or not the auto balancer is enabled.", - []string{"name"}, - nil, - ) - c.backupInProgress = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "backup_in_progress"), - "Indicates whether a backup is in progress.", - []string{"name"}, - nil, - ) - c.blockCacheSize = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "block_cache_size"), - "CSV BlockCache Size in MB.", - []string{"name"}, - nil, - ) - c.clusSvcHangTimeout = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "clus_svc_hang_timeout"), - "Controls how long the cluster network driver waits between Failover Cluster Service heartbeats before it determines that the Failover Cluster Service has stopped responding.", - []string{"name"}, - nil, - ) - c.clusSvcRegroupOpeningTimeout = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "clus_svc_regroup_opening_timeout"), - "Controls how long a node will wait on other nodes in the opening stage before deciding that they failed.", - []string{"name"}, - nil, - ) - c.clusSvcRegroupPruningTimeout = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "clus_svc_regroup_pruning_timeout"), - "Controls how long the membership leader will wait to reach full connectivity between cluster nodes.", - []string{"name"}, - nil, - ) - c.clusSvcRegroupStageTimeout = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "clus_svc_regroup_stage_timeout"), - "Controls how long a node will wait on other nodes in a membership stage before deciding that they failed.", - []string{"name"}, - nil, - ) - c.clusSvcRegroupTickInMilliseconds = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "clus_svc_regroup_tick_in_milliseconds"), - "Controls how frequently the membership algorithm is sending periodic membership messages.", - []string{"name"}, - nil, - ) - c.clusterEnforcedAntiAffinity = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "cluster_enforced_anti_affinity"), - "Enables or disables hard enforcement of group anti-affinity classes.", - []string{"name"}, - nil, - ) - c.clusterFunctionalLevel = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "cluster_functional_level"), - "The functional level the cluster is currently running in.", - []string{"name"}, - nil, - ) - c.clusterGroupWaitDelay = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "cluster_group_wait_delay"), - "Maximum time in seconds that a group waits for its preferred node to come online during cluster startup before coming online on a different node.", - []string{"name"}, - nil, - ) - c.clusterLogLevel = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "cluster_log_level"), - "Controls the level of cluster logging.", - []string{"name"}, - nil, - ) - c.clusterLogSize = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "cluster_log_size"), - "Controls the maximum size of the cluster log files on each of the nodes.", - []string{"name"}, - nil, - ) - c.clusterUpgradeVersion = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "cluster_upgrade_version"), - "Specifies the upgrade version the cluster is currently running in.", - []string{"name"}, - nil, - ) - c.crossSiteDelay = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "cross_site_delay"), - "Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats across sites.", - []string{"name"}, - nil, - ) - c.crossSiteThreshold = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "cross_site_threshold"), - "Controls how many Cluster Service heartbeats can be missed across sites before it determines that Cluster Service has stopped responding.", - []string{"name"}, - nil, - ) - c.crossSubnetDelay = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "cross_subnet_delay"), - "Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats across subnets.", - []string{"name"}, - nil, - ) - c.crossSubnetThreshold = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "cross_subnet_threshold"), - "Controls how many Cluster Service heartbeats can be missed across subnets before it determines that Cluster Service has stopped responding.", - []string{"name"}, - nil, - ) - c.csvBalancer = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "csv_balancer"), - "Whether automatic balancing for CSV is enabled.", - []string{"name"}, - nil, - ) - c.databaseReadWriteMode = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "database_read_write_mode"), - "Sets the database read and write mode.", - []string{"name"}, - nil, - ) - c.defaultNetworkRole = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "default_network_role"), - "Provides access to the cluster's DefaultNetworkRole property.", - []string{"name"}, - nil, - ) - c.detectedCloudPlatform = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "detected_cloud_platform"), - "(DetectedCloudPlatform)", - []string{"name"}, - nil, - ) - c.detectManagedEvents = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "detect_managed_events"), - "(DetectManagedEvents)", - []string{"name"}, - nil, - ) - c.detectManagedEventsThreshold = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "detect_managed_events_threshold"), - "(DetectManagedEventsThreshold)", - []string{"name"}, - nil, - ) - c.disableGroupPreferredOwnerRandomization = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "disable_group_preferred_owner_randomization"), - "(DisableGroupPreferredOwnerRandomization)", - []string{"name"}, - nil, - ) - c.drainOnShutdown = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "drain_on_shutdown"), - "Whether to drain the node when cluster service is being stopped.", - []string{"name"}, - nil, - ) - c.dynamicQuorumEnabled = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "dynamic_quorum_enabled"), - "Allows cluster service to adjust node weights as needed to increase availability.", - []string{"name"}, - nil, - ) - c.enableSharedVolumes = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "enable_shared_volumes"), - "Enables or disables cluster shared volumes on this cluster.", - []string{"name"}, - nil, - ) - c.fixQuorum = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "fix_quorum"), - "Provides access to the cluster's FixQuorum property, which specifies if the cluster is in a fix quorum state.", - []string{"name"}, - nil, - ) - c.gracePeriodEnabled = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "grace_period_enabled"), - "Whether the node grace period feature of this cluster is enabled.", - []string{"name"}, - nil, - ) - c.gracePeriodTimeout = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "grace_period_timeout"), - "The grace period timeout in milliseconds.", - []string{"name"}, - nil, - ) - c.groupDependencyTimeout = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "group_dependency_timeout"), - "The timeout after which a group will be brought online despite unsatisfied dependencies", - []string{"name"}, - nil, - ) - c.hangRecoveryAction = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "hang_recovery_action"), - "Controls the action to take if the user-mode processes have stopped responding.", - []string{"name"}, - nil, - ) - c.ignorePersistentStateOnStartup = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "ignore_persistent_state_on_startup"), - "Provides access to the cluster's IgnorePersistentStateOnStartup property, which specifies whether the cluster will bring online groups that were online when the cluster was shut down.", - []string{"name"}, - nil, - ) - c.logResourceControls = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "log_resource_controls"), - "Controls the logging of resource controls.", - []string{"name"}, - nil, - ) - c.lowerQuorumPriorityNodeId = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "lower_quorum_priority_node_id"), - "Specifies the Node ID that has a lower priority when voting for quorum is performed. If the quorum vote is split 50/50%, the specified node's vote would be ignored to break the tie. If this is not set then the cluster will pick a node at random to break the tie.", - []string{"name"}, - nil, - ) - c.maxNumberOfNodes = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "max_number_of_nodes"), - "Indicates the maximum number of nodes that may participate in the Cluster.", - []string{"name"}, - nil, - ) - c.messageBufferLength = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "message_buffer_length"), - "The maximum unacknowledged message count for GEM.", - []string{"name"}, - nil, - ) - c.minimumNeverPreemptPriority = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "minimum_never_preempt_priority"), - "Groups with this priority or higher cannot be preempted.", - []string{"name"}, - nil, - ) - c.minimumPreemptorPriority = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "minimum_preemptor_priority"), - "Minimum priority a cluster group must have to be able to preempt another group.", - []string{"name"}, - nil, - ) - c.netftIPSecEnabled = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "netft_ip_sec_enabled"), - "Whether IPSec is enabled for cluster internal traffic.", - []string{"name"}, - nil, - ) - c.placementOptions = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "placement_options"), - "Various option flags to modify default placement behavior.", - []string{"name"}, - nil, - ) - c.plumbAllCrossSubnetRoutes = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "plumb_all_cross_subnet_routes"), - "Plumbs all possible cross subnet routes to all nodes.", - []string{"name"}, - nil, - ) - c.preventQuorum = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "prevent_quorum"), - "Whether the cluster will ignore group persistent state on startup.", - []string{"name"}, - nil, - ) - c.quarantineDuration = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "quarantine_duration"), - "The quarantine period timeout in milliseconds.", - []string{"name"}, - nil, - ) - c.quarantineThreshold = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "quarantine_threshold"), - "Number of node failures before it will be quarantined.", - []string{"name"}, - nil, - ) - c.quorumArbitrationTimeMax = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "quorum_arbitration_time_max"), - "Controls the maximum time necessary to decide the Quorum owner node.", - []string{"name"}, - nil, - ) - c.quorumArbitrationTimeMin = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "quorum_arbitration_time_min"), - "Controls the minimum time necessary to decide the Quorum owner node.", - []string{"name"}, - nil, - ) - c.quorumLogFileSize = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "quorum_log_file_size"), - "This property is obsolete.", - []string{"name"}, - nil, - ) - c.quorumTypeValue = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "quorum_type_value"), - "Get the current quorum type value. -1: Unknown; 1: Node; 2: FileShareWitness; 3: Storage; 4: None", - []string{"name"}, - nil, - ) - c.requestReplyTimeout = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "request_reply_timeout"), - "Controls the request reply time-out period.", - []string{"name"}, - nil, - ) - c.resiliencyDefaultPeriod = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "resiliency_default_period"), - "The default resiliency period, in seconds, for the cluster.", - []string{"name"}, - nil, - ) - c.resiliencyLevel = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "resiliency_level"), - "The resiliency level for the cluster.", - []string{"name"}, - nil, - ) - c.resourceDllDeadlockPeriod = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "resource_dll_deadlock_period"), - "This property is obsolete.", - []string{"name"}, - nil, - ) - c.rootMemoryReserved = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "root_memory_reserved"), - "Controls the amount of memory reserved for the parent partition on all cluster nodes.", - []string{"name"}, - nil, - ) - c.routeHistoryLength = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "route_history_length"), - "The history length for routes to help finding network issues.", - []string{"name"}, - nil, - ) - c.s2DBusTypes = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "s2d_bus_types"), - "Bus types for storage spaces direct.", - []string{"name"}, - nil, - ) - c.s2DCacheDesiredState = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "s2d_cache_desired_state"), - "Desired state of the storage spaces direct cache.", - []string{"name"}, - nil, - ) - c.s2DCacheFlashReservePercent = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "s2d_cache_flash_reserve_percent"), - "Percentage of allocated flash space to utilize when caching.", - []string{"name"}, - nil, - ) - c.s2DCachePageSizeKBytes = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "s2d_cache_page_size_k_bytes"), - "Page size in KB used by S2D cache.", - []string{"name"}, - nil, - ) - c.s2DEnabled = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "s2d_enabled"), - "Whether direct attached storage (DAS) is enabled.", - []string{"name"}, - nil, - ) - c.s2DIOLatencyThreshold = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "s2dio_latency_threshold"), - "The I/O latency threshold for storage spaces direct.", - []string{"name"}, - nil, - ) - c.s2DOptimizations = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "s2d_optimizations"), - "Optimization flags for storage spaces direct.", - []string{"name"}, - nil, - ) - c.sameSubnetDelay = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "same_subnet_delay"), - "Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats on the same subnet.", - []string{"name"}, - nil, - ) - c.sameSubnetThreshold = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "same_subnet_threshold"), - "Controls how many Cluster Service heartbeats can be missed on the same subnet before it determines that Cluster Service has stopped responding.", - []string{"name"}, - nil, - ) - c.securityLevel = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "security_level"), - "Controls the level of security that should apply to intracluster messages. 0: Clear Text; 1: Sign; 2: Encrypt ", - []string{"name"}, - nil, - ) - c.securityLevelForStorage = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "security_level_for_storage"), - "(SecurityLevelForStorage)", - []string{"name"}, - nil, - ) - c.sharedVolumeVssWriterOperationTimeout = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "shared_volume_vss_writer_operation_timeout"), - "CSV VSS Writer operation timeout in seconds.", - []string{"name"}, - nil, - ) - c.shutdownTimeoutInMinutes = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "shutdown_timeout_in_minutes"), - "The maximum time in minutes allowed for cluster resources to come offline during cluster service shutdown.", - []string{"name"}, - nil, - ) - c.useClientAccessNetworksForSharedVolumes = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "use_client_access_networks_for_shared_volumes"), - "Whether the use of client access networks for cluster shared volumes feature of this cluster is enabled. 0: Disabled; 1: Enabled; 2: Auto", - []string{"name"}, - nil, - ) - c.witnessDatabaseWriteTimeout = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "witness_database_write_timeout"), - "Controls the maximum time in seconds that a cluster database write to a witness can take before the write is abandoned.", - []string{"name"}, - nil, - ) - c.witnessDynamicWeight = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "witness_dynamic_weight"), - "The weight of the configured witness.", - []string{"name"}, - nil, - ) - c.witnessRestartInterval = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "witness_restart_interval"), - "Controls the witness restart interval.", - []string{"name"}, - nil, - ) - return nil -} - -// MSCluster_Cluster docs: +// msClusterCluster represents the MSCluster_Cluster WMI class // - https://docs.microsoft.com/en-us/previous-versions/windows/desktop/cluswmi/mscluster-cluster -type MSCluster_Cluster struct { +type msClusterCluster struct { Name string AddEvictDelay uint @@ -675,551 +93,1013 @@ type MSCluster_Cluster struct { WitnessRestartInterval uint } -// Collect sends the metric values for each metric -// to the provided prometheus Metric channel. -func (c *Collector) Collect(_ *types.ScrapeContext, logger log.Logger, ch chan<- prometheus.Metric) error { - logger = log.With(logger, "collector", Name) - var dst []MSCluster_Cluster - q := wmi.QueryAll(&dst, logger) +func (c *Collector) buildCluster() { + c.clusterAddEvictDelay = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "add_evict_delay"), + "Provides access to the cluster's AddEvictDelay property, which is the number a seconds that a new node is delayed after an eviction of another node.", + []string{"name"}, + nil, + ) + c.clusterAdminAccessPoint = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "admin_access_point"), + "The type of the cluster administrative access point.", + []string{"name"}, + nil, + ) + c.clusterAutoAssignNodeSite = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "auto_assign_node_site"), + "Determines whether or not the cluster will attempt to automatically assign nodes to sites based on networks and Active Directory Site information.", + []string{"name"}, + nil, + ) + c.clusterAutoBalancerLevel = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "auto_balancer_level"), + "Determines the level of aggressiveness of AutoBalancer.", + []string{"name"}, + nil, + ) + c.clusterAutoBalancerMode = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "auto_balancer_mode"), + "Determines whether or not the auto balancer is enabled.", + []string{"name"}, + nil, + ) + c.clusterBackupInProgress = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "backup_in_progress"), + "Indicates whether a backup is in progress.", + []string{"name"}, + nil, + ) + c.clusterBlockCacheSize = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "block_cache_size"), + "CSV BlockCache Size in MB.", + []string{"name"}, + nil, + ) + c.clusterClusSvcHangTimeout = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "clus_svc_hang_timeout"), + "Controls how long the cluster network driver waits between Failover Cluster Service heartbeats before it determines that the Failover Cluster Service has stopped responding.", + []string{"name"}, + nil, + ) + c.clusterClusSvcRegroupOpeningTimeout = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "clus_svc_regroup_opening_timeout"), + "Controls how long a node will wait on other nodes in the opening stage before deciding that they failed.", + []string{"name"}, + nil, + ) + c.clusterClusSvcRegroupPruningTimeout = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "clus_svc_regroup_pruning_timeout"), + "Controls how long the membership leader will wait to reach full connectivity between cluster nodes.", + []string{"name"}, + nil, + ) + c.clusterClusSvcRegroupStageTimeout = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "clus_svc_regroup_stage_timeout"), + "Controls how long a node will wait on other nodes in a membership stage before deciding that they failed.", + []string{"name"}, + nil, + ) + c.clusterClusSvcRegroupTickInMilliseconds = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "clus_svc_regroup_tick_in_milliseconds"), + "Controls how frequently the membership algorithm is sending periodic membership messages.", + []string{"name"}, + nil, + ) + c.clusterClusterEnforcedAntiAffinity = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "cluster_enforced_anti_affinity"), + "Enables or disables hard enforcement of group anti-affinity classes.", + []string{"name"}, + nil, + ) + c.clusterClusterFunctionalLevel = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "cluster_functional_level"), + "The functional level the cluster is currently running in.", + []string{"name"}, + nil, + ) + c.clusterClusterGroupWaitDelay = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "cluster_group_wait_delay"), + "Maximum time in seconds that a group waits for its preferred node to come online during cluster startup before coming online on a different node.", + []string{"name"}, + nil, + ) + c.clusterClusterLogLevel = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "cluster_log_level"), + "Controls the level of cluster logging.", + []string{"name"}, + nil, + ) + c.clusterClusterLogSize = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "cluster_log_size"), + "Controls the maximum size of the cluster log files on each of the nodes.", + []string{"name"}, + nil, + ) + c.clusterClusterUpgradeVersion = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "cluster_upgrade_version"), + "Specifies the upgrade version the cluster is currently running in.", + []string{"name"}, + nil, + ) + c.clusterCrossSiteDelay = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "cross_site_delay"), + "Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats across sites.", + []string{"name"}, + nil, + ) + c.clusterCrossSiteThreshold = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "cross_site_threshold"), + "Controls how many Cluster Service heartbeats can be missed across sites before it determines that Cluster Service has stopped responding.", + []string{"name"}, + nil, + ) + c.clusterCrossSubnetDelay = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "cross_subnet_delay"), + "Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats across subnets.", + []string{"name"}, + nil, + ) + c.clusterCrossSubnetThreshold = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "cross_subnet_threshold"), + "Controls how many Cluster Service heartbeats can be missed across subnets before it determines that Cluster Service has stopped responding.", + []string{"name"}, + nil, + ) + c.clusterCsvBalancer = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "csv_balancer"), + "Whether automatic balancing for CSV is enabled.", + []string{"name"}, + nil, + ) + c.clusterDatabaseReadWriteMode = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "database_read_write_mode"), + "Sets the database read and write mode.", + []string{"name"}, + nil, + ) + c.clusterDefaultNetworkRole = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "default_network_role"), + "Provides access to the cluster's DefaultNetworkRole property.", + []string{"name"}, + nil, + ) + c.clusterDetectedCloudPlatform = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "detected_cloud_platform"), + "(DetectedCloudPlatform)", + []string{"name"}, + nil, + ) + c.clusterDetectManagedEvents = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "detect_managed_events"), + "(DetectManagedEvents)", + []string{"name"}, + nil, + ) + c.clusterDetectManagedEventsThreshold = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "detect_managed_events_threshold"), + "(DetectManagedEventsThreshold)", + []string{"name"}, + nil, + ) + c.clusterDisableGroupPreferredOwnerRandomization = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "disable_group_preferred_owner_randomization"), + "(DisableGroupPreferredOwnerRandomization)", + []string{"name"}, + nil, + ) + c.clusterDrainOnShutdown = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "drain_on_shutdown"), + "Whether to drain the node when cluster service is being stopped.", + []string{"name"}, + nil, + ) + c.clusterDynamicQuorumEnabled = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "dynamic_quorum_enabled"), + "Allows cluster service to adjust node weights as needed to increase availability.", + []string{"name"}, + nil, + ) + c.clusterEnableSharedVolumes = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "enable_shared_volumes"), + "Enables or disables cluster shared volumes on this cluster.", + []string{"name"}, + nil, + ) + c.clusterFixQuorum = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "fix_quorum"), + "Provides access to the cluster's FixQuorum property, which specifies if the cluster is in a fix quorum state.", + []string{"name"}, + nil, + ) + c.clusterGracePeriodEnabled = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "grace_period_enabled"), + "Whether the node grace period feature of this cluster is enabled.", + []string{"name"}, + nil, + ) + c.clusterGracePeriodTimeout = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "grace_period_timeout"), + "The grace period timeout in milliseconds.", + []string{"name"}, + nil, + ) + c.clusterGroupDependencyTimeout = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "group_dependency_timeout"), + "The timeout after which a group will be brought online despite unsatisfied dependencies", + []string{"name"}, + nil, + ) + c.clusterHangRecoveryAction = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "hang_recovery_action"), + "Controls the action to take if the user-mode processes have stopped responding.", + []string{"name"}, + nil, + ) + c.clusterIgnorePersistentStateOnStartup = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "ignore_persistent_state_on_startup"), + "Provides access to the cluster's IgnorePersistentStateOnStartup property, which specifies whether the cluster will bring online groups that were online when the cluster was shut down.", + []string{"name"}, + nil, + ) + c.clusterLogResourceControls = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "log_resource_controls"), + "Controls the logging of resource controls.", + []string{"name"}, + nil, + ) + c.clusterLowerQuorumPriorityNodeId = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "lower_quorum_priority_node_id"), + "Specifies the Node ID that has a lower priority when voting for quorum is performed. If the quorum vote is split 50/50%, the specified node's vote would be ignored to break the tie. If this is not set then the cluster will pick a node at random to break the tie.", + []string{"name"}, + nil, + ) + c.clusterMaxNumberOfNodes = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "max_number_of_nodes"), + "Indicates the maximum number of nodes that may participate in the Cluster.", + []string{"name"}, + nil, + ) + c.clusterMessageBufferLength = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "message_buffer_length"), + "The maximum unacknowledged message count for GEM.", + []string{"name"}, + nil, + ) + c.clusterMinimumNeverPreemptPriority = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "minimum_never_preempt_priority"), + "Groups with this priority or higher cannot be preempted.", + []string{"name"}, + nil, + ) + c.clusterMinimumPreemptorPriority = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "minimum_preemptor_priority"), + "Minimum priority a cluster group must have to be able to preempt another group.", + []string{"name"}, + nil, + ) + c.clusterNetftIPSecEnabled = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "netft_ip_sec_enabled"), + "Whether IPSec is enabled for cluster internal traffic.cluster", + []string{"name"}, + nil, + ) + c.clusterPlacementOptions = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "placement_options"), + "Various option flags to modify default placement behavior.", + []string{"name"}, + nil, + ) + c.clusterPlumbAllCrossSubnetRoutes = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "plumb_all_cross_subnet_routes"), + "Plumbs all possible cross subnet routes to all nodes.", + []string{"name"}, + nil, + ) + c.clusterPreventQuorum = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "prevent_quorum"), + "Whether the cluster will ignore group persistent state on startup.", + []string{"name"}, + nil, + ) + c.clusterQuarantineDuration = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "quarantine_duration"), + "The quarantine period timeout in milliseconds.", + []string{"name"}, + nil, + ) + c.clusterQuarantineThreshold = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "quarantine_threshold"), + "Number of node failures before it will be quarantined.", + []string{"name"}, + nil, + ) + c.clusterQuorumArbitrationTimeMax = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "quorum_arbitration_time_max"), + "Controls the maximum time necessary to decide the Quorum owner node.", + []string{"name"}, + nil, + ) + c.clusterQuorumArbitrationTimeMin = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "quorum_arbitration_time_min"), + "Controls the minimum time necessary to decide the Quorum owner node.", + []string{"name"}, + nil, + ) + c.clusterQuorumLogFileSize = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "quorum_log_file_size"), + "This property is obsolete.", + []string{"name"}, + nil, + ) + c.clusterQuorumTypeValue = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "quorum_type_value"), + "Get the current quorum type value. -1: Unknown; 1: Node; 2: FileShareWitness; 3: Storage; 4: None", + []string{"name"}, + nil, + ) + c.clusterRequestReplyTimeout = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "request_reply_timeout"), + "Controls the request reply time-out period.", + []string{"name"}, + nil, + ) + c.clusterResiliencyDefaultPeriod = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "resiliency_default_period"), + "The default resiliency period, in seconds, for the cluster.", + []string{"name"}, + nil, + ) + c.clusterResiliencyLevel = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "resiliency_level"), + "The resiliency level for the cluster.", + []string{"name"}, + nil, + ) + c.clusterResourceDllDeadlockPeriod = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "resource_dll_deadlock_period"), + "This property is obsolete.", + []string{"name"}, + nil, + ) + c.clusterRootMemoryReserved = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "root_memory_reserved"), + "Controls the amount of memory reserved for the parent partition on all cluster nodes.", + []string{"name"}, + nil, + ) + c.clusterRouteHistoryLength = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "route_history_length"), + "The history length for routes to help finding network issues.", + []string{"name"}, + nil, + ) + c.clusterS2DBusTypes = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "s2d_bus_types"), + "Bus types for storage spaces direct.", + []string{"name"}, + nil, + ) + c.clusterS2DCacheDesiredState = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "s2d_cache_desired_state"), + "Desired state of the storage spaces direct cache.", + []string{"name"}, + nil, + ) + c.clusterS2DCacheFlashReservePercent = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "s2d_cache_flash_reserve_percent"), + "Percentage of allocated flash space to utilize when caching.", + []string{"name"}, + nil, + ) + c.clusterS2DCachePageSizeKBytes = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "s2d_cache_page_size_k_bytes"), + "Page size in KB used by S2D cache.", + []string{"name"}, + nil, + ) + c.clusterS2DEnabled = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "s2d_enabled"), + "Whether direct attached storage (DAS) is enabled.", + []string{"name"}, + nil, + ) + c.clusterS2DIOLatencyThreshold = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "s2dio_latency_threshold"), + "The I/O latency threshold for storage spaces direct.", + []string{"name"}, + nil, + ) + c.clusterS2DOptimizations = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "s2d_optimizations"), + "Optimization flags for storage spaces direct.", + []string{"name"}, + nil, + ) + c.clusterSameSubnetDelay = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "same_subnet_delay"), + "Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats on the same subnet.", + []string{"name"}, + nil, + ) + c.clusterSameSubnetThreshold = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "same_subnet_threshold"), + "Controls how many Cluster Service heartbeats can be missed on the same subnet before it determines that Cluster Service has stopped responding.", + []string{"name"}, + nil, + ) + c.clusterSecurityLevel = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "security_level"), + "Controls the level of security that should apply to intracluster messages. 0: Clear Text; 1: Sign; 2: Encrypt ", + []string{"name"}, + nil, + ) + c.clusterSecurityLevelForStorage = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "security_level_for_storage"), + "(SecurityLevelForStorage)", + []string{"name"}, + nil, + ) + c.clusterSharedVolumeVssWriterOperationTimeout = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "shared_volume_vss_writer_operation_timeout"), + "CSV VSS Writer operation timeout in seconds.", + []string{"name"}, + nil, + ) + c.clusterShutdownTimeoutInMinutes = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "shutdown_timeout_in_minutes"), + "The maximum time in minutes allowed for cluster resources to come offline during cluster service shutdown.", + []string{"name"}, + nil, + ) + c.clusterUseClientAccessNetworksForSharedVolumes = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "use_client_access_networks_for_shared_volumes"), + "Whether the use of client access networks for cluster shared volumes feature of this cluster is enabled. 0: Disabled; 1: Enabled; 2: Auto", + []string{"name"}, + nil, + ) + c.clusterWitnessDatabaseWriteTimeout = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "witness_database_write_timeout"), + "Controls the maximum time in seconds that a cluster database write to a witness can take before the write is abandoned.", + []string{"name"}, + nil, + ) + c.clusterWitnessDynamicWeight = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "witness_dynamic_weight"), + "The weight of the configured witness.", + []string{"name"}, + nil, + ) + c.clusterWitnessRestartInterval = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameCluster, "witness_restart_interval"), + "Controls the witness restart interval.", + []string{"name"}, + nil, + ) +} + +func (c *Collector) collectCluster(logger log.Logger, ch chan<- prometheus.Metric) error { + var dst []msClusterCluster + q := wmi.QueryAllForClass(&dst, "MSCluster_Cluster", logger) if err := wmi.QueryNamespace(q, &dst, "root/MSCluster"); err != nil { return err } for _, v := range dst { ch <- prometheus.MustNewConstMetric( - c.addEvictDelay, + c.clusterAddEvictDelay, prometheus.GaugeValue, float64(v.AddEvictDelay), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.adminAccessPoint, + c.clusterAdminAccessPoint, prometheus.GaugeValue, float64(v.AdminAccessPoint), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.autoAssignNodeSite, + c.clusterAutoAssignNodeSite, prometheus.GaugeValue, float64(v.AutoAssignNodeSite), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.autoBalancerLevel, + c.clusterAutoBalancerLevel, prometheus.GaugeValue, float64(v.AutoBalancerLevel), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.autoBalancerMode, + c.clusterAutoBalancerMode, prometheus.GaugeValue, float64(v.AutoBalancerMode), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.backupInProgress, + c.clusterBackupInProgress, prometheus.GaugeValue, float64(v.BackupInProgress), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.blockCacheSize, + c.clusterBlockCacheSize, prometheus.GaugeValue, float64(v.BlockCacheSize), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.clusSvcHangTimeout, + c.clusterClusSvcHangTimeout, prometheus.GaugeValue, float64(v.ClusSvcHangTimeout), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.clusSvcRegroupOpeningTimeout, + c.clusterClusSvcRegroupOpeningTimeout, prometheus.GaugeValue, float64(v.ClusSvcRegroupOpeningTimeout), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.clusSvcRegroupPruningTimeout, + c.clusterClusSvcRegroupPruningTimeout, prometheus.GaugeValue, float64(v.ClusSvcRegroupPruningTimeout), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.clusSvcRegroupStageTimeout, + c.clusterClusSvcRegroupStageTimeout, prometheus.GaugeValue, float64(v.ClusSvcRegroupStageTimeout), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.clusSvcRegroupTickInMilliseconds, + c.clusterClusSvcRegroupTickInMilliseconds, prometheus.GaugeValue, float64(v.ClusSvcRegroupTickInMilliseconds), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.clusterEnforcedAntiAffinity, + c.clusterClusterEnforcedAntiAffinity, prometheus.GaugeValue, float64(v.ClusterEnforcedAntiAffinity), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.clusterFunctionalLevel, + c.clusterClusterFunctionalLevel, prometheus.GaugeValue, float64(v.ClusterFunctionalLevel), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.clusterGroupWaitDelay, + c.clusterClusterGroupWaitDelay, prometheus.GaugeValue, float64(v.ClusterGroupWaitDelay), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.clusterLogLevel, + c.clusterClusterLogLevel, prometheus.GaugeValue, float64(v.ClusterLogLevel), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.clusterLogSize, + c.clusterClusterLogSize, prometheus.GaugeValue, float64(v.ClusterLogSize), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.clusterUpgradeVersion, + c.clusterClusterUpgradeVersion, prometheus.GaugeValue, float64(v.ClusterUpgradeVersion), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.crossSiteDelay, + c.clusterCrossSiteDelay, prometheus.GaugeValue, float64(v.CrossSiteDelay), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.crossSiteThreshold, + c.clusterCrossSiteThreshold, prometheus.GaugeValue, float64(v.CrossSiteThreshold), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.crossSubnetDelay, + c.clusterCrossSubnetDelay, prometheus.GaugeValue, float64(v.CrossSubnetDelay), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.crossSubnetThreshold, + c.clusterCrossSubnetThreshold, prometheus.GaugeValue, float64(v.CrossSubnetThreshold), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.csvBalancer, + c.clusterCsvBalancer, prometheus.GaugeValue, float64(v.CsvBalancer), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.databaseReadWriteMode, + c.clusterDatabaseReadWriteMode, prometheus.GaugeValue, float64(v.DatabaseReadWriteMode), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.defaultNetworkRole, + c.clusterDefaultNetworkRole, prometheus.GaugeValue, float64(v.DefaultNetworkRole), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.detectedCloudPlatform, + c.clusterDetectedCloudPlatform, prometheus.GaugeValue, float64(v.DetectedCloudPlatform), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.detectManagedEvents, + c.clusterDetectManagedEvents, prometheus.GaugeValue, float64(v.DetectManagedEvents), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.detectManagedEventsThreshold, + c.clusterDetectManagedEventsThreshold, prometheus.GaugeValue, float64(v.DetectManagedEventsThreshold), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.disableGroupPreferredOwnerRandomization, + c.clusterDisableGroupPreferredOwnerRandomization, prometheus.GaugeValue, float64(v.DisableGroupPreferredOwnerRandomization), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.drainOnShutdown, + c.clusterDrainOnShutdown, prometheus.GaugeValue, float64(v.DrainOnShutdown), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.dynamicQuorumEnabled, + c.clusterDynamicQuorumEnabled, prometheus.GaugeValue, float64(v.DynamicQuorumEnabled), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.enableSharedVolumes, + c.clusterEnableSharedVolumes, prometheus.GaugeValue, float64(v.EnableSharedVolumes), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.fixQuorum, + c.clusterFixQuorum, prometheus.GaugeValue, float64(v.FixQuorum), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.gracePeriodEnabled, + c.clusterGracePeriodEnabled, prometheus.GaugeValue, float64(v.GracePeriodEnabled), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.gracePeriodTimeout, + c.clusterGracePeriodTimeout, prometheus.GaugeValue, float64(v.GracePeriodTimeout), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.groupDependencyTimeout, + c.clusterGroupDependencyTimeout, prometheus.GaugeValue, float64(v.GroupDependencyTimeout), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.hangRecoveryAction, + c.clusterHangRecoveryAction, prometheus.GaugeValue, float64(v.HangRecoveryAction), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.ignorePersistentStateOnStartup, + c.clusterIgnorePersistentStateOnStartup, prometheus.GaugeValue, float64(v.IgnorePersistentStateOnStartup), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.logResourceControls, + c.clusterLogResourceControls, prometheus.GaugeValue, float64(v.LogResourceControls), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.lowerQuorumPriorityNodeId, + c.clusterLowerQuorumPriorityNodeId, prometheus.GaugeValue, float64(v.LowerQuorumPriorityNodeId), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.maxNumberOfNodes, + c.clusterMaxNumberOfNodes, prometheus.GaugeValue, float64(v.MaxNumberOfNodes), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.messageBufferLength, + c.clusterMessageBufferLength, prometheus.GaugeValue, float64(v.MessageBufferLength), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.minimumNeverPreemptPriority, + c.clusterMinimumNeverPreemptPriority, prometheus.GaugeValue, float64(v.MinimumNeverPreemptPriority), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.minimumPreemptorPriority, + c.clusterMinimumPreemptorPriority, prometheus.GaugeValue, float64(v.MinimumPreemptorPriority), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.netftIPSecEnabled, + c.clusterNetftIPSecEnabled, prometheus.GaugeValue, float64(v.NetftIPSecEnabled), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.placementOptions, + c.clusterPlacementOptions, prometheus.GaugeValue, float64(v.PlacementOptions), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.plumbAllCrossSubnetRoutes, + c.clusterPlumbAllCrossSubnetRoutes, prometheus.GaugeValue, float64(v.PlumbAllCrossSubnetRoutes), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.preventQuorum, + c.clusterPreventQuorum, prometheus.GaugeValue, float64(v.PreventQuorum), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.quarantineDuration, + c.clusterQuarantineDuration, prometheus.GaugeValue, float64(v.QuarantineDuration), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.quarantineThreshold, + c.clusterQuarantineThreshold, prometheus.GaugeValue, float64(v.QuarantineThreshold), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.quorumArbitrationTimeMax, + c.clusterQuorumArbitrationTimeMax, prometheus.GaugeValue, float64(v.QuorumArbitrationTimeMax), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.quorumArbitrationTimeMin, + c.clusterQuorumArbitrationTimeMin, prometheus.GaugeValue, float64(v.QuorumArbitrationTimeMin), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.quorumLogFileSize, + c.clusterQuorumLogFileSize, prometheus.GaugeValue, float64(v.QuorumLogFileSize), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.quorumTypeValue, + c.clusterQuorumTypeValue, prometheus.GaugeValue, float64(v.QuorumTypeValue), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.requestReplyTimeout, + c.clusterRequestReplyTimeout, prometheus.GaugeValue, float64(v.RequestReplyTimeout), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.resiliencyDefaultPeriod, + c.clusterResiliencyDefaultPeriod, prometheus.GaugeValue, float64(v.ResiliencyDefaultPeriod), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.resiliencyLevel, + c.clusterResiliencyLevel, prometheus.GaugeValue, float64(v.ResiliencyLevel), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.resourceDllDeadlockPeriod, + c.clusterResourceDllDeadlockPeriod, prometheus.GaugeValue, float64(v.ResourceDllDeadlockPeriod), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.rootMemoryReserved, + c.clusterRootMemoryReserved, prometheus.GaugeValue, float64(v.RootMemoryReserved), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.routeHistoryLength, + c.clusterRouteHistoryLength, prometheus.GaugeValue, float64(v.RouteHistoryLength), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.s2DBusTypes, + c.clusterS2DBusTypes, prometheus.GaugeValue, float64(v.S2DBusTypes), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.s2DCacheDesiredState, + c.clusterS2DCacheDesiredState, prometheus.GaugeValue, float64(v.S2DCacheDesiredState), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.s2DCacheFlashReservePercent, + c.clusterS2DCacheFlashReservePercent, prometheus.GaugeValue, float64(v.S2DCacheFlashReservePercent), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.s2DCachePageSizeKBytes, + c.clusterS2DCachePageSizeKBytes, prometheus.GaugeValue, float64(v.S2DCachePageSizeKBytes), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.s2DEnabled, + c.clusterS2DEnabled, prometheus.GaugeValue, float64(v.S2DEnabled), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.s2DIOLatencyThreshold, + c.clusterS2DIOLatencyThreshold, prometheus.GaugeValue, float64(v.S2DIOLatencyThreshold), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.s2DOptimizations, + c.clusterS2DOptimizations, prometheus.GaugeValue, float64(v.S2DOptimizations), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.sameSubnetDelay, + c.clusterSameSubnetDelay, prometheus.GaugeValue, float64(v.SameSubnetDelay), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.sameSubnetThreshold, + c.clusterSameSubnetThreshold, prometheus.GaugeValue, float64(v.SameSubnetThreshold), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.securityLevel, + c.clusterSecurityLevel, prometheus.GaugeValue, float64(v.SecurityLevel), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.securityLevelForStorage, + c.clusterSecurityLevelForStorage, prometheus.GaugeValue, float64(v.SecurityLevelForStorage), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.sharedVolumeVssWriterOperationTimeout, + c.clusterSharedVolumeVssWriterOperationTimeout, prometheus.GaugeValue, float64(v.SharedVolumeVssWriterOperationTimeout), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.shutdownTimeoutInMinutes, + c.clusterShutdownTimeoutInMinutes, prometheus.GaugeValue, float64(v.ShutdownTimeoutInMinutes), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.useClientAccessNetworksForSharedVolumes, + c.clusterUseClientAccessNetworksForSharedVolumes, prometheus.GaugeValue, float64(v.UseClientAccessNetworksForSharedVolumes), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.witnessDatabaseWriteTimeout, + c.clusterWitnessDatabaseWriteTimeout, prometheus.GaugeValue, float64(v.WitnessDatabaseWriteTimeout), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.witnessDynamicWeight, + c.clusterWitnessDynamicWeight, prometheus.GaugeValue, float64(v.WitnessDynamicWeight), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.witnessRestartInterval, + c.clusterWitnessRestartInterval, prometheus.GaugeValue, float64(v.WitnessRestartInterval), v.Name, diff --git a/pkg/collector/mscluster_network/mscluster_network.go b/pkg/collector/mscluster/mscluster_network.go similarity index 51% rename from pkg/collector/mscluster_network/mscluster_network.go rename to pkg/collector/mscluster/mscluster_network.go index e9cbae7a..024b1a72 100644 --- a/pkg/collector/mscluster_network/mscluster_network.go +++ b/pkg/collector/mscluster/mscluster_network.go @@ -1,95 +1,17 @@ -package mscluster_network +package mscluster import ( - "github.com/alecthomas/kingpin/v2" "github.com/go-kit/log" "github.com/prometheus-community/windows_exporter/pkg/types" "github.com/prometheus-community/windows_exporter/pkg/wmi" "github.com/prometheus/client_golang/prometheus" ) -const Name = "mscluster_network" +const nameNetwork = Name + "_network" -type Config struct{} - -var ConfigDefaults = Config{} - -// A Collector is a Prometheus Collector for WMI MSCluster_Network metrics. -type Collector struct { - config Config - - characteristics *prometheus.Desc - flags *prometheus.Desc - metric *prometheus.Desc - role *prometheus.Desc - state *prometheus.Desc -} - -func New(config *Config) *Collector { - if config == nil { - config = &ConfigDefaults - } - - c := &Collector{ - config: *config, - } - - return c -} - -func NewWithFlags(_ *kingpin.Application) *Collector { - return &Collector{} -} - -func (c *Collector) GetName() string { - return Name -} - -func (c *Collector) GetPerfCounter(_ log.Logger) ([]string, error) { - return []string{"Memory"}, nil -} - -func (c *Collector) Close() error { - return nil -} - -func (c *Collector) Build(_ log.Logger) error { - c.characteristics = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "characteristics"), - "Provides the characteristics of the network.", - []string{"name"}, - nil, - ) - c.flags = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "flags"), - "Provides access to the flags set for the node. ", - []string{"name"}, - nil, - ) - c.metric = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "metric"), - "The metric of a cluster network (networks with lower values are used first). If this value is set, then the AutoMetric property is set to false.", - []string{"name"}, - nil, - ) - c.role = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "role"), - "Provides access to the network's Role property. The Role property describes the role of the network in the cluster. 0: None; 1: Cluster; 2: Client; 3: Both ", - []string{"name"}, - nil, - ) - c.state = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "state"), - "Provides the current state of the network. 1-1: Unknown; 0: Unavailable; 1: Down; 2: Partitioned; 3: Up", - []string{"name"}, - nil, - ) - return nil -} - -// MSCluster_Network docs: +// msClusterNetwork represents the MSCluster_Network WMI class // - https://docs.microsoft.com/en-us/previous-versions/windows/desktop/cluswmi/mscluster-network -type MSCluster_Network struct { +type msClusterNetwork struct { Name string Characteristics uint @@ -99,47 +21,80 @@ type MSCluster_Network struct { State uint } +func (c *Collector) buildNetwork() { + c.networkCharacteristics = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNetwork, "characteristics"), + "Provides the characteristics of the network.", + []string{"name"}, + nil, + ) + c.networkFlags = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNetwork, "flags"), + "Provides access to the flags set for the node. ", + []string{"name"}, + nil, + ) + c.networkMetric = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNetwork, "metric"), + "The metric of a cluster network (networks with lower values are used first). If this value is set, then the AutoMetric property is set to false.", + []string{"name"}, + nil, + ) + c.networkRole = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNetwork, "role"), + "Provides access to the network's Role property. The Role property describes the role of the network in the cluster. 0: None; 1: Cluster; 2: Client; 3: Both ", + []string{"name"}, + nil, + ) + c.networkState = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNetwork, "state"), + "Provides the current state of the network. 1-1: Unknown; 0: Unavailable; 1: Down; 2: Partitioned; 3: Up", + []string{"name"}, + nil, + ) +} + // Collect sends the metric values for each metric // to the provided prometheus metric channel. -func (c *Collector) Collect(_ *types.ScrapeContext, logger log.Logger, ch chan<- prometheus.Metric) error { - logger = log.With(logger, "collector", Name) - var dst []MSCluster_Network - q := wmi.QueryAll(&dst, logger) +func (c *Collector) collectNetwork(logger log.Logger, ch chan<- prometheus.Metric) error { + var dst []msClusterNetwork + + q := wmi.QueryAllForClass(&dst, "MSCluster_Network", logger) if err := wmi.QueryNamespace(q, &dst, "root/MSCluster"); err != nil { return err } for _, v := range dst { ch <- prometheus.MustNewConstMetric( - c.characteristics, + c.networkCharacteristics, prometheus.GaugeValue, float64(v.Characteristics), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.flags, + c.networkFlags, prometheus.GaugeValue, float64(v.Flags), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.metric, + c.networkMetric, prometheus.GaugeValue, float64(v.Metric), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.role, + c.networkRole, prometheus.GaugeValue, float64(v.Role), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.state, + c.networkState, prometheus.GaugeValue, float64(v.State), v.Name, diff --git a/pkg/collector/mscluster_node/mscluster_node.go b/pkg/collector/mscluster/mscluster_node.go similarity index 53% rename from pkg/collector/mscluster_node/mscluster_node.go rename to pkg/collector/mscluster/mscluster_node.go index 8a7827ed..ec7942ef 100644 --- a/pkg/collector/mscluster_node/mscluster_node.go +++ b/pkg/collector/mscluster/mscluster_node.go @@ -1,161 +1,17 @@ -package mscluster_node +package mscluster import ( - "github.com/alecthomas/kingpin/v2" "github.com/go-kit/log" "github.com/prometheus-community/windows_exporter/pkg/types" "github.com/prometheus-community/windows_exporter/pkg/wmi" "github.com/prometheus/client_golang/prometheus" ) -const Name = "mscluster_node" +const nameNode = Name + "_node" -type Config struct{} - -var ConfigDefaults = Config{} - -// Variable used by mscluster_resource and mscluster_resourcegroup. -var NodeName []string - -// A Collector is a Prometheus Collector for WMI MSCluster_Node metrics. -type Collector struct { - config Config - - buildNumber *prometheus.Desc - characteristics *prometheus.Desc - detectedCloudPlatform *prometheus.Desc - dynamicWeight *prometheus.Desc - flags *prometheus.Desc - majorVersion *prometheus.Desc - minorVersion *prometheus.Desc - needsPreventQuorum *prometheus.Desc - nodeDrainStatus *prometheus.Desc - nodeHighestVersion *prometheus.Desc - nodeLowestVersion *prometheus.Desc - nodeWeight *prometheus.Desc - state *prometheus.Desc - statusInformation *prometheus.Desc -} - -func New(config *Config) *Collector { - if config == nil { - config = &ConfigDefaults - } - - c := &Collector{ - config: *config, - } - - return c -} - -func NewWithFlags(_ *kingpin.Application) *Collector { - return &Collector{} -} - -func (c *Collector) GetName() string { - return Name -} - -func (c *Collector) GetPerfCounter(_ log.Logger) ([]string, error) { - return []string{"Memory"}, nil -} - -func (c *Collector) Close() error { - return nil -} - -func (c *Collector) Build(_ log.Logger) error { - c.buildNumber = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "build_number"), - "Provides access to the node's BuildNumber property.", - []string{"name"}, - nil, - ) - c.characteristics = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "characteristics"), - "Provides access to the characteristics set for the node.", - []string{"name"}, - nil, - ) - c.detectedCloudPlatform = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "detected_cloud_platform"), - "(DetectedCloudPlatform)", - []string{"name"}, - nil, - ) - c.dynamicWeight = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "dynamic_weight"), - "The dynamic vote weight of the node adjusted by dynamic quorum feature.", - []string{"name"}, - nil, - ) - c.flags = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "flags"), - "Provides access to the flags set for the node.", - []string{"name"}, - nil, - ) - c.majorVersion = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "major_version"), - "Provides access to the node's MajorVersion property, which specifies the major portion of the Windows version installed.", - []string{"name"}, - nil, - ) - c.minorVersion = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "minor_version"), - "Provides access to the node's MinorVersion property, which specifies the minor portion of the Windows version installed.", - []string{"name"}, - nil, - ) - c.needsPreventQuorum = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "needs_prevent_quorum"), - "Whether the cluster service on that node should be started with prevent quorum flag.", - []string{"name"}, - nil, - ) - c.nodeDrainStatus = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "node_drain_status"), - "The current node drain status of a node. 0: Not Initiated; 1: In Progress; 2: Completed; 3: Failed", - []string{"name"}, - nil, - ) - c.nodeHighestVersion = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "node_highest_version"), - "Provides access to the node's NodeHighestVersion property, which specifies the highest possible version of the cluster service with which the node can join or communicate.", - []string{"name"}, - nil, - ) - c.nodeLowestVersion = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "node_lowest_version"), - "Provides access to the node's NodeLowestVersion property, which specifies the lowest possible version of the cluster service with which the node can join or communicate.", - []string{"name"}, - nil, - ) - c.nodeWeight = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "node_weight"), - "The vote weight of the node.", - []string{"name"}, - nil, - ) - c.state = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "state"), - "Returns the current state of a node. -1: Unknown; 0: Up; 1: Down; 2: Paused; 3: Joining", - []string{"name"}, - nil, - ) - c.statusInformation = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "status_information"), - "The isolation or quarantine status of the node.", - []string{"name"}, - nil, - ) - return nil -} - -// MSCluster_Node docs: +// msClusterNode represents the MSCluster_Node WMI class // - https://docs.microsoft.com/en-us/previous-versions/windows/desktop/cluswmi/mscluster-node -type MSCluster_Node struct { +type msClusterNode struct { Name string BuildNumber uint @@ -174,119 +30,206 @@ type MSCluster_Node struct { StatusInformation uint } +func (c *Collector) buildNode() { + c.nodeBuildNumber = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNode, "build_number"), + "Provides access to the node's BuildNumber property.", + []string{"name"}, + nil, + ) + c.nodeCharacteristics = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNode, "characteristics"), + "Provides access to the characteristics set for the node.", + []string{"name"}, + nil, + ) + c.nodeDetectedCloudPlatform = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNode, "detected_cloud_platform"), + "(DetectedCloudPlatform)", + []string{"name"}, + nil, + ) + c.nodeDynamicWeight = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNode, "dynamic_weight"), + "The dynamic vote weight of the node adjusted by dynamic quorum feature.", + []string{"name"}, + nil, + ) + c.nodeFlags = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNode, "flags"), + "Provides access to the flags set for the node.", + []string{"name"}, + nil, + ) + c.nodeMajorVersion = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNode, "major_version"), + "Provides access to the node's MajorVersion property, which specifies the major portion of the Windows version installed.", + []string{"name"}, + nil, + ) + c.nodeMinorVersion = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNode, "minor_version"), + "Provides access to the node's MinorVersion property, which specifies the minor portion of the Windows version installed.", + []string{"name"}, + nil, + ) + c.nodeNeedsPreventQuorum = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNode, "needs_prevent_quorum"), + "Whether the cluster service on that node should be started with prevent quorum flag.", + []string{"name"}, + nil, + ) + c.nodeNodeDrainStatus = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNode, "node_drain_status"), + "The current node drain status of a node. 0: Not Initiated; 1: In Progress; 2: Completed; 3: Failed", + []string{"name"}, + nil, + ) + c.nodeNodeHighestVersion = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNode, "node_highest_version"), + "Provides access to the node's NodeHighestVersion property, which specifies the highest possible version of the cluster service with which the node can join or communicate.", + []string{"name"}, + nil, + ) + c.nodeNodeLowestVersion = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNode, "node_lowest_version"), + "Provides access to the node's NodeLowestVersion property, which specifies the lowest possible version of the cluster service with which the node can join or communicate.", + []string{"name"}, + nil, + ) + c.nodeNodeWeight = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNode, "node_weight"), + "The vote weight of the node.", + []string{"name"}, + nil, + ) + c.nodeState = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNode, "state"), + "Returns the current state of a node. -1: Unknown; 0: Up; 1: Down; 2: Paused; 3: Joining", + []string{"name"}, + nil, + ) + c.nodeStatusInformation = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameNode, "status_information"), + "The isolation or quarantine status of the node.", + []string{"name"}, + nil, + ) +} + // Collect sends the metric values for each metric // to the provided prometheus Metric channel. -func (c *Collector) Collect(_ *types.ScrapeContext, logger log.Logger, ch chan<- prometheus.Metric) error { - logger = log.With(logger, "collector", Name) - var dst []MSCluster_Node - q := wmi.QueryAll(&dst, logger) +func (c *Collector) collectNode(logger log.Logger, ch chan<- prometheus.Metric) ([]string, error) { + var dst []msClusterNode + + q := wmi.QueryAllForClass(&dst, "MSCluster_Node", logger) if err := wmi.QueryNamespace(q, &dst, "root/MSCluster"); err != nil { - return err + return nil, err } - NodeName = []string{} + nodeNames := make([]string, 0, len(dst)) for _, v := range dst { ch <- prometheus.MustNewConstMetric( - c.buildNumber, + c.nodeBuildNumber, prometheus.GaugeValue, float64(v.BuildNumber), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.characteristics, + c.nodeCharacteristics, prometheus.GaugeValue, float64(v.Characteristics), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.detectedCloudPlatform, + c.nodeDetectedCloudPlatform, prometheus.GaugeValue, float64(v.DetectedCloudPlatform), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.dynamicWeight, + c.nodeDynamicWeight, prometheus.GaugeValue, float64(v.DynamicWeight), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.flags, + c.nodeFlags, prometheus.GaugeValue, float64(v.Flags), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.majorVersion, + c.nodeMajorVersion, prometheus.GaugeValue, float64(v.MajorVersion), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.minorVersion, + c.nodeMinorVersion, prometheus.GaugeValue, float64(v.MinorVersion), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.needsPreventQuorum, + c.nodeNeedsPreventQuorum, prometheus.GaugeValue, float64(v.NeedsPreventQuorum), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.nodeDrainStatus, + c.nodeNodeDrainStatus, prometheus.GaugeValue, float64(v.NodeDrainStatus), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.nodeHighestVersion, + c.nodeNodeHighestVersion, prometheus.GaugeValue, float64(v.NodeHighestVersion), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.nodeLowestVersion, + c.nodeNodeLowestVersion, prometheus.GaugeValue, float64(v.NodeLowestVersion), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.nodeWeight, + c.nodeNodeWeight, prometheus.GaugeValue, float64(v.NodeWeight), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.state, + c.nodeState, prometheus.GaugeValue, float64(v.State), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.statusInformation, + c.nodeStatusInformation, prometheus.GaugeValue, float64(v.StatusInformation), v.Name, ) - NodeName = append(NodeName, v.Name) + nodeNames = append(nodeNames, v.Name) } - return nil + return nodeNames, nil } diff --git a/pkg/collector/mscluster_resource/mscluster_resource.go b/pkg/collector/mscluster/mscluster_resource.go similarity index 61% rename from pkg/collector/mscluster_resource/mscluster_resource.go rename to pkg/collector/mscluster/mscluster_resource.go index 3236490e..323c3a01 100644 --- a/pkg/collector/mscluster_resource/mscluster_resource.go +++ b/pkg/collector/mscluster/mscluster_resource.go @@ -1,186 +1,17 @@ -package mscluster_resource +package mscluster import ( - "github.com/alecthomas/kingpin/v2" "github.com/go-kit/log" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_node" "github.com/prometheus-community/windows_exporter/pkg/types" "github.com/prometheus-community/windows_exporter/pkg/wmi" "github.com/prometheus/client_golang/prometheus" ) -const Name = "mscluster_resource" +const nameResource = Name + "_resource" -type Config struct{} - -var ConfigDefaults = Config{} - -// A Collector is a Prometheus Collector for WMI MSCluster_Resource metrics. -type Collector struct { - config Config - - characteristics *prometheus.Desc - deadlockTimeout *prometheus.Desc - embeddedFailureAction *prometheus.Desc - flags *prometheus.Desc - isAlivePollInterval *prometheus.Desc - looksAlivePollInterval *prometheus.Desc - monitorProcessId *prometheus.Desc - ownerNode *prometheus.Desc - pendingTimeout *prometheus.Desc - resourceClass *prometheus.Desc - restartAction *prometheus.Desc - restartDelay *prometheus.Desc - restartPeriod *prometheus.Desc - restartThreshold *prometheus.Desc - retryPeriodOnFailure *prometheus.Desc - state *prometheus.Desc - subclass *prometheus.Desc -} - -func New(config *Config) *Collector { - if config == nil { - config = &ConfigDefaults - } - - c := &Collector{ - config: *config, - } - - return c -} - -func NewWithFlags(_ *kingpin.Application) *Collector { - return &Collector{} -} - -func (c *Collector) GetName() string { - return Name -} - -func (c *Collector) GetPerfCounter(_ log.Logger) ([]string, error) { - return []string{"Memory"}, nil -} - -func (c *Collector) Close() error { - return nil -} - -func (c *Collector) Build(_ log.Logger) error { - c.characteristics = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "characteristics"), - "Provides the characteristics of the object.", - []string{"type", "owner_group", "name"}, - nil, - ) - c.deadlockTimeout = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "deadlock_timeout"), - "Indicates the length of time to wait, in milliseconds, before declaring a deadlock in any call into a resource.", - []string{"type", "owner_group", "name"}, - nil, - ) - c.embeddedFailureAction = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "embedded_failure_action"), - "The time, in milliseconds, that a resource should remain in a failed state before the Cluster service attempts to restart it.", - []string{"type", "owner_group", "name"}, - nil, - ) - c.flags = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "flags"), - "Provides access to the flags set for the object.", - []string{"type", "owner_group", "name"}, - nil, - ) - c.isAlivePollInterval = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "is_alive_poll_interval"), - "Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource.", - []string{"type", "owner_group", "name"}, - nil, - ) - c.looksAlivePollInterval = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "looks_alive_poll_interval"), - "Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource.", - []string{"type", "owner_group", "name"}, - nil, - ) - c.monitorProcessId = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "monitor_process_id"), - "Provides the process ID of the resource host service that is currently hosting the resource.", - []string{"type", "owner_group", "name"}, - nil, - ) - c.ownerNode = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "owner_node"), - "The node hosting the resource. 0: Not hosted; 1: Hosted", - []string{"type", "owner_group", "node_name", "name"}, - nil, - ) - c.ownerNode = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "owner_node"), - "The node hosting the resource. 0: Not hosted; 1: Hosted", - []string{"type", "owner_group", "node_name", "name"}, - nil, - ) - c.pendingTimeout = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "pending_timeout"), - "Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated.", - []string{"type", "owner_group", "name"}, - nil, - ) - c.resourceClass = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "resource_class"), - "Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown ", - []string{"type", "owner_group", "name"}, - nil, - ) - c.restartAction = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "restart_action"), - "Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails.", - []string{"type", "owner_group", "name"}, - nil, - ) - c.restartDelay = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "restart_delay"), - "Indicates the time delay before a failed resource is restarted.", - []string{"type", "owner_group", "name"}, - nil, - ) - c.restartPeriod = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "restart_period"), - "Provides access to the resource's RestartPeriod property, which is interval of time, in milliseconds, during which a specified number of restart attempts can be made on a nonresponsive resource.", - []string{"type", "owner_group", "name"}, - nil, - ) - c.restartThreshold = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "restart_threshold"), - "Provides access to the resource's RestartThreshold property which is the maximum number of restart attempts that can be made on a resource within an interval defined by the RestartPeriod property before the Cluster Service initiates the action specified by the RestartAction property.", - []string{"type", "owner_group", "name"}, - nil, - ) - c.retryPeriodOnFailure = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "retry_period_on_failure"), - "Provides access to the resource's RetryPeriodOnFailure property, which is the interval of time (in milliseconds) that a resource should remain in a failed state before the Cluster service attempts to restart it.", - []string{"type", "owner_group", "name"}, - nil, - ) - c.state = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "state"), - "The current state of the resource. -1: Unknown; 0: Inherited; 1: Initializing; 2: Online; 3: Offline; 4: Failed; 128: Pending; 129: Online Pending; 130: Offline Pending ", - []string{"type", "owner_group", "name"}, - nil, - ) - c.subclass = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "subclass"), - "Provides the list of references to nodes that can be the owner of this resource.", - []string{"type", "owner_group", "name"}, - nil, - ) - return nil -} - -// MSCluster_Resource docs: +// msClusterResource represents the MSCluster_Resource WMI class // - https://docs.microsoft.com/en-us/previous-versions/windows/desktop/cluswmi/mscluster-resource -type MSCluster_Resource struct { +type msClusterResource struct { Name string Type string OwnerGroup string @@ -204,139 +35,248 @@ type MSCluster_Resource struct { Subclass uint } +func (c *Collector) buildResource() { + c.resourceCharacteristics = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "characteristics"), + "Provides the characteristics of the object.", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceDeadlockTimeout = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "deadlock_timeout"), + "Indicates the length of time to wait, in milliseconds, before declaring a deadlock in any call into a resource.", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceEmbeddedFailureAction = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "embedded_failure_action"), + "The time, in milliseconds, that a resource should remain in a failed state before the Cluster service attempts to restart it.", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceFlags = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "flags"), + "Provides access to the flags set for the object.", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceIsAlivePollInterval = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "is_alive_poll_interval"), + "Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource.", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceLooksAlivePollInterval = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "looks_alive_poll_interval"), + "Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource.", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceMonitorProcessId = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "monitor_process_id"), + "Provides the process ID of the resource host service that is currently hosting the resource.", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceOwnerNode = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "owner_node"), + "The node hosting the resource. 0: Not hosted; 1: Hosted", + []string{"type", "owner_group", "node_name", "name"}, + nil, + ) + c.resourceOwnerNode = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "owner_node"), + "The node hosting the resource. 0: Not hosted; 1: Hosted", + []string{"type", "owner_group", "node_name", "name"}, + nil, + ) + c.resourcePendingTimeout = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "pending_timeout"), + "Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated.", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceResourceClass = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "resource_class"), + "Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown ", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceRestartAction = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "restart_action"), + "Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails.", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceRestartDelay = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "restart_delay"), + "Indicates the time delay before a failed resource is restarted.", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceRestartPeriod = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "restart_period"), + "Provides access to the resource's RestartPeriod property, which is interval of time, in milliseconds, during which a specified number of restart attempts can be made on a nonresponsive resource.", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceRestartThreshold = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "restart_threshold"), + "Provides access to the resource's RestartThreshold property which is the maximum number of restart attempts that can be made on a resource within an interval defined by the RestartPeriod property before the Cluster Service initiates the action specified by the RestartAction property.", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceRetryPeriodOnFailure = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "retry_period_on_failure"), + "Provides access to the resource's RetryPeriodOnFailure property, which is the interval of time (in milliseconds) that a resource should remain in a failed state before the Cluster service attempts to restart it.", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceState = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "state"), + "The current state of the resource. -1: Unknown; 0: Inherited; 1: Initializing; 2: Online; 3: Offline; 4: Failed; 128: Pending; 129: Online Pending; 130: Offline Pending ", + []string{"type", "owner_group", "name"}, + nil, + ) + c.resourceSubClass = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResource, "subclass"), + "Provides the list of references to nodes that can be the owner of this resource.", + []string{"type", "owner_group", "name"}, + nil, + ) +} + // Collect sends the metric values for each metric // to the provided prometheus Metric channel. -func (c *Collector) Collect(_ *types.ScrapeContext, logger log.Logger, ch chan<- prometheus.Metric) error { - logger = log.With(logger, "collector", Name) - var dst []MSCluster_Resource - q := wmi.QueryAll(&dst, logger) +func (c *Collector) collectResource(logger log.Logger, ch chan<- prometheus.Metric, nodeNames []string) error { + var dst []msClusterResource + + q := wmi.QueryAllForClass(&dst, "MSCluster_Resource", logger) if err := wmi.QueryNamespace(q, &dst, "root/MSCluster"); err != nil { return err } for _, v := range dst { ch <- prometheus.MustNewConstMetric( - c.characteristics, + c.resourceCharacteristics, prometheus.GaugeValue, float64(v.Characteristics), v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( - c.deadlockTimeout, + c.resourceDeadlockTimeout, prometheus.GaugeValue, float64(v.DeadlockTimeout), v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( - c.embeddedFailureAction, + c.resourceEmbeddedFailureAction, prometheus.GaugeValue, float64(v.EmbeddedFailureAction), v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( - c.flags, + c.resourceFlags, prometheus.GaugeValue, float64(v.Flags), v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( - c.isAlivePollInterval, + c.resourceIsAlivePollInterval, prometheus.GaugeValue, float64(v.IsAlivePollInterval), v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( - c.looksAlivePollInterval, + c.resourceLooksAlivePollInterval, prometheus.GaugeValue, float64(v.LooksAlivePollInterval), v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( - c.monitorProcessId, + c.resourceMonitorProcessId, prometheus.GaugeValue, float64(v.MonitorProcessId), v.Type, v.OwnerGroup, v.Name, ) - if mscluster_node.NodeName != nil { - for _, node_name := range mscluster_node.NodeName { - isCurrentState := 0.0 - if v.OwnerNode == node_name { - isCurrentState = 1.0 - } - ch <- prometheus.MustNewConstMetric( - c.ownerNode, - prometheus.GaugeValue, - isCurrentState, - v.Type, v.OwnerGroup, node_name, v.Name, - ) + for _, nodeName := range nodeNames { + isCurrentState := 0.0 + if v.OwnerNode == nodeName { + isCurrentState = 1.0 } + ch <- prometheus.MustNewConstMetric( + c.resourceOwnerNode, + prometheus.GaugeValue, + isCurrentState, + v.Type, v.OwnerGroup, nodeName, v.Name, + ) } ch <- prometheus.MustNewConstMetric( - c.pendingTimeout, + c.resourcePendingTimeout, prometheus.GaugeValue, float64(v.PendingTimeout), v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( - c.resourceClass, + c.resourceResourceClass, prometheus.GaugeValue, float64(v.ResourceClass), v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( - c.restartAction, + c.resourceRestartAction, prometheus.GaugeValue, float64(v.RestartAction), v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( - c.restartDelay, + c.resourceRestartDelay, prometheus.GaugeValue, float64(v.RestartDelay), v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( - c.restartPeriod, + c.resourceRestartPeriod, prometheus.GaugeValue, float64(v.RestartPeriod), v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( - c.restartThreshold, + c.resourceRestartThreshold, prometheus.GaugeValue, float64(v.RestartThreshold), v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( - c.retryPeriodOnFailure, + c.resourceRetryPeriodOnFailure, prometheus.GaugeValue, float64(v.RetryPeriodOnFailure), v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( - c.state, + c.resourceState, prometheus.GaugeValue, float64(v.State), v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( - c.subclass, + c.resourceSubClass, prometheus.GaugeValue, float64(v.Subclass), v.Type, v.OwnerGroup, v.Name, diff --git a/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go b/pkg/collector/mscluster/mscluster_resourcegroup.go similarity index 51% rename from pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go rename to pkg/collector/mscluster/mscluster_resourcegroup.go index f85a5322..78030f05 100644 --- a/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go +++ b/pkg/collector/mscluster/mscluster_resourcegroup.go @@ -1,165 +1,17 @@ -package mscluster_resourcegroup +package mscluster import ( - "github.com/alecthomas/kingpin/v2" "github.com/go-kit/log" - "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_node" "github.com/prometheus-community/windows_exporter/pkg/types" "github.com/prometheus-community/windows_exporter/pkg/wmi" "github.com/prometheus/client_golang/prometheus" ) -const Name = "mscluster_resourcegroup" +const nameResourceGroup = Name + "_resourcegroup" -type Config struct{} - -var ConfigDefaults = Config{} - -// A Collector is a Prometheus Collector for WMI MSCluster_ResourceGroup metrics. -type Collector struct { - config Config - - autoFailbackType *prometheus.Desc - characteristics *prometheus.Desc - coldStartSetting *prometheus.Desc - defaultOwner *prometheus.Desc - failbackWindowEnd *prometheus.Desc - failbackWindowStart *prometheus.Desc - failOverPeriod *prometheus.Desc - failOverThreshold *prometheus.Desc - flags *prometheus.Desc - groupType *prometheus.Desc - ownerNode *prometheus.Desc - priority *prometheus.Desc - resiliencyPeriod *prometheus.Desc - state *prometheus.Desc -} - -func New(config *Config) *Collector { - if config == nil { - config = &ConfigDefaults - } - - c := &Collector{ - config: *config, - } - - return c -} - -func NewWithFlags(_ *kingpin.Application) *Collector { - return &Collector{} -} - -func (c *Collector) GetName() string { - return Name -} - -func (c *Collector) GetPerfCounter(_ log.Logger) ([]string, error) { - return []string{"Memory"}, nil -} - -func (c *Collector) Close() error { - return nil -} - -func (c *Collector) Build(_ log.Logger) error { - c.autoFailbackType = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "auto_failback_type"), - "Provides access to the group's AutoFailbackType property.", - []string{"name"}, - nil, - ) - c.characteristics = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "characteristics"), - "Provides the characteristics of the group.", - []string{"name"}, - nil, - ) - c.coldStartSetting = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "cold_start_setting"), - "Indicates whether a group can start after a cluster cold start.", - []string{"name"}, - nil, - ) - c.defaultOwner = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "default_owner"), - "Number of the last node the resource group was activated on or explicitly moved to.", - []string{"name"}, - nil, - ) - c.failbackWindowEnd = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "failback_window_end"), - "The FailbackWindowEnd property provides the latest time that the group can be moved back to the node identified as its preferred node.", - []string{"name"}, - nil, - ) - c.failbackWindowStart = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "failback_window_start"), - "The FailbackWindowStart property provides the earliest time (that is, local time as kept by the cluster) that the group can be moved back to the node identified as its preferred node.", - []string{"name"}, - nil, - ) - c.failOverPeriod = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "failover_period"), - "The FailoverPeriod property specifies a number of hours during which a maximum number of failover attempts, specified by the FailoverThreshold property, can occur.", - []string{"name"}, - nil, - ) - c.failOverThreshold = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "failover_threshold"), - "The FailoverThreshold property specifies the maximum number of failover attempts.", - []string{"name"}, - nil, - ) - c.flags = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "flags"), - "Provides access to the flags set for the group. ", - []string{"name"}, - nil, - ) - c.groupType = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "group_type"), - "The Type of the resource group.", - []string{"name"}, - nil, - ) - c.ownerNode = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "owner_node"), - "The node hosting the resource group. 0: Not hosted; 1: Hosted", - []string{"node_name", "name"}, - nil, - ) - c.ownerNode = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "owner_node"), - "The node hosting the resource group. 0: Not hosted; 1: Hosted", - []string{"node_name", "name"}, - nil, - ) - c.priority = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "priority"), - "Priority value of the resource group", - []string{"name"}, - nil, - ) - c.resiliencyPeriod = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "resiliency_period"), - "The resiliency period for this group, in seconds.", - []string{"name"}, - nil, - ) - c.state = prometheus.NewDesc( - prometheus.BuildFQName(types.Namespace, Name, "state"), - "The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending", - []string{"name"}, - nil, - ) - return nil -} - -// MSCluster_ResourceGroup docs: +// msClusterResourceGroup represents the MSCluster_ResourceGroup WMI class // - https://docs.microsoft.com/en-us/previous-versions/windows/desktop/cluswmi/mscluster-resourcegroup -type MSCluster_ResourceGroup struct { +type msClusterResourceGroup struct { Name string AutoFailbackType uint @@ -178,118 +30,209 @@ type MSCluster_ResourceGroup struct { State uint } +func (c *Collector) buildResourceGroup() { + c.resourceGroupAutoFailbackType = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "auto_failback_type"), + "Provides access to the group's AutoFailbackType property.", + []string{"name"}, + nil, + ) + c.resourceGroupCharacteristics = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "characteristics"), + "Provides the characteristics of the group.", + []string{"name"}, + nil, + ) + c.resourceGroupColdStartSetting = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "cold_start_setting"), + "Indicates whether a group can start after a cluster cold start.", + []string{"name"}, + nil, + ) + c.resourceGroupDefaultOwner = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "default_owner"), + "Number of the last node the resource group was activated on or explicitly moved to.", + []string{"name"}, + nil, + ) + c.resourceGroupFailbackWindowEnd = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "failback_window_end"), + "The FailbackWindowEnd property provides the latest time that the group can be moved back to the node identified as its preferred node.", + []string{"name"}, + nil, + ) + c.resourceGroupFailbackWindowStart = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "failback_window_start"), + "The FailbackWindowStart property provides the earliest time (that is, local time as kept by the cluster) that the group can be moved back to the node identified as its preferred node.", + []string{"name"}, + nil, + ) + c.resourceGroupFailOverPeriod = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "failover_period"), + "The FailoverPeriod property specifies a number of hours during which a maximum number of failover attempts, specified by the FailoverThreshold property, can occur.", + []string{"name"}, + nil, + ) + c.resourceGroupFailOverThreshold = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "failover_threshold"), + "The FailoverThreshold property specifies the maximum number of failover attempts.", + []string{"name"}, + nil, + ) + c.resourceGroupFlags = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "flags"), + "Provides access to the flags set for the group. ", + []string{"name"}, + nil, + ) + c.resourceGroupGroupType = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "group_type"), + "The Type of the resource group.", + []string{"name"}, + nil, + ) + c.resourceGroupOwnerNode = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "owner_node"), + "The node hosting the resource group. 0: Not hosted; 1: Hosted", + []string{"node_name", "name"}, + nil, + ) + c.resourceGroupOwnerNode = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "owner_node"), + "The node hosting the resource group. 0: Not hosted; 1: Hosted", + []string{"node_name", "name"}, + nil, + ) + c.resourceGroupPriority = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "priority"), + "Priority value of the resource group", + []string{"name"}, + nil, + ) + c.resourceGroupResiliencyPeriod = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "resiliency_period"), + "The resiliency period for this group, in seconds.", + []string{"name"}, + nil, + ) + c.resourceGroupState = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, nameResourceGroup, "state"), + "The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending", + []string{"name"}, + nil, + ) +} + // Collect sends the metric values for each metric // to the provided prometheus Metric channel. -func (c *Collector) Collect(_ *types.ScrapeContext, logger log.Logger, ch chan<- prometheus.Metric) error { - logger = log.With(logger, "collector", Name) - var dst []MSCluster_ResourceGroup - q := wmi.QueryAll(&dst, logger) +func (c *Collector) collectResourceGroup(logger log.Logger, ch chan<- prometheus.Metric, nodeNames []string) error { + var dst []msClusterResourceGroup + + q := wmi.QueryAllForClass(&dst, "MSCluster_ResourceGroup", logger) if err := wmi.QueryNamespace(q, &dst, "root/MSCluster"); err != nil { return err } for _, v := range dst { ch <- prometheus.MustNewConstMetric( - c.autoFailbackType, + c.resourceGroupAutoFailbackType, prometheus.GaugeValue, float64(v.AutoFailbackType), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.characteristics, + c.resourceGroupCharacteristics, prometheus.GaugeValue, float64(v.Characteristics), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.coldStartSetting, + c.resourceGroupColdStartSetting, prometheus.GaugeValue, float64(v.ColdStartSetting), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.defaultOwner, + c.resourceGroupDefaultOwner, prometheus.GaugeValue, float64(v.DefaultOwner), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.failbackWindowEnd, + c.resourceGroupFailbackWindowEnd, prometheus.GaugeValue, float64(v.FailbackWindowEnd), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.failbackWindowStart, + c.resourceGroupFailbackWindowStart, prometheus.GaugeValue, float64(v.FailbackWindowStart), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.failOverPeriod, + c.resourceGroupFailOverPeriod, prometheus.GaugeValue, float64(v.FailoverPeriod), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.failOverThreshold, + c.resourceGroupFailOverThreshold, prometheus.GaugeValue, float64(v.FailoverThreshold), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.flags, + c.resourceGroupFlags, prometheus.GaugeValue, float64(v.Flags), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.groupType, + c.resourceGroupGroupType, prometheus.GaugeValue, float64(v.GroupType), v.Name, ) - if mscluster_node.NodeName != nil { - for _, node_name := range mscluster_node.NodeName { - isCurrentState := 0.0 - if v.OwnerNode == node_name { - isCurrentState = 1.0 - } - ch <- prometheus.MustNewConstMetric( - c.ownerNode, - prometheus.GaugeValue, - isCurrentState, - node_name, v.Name, - ) + for _, nodeName := range nodeNames { + isCurrentState := 0.0 + if v.OwnerNode == nodeName { + isCurrentState = 1.0 } + ch <- prometheus.MustNewConstMetric( + c.resourceGroupOwnerNode, + prometheus.GaugeValue, + isCurrentState, + nodeName, v.Name, + ) } ch <- prometheus.MustNewConstMetric( - c.priority, + c.resourceGroupPriority, prometheus.GaugeValue, float64(v.Priority), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.resiliencyPeriod, + c.resourceGroupResiliencyPeriod, prometheus.GaugeValue, float64(v.ResiliencyPeriod), v.Name, ) ch <- prometheus.MustNewConstMetric( - c.state, + c.resourceGroupState, prometheus.GaugeValue, float64(v.State), v.Name, diff --git a/pkg/collector/prometheus.go b/pkg/collector/prometheus.go index d3dea7d7..843f7bba 100644 --- a/pkg/collector/prometheus.go +++ b/pkg/collector/prometheus.go @@ -186,6 +186,7 @@ func (coll *Prometheus) execute(logger log.Logger, name string, c Collector, ctx _ = level.Error(coll.logger).Log("msg", fmt.Sprintf("collector %s failed after %fs", name, duration), "err", err) return failed } + _ = level.Debug(coll.logger).Log("msg", fmt.Sprintf("collector %s succeeded after %fs.", name, duration)) return success }