Merge pull request #1395 from webalexeu/feat/add_owner_node

This commit is contained in:
Jan-Otto Kröpke 2024-04-21 08:51:15 +02:00 committed by GitHub
commit b1c272a996
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 85 additions and 4 deletions

View File

@ -23,6 +23,7 @@ Name | Description | Type | Labels
`IsAlivePollInterval` | Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `name`
`LooksAlivePollInterval` | Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `name`
`MonitorProcessId` | Provides the process ID of the resource host service that is currently hosting the resource. | gauge | `type`, `owner_group`, `name`
`OwnerNode` | The node hosting the resource. | gauge | `type`, `owner_group`, `node_name`, `name`
`PendingTimeout` | Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated. | gauge | `type`, `owner_group`, `name`
`ResourceClass` | Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown | gauge | `type`, `owner_group`, `name`
`RestartAction` | Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails. | gauge | `type`, `owner_group`, `name`
@ -34,10 +35,16 @@ Name | Description | Type | Labels
`Subclass` | Provides the list of references to nodes that can be the owner of this resource. | gauge | `type`, `owner_group`, `name`
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
Query the state of all cluster resource owned by node1
```
windows_mscluster_resource_owner_node{node_name="node1"}
```
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
Counts the number of Network Name cluster resource
```
count(windows_mscluster_resource_state{type="Network Name"})
```
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_

View File

@ -26,16 +26,23 @@ Name | Description | Type | Labels
`FailoverThreshold` | The FailoverThreshold property specifies the maximum number of failover attempts. | gauge | `name`
`Flags` | Provides access to the flags set for the group. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `name`
`GroupType` | The Type of the resource group. | gauge | `name`
`OwnerNode` | The node hosting the resource group. | gauge | `node_name`, `name`
`Priority` | Priority value of the resource group | gauge | `name`
`ResiliencyPeriod` | The resiliency period for this group, in seconds. | gauge | `name`
`State` | The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending | gauge | `name`
`UpdateDomain` | | gauge | `name`
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
Query the state of all cluster group owned by node1
```
windows_mscluster_resourcegroup_owner_node{node_name="node1"}
```
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
Counts the number of cluster group by type
```
count_values("count", windows_mscluster_resourcegroup_group_type)
```
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_

View File

@ -15,6 +15,9 @@ type Config struct{}
var ConfigDefaults = Config{}
// Variable used by mscluster_resource and mscluster_resourcegroup
var NodeName []string
// A collector is a Prometheus collector for WMI MSCluster_Node metrics
type collector struct {
logger log.Logger
@ -175,6 +178,8 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric)
return err
}
NodeName = []string{}
for _, v := range dst {
ch <- prometheus.MustNewConstMetric(
@ -274,6 +279,8 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric)
float64(v.StatusInformation),
v.Name,
)
NodeName = append(NodeName, v.Name)
}
return nil

View File

@ -1,6 +1,7 @@
package mscluster_resource
import (
"github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_node"
"github.com/prometheus-community/windows_exporter/pkg/types"
"github.com/prometheus-community/windows_exporter/pkg/wmi"
@ -26,6 +27,7 @@ type collector struct {
IsAlivePollInterval *prometheus.Desc
LooksAlivePollInterval *prometheus.Desc
MonitorProcessId *prometheus.Desc
OwnerNode *prometheus.Desc
PendingTimeout *prometheus.Desc
ResourceClass *prometheus.Desc
RestartAction *prometheus.Desc
@ -102,6 +104,18 @@ func (c *collector) Build() error {
[]string{"type", "owner_group", "name"},
nil,
)
c.OwnerNode = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "owner_node"),
"The node hosting the resource. 0: Not hosted; 1: Hosted",
[]string{"type", "owner_group", "node_name", "name"},
nil,
)
c.OwnerNode = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "owner_node"),
"The node hosting the resource. 0: Not hosted; 1: Hosted",
[]string{"type", "owner_group", "node_name", "name"},
nil,
)
c.PendingTimeout = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "pending_timeout"),
"Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated.",
@ -165,6 +179,7 @@ type MSCluster_Resource struct {
Name string
Type string
OwnerGroup string
OwnerNode string
Characteristics uint
DeadlockTimeout uint
@ -244,6 +259,21 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric)
v.Type, v.OwnerGroup, v.Name,
)
if mscluster_node.NodeName != nil {
for _, node_name := range mscluster_node.NodeName {
isCurrentState := 0.0
if v.OwnerNode == node_name {
isCurrentState = 1.0
}
ch <- prometheus.MustNewConstMetric(
c.OwnerNode,
prometheus.GaugeValue,
isCurrentState,
v.Type, v.OwnerGroup, node_name, v.Name,
)
}
}
ch <- prometheus.MustNewConstMetric(
c.PendingTimeout,
prometheus.GaugeValue,

View File

@ -1,6 +1,7 @@
package mscluster_resourcegroup
import (
"github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_node"
"github.com/prometheus-community/windows_exporter/pkg/types"
"github.com/prometheus-community/windows_exporter/pkg/wmi"
@ -31,6 +32,7 @@ type collector struct {
Flags *prometheus.Desc
GroupType *prometheus.Desc
PlacementOptions *prometheus.Desc
OwnerNode *prometheus.Desc
Priority *prometheus.Desc
ResiliencyPeriod *prometheus.Desc
State *prometheus.Desc
@ -119,6 +121,18 @@ func (c *collector) Build() error {
[]string{"name"},
nil,
)
c.OwnerNode = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "owner_node"),
"The node hosting the resource group. 0: Not hosted; 1: Hosted",
[]string{"node_name", "name"},
nil,
)
c.OwnerNode = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "owner_node"),
"The node hosting the resource group. 0: Not hosted; 1: Hosted",
[]string{"node_name", "name"},
nil,
)
c.Priority = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "priority"),
"Priority value of the resource group",
@ -155,6 +169,7 @@ type MSCluster_ResourceGroup struct {
FailoverThreshold uint
Flags uint
GroupType uint
OwnerNode string
Priority uint
ResiliencyPeriod uint
State uint
@ -241,6 +256,21 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric)
v.Name,
)
if mscluster_node.NodeName != nil {
for _, node_name := range mscluster_node.NodeName {
isCurrentState := 0.0
if v.OwnerNode == node_name {
isCurrentState = 1.0
}
ch <- prometheus.MustNewConstMetric(
c.OwnerNode,
prometheus.GaugeValue,
isCurrentState,
node_name, v.Name,
)
}
}
ch <- prometheus.MustNewConstMetric(
c.Priority,
prometheus.GaugeValue,