mirror of
https://github.com/prometheus-community/windows_exporter
synced 2025-02-13 18:20:02 +00:00
303 lines
11 KiB
Go
303 lines
11 KiB
Go
package mscluster
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"slices"
|
|
"strings"
|
|
|
|
"github.com/alecthomas/kingpin/v2"
|
|
"github.com/prometheus-community/windows_exporter/internal/types"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/yusufpapurcu/wmi"
|
|
)
|
|
|
|
const Name = "mscluster"
|
|
|
|
type Config struct {
|
|
CollectorsEnabled []string `yaml:"collectors_enabled"`
|
|
}
|
|
|
|
var ConfigDefaults = Config{
|
|
CollectorsEnabled: []string{
|
|
"cluster",
|
|
"network",
|
|
"node",
|
|
"resource",
|
|
"resourcegroup",
|
|
},
|
|
}
|
|
|
|
// A Collector is a Prometheus Collector for WMI MSCluster_Cluster metrics.
|
|
type Collector struct {
|
|
config Config
|
|
wmiClient *wmi.Client
|
|
|
|
// cluster
|
|
clusterAddEvictDelay *prometheus.Desc
|
|
clusterAdminAccessPoint *prometheus.Desc
|
|
clusterAutoAssignNodeSite *prometheus.Desc
|
|
clusterAutoBalancerLevel *prometheus.Desc
|
|
clusterAutoBalancerMode *prometheus.Desc
|
|
clusterBackupInProgress *prometheus.Desc
|
|
clusterBlockCacheSize *prometheus.Desc
|
|
clusterClusSvcHangTimeout *prometheus.Desc
|
|
clusterClusSvcRegroupOpeningTimeout *prometheus.Desc
|
|
clusterClusSvcRegroupPruningTimeout *prometheus.Desc
|
|
clusterClusSvcRegroupStageTimeout *prometheus.Desc
|
|
clusterClusSvcRegroupTickInMilliseconds *prometheus.Desc
|
|
clusterClusterEnforcedAntiAffinity *prometheus.Desc
|
|
clusterClusterFunctionalLevel *prometheus.Desc
|
|
clusterClusterGroupWaitDelay *prometheus.Desc
|
|
clusterClusterLogLevel *prometheus.Desc
|
|
clusterClusterLogSize *prometheus.Desc
|
|
clusterClusterUpgradeVersion *prometheus.Desc
|
|
clusterCrossSiteDelay *prometheus.Desc
|
|
clusterCrossSiteThreshold *prometheus.Desc
|
|
clusterCrossSubnetDelay *prometheus.Desc
|
|
clusterCrossSubnetThreshold *prometheus.Desc
|
|
clusterCsvBalancer *prometheus.Desc
|
|
clusterDatabaseReadWriteMode *prometheus.Desc
|
|
clusterDefaultNetworkRole *prometheus.Desc
|
|
clusterDetectedCloudPlatform *prometheus.Desc
|
|
clusterDetectManagedEvents *prometheus.Desc
|
|
clusterDetectManagedEventsThreshold *prometheus.Desc
|
|
clusterDisableGroupPreferredOwnerRandomization *prometheus.Desc
|
|
clusterDrainOnShutdown *prometheus.Desc
|
|
clusterDynamicQuorumEnabled *prometheus.Desc
|
|
clusterEnableSharedVolumes *prometheus.Desc
|
|
clusterFixQuorum *prometheus.Desc
|
|
clusterGracePeriodEnabled *prometheus.Desc
|
|
clusterGracePeriodTimeout *prometheus.Desc
|
|
clusterGroupDependencyTimeout *prometheus.Desc
|
|
clusterHangRecoveryAction *prometheus.Desc
|
|
clusterIgnorePersistentStateOnStartup *prometheus.Desc
|
|
clusterLogResourceControls *prometheus.Desc
|
|
clusterLowerQuorumPriorityNodeId *prometheus.Desc
|
|
clusterMaxNumberOfNodes *prometheus.Desc
|
|
clusterMessageBufferLength *prometheus.Desc
|
|
clusterMinimumNeverPreemptPriority *prometheus.Desc
|
|
clusterMinimumPreemptorPriority *prometheus.Desc
|
|
clusterNetftIPSecEnabled *prometheus.Desc
|
|
clusterPlacementOptions *prometheus.Desc
|
|
clusterPlumbAllCrossSubnetRoutes *prometheus.Desc
|
|
clusterPreventQuorum *prometheus.Desc
|
|
clusterQuarantineDuration *prometheus.Desc
|
|
clusterQuarantineThreshold *prometheus.Desc
|
|
clusterQuorumArbitrationTimeMax *prometheus.Desc
|
|
clusterQuorumArbitrationTimeMin *prometheus.Desc
|
|
clusterQuorumLogFileSize *prometheus.Desc
|
|
clusterQuorumTypeValue *prometheus.Desc
|
|
clusterRequestReplyTimeout *prometheus.Desc
|
|
clusterResiliencyDefaultPeriod *prometheus.Desc
|
|
clusterResiliencyLevel *prometheus.Desc
|
|
clusterResourceDllDeadlockPeriod *prometheus.Desc
|
|
clusterRootMemoryReserved *prometheus.Desc
|
|
clusterRouteHistoryLength *prometheus.Desc
|
|
clusterS2DBusTypes *prometheus.Desc
|
|
clusterS2DCacheDesiredState *prometheus.Desc
|
|
clusterS2DCacheFlashReservePercent *prometheus.Desc
|
|
clusterS2DCachePageSizeKBytes *prometheus.Desc
|
|
clusterS2DEnabled *prometheus.Desc
|
|
clusterS2DIOLatencyThreshold *prometheus.Desc
|
|
clusterS2DOptimizations *prometheus.Desc
|
|
clusterSameSubnetDelay *prometheus.Desc
|
|
clusterSameSubnetThreshold *prometheus.Desc
|
|
clusterSecurityLevel *prometheus.Desc
|
|
clusterSecurityLevelForStorage *prometheus.Desc
|
|
clusterSharedVolumeVssWriterOperationTimeout *prometheus.Desc
|
|
clusterShutdownTimeoutInMinutes *prometheus.Desc
|
|
clusterUseClientAccessNetworksForSharedVolumes *prometheus.Desc
|
|
clusterWitnessDatabaseWriteTimeout *prometheus.Desc
|
|
clusterWitnessDynamicWeight *prometheus.Desc
|
|
clusterWitnessRestartInterval *prometheus.Desc
|
|
|
|
// network
|
|
networkCharacteristics *prometheus.Desc
|
|
networkFlags *prometheus.Desc
|
|
networkMetric *prometheus.Desc
|
|
networkRole *prometheus.Desc
|
|
networkState *prometheus.Desc
|
|
|
|
// node
|
|
nodeBuildNumber *prometheus.Desc
|
|
nodeCharacteristics *prometheus.Desc
|
|
nodeDetectedCloudPlatform *prometheus.Desc
|
|
nodeDynamicWeight *prometheus.Desc
|
|
nodeFlags *prometheus.Desc
|
|
nodeMajorVersion *prometheus.Desc
|
|
nodeMinorVersion *prometheus.Desc
|
|
nodeNeedsPreventQuorum *prometheus.Desc
|
|
nodeNodeDrainStatus *prometheus.Desc
|
|
nodeNodeHighestVersion *prometheus.Desc
|
|
nodeNodeLowestVersion *prometheus.Desc
|
|
nodeNodeWeight *prometheus.Desc
|
|
nodeState *prometheus.Desc
|
|
nodeStatusInformation *prometheus.Desc
|
|
|
|
resourceCharacteristics *prometheus.Desc
|
|
resourceDeadlockTimeout *prometheus.Desc
|
|
resourceEmbeddedFailureAction *prometheus.Desc
|
|
resourceFlags *prometheus.Desc
|
|
resourceIsAlivePollInterval *prometheus.Desc
|
|
resourceLooksAlivePollInterval *prometheus.Desc
|
|
resourceMonitorProcessId *prometheus.Desc
|
|
resourceOwnerNode *prometheus.Desc
|
|
resourcePendingTimeout *prometheus.Desc
|
|
resourceResourceClass *prometheus.Desc
|
|
resourceRestartAction *prometheus.Desc
|
|
resourceRestartDelay *prometheus.Desc
|
|
resourceRestartPeriod *prometheus.Desc
|
|
resourceRestartThreshold *prometheus.Desc
|
|
resourceRetryPeriodOnFailure *prometheus.Desc
|
|
resourceState *prometheus.Desc
|
|
resourceSubClass *prometheus.Desc
|
|
|
|
// ResourceGroup
|
|
resourceGroupAutoFailbackType *prometheus.Desc
|
|
resourceGroupCharacteristics *prometheus.Desc
|
|
resourceGroupColdStartSetting *prometheus.Desc
|
|
resourceGroupDefaultOwner *prometheus.Desc
|
|
resourceGroupFailbackWindowEnd *prometheus.Desc
|
|
resourceGroupFailbackWindowStart *prometheus.Desc
|
|
resourceGroupFailOverPeriod *prometheus.Desc
|
|
resourceGroupFailOverThreshold *prometheus.Desc
|
|
resourceGroupFlags *prometheus.Desc
|
|
resourceGroupGroupType *prometheus.Desc
|
|
resourceGroupOwnerNode *prometheus.Desc
|
|
resourceGroupPriority *prometheus.Desc
|
|
resourceGroupResiliencyPeriod *prometheus.Desc
|
|
resourceGroupState *prometheus.Desc
|
|
}
|
|
|
|
func New(config *Config) *Collector {
|
|
if config == nil {
|
|
config = &ConfigDefaults
|
|
}
|
|
|
|
if config.CollectorsEnabled == nil {
|
|
config.CollectorsEnabled = ConfigDefaults.CollectorsEnabled
|
|
}
|
|
|
|
c := &Collector{
|
|
config: *config,
|
|
}
|
|
|
|
return c
|
|
}
|
|
|
|
func NewWithFlags(app *kingpin.Application) *Collector {
|
|
c := &Collector{
|
|
config: ConfigDefaults,
|
|
}
|
|
c.config.CollectorsEnabled = make([]string, 0)
|
|
|
|
var collectorsEnabled string
|
|
|
|
app.Flag(
|
|
"collector.mscluster.enabled",
|
|
"Comma-separated list of collectors to use.",
|
|
).Default(strings.Join(ConfigDefaults.CollectorsEnabled, ",")).StringVar(&collectorsEnabled)
|
|
|
|
app.Action(func(*kingpin.ParseContext) error {
|
|
c.config.CollectorsEnabled = strings.Split(collectorsEnabled, ",")
|
|
|
|
return nil
|
|
})
|
|
|
|
return c
|
|
}
|
|
|
|
func (c *Collector) GetName() string {
|
|
return Name
|
|
}
|
|
|
|
func (c *Collector) GetPerfCounter(_ *slog.Logger) ([]string, error) {
|
|
return []string{"Memory"}, nil
|
|
}
|
|
|
|
func (c *Collector) Close(_ *slog.Logger) error {
|
|
return nil
|
|
}
|
|
|
|
func (c *Collector) Build(_ *slog.Logger, wmiClient *wmi.Client) error {
|
|
if len(c.config.CollectorsEnabled) == 0 {
|
|
return nil
|
|
}
|
|
|
|
if wmiClient == nil || wmiClient.SWbemServicesClient == nil {
|
|
return errors.New("wmiClient or SWbemServicesClient is nil")
|
|
}
|
|
|
|
c.wmiClient = wmiClient
|
|
|
|
if slices.Contains(c.config.CollectorsEnabled, "cluster") {
|
|
c.buildCluster()
|
|
}
|
|
|
|
if slices.Contains(c.config.CollectorsEnabled, "network") {
|
|
c.buildNetwork()
|
|
}
|
|
|
|
if slices.Contains(c.config.CollectorsEnabled, "node") {
|
|
c.buildNode()
|
|
}
|
|
|
|
if slices.Contains(c.config.CollectorsEnabled, "resource") {
|
|
c.buildResource()
|
|
}
|
|
|
|
if slices.Contains(c.config.CollectorsEnabled, "resourcegroup") {
|
|
c.buildResourceGroup()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Collect sends the metric values for each metric
|
|
// to the provided prometheus Metric channel.
|
|
func (c *Collector) Collect(_ *types.ScrapeContext, _ *slog.Logger, ch chan<- prometheus.Metric) error {
|
|
if len(c.config.CollectorsEnabled) == 0 {
|
|
return nil
|
|
}
|
|
|
|
var (
|
|
err error
|
|
errs []error
|
|
nodeNames []string
|
|
)
|
|
|
|
if slices.Contains(c.config.CollectorsEnabled, "cluster") {
|
|
if err = c.collectCluster(ch); err != nil {
|
|
errs = append(errs, fmt.Errorf("failed to collect cluster metrics: %w", err))
|
|
}
|
|
}
|
|
|
|
if slices.Contains(c.config.CollectorsEnabled, "network") {
|
|
if err = c.collectNetwork(ch); err != nil {
|
|
errs = append(errs, fmt.Errorf("failed to collect network metrics: %w", err))
|
|
}
|
|
}
|
|
|
|
if slices.Contains(c.config.CollectorsEnabled, "node") {
|
|
if nodeNames, err = c.collectNode(ch); err != nil {
|
|
errs = append(errs, fmt.Errorf("failed to collect node metrics: %w", err))
|
|
}
|
|
}
|
|
|
|
if slices.Contains(c.config.CollectorsEnabled, "resource") {
|
|
if err = c.collectResource(ch, nodeNames); err != nil {
|
|
errs = append(errs, fmt.Errorf("failed to collect resource metrics: %w", err))
|
|
}
|
|
}
|
|
|
|
if slices.Contains(c.config.CollectorsEnabled, "resourcegroup") {
|
|
if err = c.collectResourceGroup(ch, nodeNames); err != nil {
|
|
errs = append(errs, fmt.Errorf("failed to collect resource group metrics: %w", err))
|
|
}
|
|
}
|
|
|
|
return errors.Join(errs...)
|
|
}
|