From 14d5e6716f890115b394922e90d3a600a155e582 Mon Sep 17 00:00:00 2001 From: Aleksandr Snisarenko Date: Thu, 12 Jan 2023 17:18:28 +0300 Subject: [PATCH] added chassis cooling fault and drive fault metrics --- collector_chassis.go | 24 ++++++++++++++++++++++++ freeipmi/freeipmi.go | 30 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/collector_chassis.go b/collector_chassis.go index d18aeaa..55bc5b4 100644 --- a/collector_chassis.go +++ b/collector_chassis.go @@ -31,6 +31,18 @@ var ( []string{}, nil, ) + chassisDriveFaultDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "chassis", "drive_fault_state"), + "Current drive fault state (1=false, 0=true).", + []string{}, + nil, + ) + chassisCoolingFaultDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "chassis", "cooling_fault_state"), + "Current Cooling/fan fault state (1=false, 0=true).", + []string{}, + nil, + ) ) type ChassisCollector struct{} @@ -49,6 +61,8 @@ func (c ChassisCollector) Args() []string { func (c ChassisCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) { currentChassisPowerState, err := freeipmi.GetChassisPowerState(result) + currentChassisDriveFault, err := freeipmi.GetChassisDriveFault(result) + currentChassisCoolingFault, err := freeipmi.GetChassisCoolingFault(result) if err != nil { level.Error(logger).Log("msg", "Failed to collect chassis data", "target", targetName(target.host), "error", err) return 0, err @@ -58,5 +72,15 @@ func (c ChassisCollector) Collect(result freeipmi.Result, ch chan<- prometheus.M prometheus.GaugeValue, currentChassisPowerState, ) + ch <- prometheus.MustNewConstMetric( + chassisDriveFaultDesc, + prometheus.GaugeValue, + currentChassisDriveFault, + ) + ch <- prometheus.MustNewConstMetric( + chassisCoolingFaultDesc, + prometheus.GaugeValue, + currentChassisCoolingFault, + ) return 1, nil } diff --git a/freeipmi/freeipmi.go b/freeipmi/freeipmi.go index 7210141..0231aee 100644 --- a/freeipmi/freeipmi.go +++ b/freeipmi/freeipmi.go @@ -35,6 +35,8 @@ import ( var ( ipmiDCMICurrentPowerRegex = regexp.MustCompile(`^Current Power\s*:\s*(?P[0-9.]*)\s*Watts.*`) ipmiChassisPowerRegex = regexp.MustCompile(`^System Power\s*:\s(?P.*)`) + ipmiChassisDriveFaultRegex = regexp.MustCompile(`^Drive Fault\s*:\s(?P.*)`) + ipmiChassisCoolingFaultRegex = regexp.MustCompile(`^Cooling/fan fault\s*:\s(?P.*)`) ipmiSELEntriesRegex = regexp.MustCompile(`^Number of log entries\s*:\s(?P[0-9.]*)`) ipmiSELFreeSpaceRegex = regexp.MustCompile(`^Free space remaining\s*:\s(?P[0-9.]*)\s*bytes.*`) bmcInfoFirmwareRevisionRegex = regexp.MustCompile(`^Firmware Revision\s*:\s*(?P[0-9.]*).*`) @@ -219,6 +221,34 @@ func GetChassisPowerState(ipmiOutput Result) (float64, error) { return 0, err } +func GetChassisDriveFault(ipmiOutput Result) (float64, error) { + if ipmiOutput.err != nil { + return -1, fmt.Errorf("%s: %s", ipmiOutput.err, ipmiOutput.output) + } + value, err := getValue(ipmiOutput.output, ipmiChassisDriveFaultRegex) + if err != nil { + return -1, err + } + if value == "false" { + return 1, err + } + return 0, err +} + +func GetChassisCoolingFault(ipmiOutput Result) (float64, error) { + if ipmiOutput.err != nil { + return -1, fmt.Errorf("%s: %s", ipmiOutput.err, ipmiOutput.output) + } + value, err := getValue(ipmiOutput.output, ipmiChassisCoolingFaultRegex) + if err != nil { + return -1, err + } + if value == "false" { + return 1, err + } + return 0, err +} + func GetBMCInfoFirmwareRevision(ipmiOutput Result) (string, error) { // Workaround for an issue described here: https://github.com/prometheus-community/ipmi_exporter/issues/57 // The command may fail, but produce usable output (minus the system firmware revision).