added chassis cooling fault and drive fault metrics

This commit is contained in:
Aleksandr Snisarenko 2023-01-12 17:18:28 +03:00
parent 374e5593a1
commit 14d5e6716f
2 changed files with 54 additions and 0 deletions

View File

@ -31,6 +31,18 @@ var (
[]string{},
nil,
)
chassisDriveFaultDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "chassis", "drive_fault_state"),
"Current drive fault state (1=false, 0=true).",
[]string{},
nil,
)
chassisCoolingFaultDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "chassis", "cooling_fault_state"),
"Current Cooling/fan fault state (1=false, 0=true).",
[]string{},
nil,
)
)
type ChassisCollector struct{}
@ -49,6 +61,8 @@ func (c ChassisCollector) Args() []string {
func (c ChassisCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) {
currentChassisPowerState, err := freeipmi.GetChassisPowerState(result)
currentChassisDriveFault, err := freeipmi.GetChassisDriveFault(result)
currentChassisCoolingFault, err := freeipmi.GetChassisCoolingFault(result)
if err != nil {
level.Error(logger).Log("msg", "Failed to collect chassis data", "target", targetName(target.host), "error", err)
return 0, err
@ -58,5 +72,15 @@ func (c ChassisCollector) Collect(result freeipmi.Result, ch chan<- prometheus.M
prometheus.GaugeValue,
currentChassisPowerState,
)
ch <- prometheus.MustNewConstMetric(
chassisDriveFaultDesc,
prometheus.GaugeValue,
currentChassisDriveFault,
)
ch <- prometheus.MustNewConstMetric(
chassisCoolingFaultDesc,
prometheus.GaugeValue,
currentChassisCoolingFault,
)
return 1, nil
}

View File

@ -35,6 +35,8 @@ import (
var (
ipmiDCMICurrentPowerRegex = regexp.MustCompile(`^Current Power\s*:\s*(?P<value>[0-9.]*)\s*Watts.*`)
ipmiChassisPowerRegex = regexp.MustCompile(`^System Power\s*:\s(?P<value>.*)`)
ipmiChassisDriveFaultRegex = regexp.MustCompile(`^Drive Fault\s*:\s(?P<value>.*)`)
ipmiChassisCoolingFaultRegex = regexp.MustCompile(`^Cooling/fan fault\s*:\s(?P<value>.*)`)
ipmiSELEntriesRegex = regexp.MustCompile(`^Number of log entries\s*:\s(?P<value>[0-9.]*)`)
ipmiSELFreeSpaceRegex = regexp.MustCompile(`^Free space remaining\s*:\s(?P<value>[0-9.]*)\s*bytes.*`)
bmcInfoFirmwareRevisionRegex = regexp.MustCompile(`^Firmware Revision\s*:\s*(?P<value>[0-9.]*).*`)
@ -219,6 +221,34 @@ func GetChassisPowerState(ipmiOutput Result) (float64, error) {
return 0, err
}
func GetChassisDriveFault(ipmiOutput Result) (float64, error) {
if ipmiOutput.err != nil {
return -1, fmt.Errorf("%s: %s", ipmiOutput.err, ipmiOutput.output)
}
value, err := getValue(ipmiOutput.output, ipmiChassisDriveFaultRegex)
if err != nil {
return -1, err
}
if value == "false" {
return 1, err
}
return 0, err
}
func GetChassisCoolingFault(ipmiOutput Result) (float64, error) {
if ipmiOutput.err != nil {
return -1, fmt.Errorf("%s: %s", ipmiOutput.err, ipmiOutput.output)
}
value, err := getValue(ipmiOutput.output, ipmiChassisCoolingFaultRegex)
if err != nil {
return -1, err
}
if value == "false" {
return 1, err
}
return 0, err
}
func GetBMCInfoFirmwareRevision(ipmiOutput Result) (string, error) {
// Workaround for an issue described here: https://github.com/prometheus-community/ipmi_exporter/issues/57
// The command may fail, but produce usable output (minus the system firmware revision).