Merge pull request #145 from petshopjke1337/add_more_faults_metric

Add chassis cooling fault and drive fault metrics
This commit is contained in:
Conrad Hoffmann 2023-03-13 15:26:47 +01:00 committed by GitHub
commit 22285c9300
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 54 additions and 0 deletions

View File

@ -31,6 +31,18 @@ var (
[]string{}, []string{},
nil, nil,
) )
chassisDriveFaultDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "chassis", "drive_fault_state"),
"Current drive fault state (1=false, 0=true).",
[]string{},
nil,
)
chassisCoolingFaultDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "chassis", "cooling_fault_state"),
"Current Cooling/fan fault state (1=false, 0=true).",
[]string{},
nil,
)
) )
type ChassisCollector struct{} type ChassisCollector struct{}
@ -49,6 +61,8 @@ func (c ChassisCollector) Args() []string {
func (c ChassisCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) { func (c ChassisCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) {
currentChassisPowerState, err := freeipmi.GetChassisPowerState(result) currentChassisPowerState, err := freeipmi.GetChassisPowerState(result)
currentChassisDriveFault, err := freeipmi.GetChassisDriveFault(result)
currentChassisCoolingFault, err := freeipmi.GetChassisCoolingFault(result)
if err != nil { if err != nil {
level.Error(logger).Log("msg", "Failed to collect chassis data", "target", targetName(target.host), "error", err) level.Error(logger).Log("msg", "Failed to collect chassis data", "target", targetName(target.host), "error", err)
return 0, err return 0, err
@ -58,5 +72,15 @@ func (c ChassisCollector) Collect(result freeipmi.Result, ch chan<- prometheus.M
prometheus.GaugeValue, prometheus.GaugeValue,
currentChassisPowerState, currentChassisPowerState,
) )
ch <- prometheus.MustNewConstMetric(
chassisDriveFaultDesc,
prometheus.GaugeValue,
currentChassisDriveFault,
)
ch <- prometheus.MustNewConstMetric(
chassisCoolingFaultDesc,
prometheus.GaugeValue,
currentChassisCoolingFault,
)
return 1, nil return 1, nil
} }

View File

@ -35,6 +35,8 @@ import (
var ( var (
ipmiDCMICurrentPowerRegex = regexp.MustCompile(`^Current Power\s*:\s*(?P<value>[0-9.]*)\s*Watts.*`) ipmiDCMICurrentPowerRegex = regexp.MustCompile(`^Current Power\s*:\s*(?P<value>[0-9.]*)\s*Watts.*`)
ipmiChassisPowerRegex = regexp.MustCompile(`^System Power\s*:\s(?P<value>.*)`) ipmiChassisPowerRegex = regexp.MustCompile(`^System Power\s*:\s(?P<value>.*)`)
ipmiChassisDriveFaultRegex = regexp.MustCompile(`^Drive Fault\s*:\s(?P<value>.*)`)
ipmiChassisCoolingFaultRegex = regexp.MustCompile(`^Cooling/fan fault\s*:\s(?P<value>.*)`)
ipmiSELEntriesRegex = regexp.MustCompile(`^Number of log entries\s*:\s(?P<value>[0-9.]*)`) ipmiSELEntriesRegex = regexp.MustCompile(`^Number of log entries\s*:\s(?P<value>[0-9.]*)`)
ipmiSELFreeSpaceRegex = regexp.MustCompile(`^Free space remaining\s*:\s(?P<value>[0-9.]*)\s*bytes.*`) ipmiSELFreeSpaceRegex = regexp.MustCompile(`^Free space remaining\s*:\s(?P<value>[0-9.]*)\s*bytes.*`)
bmcInfoFirmwareRevisionRegex = regexp.MustCompile(`^Firmware Revision\s*:\s*(?P<value>[0-9.]*).*`) bmcInfoFirmwareRevisionRegex = regexp.MustCompile(`^Firmware Revision\s*:\s*(?P<value>[0-9.]*).*`)
@ -219,6 +221,34 @@ func GetChassisPowerState(ipmiOutput Result) (float64, error) {
return 0, err return 0, err
} }
func GetChassisDriveFault(ipmiOutput Result) (float64, error) {
if ipmiOutput.err != nil {
return -1, fmt.Errorf("%s: %s", ipmiOutput.err, ipmiOutput.output)
}
value, err := getValue(ipmiOutput.output, ipmiChassisDriveFaultRegex)
if err != nil {
return -1, err
}
if value == "false" {
return 1, err
}
return 0, err
}
func GetChassisCoolingFault(ipmiOutput Result) (float64, error) {
if ipmiOutput.err != nil {
return -1, fmt.Errorf("%s: %s", ipmiOutput.err, ipmiOutput.output)
}
value, err := getValue(ipmiOutput.output, ipmiChassisCoolingFaultRegex)
if err != nil {
return -1, err
}
if value == "false" {
return 1, err
}
return 0, err
}
func GetBMCInfoFirmwareRevision(ipmiOutput Result) (string, error) { func GetBMCInfoFirmwareRevision(ipmiOutput Result) (string, error) {
// Workaround for an issue described here: https://github.com/prometheus-community/ipmi_exporter/issues/57 // Workaround for an issue described here: https://github.com/prometheus-community/ipmi_exporter/issues/57
// The command may fail, but produce usable output (minus the system firmware revision). // The command may fail, but produce usable output (minus the system firmware revision).