ipmi_exporter/collector_bmc_watchdog.go

165 lines
6.5 KiB
Go

// Copyright 2021 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus-community/ipmi_exporter/freeipmi"
)
const (
BMCWatchdogCollectorName CollectorName = "bmc-watchdog"
)
var (
bmcWatchdogTimerDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "bmc_watchdog", "timer_state"),
"Watchdog timer running (1: running, 0: stopped)",
[]string{},
nil,
)
watchdogTimerUses = []string{"BIOS FRB2", "BIOS POST", "OS LOAD", "SMS/OS", "OEM"}
bmcWatchdogTimerUseDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "bmc_watchdog", "timer_use_state"),
"Watchdog timer use (1: active, 0: inactive)",
[]string{"name"},
nil,
)
bmcWatchdogLoggingDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "bmc_watchdog", "logging_state"),
"Watchdog log flag (1: Enabled, 0: Disabled / note: reverse of freeipmi)",
[]string{},
nil,
)
watchdogTimeoutActions = []string{"None", "Hard Reset", "Power Down", "Power Cycle"}
bmcWatchdogTimeoutActionDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "bmc_watchdog", "timeout_action_state"),
"Watchdog timeout action (1: active, 0: inactive)",
[]string{"action"},
nil,
)
watchdogPretimeoutInterrupts = []string{"None", "SMI", "NMI / Diagnostic Interrupt", "Messaging Interrupt"}
bmcWatchdogPretimeoutInterruptDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "bmc_watchdog", "pretimeout_interrupt_state"),
"Watchdog pre-timeout interrupt (1: active, 0: inactive)",
[]string{"interrupt"},
nil,
)
bmcWatchdogPretimeoutIntervalDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "bmc_watchdog", "pretimeout_interval_seconds"),
"Watchdog pre-timeout interval in seconds",
[]string{},
nil,
)
bmcWatchdogInitialCountdownDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "bmc_watchdog", "initial_countdown_seconds"),
"Watchdog initial countdown in seconds",
[]string{},
nil,
)
bmcWatchdogCurrentCountdownDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "bmc_watchdog", "current_countdown_seconds"),
"Watchdog initial countdown in seconds",
[]string{},
nil,
)
)
type BMCWatchdogCollector struct{}
func (c BMCWatchdogCollector) Name() CollectorName {
return BMCWatchdogCollectorName
}
func (c BMCWatchdogCollector) Cmd() string {
return "bmc-watchdog"
}
func (c BMCWatchdogCollector) Args() []string {
return []string{"--get"}
}
func (c BMCWatchdogCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) {
timerState, err := freeipmi.GetBMCWatchdogTimerState(result)
if err != nil {
level.Error(logger).Log("msg", "Failed to collect BMC watchdog timer", "target", targetName(target.host), "error", err)
return 0, err
}
currentTimerUse, err := freeipmi.GetBMCWatchdogTimerUse(result)
if err != nil {
level.Error(logger).Log("msg", "Failed to collect BMC watchdog timer use", "target", targetName(target.host), "error", err)
return 0, err
}
loggingState, err := freeipmi.GetBMCWatchdogLoggingState(result)
if err != nil {
level.Error(logger).Log("msg", "Failed to collect BMC watchdog logging", "target", targetName(target.host), "error", err)
return 0, err
}
currentTimeoutAction, err := freeipmi.GetBMCWatchdogTimeoutAction(result)
if err != nil {
level.Error(logger).Log("msg", "Failed to collect BMC watchdog timeout action", "target", targetName(target.host), "error", err)
return 0, err
}
currentPretimeoutInterrupt, err := freeipmi.GetBMCWatchdogPretimeoutInterrupt(result)
if err != nil {
level.Error(logger).Log("msg", "Failed to collect BMC watchdog pretimeout interrupt", "target", targetName(target.host), "error", err)
return 0, err
}
pretimeoutInterval, err := freeipmi.GetBMCWatchdogPretimeoutInterval(result)
if err != nil {
level.Error(logger).Log("msg", "Failed to collect BMC watchdog pretimeout interval", "target", targetName(target.host), "error", err)
return 0, err
}
initialCountdown, err := freeipmi.GetBMCWatchdogInitialCountdown(result)
if err != nil {
level.Error(logger).Log("msg", "Failed to collect BMC watchdog initial countdown", "target", targetName(target.host), "error", err)
return 0, err
}
currentCountdown, err := freeipmi.GetBMCWatchdogCurrentCountdown(result)
if err != nil {
level.Error(logger).Log("msg", "Failed to collect BMC watchdog current countdown", "target", targetName(target.host), "error", err)
return 0, err
}
ch <- prometheus.MustNewConstMetric(bmcWatchdogTimerDesc, prometheus.GaugeValue, timerState)
for _, timerUse := range watchdogTimerUses {
if currentTimerUse == timerUse {
ch <- prometheus.MustNewConstMetric(bmcWatchdogTimerUseDesc, prometheus.GaugeValue, 1, timerUse)
} else {
ch <- prometheus.MustNewConstMetric(bmcWatchdogTimerUseDesc, prometheus.GaugeValue, 0, timerUse)
}
}
ch <- prometheus.MustNewConstMetric(bmcWatchdogLoggingDesc, prometheus.GaugeValue, loggingState)
for _, timeoutAction := range watchdogTimeoutActions {
if currentTimeoutAction == timeoutAction {
ch <- prometheus.MustNewConstMetric(bmcWatchdogTimeoutActionDesc, prometheus.GaugeValue, 1, timeoutAction)
} else {
ch <- prometheus.MustNewConstMetric(bmcWatchdogTimeoutActionDesc, prometheus.GaugeValue, 0, timeoutAction)
}
}
for _, pretimeoutInterrupt := range watchdogPretimeoutInterrupts {
if currentPretimeoutInterrupt == pretimeoutInterrupt {
ch <- prometheus.MustNewConstMetric(bmcWatchdogPretimeoutInterruptDesc, prometheus.GaugeValue, 1, pretimeoutInterrupt)
} else {
ch <- prometheus.MustNewConstMetric(bmcWatchdogPretimeoutInterruptDesc, prometheus.GaugeValue, 0, pretimeoutInterrupt)
}
}
ch <- prometheus.MustNewConstMetric(bmcWatchdogPretimeoutIntervalDesc, prometheus.GaugeValue, pretimeoutInterval)
ch <- prometheus.MustNewConstMetric(bmcWatchdogInitialCountdownDesc, prometheus.GaugeValue, initialCountdown)
ch <- prometheus.MustNewConstMetric(bmcWatchdogCurrentCountdownDesc, prometheus.GaugeValue, currentCountdown)
return 1, nil
}