Add SEL collector

It exposes two metrics about the IPMI system event log (SEL), the
current number of entries stored in it and the free space for new
records. The collector is not enabled by default, it has to be
explicitly enabled in the config.

Related to #41.
This commit is contained in:
Conrad Hoffmann 2020-04-22 22:21:25 +02:00
parent 7d7e33dc93
commit 0aa63d4c21
5 changed files with 95 additions and 6 deletions

View File

@ -65,6 +65,7 @@ Make sure you have the following tools from the
- `ipmimonitoring`/`ipmi-sensors`
- `ipmi-dcmi`
- `bmc-info`
- `ipmi-sel`
### Running as unprivileged user
@ -72,8 +73,13 @@ If you are running the exporter as unprivileged user, but need to execute the
FreeIPMI tools as root, you can do the following:
1. Add sudoers files to permit the following commands
```bash
ipmi-exporter ALL = NOPASSWD:/usr/sbin/ipmimonitoring, /usr/sbin/ipmi-sensors, /usr/sbin/ipmi-dcmi, /usr/sbin/bmc-info, /usr/sbin/ipmi-chassis
```
ipmi-exporter ALL = NOPASSWD: /usr/sbin/ipmimonitoring,\
/usr/sbin/ipmi-sensors,\
/usr/sbin/ipmi-dcmi,\
/usr/sbin/bmc-info,\
/usr/sbin/ipmi-chassis,\
/usr/sbin/ipmi-sel
```
2. Create the script under user dir with execute permission
```bash
@ -292,6 +298,18 @@ the live power consumption of the machine in Watts. If in doubt, this metric
should be used over any of the sensor data (see below), even if their name
might suggest that they measure the same thing. This metric has no labels.
### System event log (SEL) info
These metrics is only provided if the `sel` collector is enabled (it isn't by
default).
The metric `ipmi_sel_entries_count` contains the current number of entries in
the SEL. It is a gauge, as the SEL can be cleared at any time. This metric has
no labels.
The metric `ipmi_sel_free_space_bytes` contains the current number of free
space for new SEL entries, in bytes. This metric has no labels.
### Sensors
These metric are only provided if the `ipmi` collector is enabled.

View File

@ -29,6 +29,8 @@ const (
var (
ipmiDCMICurrentPowerRegex = regexp.MustCompile(`^Current Power\s*:\s*(?P<value>[0-9.]*)\s*Watts.*`)
ipmiChassisPowerRegex = regexp.MustCompile(`^System Power\s*:\s(?P<value>.*)`)
ipmiSELEntriesRegex = regexp.MustCompile(`^Number of log entries\s*:\s(?P<value>[0-9.]*)`)
ipmiSELFreeSpaceRegex = regexp.MustCompile(`^Free space remaining\s*:\s(?P<value>[0-9.]*)\s*bytes.*`)
bmcInfoFirmwareRevisionRegex = regexp.MustCompile(`^Firmware Revision\s*:\s*(?P<value>[0-9.]*).*`)
bmcInfoManufacturerIDRegex = regexp.MustCompile(`^Manufacturer ID\s*:\s*(?P<value>.*)`)
)
@ -160,6 +162,20 @@ var (
nil,
)
selEntriesCountDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "sel", "logs_count"),
"Current number of log entries in the SEL.",
[]string{},
nil,
)
selFreeSpaceDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "sel", "free_space_bytes"),
"Current free space remaining for new SEL entries.",
[]string{},
nil,
)
upDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "up"),
"'1' if a scrape of the IPMI device was successful, '0' otherwise.",
@ -271,6 +287,10 @@ func ipmiChassisOutput(target ipmiTarget) ([]byte, error) {
return freeipmiOutput("ipmi-chassis", target, "--get-chassis-status")
}
func ipmiSELOutput(target ipmiTarget) ([]byte, error) {
return freeipmiOutput("ipmi-sel", target, "--info")
}
func splitMonitoringOutput(impiOutput []byte, excludeSensorIds []int64) ([]sensorData, error) {
var result []sensorData
@ -356,6 +376,22 @@ func getBMCInfoManufacturerID(ipmiOutput []byte) (string, error) {
return getValue(ipmiOutput, bmcInfoManufacturerIDRegex)
}
func getSELInfoEntriesCount(ipmiOutput []byte) (float64, error) {
value, err := getValue(ipmiOutput, ipmiSELEntriesRegex)
if err != nil {
return -1, err
}
return strconv.ParseFloat(value, 64)
}
func getSELInfoFreeSpace(ipmiOutput []byte) (float64, error) {
value, err := getValue(ipmiOutput, ipmiSELFreeSpaceRegex)
if err != nil {
return -1, err
}
return strconv.ParseFloat(value, 64)
}
// Describe implements Prometheus.Collector.
func (c collector) Describe(ch chan<- *prometheus.Desc) {
ch <- sensorStateDesc
@ -364,6 +400,8 @@ func (c collector) Describe(ch chan<- *prometheus.Desc) {
ch <- temperatureDesc
ch <- powerConsumption
ch <- bmcInfo
ch <- selEntriesCountDesc
ch <- selFreeSpaceDesc
ch <- upDesc
ch <- durationDesc
}
@ -516,6 +554,35 @@ func collectBmcInfo(ch chan<- prometheus.Metric, target ipmiTarget) (int, error)
return 1, nil
}
func collectSELInfo(ch chan<- prometheus.Metric, target ipmiTarget) (int, error) {
output, err := ipmiSELOutput(target)
if err != nil {
log.Debugf("Failed to collect ipmi-sel data from %s: %s", targetName(target.host), err)
return 0, err
}
entriesCount, err := getSELInfoEntriesCount(output)
if err != nil {
log.Errorf("Failed to parse ipmi-sel data from %s: %s", targetName(target.host), err)
return 0, err
}
freeSpace, err := getSELInfoFreeSpace(output)
if err != nil {
log.Errorf("Failed to parse ipmi-sel data from %s: %s", targetName(target.host), err)
return 0, err
}
ch <- prometheus.MustNewConstMetric(
selEntriesCountDesc,
prometheus.GaugeValue,
entriesCount,
)
ch <- prometheus.MustNewConstMetric(
selFreeSpaceDesc,
prometheus.GaugeValue,
freeSpace,
)
return 1, nil
}
func markCollectorUp(ch chan<- prometheus.Metric, name string, up int) {
ch <- prometheus.MustNewConstMetric(
upDesc,
@ -556,6 +623,8 @@ func (c collector) Collect(ch chan<- prometheus.Metric) {
up, _ = collectBmcInfo(ch, target)
case "chassis":
up, _ = collectChassisState(ch, target)
case "sel":
up, _ = collectSELInfo(ch, target)
}
markCollectorUp(ch, collector, up)
}

View File

@ -79,7 +79,7 @@ func (s *IPMIConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err
}
for _, c := range s.Collectors {
if !(c == "ipmi" || c == "dcmi" || c == "bmc" || c == "chassis") {
if !(c == "ipmi" || c == "dcmi" || c == "bmc" || c == "chassis" || c == "sel") {
return fmt.Errorf("unknown collector name: %s", c)
}
}

View File

@ -4,12 +4,13 @@
# In most cases, this should work without using a config file at all.
modules:
default:
# Available collectors are bmc, ipmi, chassis, and dcmi
# Available collectors are bmc, ipmi, chassis, dcmi, and sel
collectors:
- bmc
- ipmi
- dcmi
- chassis
- sel
# Got any sensors you don't care about? Add them here.
exclude_sensor_ids:
- 2

View File

@ -21,8 +21,8 @@ modules:
# to (session-timeout * #-of-collectors) milliseconds, so set the scrape
# timeout in Prometheus accordingly.
timeout: 10000
# Available collectors are bmc, ipmi, chassis, and dcmi
# If not specified, all three are used
# Available collectors are bmc, ipmi, chassis, dcmi, and sel
# If _not_ specified, bmc, ipmi, chassis, and dcmi are used
collectors:
- bmc
- ipmi
@ -51,6 +51,7 @@ modules:
driver: "LAN"
collectors:
- ipmi
- sel
# Need any special workaround flags set? Add them here.
# Workaround flags might be needed to address issues with specific vendor implementations
# e.g. https://www.gnu.org/software/freeipmi/freeipmi-faq.html#Why-is-the-output-from-FreeIPMI-different-than-another-software_003f