Add SEL collector

It exposes two metrics about the IPMI system event log (SEL), the
current number of entries stored in it and the free space for new
records. The collector is not enabled by default, it has to be
explicitly enabled in the config.

Related to #41.
This commit is contained in:
Conrad Hoffmann 2020-04-22 22:21:25 +02:00
parent 7d7e33dc93
commit 0aa63d4c21
5 changed files with 95 additions and 6 deletions

View File

@ -65,6 +65,7 @@ Make sure you have the following tools from the
- `ipmimonitoring`/`ipmi-sensors` - `ipmimonitoring`/`ipmi-sensors`
- `ipmi-dcmi` - `ipmi-dcmi`
- `bmc-info` - `bmc-info`
- `ipmi-sel`
### Running as unprivileged user ### Running as unprivileged user
@ -72,8 +73,13 @@ If you are running the exporter as unprivileged user, but need to execute the
FreeIPMI tools as root, you can do the following: FreeIPMI tools as root, you can do the following:
1. Add sudoers files to permit the following commands 1. Add sudoers files to permit the following commands
```bash ```
ipmi-exporter ALL = NOPASSWD:/usr/sbin/ipmimonitoring, /usr/sbin/ipmi-sensors, /usr/sbin/ipmi-dcmi, /usr/sbin/bmc-info, /usr/sbin/ipmi-chassis ipmi-exporter ALL = NOPASSWD: /usr/sbin/ipmimonitoring,\
/usr/sbin/ipmi-sensors,\
/usr/sbin/ipmi-dcmi,\
/usr/sbin/bmc-info,\
/usr/sbin/ipmi-chassis,\
/usr/sbin/ipmi-sel
``` ```
2. Create the script under user dir with execute permission 2. Create the script under user dir with execute permission
```bash ```bash
@ -292,6 +298,18 @@ the live power consumption of the machine in Watts. If in doubt, this metric
should be used over any of the sensor data (see below), even if their name should be used over any of the sensor data (see below), even if their name
might suggest that they measure the same thing. This metric has no labels. might suggest that they measure the same thing. This metric has no labels.
### System event log (SEL) info
These metrics is only provided if the `sel` collector is enabled (it isn't by
default).
The metric `ipmi_sel_entries_count` contains the current number of entries in
the SEL. It is a gauge, as the SEL can be cleared at any time. This metric has
no labels.
The metric `ipmi_sel_free_space_bytes` contains the current number of free
space for new SEL entries, in bytes. This metric has no labels.
### Sensors ### Sensors
These metric are only provided if the `ipmi` collector is enabled. These metric are only provided if the `ipmi` collector is enabled.

View File

@ -29,6 +29,8 @@ const (
var ( var (
ipmiDCMICurrentPowerRegex = regexp.MustCompile(`^Current Power\s*:\s*(?P<value>[0-9.]*)\s*Watts.*`) ipmiDCMICurrentPowerRegex = regexp.MustCompile(`^Current Power\s*:\s*(?P<value>[0-9.]*)\s*Watts.*`)
ipmiChassisPowerRegex = regexp.MustCompile(`^System Power\s*:\s(?P<value>.*)`) ipmiChassisPowerRegex = regexp.MustCompile(`^System Power\s*:\s(?P<value>.*)`)
ipmiSELEntriesRegex = regexp.MustCompile(`^Number of log entries\s*:\s(?P<value>[0-9.]*)`)
ipmiSELFreeSpaceRegex = regexp.MustCompile(`^Free space remaining\s*:\s(?P<value>[0-9.]*)\s*bytes.*`)
bmcInfoFirmwareRevisionRegex = regexp.MustCompile(`^Firmware Revision\s*:\s*(?P<value>[0-9.]*).*`) bmcInfoFirmwareRevisionRegex = regexp.MustCompile(`^Firmware Revision\s*:\s*(?P<value>[0-9.]*).*`)
bmcInfoManufacturerIDRegex = regexp.MustCompile(`^Manufacturer ID\s*:\s*(?P<value>.*)`) bmcInfoManufacturerIDRegex = regexp.MustCompile(`^Manufacturer ID\s*:\s*(?P<value>.*)`)
) )
@ -160,6 +162,20 @@ var (
nil, nil,
) )
selEntriesCountDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "sel", "logs_count"),
"Current number of log entries in the SEL.",
[]string{},
nil,
)
selFreeSpaceDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "sel", "free_space_bytes"),
"Current free space remaining for new SEL entries.",
[]string{},
nil,
)
upDesc = prometheus.NewDesc( upDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "up"), prometheus.BuildFQName(namespace, "", "up"),
"'1' if a scrape of the IPMI device was successful, '0' otherwise.", "'1' if a scrape of the IPMI device was successful, '0' otherwise.",
@ -271,6 +287,10 @@ func ipmiChassisOutput(target ipmiTarget) ([]byte, error) {
return freeipmiOutput("ipmi-chassis", target, "--get-chassis-status") return freeipmiOutput("ipmi-chassis", target, "--get-chassis-status")
} }
func ipmiSELOutput(target ipmiTarget) ([]byte, error) {
return freeipmiOutput("ipmi-sel", target, "--info")
}
func splitMonitoringOutput(impiOutput []byte, excludeSensorIds []int64) ([]sensorData, error) { func splitMonitoringOutput(impiOutput []byte, excludeSensorIds []int64) ([]sensorData, error) {
var result []sensorData var result []sensorData
@ -356,6 +376,22 @@ func getBMCInfoManufacturerID(ipmiOutput []byte) (string, error) {
return getValue(ipmiOutput, bmcInfoManufacturerIDRegex) return getValue(ipmiOutput, bmcInfoManufacturerIDRegex)
} }
func getSELInfoEntriesCount(ipmiOutput []byte) (float64, error) {
value, err := getValue(ipmiOutput, ipmiSELEntriesRegex)
if err != nil {
return -1, err
}
return strconv.ParseFloat(value, 64)
}
func getSELInfoFreeSpace(ipmiOutput []byte) (float64, error) {
value, err := getValue(ipmiOutput, ipmiSELFreeSpaceRegex)
if err != nil {
return -1, err
}
return strconv.ParseFloat(value, 64)
}
// Describe implements Prometheus.Collector. // Describe implements Prometheus.Collector.
func (c collector) Describe(ch chan<- *prometheus.Desc) { func (c collector) Describe(ch chan<- *prometheus.Desc) {
ch <- sensorStateDesc ch <- sensorStateDesc
@ -364,6 +400,8 @@ func (c collector) Describe(ch chan<- *prometheus.Desc) {
ch <- temperatureDesc ch <- temperatureDesc
ch <- powerConsumption ch <- powerConsumption
ch <- bmcInfo ch <- bmcInfo
ch <- selEntriesCountDesc
ch <- selFreeSpaceDesc
ch <- upDesc ch <- upDesc
ch <- durationDesc ch <- durationDesc
} }
@ -516,6 +554,35 @@ func collectBmcInfo(ch chan<- prometheus.Metric, target ipmiTarget) (int, error)
return 1, nil return 1, nil
} }
func collectSELInfo(ch chan<- prometheus.Metric, target ipmiTarget) (int, error) {
output, err := ipmiSELOutput(target)
if err != nil {
log.Debugf("Failed to collect ipmi-sel data from %s: %s", targetName(target.host), err)
return 0, err
}
entriesCount, err := getSELInfoEntriesCount(output)
if err != nil {
log.Errorf("Failed to parse ipmi-sel data from %s: %s", targetName(target.host), err)
return 0, err
}
freeSpace, err := getSELInfoFreeSpace(output)
if err != nil {
log.Errorf("Failed to parse ipmi-sel data from %s: %s", targetName(target.host), err)
return 0, err
}
ch <- prometheus.MustNewConstMetric(
selEntriesCountDesc,
prometheus.GaugeValue,
entriesCount,
)
ch <- prometheus.MustNewConstMetric(
selFreeSpaceDesc,
prometheus.GaugeValue,
freeSpace,
)
return 1, nil
}
func markCollectorUp(ch chan<- prometheus.Metric, name string, up int) { func markCollectorUp(ch chan<- prometheus.Metric, name string, up int) {
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
upDesc, upDesc,
@ -556,6 +623,8 @@ func (c collector) Collect(ch chan<- prometheus.Metric) {
up, _ = collectBmcInfo(ch, target) up, _ = collectBmcInfo(ch, target)
case "chassis": case "chassis":
up, _ = collectChassisState(ch, target) up, _ = collectChassisState(ch, target)
case "sel":
up, _ = collectSELInfo(ch, target)
} }
markCollectorUp(ch, collector, up) markCollectorUp(ch, collector, up)
} }

View File

@ -79,7 +79,7 @@ func (s *IPMIConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err return err
} }
for _, c := range s.Collectors { for _, c := range s.Collectors {
if !(c == "ipmi" || c == "dcmi" || c == "bmc" || c == "chassis") { if !(c == "ipmi" || c == "dcmi" || c == "bmc" || c == "chassis" || c == "sel") {
return fmt.Errorf("unknown collector name: %s", c) return fmt.Errorf("unknown collector name: %s", c)
} }
} }

View File

@ -4,12 +4,13 @@
# In most cases, this should work without using a config file at all. # In most cases, this should work without using a config file at all.
modules: modules:
default: default:
# Available collectors are bmc, ipmi, chassis, and dcmi # Available collectors are bmc, ipmi, chassis, dcmi, and sel
collectors: collectors:
- bmc - bmc
- ipmi - ipmi
- dcmi - dcmi
- chassis - chassis
- sel
# Got any sensors you don't care about? Add them here. # Got any sensors you don't care about? Add them here.
exclude_sensor_ids: exclude_sensor_ids:
- 2 - 2

View File

@ -21,8 +21,8 @@ modules:
# to (session-timeout * #-of-collectors) milliseconds, so set the scrape # to (session-timeout * #-of-collectors) milliseconds, so set the scrape
# timeout in Prometheus accordingly. # timeout in Prometheus accordingly.
timeout: 10000 timeout: 10000
# Available collectors are bmc, ipmi, chassis, and dcmi # Available collectors are bmc, ipmi, chassis, dcmi, and sel
# If not specified, all three are used # If _not_ specified, bmc, ipmi, chassis, and dcmi are used
collectors: collectors:
- bmc - bmc
- ipmi - ipmi
@ -51,6 +51,7 @@ modules:
driver: "LAN" driver: "LAN"
collectors: collectors:
- ipmi - ipmi
- sel
# Need any special workaround flags set? Add them here. # Need any special workaround flags set? Add them here.
# Workaround flags might be needed to address issues with specific vendor implementations # Workaround flags might be needed to address issues with specific vendor implementations
# e.g. https://www.gnu.org/software/freeipmi/freeipmi-faq.html#Why-is-the-output-from-FreeIPMI-different-than-another-software_003f # e.g. https://www.gnu.org/software/freeipmi/freeipmi-faq.html#Why-is-the-output-from-FreeIPMI-different-than-another-software_003f