diff --git a/README.md b/README.md index 8263d8c..1f745f4 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,7 @@ Make sure you have the following tools from the - `ipmimonitoring`/`ipmi-sensors` - `ipmi-dcmi` - `bmc-info` + - `ipmi-sel` ### Running as unprivileged user @@ -72,8 +73,13 @@ If you are running the exporter as unprivileged user, but need to execute the FreeIPMI tools as root, you can do the following: 1. Add sudoers files to permit the following commands - ```bash - ipmi-exporter ALL = NOPASSWD:/usr/sbin/ipmimonitoring, /usr/sbin/ipmi-sensors, /usr/sbin/ipmi-dcmi, /usr/sbin/bmc-info, /usr/sbin/ipmi-chassis + ``` + ipmi-exporter ALL = NOPASSWD: /usr/sbin/ipmimonitoring,\ + /usr/sbin/ipmi-sensors,\ + /usr/sbin/ipmi-dcmi,\ + /usr/sbin/bmc-info,\ + /usr/sbin/ipmi-chassis,\ + /usr/sbin/ipmi-sel ``` 2. Create the script under user dir with execute permission ```bash @@ -292,6 +298,18 @@ the live power consumption of the machine in Watts. If in doubt, this metric should be used over any of the sensor data (see below), even if their name might suggest that they measure the same thing. This metric has no labels. +### System event log (SEL) info + +These metrics is only provided if the `sel` collector is enabled (it isn't by +default). + +The metric `ipmi_sel_entries_count` contains the current number of entries in +the SEL. It is a gauge, as the SEL can be cleared at any time. This metric has +no labels. + +The metric `ipmi_sel_free_space_bytes` contains the current number of free +space for new SEL entries, in bytes. This metric has no labels. + ### Sensors These metric are only provided if the `ipmi` collector is enabled. diff --git a/collector.go b/collector.go index d12b309..a557e77 100644 --- a/collector.go +++ b/collector.go @@ -29,6 +29,8 @@ const ( var ( ipmiDCMICurrentPowerRegex = regexp.MustCompile(`^Current Power\s*:\s*(?P[0-9.]*)\s*Watts.*`) ipmiChassisPowerRegex = regexp.MustCompile(`^System Power\s*:\s(?P.*)`) + ipmiSELEntriesRegex = regexp.MustCompile(`^Number of log entries\s*:\s(?P[0-9.]*)`) + ipmiSELFreeSpaceRegex = regexp.MustCompile(`^Free space remaining\s*:\s(?P[0-9.]*)\s*bytes.*`) bmcInfoFirmwareRevisionRegex = regexp.MustCompile(`^Firmware Revision\s*:\s*(?P[0-9.]*).*`) bmcInfoManufacturerIDRegex = regexp.MustCompile(`^Manufacturer ID\s*:\s*(?P.*)`) ) @@ -160,6 +162,20 @@ var ( nil, ) + selEntriesCountDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "sel", "logs_count"), + "Current number of log entries in the SEL.", + []string{}, + nil, + ) + + selFreeSpaceDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "sel", "free_space_bytes"), + "Current free space remaining for new SEL entries.", + []string{}, + nil, + ) + upDesc = prometheus.NewDesc( prometheus.BuildFQName(namespace, "", "up"), "'1' if a scrape of the IPMI device was successful, '0' otherwise.", @@ -271,6 +287,10 @@ func ipmiChassisOutput(target ipmiTarget) ([]byte, error) { return freeipmiOutput("ipmi-chassis", target, "--get-chassis-status") } +func ipmiSELOutput(target ipmiTarget) ([]byte, error) { + return freeipmiOutput("ipmi-sel", target, "--info") +} + func splitMonitoringOutput(impiOutput []byte, excludeSensorIds []int64) ([]sensorData, error) { var result []sensorData @@ -356,6 +376,22 @@ func getBMCInfoManufacturerID(ipmiOutput []byte) (string, error) { return getValue(ipmiOutput, bmcInfoManufacturerIDRegex) } +func getSELInfoEntriesCount(ipmiOutput []byte) (float64, error) { + value, err := getValue(ipmiOutput, ipmiSELEntriesRegex) + if err != nil { + return -1, err + } + return strconv.ParseFloat(value, 64) +} + +func getSELInfoFreeSpace(ipmiOutput []byte) (float64, error) { + value, err := getValue(ipmiOutput, ipmiSELFreeSpaceRegex) + if err != nil { + return -1, err + } + return strconv.ParseFloat(value, 64) +} + // Describe implements Prometheus.Collector. func (c collector) Describe(ch chan<- *prometheus.Desc) { ch <- sensorStateDesc @@ -364,6 +400,8 @@ func (c collector) Describe(ch chan<- *prometheus.Desc) { ch <- temperatureDesc ch <- powerConsumption ch <- bmcInfo + ch <- selEntriesCountDesc + ch <- selFreeSpaceDesc ch <- upDesc ch <- durationDesc } @@ -516,6 +554,35 @@ func collectBmcInfo(ch chan<- prometheus.Metric, target ipmiTarget) (int, error) return 1, nil } +func collectSELInfo(ch chan<- prometheus.Metric, target ipmiTarget) (int, error) { + output, err := ipmiSELOutput(target) + if err != nil { + log.Debugf("Failed to collect ipmi-sel data from %s: %s", targetName(target.host), err) + return 0, err + } + entriesCount, err := getSELInfoEntriesCount(output) + if err != nil { + log.Errorf("Failed to parse ipmi-sel data from %s: %s", targetName(target.host), err) + return 0, err + } + freeSpace, err := getSELInfoFreeSpace(output) + if err != nil { + log.Errorf("Failed to parse ipmi-sel data from %s: %s", targetName(target.host), err) + return 0, err + } + ch <- prometheus.MustNewConstMetric( + selEntriesCountDesc, + prometheus.GaugeValue, + entriesCount, + ) + ch <- prometheus.MustNewConstMetric( + selFreeSpaceDesc, + prometheus.GaugeValue, + freeSpace, + ) + return 1, nil +} + func markCollectorUp(ch chan<- prometheus.Metric, name string, up int) { ch <- prometheus.MustNewConstMetric( upDesc, @@ -556,6 +623,8 @@ func (c collector) Collect(ch chan<- prometheus.Metric) { up, _ = collectBmcInfo(ch, target) case "chassis": up, _ = collectChassisState(ch, target) + case "sel": + up, _ = collectSELInfo(ch, target) } markCollectorUp(ch, collector, up) } diff --git a/config.go b/config.go index 011af48..09c4afd 100644 --- a/config.go +++ b/config.go @@ -79,7 +79,7 @@ func (s *IPMIConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { return err } for _, c := range s.Collectors { - if !(c == "ipmi" || c == "dcmi" || c == "bmc" || c == "chassis") { + if !(c == "ipmi" || c == "dcmi" || c == "bmc" || c == "chassis" || c == "sel") { return fmt.Errorf("unknown collector name: %s", c) } } diff --git a/ipmi_local.yml b/ipmi_local.yml index 1ce487a..0a7d4e1 100644 --- a/ipmi_local.yml +++ b/ipmi_local.yml @@ -4,12 +4,13 @@ # In most cases, this should work without using a config file at all. modules: default: - # Available collectors are bmc, ipmi, chassis, and dcmi + # Available collectors are bmc, ipmi, chassis, dcmi, and sel collectors: - bmc - ipmi - dcmi - chassis + - sel # Got any sensors you don't care about? Add them here. exclude_sensor_ids: - 2 diff --git a/ipmi_remote.yml b/ipmi_remote.yml index d2784ae..e049208 100644 --- a/ipmi_remote.yml +++ b/ipmi_remote.yml @@ -21,8 +21,8 @@ modules: # to (session-timeout * #-of-collectors) milliseconds, so set the scrape # timeout in Prometheus accordingly. timeout: 10000 - # Available collectors are bmc, ipmi, chassis, and dcmi - # If not specified, all three are used + # Available collectors are bmc, ipmi, chassis, dcmi, and sel + # If _not_ specified, bmc, ipmi, chassis, and dcmi are used collectors: - bmc - ipmi @@ -51,6 +51,7 @@ modules: driver: "LAN" collectors: - ipmi + - sel # Need any special workaround flags set? Add them here. # Workaround flags might be needed to address issues with specific vendor implementations # e.g. https://www.gnu.org/software/freeipmi/freeipmi-faq.html#Why-is-the-output-from-FreeIPMI-different-than-another-software_003f