Add SEL collector
It exposes two metrics about the IPMI system event log (SEL), the current number of entries stored in it and the free space for new records. The collector is not enabled by default, it has to be explicitly enabled in the config. Related to #41.
This commit is contained in:
parent
7d7e33dc93
commit
0aa63d4c21
22
README.md
22
README.md
|
@ -65,6 +65,7 @@ Make sure you have the following tools from the
|
||||||
- `ipmimonitoring`/`ipmi-sensors`
|
- `ipmimonitoring`/`ipmi-sensors`
|
||||||
- `ipmi-dcmi`
|
- `ipmi-dcmi`
|
||||||
- `bmc-info`
|
- `bmc-info`
|
||||||
|
- `ipmi-sel`
|
||||||
|
|
||||||
### Running as unprivileged user
|
### Running as unprivileged user
|
||||||
|
|
||||||
|
@ -72,8 +73,13 @@ If you are running the exporter as unprivileged user, but need to execute the
|
||||||
FreeIPMI tools as root, you can do the following:
|
FreeIPMI tools as root, you can do the following:
|
||||||
|
|
||||||
1. Add sudoers files to permit the following commands
|
1. Add sudoers files to permit the following commands
|
||||||
```bash
|
```
|
||||||
ipmi-exporter ALL = NOPASSWD:/usr/sbin/ipmimonitoring, /usr/sbin/ipmi-sensors, /usr/sbin/ipmi-dcmi, /usr/sbin/bmc-info, /usr/sbin/ipmi-chassis
|
ipmi-exporter ALL = NOPASSWD: /usr/sbin/ipmimonitoring,\
|
||||||
|
/usr/sbin/ipmi-sensors,\
|
||||||
|
/usr/sbin/ipmi-dcmi,\
|
||||||
|
/usr/sbin/bmc-info,\
|
||||||
|
/usr/sbin/ipmi-chassis,\
|
||||||
|
/usr/sbin/ipmi-sel
|
||||||
```
|
```
|
||||||
2. Create the script under user dir with execute permission
|
2. Create the script under user dir with execute permission
|
||||||
```bash
|
```bash
|
||||||
|
@ -292,6 +298,18 @@ the live power consumption of the machine in Watts. If in doubt, this metric
|
||||||
should be used over any of the sensor data (see below), even if their name
|
should be used over any of the sensor data (see below), even if their name
|
||||||
might suggest that they measure the same thing. This metric has no labels.
|
might suggest that they measure the same thing. This metric has no labels.
|
||||||
|
|
||||||
|
### System event log (SEL) info
|
||||||
|
|
||||||
|
These metrics is only provided if the `sel` collector is enabled (it isn't by
|
||||||
|
default).
|
||||||
|
|
||||||
|
The metric `ipmi_sel_entries_count` contains the current number of entries in
|
||||||
|
the SEL. It is a gauge, as the SEL can be cleared at any time. This metric has
|
||||||
|
no labels.
|
||||||
|
|
||||||
|
The metric `ipmi_sel_free_space_bytes` contains the current number of free
|
||||||
|
space for new SEL entries, in bytes. This metric has no labels.
|
||||||
|
|
||||||
### Sensors
|
### Sensors
|
||||||
|
|
||||||
These metric are only provided if the `ipmi` collector is enabled.
|
These metric are only provided if the `ipmi` collector is enabled.
|
||||||
|
|
69
collector.go
69
collector.go
|
@ -29,6 +29,8 @@ const (
|
||||||
var (
|
var (
|
||||||
ipmiDCMICurrentPowerRegex = regexp.MustCompile(`^Current Power\s*:\s*(?P<value>[0-9.]*)\s*Watts.*`)
|
ipmiDCMICurrentPowerRegex = regexp.MustCompile(`^Current Power\s*:\s*(?P<value>[0-9.]*)\s*Watts.*`)
|
||||||
ipmiChassisPowerRegex = regexp.MustCompile(`^System Power\s*:\s(?P<value>.*)`)
|
ipmiChassisPowerRegex = regexp.MustCompile(`^System Power\s*:\s(?P<value>.*)`)
|
||||||
|
ipmiSELEntriesRegex = regexp.MustCompile(`^Number of log entries\s*:\s(?P<value>[0-9.]*)`)
|
||||||
|
ipmiSELFreeSpaceRegex = regexp.MustCompile(`^Free space remaining\s*:\s(?P<value>[0-9.]*)\s*bytes.*`)
|
||||||
bmcInfoFirmwareRevisionRegex = regexp.MustCompile(`^Firmware Revision\s*:\s*(?P<value>[0-9.]*).*`)
|
bmcInfoFirmwareRevisionRegex = regexp.MustCompile(`^Firmware Revision\s*:\s*(?P<value>[0-9.]*).*`)
|
||||||
bmcInfoManufacturerIDRegex = regexp.MustCompile(`^Manufacturer ID\s*:\s*(?P<value>.*)`)
|
bmcInfoManufacturerIDRegex = regexp.MustCompile(`^Manufacturer ID\s*:\s*(?P<value>.*)`)
|
||||||
)
|
)
|
||||||
|
@ -160,6 +162,20 @@ var (
|
||||||
nil,
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
selEntriesCountDesc = prometheus.NewDesc(
|
||||||
|
prometheus.BuildFQName(namespace, "sel", "logs_count"),
|
||||||
|
"Current number of log entries in the SEL.",
|
||||||
|
[]string{},
|
||||||
|
nil,
|
||||||
|
)
|
||||||
|
|
||||||
|
selFreeSpaceDesc = prometheus.NewDesc(
|
||||||
|
prometheus.BuildFQName(namespace, "sel", "free_space_bytes"),
|
||||||
|
"Current free space remaining for new SEL entries.",
|
||||||
|
[]string{},
|
||||||
|
nil,
|
||||||
|
)
|
||||||
|
|
||||||
upDesc = prometheus.NewDesc(
|
upDesc = prometheus.NewDesc(
|
||||||
prometheus.BuildFQName(namespace, "", "up"),
|
prometheus.BuildFQName(namespace, "", "up"),
|
||||||
"'1' if a scrape of the IPMI device was successful, '0' otherwise.",
|
"'1' if a scrape of the IPMI device was successful, '0' otherwise.",
|
||||||
|
@ -271,6 +287,10 @@ func ipmiChassisOutput(target ipmiTarget) ([]byte, error) {
|
||||||
return freeipmiOutput("ipmi-chassis", target, "--get-chassis-status")
|
return freeipmiOutput("ipmi-chassis", target, "--get-chassis-status")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ipmiSELOutput(target ipmiTarget) ([]byte, error) {
|
||||||
|
return freeipmiOutput("ipmi-sel", target, "--info")
|
||||||
|
}
|
||||||
|
|
||||||
func splitMonitoringOutput(impiOutput []byte, excludeSensorIds []int64) ([]sensorData, error) {
|
func splitMonitoringOutput(impiOutput []byte, excludeSensorIds []int64) ([]sensorData, error) {
|
||||||
var result []sensorData
|
var result []sensorData
|
||||||
|
|
||||||
|
@ -356,6 +376,22 @@ func getBMCInfoManufacturerID(ipmiOutput []byte) (string, error) {
|
||||||
return getValue(ipmiOutput, bmcInfoManufacturerIDRegex)
|
return getValue(ipmiOutput, bmcInfoManufacturerIDRegex)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getSELInfoEntriesCount(ipmiOutput []byte) (float64, error) {
|
||||||
|
value, err := getValue(ipmiOutput, ipmiSELEntriesRegex)
|
||||||
|
if err != nil {
|
||||||
|
return -1, err
|
||||||
|
}
|
||||||
|
return strconv.ParseFloat(value, 64)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getSELInfoFreeSpace(ipmiOutput []byte) (float64, error) {
|
||||||
|
value, err := getValue(ipmiOutput, ipmiSELFreeSpaceRegex)
|
||||||
|
if err != nil {
|
||||||
|
return -1, err
|
||||||
|
}
|
||||||
|
return strconv.ParseFloat(value, 64)
|
||||||
|
}
|
||||||
|
|
||||||
// Describe implements Prometheus.Collector.
|
// Describe implements Prometheus.Collector.
|
||||||
func (c collector) Describe(ch chan<- *prometheus.Desc) {
|
func (c collector) Describe(ch chan<- *prometheus.Desc) {
|
||||||
ch <- sensorStateDesc
|
ch <- sensorStateDesc
|
||||||
|
@ -364,6 +400,8 @@ func (c collector) Describe(ch chan<- *prometheus.Desc) {
|
||||||
ch <- temperatureDesc
|
ch <- temperatureDesc
|
||||||
ch <- powerConsumption
|
ch <- powerConsumption
|
||||||
ch <- bmcInfo
|
ch <- bmcInfo
|
||||||
|
ch <- selEntriesCountDesc
|
||||||
|
ch <- selFreeSpaceDesc
|
||||||
ch <- upDesc
|
ch <- upDesc
|
||||||
ch <- durationDesc
|
ch <- durationDesc
|
||||||
}
|
}
|
||||||
|
@ -516,6 +554,35 @@ func collectBmcInfo(ch chan<- prometheus.Metric, target ipmiTarget) (int, error)
|
||||||
return 1, nil
|
return 1, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func collectSELInfo(ch chan<- prometheus.Metric, target ipmiTarget) (int, error) {
|
||||||
|
output, err := ipmiSELOutput(target)
|
||||||
|
if err != nil {
|
||||||
|
log.Debugf("Failed to collect ipmi-sel data from %s: %s", targetName(target.host), err)
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
entriesCount, err := getSELInfoEntriesCount(output)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Failed to parse ipmi-sel data from %s: %s", targetName(target.host), err)
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
freeSpace, err := getSELInfoFreeSpace(output)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Failed to parse ipmi-sel data from %s: %s", targetName(target.host), err)
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
ch <- prometheus.MustNewConstMetric(
|
||||||
|
selEntriesCountDesc,
|
||||||
|
prometheus.GaugeValue,
|
||||||
|
entriesCount,
|
||||||
|
)
|
||||||
|
ch <- prometheus.MustNewConstMetric(
|
||||||
|
selFreeSpaceDesc,
|
||||||
|
prometheus.GaugeValue,
|
||||||
|
freeSpace,
|
||||||
|
)
|
||||||
|
return 1, nil
|
||||||
|
}
|
||||||
|
|
||||||
func markCollectorUp(ch chan<- prometheus.Metric, name string, up int) {
|
func markCollectorUp(ch chan<- prometheus.Metric, name string, up int) {
|
||||||
ch <- prometheus.MustNewConstMetric(
|
ch <- prometheus.MustNewConstMetric(
|
||||||
upDesc,
|
upDesc,
|
||||||
|
@ -556,6 +623,8 @@ func (c collector) Collect(ch chan<- prometheus.Metric) {
|
||||||
up, _ = collectBmcInfo(ch, target)
|
up, _ = collectBmcInfo(ch, target)
|
||||||
case "chassis":
|
case "chassis":
|
||||||
up, _ = collectChassisState(ch, target)
|
up, _ = collectChassisState(ch, target)
|
||||||
|
case "sel":
|
||||||
|
up, _ = collectSELInfo(ch, target)
|
||||||
}
|
}
|
||||||
markCollectorUp(ch, collector, up)
|
markCollectorUp(ch, collector, up)
|
||||||
}
|
}
|
||||||
|
|
|
@ -79,7 +79,7 @@ func (s *IPMIConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
for _, c := range s.Collectors {
|
for _, c := range s.Collectors {
|
||||||
if !(c == "ipmi" || c == "dcmi" || c == "bmc" || c == "chassis") {
|
if !(c == "ipmi" || c == "dcmi" || c == "bmc" || c == "chassis" || c == "sel") {
|
||||||
return fmt.Errorf("unknown collector name: %s", c)
|
return fmt.Errorf("unknown collector name: %s", c)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,12 +4,13 @@
|
||||||
# In most cases, this should work without using a config file at all.
|
# In most cases, this should work without using a config file at all.
|
||||||
modules:
|
modules:
|
||||||
default:
|
default:
|
||||||
# Available collectors are bmc, ipmi, chassis, and dcmi
|
# Available collectors are bmc, ipmi, chassis, dcmi, and sel
|
||||||
collectors:
|
collectors:
|
||||||
- bmc
|
- bmc
|
||||||
- ipmi
|
- ipmi
|
||||||
- dcmi
|
- dcmi
|
||||||
- chassis
|
- chassis
|
||||||
|
- sel
|
||||||
# Got any sensors you don't care about? Add them here.
|
# Got any sensors you don't care about? Add them here.
|
||||||
exclude_sensor_ids:
|
exclude_sensor_ids:
|
||||||
- 2
|
- 2
|
||||||
|
|
|
@ -21,8 +21,8 @@ modules:
|
||||||
# to (session-timeout * #-of-collectors) milliseconds, so set the scrape
|
# to (session-timeout * #-of-collectors) milliseconds, so set the scrape
|
||||||
# timeout in Prometheus accordingly.
|
# timeout in Prometheus accordingly.
|
||||||
timeout: 10000
|
timeout: 10000
|
||||||
# Available collectors are bmc, ipmi, chassis, and dcmi
|
# Available collectors are bmc, ipmi, chassis, dcmi, and sel
|
||||||
# If not specified, all three are used
|
# If _not_ specified, bmc, ipmi, chassis, and dcmi are used
|
||||||
collectors:
|
collectors:
|
||||||
- bmc
|
- bmc
|
||||||
- ipmi
|
- ipmi
|
||||||
|
@ -51,6 +51,7 @@ modules:
|
||||||
driver: "LAN"
|
driver: "LAN"
|
||||||
collectors:
|
collectors:
|
||||||
- ipmi
|
- ipmi
|
||||||
|
- sel
|
||||||
# Need any special workaround flags set? Add them here.
|
# Need any special workaround flags set? Add them here.
|
||||||
# Workaround flags might be needed to address issues with specific vendor implementations
|
# Workaround flags might be needed to address issues with specific vendor implementations
|
||||||
# e.g. https://www.gnu.org/software/freeipmi/freeipmi-faq.html#Why-is-the-output-from-FreeIPMI-different-than-another-software_003f
|
# e.g. https://www.gnu.org/software/freeipmi/freeipmi-faq.html#Why-is-the-output-from-FreeIPMI-different-than-another-software_003f
|
||||||
|
|
Loading…
Reference in New Issue