Support collecting local IPMI metrics

This enables the standard `/metrics` endpoint. A scrape will trigger the
collection of IPMI metrics from the local machine (that the exporter is
running on).
This commit is contained in:
Conrad Hoffmann 2018-08-03 16:23:35 +02:00
parent 49612613b7
commit 2c927eb68e
3 changed files with 124 additions and 56 deletions

View File

@ -1,12 +1,17 @@
Prometheus IPMI Exporter
========================
This is an IPMI over LAN exporter for [Prometheus](https://prometheus.io).
This is an IPMI exporter for [Prometheus](https://prometheus.io).
An instance running on one host can be used to monitor a large number of IPMI
interfaces by passing the `target` parameter to a scrape. It uses tools from
the [FreeIPMI](https://www.gnu.org/software/freeipmi/) suite for the actual
IPMI communication.
It supports both the regular `/metrics` endpoint, exposing metrics from the
host that the exporter is running on, as well as an `/ipmi` endpoint that
supports IPMI over RMCP - one exporter running on one host can be used to
monitor a large number of IPMI interfaces by passing the `target` parameter to
a scrape.
The exporter relies on tools from the
[FreeIPMI](https://www.gnu.org/software/freeipmi/) suite for the actual IPMI
implementation.
## Installation
@ -36,31 +41,56 @@ Make sure you have the following tools from the
## Configuration
The general configuration pattern is similar to that of the [blackbox
exporter](https://github.com/prometheus/blackbox_exporter), i.e. Prometheus
scrapes a small number (possibly one) of IPMI exporters with a `target` URL
parameter to tell the exporter which IPMI device it should use to retrieve the
IPMI metrics. We have taken this approach as IPMI devices often provide useful
information even while the supervised host is turned off. If you are running
the exporter on a separate host anyway, it makes more sense to have only a few
of them, each probing many (possibly thousands of) IPMI devices, rather than
one exporter per IPMI device.
Simply scraping the standard `/metrics` endpoint will make the exporter emit
local IPMI metrics. No special configuration is required.
For remote metrics, the general configuration pattern is similar to that of the
[blackbox exporter](https://github.com/prometheus/blackbox_exporter), i.e.
Prometheus scrapes a small number (possibly one) of IPMI exporters with a
`target` URL parameter to tell the exporter which IPMI device it should use to
retrieve the IPMI metrics. We offer this approach as IPMI devices often provide
useful information even while the supervised host is turned off. If you are
running the exporter on a separate host anyway, it makes more sense to have
only a few of them, each probing many (possibly thousands of) IPMI devices,
rather than one exporter per IPMI device.
### IPMI exporter
The exporter requires a configuration file called `ipmi.yml` (can be
overridden, see above). It must contain user names and passwords for IPMI
access to all targets. It supports a “default” target, which is used as
overridden, see above). To collect local metrics, an empty file is technically
sufficient. For remote metrics, it must contain user names and passwords for
IPMI access to all targets. It supports a “default” target, which is used as
fallback if the target is not explicitly listed in the file.
The configuration file also supports a blacklist of sensors, useful in case of
OEM-specific sensors that FreeIPMI cannot deal with properly or otherwise
misbehaving sensors.
misbehaving sensors. This applies to both local and remote metrics.
See the included `ipmi.yml` file for an example.
### Prometheus
#### Local metrics
Collecting local IPMI metrics is fairly straightforward. Simply configure your
server to scrape the default metrics endpoint on the hosts running the
exporter.
```
- job_name: ipmi
scrape_interval: 1m
scrape_timeout: 30s
metrics_path: /metrics
scheme: http
static_configs:
- targets:
- 10.1.2.23:9290
- 10.1.2.24:9290
- 10.1.2.25:9290
```
#### Remote metrics
To add your IPMI targets to Prometheus, you can use any of the supported
service discovery mechanism of your choice. The following example uses the
file-based SD and should be easy to adjust to other scenarios.
@ -113,7 +143,7 @@ add the following to your Prometheus config:
- separator: ;
regex: .*
target_label: __address__
replacement: ipmi-exporter.internal.example.com:9198
replacement: ipmi-exporter.internal.example.com:9290
action: replace
```

View File

@ -23,6 +23,8 @@ import (
const namespace = "ipmi"
const targetLocal = ""
var (
ipmiDCMICurrentPowerRegex = regexp.MustCompile(`^Current Power\s*:\s*(?P<value>[0-9.]*)\s*Watts.*`)
bmcInfoFirmwareRevisionRegex = regexp.MustCompile(`^Firmware Revision\s*:\s*(?P<value>[0-9.]*).*`)
@ -44,6 +46,12 @@ type sensorData struct {
Event string
}
type rmcpConfig struct {
host string
user string
pass string
}
var (
sensorStateDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "sensor", "state"),
@ -194,36 +202,42 @@ func freeipmiConfigPipe(driver, user, password string) (string, error) {
return pipe, nil
}
func freeipmiOutput(cmd, host, user, password string, arg ...string) ([]byte, error) {
pipe, err := freeipmiConfigPipe("LAN_2_0", user, password)
if err != nil {
return nil, err
func freeipmiOutput(cmd string, rmcp *rmcpConfig, arg ...string) ([]byte, error) {
args := []string{}
if rmcp != nil {
pipe, err := freeipmiConfigPipe("LAN_2_0", rmcp.user, rmcp.pass)
if err != nil {
return nil, err
}
defer os.Remove(pipe)
rmcpArgs := []string{
"--config-file", pipe,
"-h", rmcp.host,
}
args = append(args, rmcpArgs...)
}
defer os.Remove(pipe)
fqcmd := path.Join(*executablesPath, cmd)
args := []string{
"--config-file", pipe,
"-h", host,
}
args = append(args, arg...)
out, err := exec.Command(fqcmd, args...).CombinedOutput()
if err != nil {
log.Errorf("Error while calling %s for %s: %s", cmd, host, out)
log.Errorf("Error while calling %s: %s", cmd, out)
}
return out, err
}
func ipmiMonitoringOutput(host, user, password string) ([]byte, error) {
return freeipmiOutput("ipmimonitoring", host, user, password, "-Q", "--comma-separated-output", "--no-header-output", "--sdr-cache-recreate")
func ipmiMonitoringOutput(rmcp *rmcpConfig) ([]byte, error) {
return freeipmiOutput("ipmimonitoring", rmcp, "-Q", "--comma-separated-output", "--no-header-output", "--sdr-cache-recreate")
}
func ipmiDCMIOutput(host, user, password string) ([]byte, error) {
return freeipmiOutput("ipmi-dcmi", host, user, password, "--get-system-power-statistics")
func ipmiDCMIOutput(rmcp *rmcpConfig) ([]byte, error) {
return freeipmiOutput("ipmi-dcmi", rmcp, "--get-system-power-statistics")
}
func bmcInfoOutput(host, user, password string) ([]byte, error) {
return freeipmiOutput("bmc-info", host, user, password, "--get-device-id")
func bmcInfoOutput(rmcp *rmcpConfig) ([]byte, error) {
return freeipmiOutput("bmc-info", rmcp, "--get-device-id")
}
func splitMonitoringOutput(impiOutput []byte, excludeSensorIds []int64) ([]sensorData, error) {
@ -348,8 +362,8 @@ func collectGenericSensor(ch chan<- prometheus.Metric, state float64, data senso
)
}
func (c collector) collectMonitoring(ch chan<- prometheus.Metric, creds Credentials) (int, error) {
output, err := ipmiMonitoringOutput(c.target, creds.User, creds.Password)
func (c collector) collectMonitoring(ch chan<- prometheus.Metric, rmcp *rmcpConfig) (int, error) {
output, err := ipmiMonitoringOutput(rmcp)
if err != nil {
log.Errorf("Failed to collect ipmimonitoring data: %s", err)
return 0, err
@ -397,8 +411,8 @@ func (c collector) collectMonitoring(ch chan<- prometheus.Metric, creds Credenti
return 1, nil
}
func (c collector) collectDCMI(ch chan<- prometheus.Metric, creds Credentials) (int, error) {
output, err := ipmiDCMIOutput(c.target, creds.User, creds.Password)
func (c collector) collectDCMI(ch chan<- prometheus.Metric, rmcp *rmcpConfig) (int, error) {
output, err := ipmiDCMIOutput(rmcp)
if err != nil {
log.Debugf("Failed to collect ipmi-dcmi data: %s", err)
return 0, err
@ -416,8 +430,8 @@ func (c collector) collectDCMI(ch chan<- prometheus.Metric, creds Credentials) (
return 1, nil
}
func (c collector) collectBmcInfo(ch chan<- prometheus.Metric, creds Credentials) (int, error) {
output, err := bmcInfoOutput(c.target, creds.User, creds.Password)
func (c collector) collectBmcInfo(ch chan<- prometheus.Metric, rmcp *rmcpConfig) (int, error) {
output, err := bmcInfoOutput(rmcp)
if err != nil {
log.Debugf("Failed to collect bmc-info data: %s", err)
return 0, err
@ -467,7 +481,7 @@ func (c collector) Collect(ch chan<- prometheus.Metric) {
start := time.Now()
defer func() {
duration := time.Since(start).Seconds()
log.Debugf("Scrape of target %s took %f seconds.", c.target, duration)
log.Debugf("Scrape of target %s took %f seconds.", targetName(c.target), duration)
ch <- prometheus.MustNewConstMetric(
durationDesc,
prometheus.GaugeValue,
@ -475,16 +489,25 @@ func (c collector) Collect(ch chan<- prometheus.Metric) {
)
}()
creds, err := c.config.CredentialsForTarget(c.target)
if err != nil {
log.Errorf("No credentials available for target %s.", c.target)
c.markCollectorsUp(ch, 0, 0, 0)
return
rmcp := (*rmcpConfig)(nil)
if !targetIsLocal(c.target) {
creds, err := c.config.CredentialsForTarget(c.target)
if err != nil {
log.Errorf("No credentials available for target %s.", c.target)
c.markCollectorsUp(ch, 0, 0, 0)
return
}
rmcp = &rmcpConfig{
host: c.target,
user: creds.User,
pass: creds.Password,
}
}
ipmiUp, _ := c.collectMonitoring(ch, creds)
dcmiUp, _ := c.collectDCMI(ch, creds)
bmcUp, _ := c.collectBmcInfo(ch, creds)
ipmiUp, _ := c.collectMonitoring(ch, rmcp)
dcmiUp, _ := c.collectDCMI(ch, rmcp)
bmcUp, _ := c.collectBmcInfo(ch, rmcp)
c.markCollectorsUp(ch, bmcUp, dcmiUp, ipmiUp)
}
@ -497,3 +520,14 @@ func contains(s []int64, elm int64) bool {
}
return false
}
func targetName(target string) string {
if targetIsLocal(target) {
return "[local]"
}
return target
}
func targetIsLocal(target string) bool {
return target == targetLocal
}

18
main.go
View File

@ -34,7 +34,7 @@ var (
reloadCh chan chan error
)
func handler(w http.ResponseWriter, r *http.Request) {
func remoteIpmiHandler(w http.ResponseWriter, r *http.Request) {
target := r.URL.Query().Get("target")
if target == "" {
http.Error(w, "'target' parameter must be specified", 400)
@ -43,8 +43,8 @@ func handler(w http.ResponseWriter, r *http.Request) {
log.Debugf("Scraping target '%s'", target)
registry := prometheus.NewRegistry()
collector := collector{target: target, config: sc}
registry.MustRegister(collector)
remoteCollector := collector{target: target, config: sc}
registry.MustRegister(remoteCollector)
h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{})
h.ServeHTTP(w, r)
}
@ -94,8 +94,11 @@ func main() {
}
}()
http.Handle("/metrics", promhttp.Handler()) // Normal metrics endpoint for IPMI exporter itself.
http.HandleFunc("/ipmi", handler) // Endpoint to do IPMI scrapes.
localCollector := collector{target: targetLocal, config: sc}
prometheus.MustRegister(&localCollector)
http.Handle("/metrics", promhttp.Handler()) // Regular metrics endpoint for local IPMI metrics.
http.HandleFunc("/ipmi", remoteIpmiHandler) // Endpoint to do IPMI scrapes.
http.HandleFunc("/-/reload", updateConfiguration) // Endpoint to reload configuration.
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
@ -120,8 +123,9 @@ func main() {
<form action="/ipmi">
<label>Target:</label> <input type="text" name="target" placeholder="X.X.X.X" value="1.2.3.4"><br>
<input type="submit" value="Submit">
</form>
<p><a href="/config">Config</a></p>
</form>
<p><a href="/metrics">Local metrics</a></p>
<p><a href="/config">Config</a></p>
</body>
</html>`))
})