From 2c927eb68eabc09ea3739ae91f5e50dbf9bca38a Mon Sep 17 00:00:00 2001 From: Conrad Hoffmann Date: Fri, 3 Aug 2018 16:23:35 +0200 Subject: [PATCH] Support collecting local IPMI metrics This enables the standard `/metrics` endpoint. A scrape will trigger the collection of IPMI metrics from the local machine (that the exporter is running on). --- README.md | 66 ++++++++++++++++++++++++++---------- collector.go | 96 +++++++++++++++++++++++++++++++++++----------------- main.go | 18 ++++++---- 3 files changed, 124 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index 51a78ff..29e1c5b 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,17 @@ Prometheus IPMI Exporter ======================== -This is an IPMI over LAN exporter for [Prometheus](https://prometheus.io). +This is an IPMI exporter for [Prometheus](https://prometheus.io). -An instance running on one host can be used to monitor a large number of IPMI -interfaces by passing the `target` parameter to a scrape. It uses tools from -the [FreeIPMI](https://www.gnu.org/software/freeipmi/) suite for the actual -IPMI communication. +It supports both the regular `/metrics` endpoint, exposing metrics from the +host that the exporter is running on, as well as an `/ipmi` endpoint that +supports IPMI over RMCP - one exporter running on one host can be used to +monitor a large number of IPMI interfaces by passing the `target` parameter to +a scrape. + +The exporter relies on tools from the +[FreeIPMI](https://www.gnu.org/software/freeipmi/) suite for the actual IPMI +implementation. ## Installation @@ -36,31 +41,56 @@ Make sure you have the following tools from the ## Configuration -The general configuration pattern is similar to that of the [blackbox -exporter](https://github.com/prometheus/blackbox_exporter), i.e. Prometheus -scrapes a small number (possibly one) of IPMI exporters with a `target` URL -parameter to tell the exporter which IPMI device it should use to retrieve the -IPMI metrics. We have taken this approach as IPMI devices often provide useful -information even while the supervised host is turned off. If you are running -the exporter on a separate host anyway, it makes more sense to have only a few -of them, each probing many (possibly thousands of) IPMI devices, rather than -one exporter per IPMI device. +Simply scraping the standard `/metrics` endpoint will make the exporter emit +local IPMI metrics. No special configuration is required. + +For remote metrics, the general configuration pattern is similar to that of the +[blackbox exporter](https://github.com/prometheus/blackbox_exporter), i.e. +Prometheus scrapes a small number (possibly one) of IPMI exporters with a +`target` URL parameter to tell the exporter which IPMI device it should use to +retrieve the IPMI metrics. We offer this approach as IPMI devices often provide +useful information even while the supervised host is turned off. If you are +running the exporter on a separate host anyway, it makes more sense to have +only a few of them, each probing many (possibly thousands of) IPMI devices, +rather than one exporter per IPMI device. ### IPMI exporter The exporter requires a configuration file called `ipmi.yml` (can be -overridden, see above). It must contain user names and passwords for IPMI -access to all targets. It supports a “default” target, which is used as +overridden, see above). To collect local metrics, an empty file is technically +sufficient. For remote metrics, it must contain user names and passwords for +IPMI access to all targets. It supports a “default” target, which is used as fallback if the target is not explicitly listed in the file. The configuration file also supports a blacklist of sensors, useful in case of OEM-specific sensors that FreeIPMI cannot deal with properly or otherwise -misbehaving sensors. +misbehaving sensors. This applies to both local and remote metrics. See the included `ipmi.yml` file for an example. ### Prometheus +#### Local metrics + +Collecting local IPMI metrics is fairly straightforward. Simply configure your +server to scrape the default metrics endpoint on the hosts running the +exporter. + +``` +- job_name: ipmi + scrape_interval: 1m + scrape_timeout: 30s + metrics_path: /metrics + scheme: http + static_configs: + - targets: + - 10.1.2.23:9290 + - 10.1.2.24:9290 + - 10.1.2.25:9290 +``` + +#### Remote metrics + To add your IPMI targets to Prometheus, you can use any of the supported service discovery mechanism of your choice. The following example uses the file-based SD and should be easy to adjust to other scenarios. @@ -113,7 +143,7 @@ add the following to your Prometheus config: - separator: ; regex: .* target_label: __address__ - replacement: ipmi-exporter.internal.example.com:9198 + replacement: ipmi-exporter.internal.example.com:9290 action: replace ``` diff --git a/collector.go b/collector.go index a6340cc..f30a7ae 100644 --- a/collector.go +++ b/collector.go @@ -23,6 +23,8 @@ import ( const namespace = "ipmi" +const targetLocal = "" + var ( ipmiDCMICurrentPowerRegex = regexp.MustCompile(`^Current Power\s*:\s*(?P[0-9.]*)\s*Watts.*`) bmcInfoFirmwareRevisionRegex = regexp.MustCompile(`^Firmware Revision\s*:\s*(?P[0-9.]*).*`) @@ -44,6 +46,12 @@ type sensorData struct { Event string } +type rmcpConfig struct { + host string + user string + pass string +} + var ( sensorStateDesc = prometheus.NewDesc( prometheus.BuildFQName(namespace, "sensor", "state"), @@ -194,36 +202,42 @@ func freeipmiConfigPipe(driver, user, password string) (string, error) { return pipe, nil } -func freeipmiOutput(cmd, host, user, password string, arg ...string) ([]byte, error) { - pipe, err := freeipmiConfigPipe("LAN_2_0", user, password) - if err != nil { - return nil, err +func freeipmiOutput(cmd string, rmcp *rmcpConfig, arg ...string) ([]byte, error) { + args := []string{} + + if rmcp != nil { + pipe, err := freeipmiConfigPipe("LAN_2_0", rmcp.user, rmcp.pass) + if err != nil { + return nil, err + } + defer os.Remove(pipe) + + rmcpArgs := []string{ + "--config-file", pipe, + "-h", rmcp.host, + } + args = append(args, rmcpArgs...) } - defer os.Remove(pipe) fqcmd := path.Join(*executablesPath, cmd) - args := []string{ - "--config-file", pipe, - "-h", host, - } args = append(args, arg...) out, err := exec.Command(fqcmd, args...).CombinedOutput() if err != nil { - log.Errorf("Error while calling %s for %s: %s", cmd, host, out) + log.Errorf("Error while calling %s: %s", cmd, out) } return out, err } -func ipmiMonitoringOutput(host, user, password string) ([]byte, error) { - return freeipmiOutput("ipmimonitoring", host, user, password, "-Q", "--comma-separated-output", "--no-header-output", "--sdr-cache-recreate") +func ipmiMonitoringOutput(rmcp *rmcpConfig) ([]byte, error) { + return freeipmiOutput("ipmimonitoring", rmcp, "-Q", "--comma-separated-output", "--no-header-output", "--sdr-cache-recreate") } -func ipmiDCMIOutput(host, user, password string) ([]byte, error) { - return freeipmiOutput("ipmi-dcmi", host, user, password, "--get-system-power-statistics") +func ipmiDCMIOutput(rmcp *rmcpConfig) ([]byte, error) { + return freeipmiOutput("ipmi-dcmi", rmcp, "--get-system-power-statistics") } -func bmcInfoOutput(host, user, password string) ([]byte, error) { - return freeipmiOutput("bmc-info", host, user, password, "--get-device-id") +func bmcInfoOutput(rmcp *rmcpConfig) ([]byte, error) { + return freeipmiOutput("bmc-info", rmcp, "--get-device-id") } func splitMonitoringOutput(impiOutput []byte, excludeSensorIds []int64) ([]sensorData, error) { @@ -348,8 +362,8 @@ func collectGenericSensor(ch chan<- prometheus.Metric, state float64, data senso ) } -func (c collector) collectMonitoring(ch chan<- prometheus.Metric, creds Credentials) (int, error) { - output, err := ipmiMonitoringOutput(c.target, creds.User, creds.Password) +func (c collector) collectMonitoring(ch chan<- prometheus.Metric, rmcp *rmcpConfig) (int, error) { + output, err := ipmiMonitoringOutput(rmcp) if err != nil { log.Errorf("Failed to collect ipmimonitoring data: %s", err) return 0, err @@ -397,8 +411,8 @@ func (c collector) collectMonitoring(ch chan<- prometheus.Metric, creds Credenti return 1, nil } -func (c collector) collectDCMI(ch chan<- prometheus.Metric, creds Credentials) (int, error) { - output, err := ipmiDCMIOutput(c.target, creds.User, creds.Password) +func (c collector) collectDCMI(ch chan<- prometheus.Metric, rmcp *rmcpConfig) (int, error) { + output, err := ipmiDCMIOutput(rmcp) if err != nil { log.Debugf("Failed to collect ipmi-dcmi data: %s", err) return 0, err @@ -416,8 +430,8 @@ func (c collector) collectDCMI(ch chan<- prometheus.Metric, creds Credentials) ( return 1, nil } -func (c collector) collectBmcInfo(ch chan<- prometheus.Metric, creds Credentials) (int, error) { - output, err := bmcInfoOutput(c.target, creds.User, creds.Password) +func (c collector) collectBmcInfo(ch chan<- prometheus.Metric, rmcp *rmcpConfig) (int, error) { + output, err := bmcInfoOutput(rmcp) if err != nil { log.Debugf("Failed to collect bmc-info data: %s", err) return 0, err @@ -467,7 +481,7 @@ func (c collector) Collect(ch chan<- prometheus.Metric) { start := time.Now() defer func() { duration := time.Since(start).Seconds() - log.Debugf("Scrape of target %s took %f seconds.", c.target, duration) + log.Debugf("Scrape of target %s took %f seconds.", targetName(c.target), duration) ch <- prometheus.MustNewConstMetric( durationDesc, prometheus.GaugeValue, @@ -475,16 +489,25 @@ func (c collector) Collect(ch chan<- prometheus.Metric) { ) }() - creds, err := c.config.CredentialsForTarget(c.target) - if err != nil { - log.Errorf("No credentials available for target %s.", c.target) - c.markCollectorsUp(ch, 0, 0, 0) - return + rmcp := (*rmcpConfig)(nil) + + if !targetIsLocal(c.target) { + creds, err := c.config.CredentialsForTarget(c.target) + if err != nil { + log.Errorf("No credentials available for target %s.", c.target) + c.markCollectorsUp(ch, 0, 0, 0) + return + } + rmcp = &rmcpConfig{ + host: c.target, + user: creds.User, + pass: creds.Password, + } } - ipmiUp, _ := c.collectMonitoring(ch, creds) - dcmiUp, _ := c.collectDCMI(ch, creds) - bmcUp, _ := c.collectBmcInfo(ch, creds) + ipmiUp, _ := c.collectMonitoring(ch, rmcp) + dcmiUp, _ := c.collectDCMI(ch, rmcp) + bmcUp, _ := c.collectBmcInfo(ch, rmcp) c.markCollectorsUp(ch, bmcUp, dcmiUp, ipmiUp) } @@ -497,3 +520,14 @@ func contains(s []int64, elm int64) bool { } return false } + +func targetName(target string) string { + if targetIsLocal(target) { + return "[local]" + } + return target +} + +func targetIsLocal(target string) bool { + return target == targetLocal +} diff --git a/main.go b/main.go index ff0b260..d54dacb 100644 --- a/main.go +++ b/main.go @@ -34,7 +34,7 @@ var ( reloadCh chan chan error ) -func handler(w http.ResponseWriter, r *http.Request) { +func remoteIpmiHandler(w http.ResponseWriter, r *http.Request) { target := r.URL.Query().Get("target") if target == "" { http.Error(w, "'target' parameter must be specified", 400) @@ -43,8 +43,8 @@ func handler(w http.ResponseWriter, r *http.Request) { log.Debugf("Scraping target '%s'", target) registry := prometheus.NewRegistry() - collector := collector{target: target, config: sc} - registry.MustRegister(collector) + remoteCollector := collector{target: target, config: sc} + registry.MustRegister(remoteCollector) h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{}) h.ServeHTTP(w, r) } @@ -94,8 +94,11 @@ func main() { } }() - http.Handle("/metrics", promhttp.Handler()) // Normal metrics endpoint for IPMI exporter itself. - http.HandleFunc("/ipmi", handler) // Endpoint to do IPMI scrapes. + localCollector := collector{target: targetLocal, config: sc} + prometheus.MustRegister(&localCollector) + + http.Handle("/metrics", promhttp.Handler()) // Regular metrics endpoint for local IPMI metrics. + http.HandleFunc("/ipmi", remoteIpmiHandler) // Endpoint to do IPMI scrapes. http.HandleFunc("/-/reload", updateConfiguration) // Endpoint to reload configuration. http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { @@ -120,8 +123,9 @@ func main() {

-
-

Config

+ +

Local metrics

+

Config

`)) })