diff --git a/README.md b/README.md index 29e1c5b..9b30088 100644 --- a/README.md +++ b/README.md @@ -29,13 +29,17 @@ A minimal invocation looks like this: Supported parameters include: - `web.listen-address`: the address/port to listen on (default: `":9290"`) - - `config.file`: path to the configuration file (default: `ipmi.yml`) - - `path`: path to the FreeIPMI executables (default: rely on `$PATH`) + - `config.file`: path to the configuration file (default: none) + - `freeipmi.path`: path to the FreeIPMI executables (default: rely on `$PATH`) + +For syntax and a complete list of available parameters, run: + + ./ipmi_exporter -h Make sure you have the following tools from the [FreeIPMI](https://www.gnu.org/software/freeipmi/) suite installed: - - `ipmimonitoring` + - `ipmimonitoring`/`ipmi-sensors` - `ipmi-dcmi` - `bmc-info` @@ -47,26 +51,34 @@ local IPMI metrics. No special configuration is required. For remote metrics, the general configuration pattern is similar to that of the [blackbox exporter](https://github.com/prometheus/blackbox_exporter), i.e. Prometheus scrapes a small number (possibly one) of IPMI exporters with a -`target` URL parameter to tell the exporter which IPMI device it should use to -retrieve the IPMI metrics. We offer this approach as IPMI devices often provide -useful information even while the supervised host is turned off. If you are -running the exporter on a separate host anyway, it makes more sense to have -only a few of them, each probing many (possibly thousands of) IPMI devices, -rather than one exporter per IPMI device. +`target` and `module` URL parameter to tell the exporter which IPMI device it +should use to retrieve the IPMI metrics. We offer this approach as IPMI devices +often provide useful information even while the supervised host is turned off. +If you are running the exporter on a separate host anyway, it makes more sense +to have only a few of them, each probing many (possibly thousands of) IPMI +devices, rather than one exporter per IPMI device. ### IPMI exporter -The exporter requires a configuration file called `ipmi.yml` (can be -overridden, see above). To collect local metrics, an empty file is technically -sufficient. For remote metrics, it must contain user names and passwords for -IPMI access to all targets. It supports a “default” target, which is used as -fallback if the target is not explicitly listed in the file. +The exporter can read a configuration file by setting `config.file` (see +above). To collect local metrics, you might not even need one. For +remote metrics, it must contain at least user names and passwords for IPMI +access to all targets to be scraped. You can additionally specify the IPMI +driver type and privilege level to use (see `man 5 freeipmi.conf` for more +details and possible values). + +The config file supports the notion of "modules", so that different +configurations can be re-used for groups of targets. See the section below on +how to set the module parameter in Prometheus. The special module "default" is +used in case the scrape does not request a specific module. The configuration file also supports a blacklist of sensors, useful in case of OEM-specific sensors that FreeIPMI cannot deal with properly or otherwise misbehaving sensors. This applies to both local and remote metrics. -See the included `ipmi.yml` file for an example. +There are two commented example configuration files, see `ipmi_local.yml` for +scraping local host metrics and `ipmi_remote.yml` for scraping remote IPMI +interfaces. ### Prometheus @@ -119,6 +131,8 @@ add the following to your Prometheus config: ``` - job_name: ipmi + params: + module: default scrape_interval: 1m scrape_timeout: 30s metrics_path: /ipmi @@ -130,7 +144,7 @@ add the following to your Prometheus config: relabel_configs: - source_labels: [__address__] separator: ; - regex: (.*)(:80)? + regex: (.*) target_label: __param_target replacement: ${1} action: replace @@ -147,6 +161,26 @@ add the following to your Prometheus config: action: replace ``` +This assumes that all hosts use the default module. If you are using modules in +the config file, like in the provided `ipmi_remote.yml` example config, you +will need to specify on job for each module, using the respective group of +targets. + +In a more extreme case, for example if you are using different passwords on +every host, a good approach is to generate an exporter config file that uses +the target name as module names, which would allow you to have single job that +uses label replace to set the module. Leave out the `params` in the job +definition and instead add a relabel rule like this one: + +``` + - source_labels: [__address__] + separator: ; + regex: (.*) + target_label: __param_module + replacement: ${1} + action: replace +``` + For more information, e.g. how to use mechanisms other than a file to discover the list of hosts to scrape, please refer to the [Prometheus documentation](https://prometheus.io/docs). @@ -159,7 +193,7 @@ These metrics provide data about the scrape itself: - `ipmi_up{collector=""}` is `1` if the data for this collector could successfully be retrieved from the remote host, `0` otherwise. The following - collectors are available: + collectors are available and can be enabled or disabled in the config: - `ipmi`: collects IPMI sensor data. If it fails, sensor metrics (see below) will not be available - `dcmi`: collects DCMI data, currently only power consumption. If it fails, @@ -171,6 +205,8 @@ These metrics provide data about the scrape itself: ### BMC info +This metric is only provided if the `bmc` collector is enabled. + For some basic information, there is a constant metric `ipmi_bmc_info` with value `1` and labels providing the firmware revision and manufacturer as returned from the BMC. Example: @@ -179,6 +215,8 @@ returned from the BMC. Example: ### Power consumption +This metric is only provided if the `dcmi` collector is enabled. + The metric `ipmi_dcmi_power_consumption_current_watts` can be used to monitor the live power consumption of the machine in Watts. If in doubt, this metric should be used over any of the sensor data (see below), even if their name @@ -186,6 +224,8 @@ might suggest that they measure the same thing. This metric has no labels. ### Sensors +These metric are only provided if the `ipmi` collector is enabled. + IPMI sensors in general have one or two distinct pieces of information that are of interest: a value and/or a state. The exporter always exports both, even if the value is NaN or the state non-sensical. This is so one can still always diff --git a/collector.go b/collector.go index 585e2fe..d2bb25e 100644 --- a/collector.go +++ b/collector.go @@ -34,6 +34,7 @@ var ( type collector struct { target string + module string config *SafeConfig } @@ -47,10 +48,9 @@ type sensorData struct { Event string } -type rmcpConfig struct { - host string - user string - pass string +type ipmiTarget struct { + host string + config IPMIConfig } var ( @@ -173,17 +173,25 @@ func pipeName() string { return filepath.Join(os.TempDir(), "ipmi_exporter-"+hex.EncodeToString(randBytes)) } -func freeipmiConfig(driver, user, password string) string { - return fmt.Sprintf(` -driver-type %s -privilege-level admin -username %s -password %s - `, driver, user, password) +func freeipmiConfig(config IPMIConfig) string { + var b strings.Builder + if config.Driver != "" { + fmt.Fprintf(&b, "driver-type %s\n", config.Driver) + } + if config.Privilege != "" { + fmt.Fprintf(&b, "privilege-level %s\n", config.Privilege) + } + if config.User != "" { + fmt.Fprintf(&b, "username %s\n", config.User) + } + if config.Password != "" { + fmt.Fprintf(&b, "password %s\n", config.Password) + } + return b.String() } -func freeipmiConfigPipe(driver, user, password string) (string, error) { - content := []byte(freeipmiConfig(driver, user, password)) +func freeipmiConfigPipe(config IPMIConfig) (string, error) { + content := []byte(freeipmiConfig(config)) pipe := pipeName() err := syscall.Mkfifo(pipe, 0600) if err != nil { @@ -203,25 +211,21 @@ func freeipmiConfigPipe(driver, user, password string) (string, error) { return pipe, nil } -func freeipmiOutput(cmd string, rmcp *rmcpConfig, arg ...string) ([]byte, error) { - args := []string{} +func freeipmiOutput(cmd string, target ipmiTarget, arg ...string) ([]byte, error) { + pipe, err := freeipmiConfigPipe(target.config) + if err != nil { + return nil, err + } + defer os.Remove(pipe) - if rmcp != nil { - pipe, err := freeipmiConfigPipe("LAN_2_0", rmcp.user, rmcp.pass) - if err != nil { - return nil, err - } - defer os.Remove(pipe) - - rmcpArgs := []string{ - "--config-file", pipe, - "-h", rmcp.host, - } - args = append(args, rmcpArgs...) + args := []string{"--config-file", pipe} + if !targetIsLocal(target.host) { + args = append(args, "-h", target.host) } fqcmd := path.Join(*executablesPath, cmd) args = append(args, arg...) + log.Debugf("Executing %s %v", fqcmd, args) out, err := exec.Command(fqcmd, args...).CombinedOutput() if err != nil { log.Errorf("Error while calling %s: %s", cmd, out) @@ -229,16 +233,16 @@ func freeipmiOutput(cmd string, rmcp *rmcpConfig, arg ...string) ([]byte, error) return out, err } -func ipmiMonitoringOutput(rmcp *rmcpConfig) ([]byte, error) { - return freeipmiOutput("ipmimonitoring", rmcp, "-Q", "--comma-separated-output", "--no-header-output", "--sdr-cache-recreate") +func ipmiMonitoringOutput(target ipmiTarget) ([]byte, error) { + return freeipmiOutput("ipmimonitoring", target, "-Q", "--comma-separated-output", "--no-header-output", "--sdr-cache-recreate") } -func ipmiDCMIOutput(rmcp *rmcpConfig) ([]byte, error) { - return freeipmiOutput("ipmi-dcmi", rmcp, "--get-system-power-statistics") +func ipmiDCMIOutput(target ipmiTarget) ([]byte, error) { + return freeipmiOutput("ipmi-dcmi", target, "--get-system-power-statistics") } -func bmcInfoOutput(rmcp *rmcpConfig) ([]byte, error) { - return freeipmiOutput("bmc-info", rmcp, "--get-device-id") +func bmcInfoOutput(target ipmiTarget) ([]byte, error) { + return freeipmiOutput("bmc-info", target, "--get-device-id") } func splitMonitoringOutput(impiOutput []byte, excludeSensorIds []int64) ([]sensorData, error) { @@ -363,13 +367,13 @@ func collectGenericSensor(ch chan<- prometheus.Metric, state float64, data senso ) } -func (c collector) collectMonitoring(ch chan<- prometheus.Metric, rmcp *rmcpConfig) (int, error) { - output, err := ipmiMonitoringOutput(rmcp) +func collectMonitoring(ch chan<- prometheus.Metric, target ipmiTarget) (int, error) { + output, err := ipmiMonitoringOutput(target) if err != nil { log.Errorf("Failed to collect ipmimonitoring data: %s", err) return 0, err } - excludeIds := c.config.ExcludeSensorIDs() + excludeIds := target.config.ExcludeSensorIDs results, err := splitMonitoringOutput(output, excludeIds) if err != nil { log.Errorf("Failed to parse ipmimonitoring data: %s", err) @@ -412,8 +416,8 @@ func (c collector) collectMonitoring(ch chan<- prometheus.Metric, rmcp *rmcpConf return 1, nil } -func (c collector) collectDCMI(ch chan<- prometheus.Metric, rmcp *rmcpConfig) (int, error) { - output, err := ipmiDCMIOutput(rmcp) +func collectDCMI(ch chan<- prometheus.Metric, target ipmiTarget) (int, error) { + output, err := ipmiDCMIOutput(target) if err != nil { log.Debugf("Failed to collect ipmi-dcmi data: %s", err) return 0, err @@ -431,8 +435,8 @@ func (c collector) collectDCMI(ch chan<- prometheus.Metric, rmcp *rmcpConfig) (i return 1, nil } -func (c collector) collectBmcInfo(ch chan<- prometheus.Metric, rmcp *rmcpConfig) (int, error) { - output, err := bmcInfoOutput(rmcp) +func collectBmcInfo(ch chan<- prometheus.Metric, target ipmiTarget) (int, error) { + output, err := bmcInfoOutput(target) if err != nil { log.Debugf("Failed to collect bmc-info data: %s", err) return 0, err @@ -456,24 +460,12 @@ func (c collector) collectBmcInfo(ch chan<- prometheus.Metric, rmcp *rmcpConfig) return 1, nil } -func (c collector) markCollectorsUp(ch chan<- prometheus.Metric, bmc, dcmi, ipmi int) { +func markCollectorUp(ch chan<- prometheus.Metric, name string, up int) { ch <- prometheus.MustNewConstMetric( upDesc, prometheus.GaugeValue, - float64(bmc), - "bmc", - ) - ch <- prometheus.MustNewConstMetric( - upDesc, - prometheus.GaugeValue, - float64(dcmi), - "dcmi", - ) - ch <- prometheus.MustNewConstMetric( - upDesc, - prometheus.GaugeValue, - float64(ipmi), - "ipmi", + float64(up), + name, ) } @@ -490,27 +482,25 @@ func (c collector) Collect(ch chan<- prometheus.Metric) { ) }() - rmcp := (*rmcpConfig)(nil) - - if !targetIsLocal(c.target) { - creds, err := c.config.CredentialsForTarget(c.target) - if err != nil { - log.Errorf("No credentials available for target %s.", c.target) - c.markCollectorsUp(ch, 0, 0, 0) - return - } - rmcp = &rmcpConfig{ - host: c.target, - user: creds.User, - pass: creds.Password, - } + config := c.config.ConfigForTarget(c.target, c.module) + target := ipmiTarget{ + host: c.target, + config: config, } - ipmiUp, _ := c.collectMonitoring(ch, rmcp) - dcmiUp, _ := c.collectDCMI(ch, rmcp) - bmcUp, _ := c.collectBmcInfo(ch, rmcp) - - c.markCollectorsUp(ch, bmcUp, dcmiUp, ipmiUp) + for _, collector := range config.Collectors { + var up int + log.Debugf("Running collector: %s", collector) + switch collector { + case "ipmi": + up, _ = collectMonitoring(ch, target) + case "dcmi": + up, _ = collectDCMI(ch, target) + case "bmc": + up, _ = collectBmcInfo(ch, target) + } + markCollectorUp(ch, collector, up) + } } func contains(s []int64, elm int64) bool { diff --git a/config.go b/config.go index 4fbb0d4..a856a33 100644 --- a/config.go +++ b/config.go @@ -12,9 +12,7 @@ import ( // Config is the Go representation of the yaml config file. type Config struct { - Credentials map[string]Credentials `yaml:"credentials"` - - ExcludeSensorIDs []int64 `yaml:"exclude_sensor_ids"` + Modules map[string]IPMIConfig `yaml:"modules"` // Catches all undefined fields and must be empty after parsing. XXX map[string]interface{} `yaml:",inline"` @@ -26,16 +24,25 @@ type SafeConfig struct { C *Config } -// Credentials is the Go representation of the credentials section in the yaml +// IPMIConfig is the Go representation of a module configuration in the yaml // config file. -type Credentials struct { - User string `yaml:"user"` - Password string `yaml:"pass"` +type IPMIConfig struct { + User string `yaml:"user"` + Password string `yaml:"pass"` + Privilege string `yaml:"privilege"` + Driver string `yaml:"driver"` + Collectors []string `yaml:"collectors"` + ExcludeSensorIDs []int64 `yaml:"exclude_sensor_ids"` // Catches all undefined fields and must be empty after parsing. XXX map[string]interface{} `yaml:",inline"` } +var emptyConfig = IPMIConfig{Collectors: []string{"ipmi", "dcmi", "bmc"}} + +// CollectorName is used for unmarshaling the list of collectors in the yaml config file +type CollectorName string + func checkOverflow(m map[string]interface{}, ctx string) error { if len(m) > 0 { var keys []string @@ -60,14 +67,20 @@ func (s *Config) UnmarshalYAML(unmarshal func(interface{}) error) error { } // UnmarshalYAML implements the yaml.Unmarshaler interface. -func (s *Credentials) UnmarshalYAML(unmarshal func(interface{}) error) error { - type plain Credentials +func (s *IPMIConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { + *s = emptyConfig + type plain IPMIConfig if err := unmarshal((*plain)(s)); err != nil { return err } - if err := checkOverflow(s.XXX, "credentials"); err != nil { + if err := checkOverflow(s.XXX, "modules"); err != nil { return err } + for _, c := range s.Collectors { + if !(c == "ipmi" || c == "dcmi" || c == "bmc") { + return fmt.Errorf("unknown collector name: %s", c) + } + } return nil } @@ -75,15 +88,20 @@ func (s *Credentials) UnmarshalYAML(unmarshal func(interface{}) error) error { // is unreadable or unparsable, an error is returned and the old config is kept. func (sc *SafeConfig) ReloadConfig(configFile string) error { var c = &Config{} + var config []byte + var err error - yamlFile, err := ioutil.ReadFile(configFile) - if err != nil { - log.Errorf("Error reading config file: %s", err) - return err + if configFile != "" { + config, err = ioutil.ReadFile(configFile) + if err != nil { + log.Errorf("Error reading config file: %s", err) + return err + } + } else { + config = []byte("# use empty file as default") } - if err := yaml.Unmarshal(yamlFile, c); err != nil { - log.Errorf("Error parsing config file: %s", err) + if err = yaml.Unmarshal(config, c); err != nil { return err } @@ -91,34 +109,46 @@ func (sc *SafeConfig) ReloadConfig(configFile string) error { sc.C = c sc.Unlock() - log.Infoln("Loaded config file") + if configFile != "" { + log.Infoln("Loaded config file", configFile) + } return nil } -// CredentialsForTarget returns the Credentials for a given target, or the -// default. It is concurrency-safe. -func (sc *SafeConfig) CredentialsForTarget(target string) (Credentials, error) { +// HasModule returns true if a given module is configured. It is concurrency-safe. +func (sc *SafeConfig) HasModule(module string) bool { sc.Lock() defer sc.Unlock() - if credentials, ok := sc.C.Credentials[target]; ok { - return Credentials{ - User: credentials.User, - Password: credentials.Password, - }, nil - } - if credentials, ok := sc.C.Credentials["default"]; ok { - return Credentials{ - User: credentials.User, - Password: credentials.Password, - }, nil - } - return Credentials{}, fmt.Errorf("no credentials found for target %s", target) + + _, ok := sc.C.Modules[module] + return ok } -// ExcludeSensorIDs returns the list of excluded sensor IDs in a -// concurrency-safe way. -func (sc *SafeConfig) ExcludeSensorIDs() []int64 { +// ConfigForTarget returns the config for a given target/module, or the +// default. It is concurrency-safe. +func (sc *SafeConfig) ConfigForTarget(target, module string) IPMIConfig { sc.Lock() defer sc.Unlock() - return sc.C.ExcludeSensorIDs + + var config IPMIConfig + var ok = false + + if module != "default" { + config, ok = sc.C.Modules[module] + if !ok { + log.Errorf("Requested module %s for target %s not found, using default", module, targetName(target)) + } + } + + // If nothing found, fall back to defaults + if !ok { + config, ok = sc.C.Modules["default"] + if !ok { + // This is probably fine for running locally, so not making this a warning + log.Debugf("Needed default config for target %s, but none configured, using FreeIPMI defaults", targetName(target)) + config = emptyConfig + } + } + + return config } diff --git a/ipmi.yml b/ipmi.yml deleted file mode 100644 index 3fa9981..0000000 --- a/ipmi.yml +++ /dev/null @@ -1,24 +0,0 @@ -credentials: - default: - user: "default_user" - pass: "example_pw" - 10.8.0.2: - user: "host_specific_user" - pass: "another_pw" - -exclude_sensor_ids: - - 2 - - 29 - - 32 - - 50 - - 52 - - 55 - - 80 - - 81 - - 82 - - 83 - - 84 - - 85 - - 86 - - 87 - \ No newline at end of file diff --git a/ipmi_local.yml b/ipmi_local.yml new file mode 100644 index 0000000..c8a3b23 --- /dev/null +++ b/ipmi_local.yml @@ -0,0 +1,16 @@ +# Configuration file for ipmi_exporter + +# This is an example config for scraping the local host. +# In most cases, this should work without using a config file at all. +modules: + default: + # Available collectors are bmc, ipmi, and dcmi + collectors: + - bmc + - ipmi + - dcmi + # Got any sensors you don't care about? Add them here. + exclude_sensor_ids: + - 2 + - 29 + - 32 diff --git a/ipmi_remote.yml b/ipmi_remote.yml new file mode 100644 index 0000000..c79c97c --- /dev/null +++ b/ipmi_remote.yml @@ -0,0 +1,45 @@ +# Configuration file for ipmi_exporter + +# This is an example config for scraping remote hosts via IPMI. +# Information required to access remote IPMI interfaces can be supplied in the +# 'modules' section. A scrape can request the usage of a given config by +# setting the `module` URL parameter. + +modules: + default: + # These settings are used if no module is specified, the + # specified module doesn't exist, or of course if + # module=default is specified. + user: "default_user" + pass: "example_pw" + driver: "LAN_2_0" + privilege: "user" + # Available collectors are bmc, ipmi, and dcmi + # If not specified, all three are used + collectors: + - bmc + - ipmi + # Got any sensors you don't care about? Add them here. + exclude_sensor_ids: + - 2 + - 29 + - 32 + - 50 + - 52 + - 55 + dcmi: + # Use these settings when scraped with module=dcmi. + user: "admin_user" + pass: "another_pw" + privilege: "admin" + driver: "LAN_2_0" + collectors: + - dcmi + thatspecialhost: + # Use these settings when scraped with module=thatspecialhost. + user: "some_user" + pass: "secret_pw" + privilege: "admin" + driver: "LAN" + collectors: + - ipmi diff --git a/main.go b/main.go index cc872db..662a623 100644 --- a/main.go +++ b/main.go @@ -1,7 +1,6 @@ package main import ( - "flag" "fmt" "net/http" "os" @@ -11,21 +10,22 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/common/log" + kingpin "gopkg.in/alecthomas/kingpin.v2" ) var ( - configFile = flag.String( - "config.file", "ipmi.yml", + configFile = kingpin.Flag( + "config.file", "Path to configuration file.", - ) - executablesPath = flag.String( - "path", "", + ).String() + executablesPath = kingpin.Flag( + "freeipmi.path", "Path to FreeIPMI executables (default: rely on $PATH).", - ) - listenAddress = flag.String( - "web.listen-address", ":9290", + ).String() + listenAddress = kingpin.Flag( + "web.listen-address", "Address to listen on for web interface and telemetry.", - ) + ).Default(":9290").String() sc = &SafeConfig{ C: &Config{}, @@ -39,10 +39,21 @@ func remoteIPMIHandler(w http.ResponseWriter, r *http.Request) { http.Error(w, "'target' parameter must be specified", 400) return } - log.Debugf("Scraping target '%s'", target) + + // Remote scrape will not work without some kind of config, so be pedantic about it + module := r.URL.Query().Get("module") + if module == "" { + module = "default" + } + if !sc.HasModule(module) { + http.Error(w, fmt.Sprintf("Unknown module %q", module), http.StatusBadRequest) + return + } + + log.Debugf("Scraping target '%s' with module '%s'", target, module) registry := prometheus.NewRegistry() - remoteCollector := collector{target: target, config: sc} + remoteCollector := collector{target: target, module: module, config: sc} registry.MustRegister(remoteCollector) h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{}) h.ServeHTTP(w, r) @@ -64,7 +75,9 @@ func updateConfiguration(w http.ResponseWriter, r *http.Request) { } func main() { - flag.Parse() + log.AddFlags(kingpin.CommandLine) + kingpin.HelpFlag.Short('h') + kingpin.Parse() log.Infoln("Starting ipmi_exporter") // Bail early if the config is bad. @@ -93,7 +106,7 @@ func main() { } }() - localCollector := collector{target: targetLocal, config: sc} + localCollector := collector{target: targetLocal, module: "default", config: sc} prometheus.MustRegister(&localCollector) http.Handle("/metrics", promhttp.Handler()) // Regular metrics endpoint for local IPMI metrics.