windows_exporter/exporter.go
2020-05-24 20:54:35 +02:00

461 lines
11 KiB
Go

// +build windows
package main
import (
"fmt"
"net/http"
_ "net/http/pprof"
"sort"
"strconv"
"strings"
"sync"
"time"
"golang.org/x/sys/windows/svc"
"github.com/StackExchange/wmi"
"github.com/prometheus-community/windows_exporter/collector"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/common/log"
"github.com/prometheus/common/version"
"gopkg.in/alecthomas/kingpin.v2"
)
// WmiCollector implements the prometheus.Collector interface.
type WmiCollector struct {
maxScrapeDuration time.Duration
collectors map[string]collector.Collector
}
const (
defaultCollectors = "cpu,cs,logical_disk,net,os,service,system,textfile"
defaultCollectorsPlaceholder = "[defaults]"
serviceName = "wmi_exporter"
)
var (
scrapeDurationDesc = prometheus.NewDesc(
prometheus.BuildFQName(collector.Namespace, "exporter", "collector_duration_seconds"),
"windows_exporter: Duration of a collection.",
[]string{"collector"},
nil,
)
scrapeSuccessDesc = prometheus.NewDesc(
prometheus.BuildFQName(collector.Namespace, "exporter", "collector_success"),
"windows_exporter: Whether the collector was successful.",
[]string{"collector"},
nil,
)
scrapeTimeoutDesc = prometheus.NewDesc(
prometheus.BuildFQName(collector.Namespace, "exporter", "collector_timeout"),
"windows_exporter: Whether the collector timed out.",
[]string{"collector"},
nil,
)
snapshotDuration = prometheus.NewDesc(
prometheus.BuildFQName(collector.Namespace, "exporter", "perflib_snapshot_duration_seconds"),
"Duration of perflib snapshot capture",
nil,
nil,
)
// This can be removed when client_golang exposes this on Windows
// (See https://github.com/prometheus/client_golang/issues/376)
startTime = float64(time.Now().Unix())
startTimeDesc = prometheus.NewDesc(
"process_start_time_seconds",
"Start time of the process since unix epoch in seconds.",
nil,
nil,
)
)
// Describe sends all the descriptors of the collectors included to
// the provided channel.
func (coll WmiCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- scrapeDurationDesc
ch <- scrapeSuccessDesc
}
type collectorOutcome int
const (
pending collectorOutcome = iota
success
failed
)
// Collect sends the collected metrics from each of the collectors to
// prometheus.
func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(
startTimeDesc,
prometheus.CounterValue,
startTime,
)
t := time.Now()
cs := make([]string, 0, len(coll.collectors))
for name := range coll.collectors {
cs = append(cs, name)
}
scrapeContext, err := collector.PrepareScrapeContext(cs)
ch <- prometheus.MustNewConstMetric(
snapshotDuration,
prometheus.GaugeValue,
time.Since(t).Seconds(),
)
if err != nil {
ch <- prometheus.NewInvalidMetric(scrapeSuccessDesc, fmt.Errorf("failed to prepare scrape: %v", err))
return
}
wg := sync.WaitGroup{}
wg.Add(len(coll.collectors))
collectorOutcomes := make(map[string]collectorOutcome)
for name := range coll.collectors {
collectorOutcomes[name] = pending
}
metricsBuffer := make(chan prometheus.Metric)
l := sync.Mutex{}
finished := false
go func() {
for m := range metricsBuffer {
l.Lock()
if !finished {
ch <- m
}
l.Unlock()
}
}()
for name, c := range coll.collectors {
go func(name string, c collector.Collector) {
defer wg.Done()
outcome := execute(name, c, scrapeContext, metricsBuffer)
l.Lock()
if !finished {
collectorOutcomes[name] = outcome
}
l.Unlock()
}(name, c)
}
allDone := make(chan struct{})
go func() {
wg.Wait()
close(allDone)
close(metricsBuffer)
}()
// Wait until either all collectors finish, or timeout expires
select {
case <-allDone:
case <-time.After(coll.maxScrapeDuration):
}
l.Lock()
finished = true
remainingCollectorNames := make([]string, 0)
for name, outcome := range collectorOutcomes {
var successValue, timeoutValue float64
if outcome == pending {
timeoutValue = 1.0
remainingCollectorNames = append(remainingCollectorNames, name)
}
if outcome == success {
successValue = 1.0
}
ch <- prometheus.MustNewConstMetric(
scrapeSuccessDesc,
prometheus.GaugeValue,
successValue,
name,
)
ch <- prometheus.MustNewConstMetric(
scrapeTimeoutDesc,
prometheus.GaugeValue,
timeoutValue,
name,
)
}
if len(remainingCollectorNames) > 0 {
log.Warn("Collection timed out, still waiting for ", remainingCollectorNames)
}
l.Unlock()
}
func execute(name string, c collector.Collector, ctx *collector.ScrapeContext, ch chan<- prometheus.Metric) collectorOutcome {
t := time.Now()
err := c.Collect(ctx, ch)
duration := time.Since(t).Seconds()
ch <- prometheus.MustNewConstMetric(
scrapeDurationDesc,
prometheus.GaugeValue,
duration,
name,
)
if err != nil {
log.Errorf("collector %s failed after %fs: %s", name, duration, err)
return failed
}
log.Debugf("collector %s succeeded after %fs.", name, duration)
return success
}
func expandEnabledCollectors(enabled string) []string {
expanded := strings.Replace(enabled, defaultCollectorsPlaceholder, defaultCollectors, -1)
separated := strings.Split(expanded, ",")
unique := map[string]bool{}
for _, s := range separated {
if s != "" {
unique[s] = true
}
}
result := make([]string, 0, len(unique))
for s := range unique {
result = append(result, s)
}
return result
}
func loadCollectors(list string) (map[string]collector.Collector, error) {
collectors := map[string]collector.Collector{}
enabled := expandEnabledCollectors(list)
for _, name := range enabled {
c, err := collector.Build(name)
if err != nil {
return nil, err
}
collectors[name] = c
}
return collectors, nil
}
func initWbem() {
// This initialization prevents a memory leak on WMF 5+. See
// https://github.com/martinlindhe/wmi_exporter/issues/77 and linked issues
// for details.
log.Debugf("Initializing SWbemServices")
s, err := wmi.InitializeSWbemServices(wmi.DefaultClient)
if err != nil {
log.Fatal(err)
}
wmi.DefaultClient.AllowMissingFields = true
wmi.DefaultClient.SWbemServicesClient = s
}
func main() {
var (
listenAddress = kingpin.Flag(
"telemetry.addr",
"host:port for WMI exporter.",
).Default(":9182").String()
metricsPath = kingpin.Flag(
"telemetry.path",
"URL path for surfacing collected metrics.",
).Default("/metrics").String()
maxRequests = kingpin.Flag(
"telemetry.max-requests",
"Maximum number of concurrent requests. 0 to disable.",
).Default("5").Int()
enabledCollectors = kingpin.Flag(
"collectors.enabled",
"Comma-separated list of collectors to use. Use '[defaults]' as a placeholder for all the collectors enabled by default.").
Default(defaultCollectors).String()
printCollectors = kingpin.Flag(
"collectors.print",
"If true, print available collectors and exit.",
).Bool()
timeoutMargin = kingpin.Flag(
"scrape.timeout-margin",
"Seconds to subtract from the timeout allowed by the client. Tune to allow for overhead or high loads.",
).Default("0.5").Float64()
)
log.AddFlags(kingpin.CommandLine)
kingpin.Version(version.Print("wmi_exporter"))
kingpin.HelpFlag.Short('h')
kingpin.Parse()
if *printCollectors {
collectors := collector.Available()
collectorNames := make(sort.StringSlice, 0, len(collectors))
for _, n := range collectors {
collectorNames = append(collectorNames, n)
}
collectorNames.Sort()
fmt.Printf("Available collectors:\n")
for _, n := range collectorNames {
fmt.Printf(" - %s\n", n)
}
return
}
initWbem()
isInteractive, err := svc.IsAnInteractiveSession()
if err != nil {
log.Fatal(err)
}
stopCh := make(chan bool)
if !isInteractive {
go func() {
err = svc.Run(serviceName, &wmiExporterService{stopCh: stopCh})
if err != nil {
log.Errorf("Failed to start service: %v", err)
}
}()
}
collectors, err := loadCollectors(*enabledCollectors)
if err != nil {
log.Fatalf("Couldn't load collectors: %s", err)
}
log.Infof("Enabled collectors: %v", strings.Join(keys(collectors), ", "))
h := &metricsHandler{
timeoutMargin: *timeoutMargin,
collectorFactory: func(timeout time.Duration) *WmiCollector {
return &WmiCollector{
collectors: collectors,
maxScrapeDuration: timeout,
}
},
}
http.HandleFunc(*metricsPath, withConcurrencyLimit(*maxRequests, h.ServeHTTP))
http.HandleFunc("/health", healthCheck)
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte(`<html>
<head><title>WMI Exporter</title></head>
<body>
<h1>WMI Exporter</h1>
<p><a href="` + *metricsPath + `">Metrics</a></p>
</body>
</html>`))
})
log.Infoln("Starting WMI exporter", version.Info())
log.Infoln("Build context", version.BuildContext())
go func() {
log.Infoln("Starting server on", *listenAddress)
log.Fatalf("cannot start WMI exporter: %s", http.ListenAndServe(*listenAddress, nil))
}()
for {
if <-stopCh {
log.Info("Shutting down WMI exporter")
break
}
}
}
func healthCheck(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, err := fmt.Fprintln(w, `{"status":"ok"}`)
if err != nil {
log.Debugf("Failed to write to stream: %v", err)
}
}
func keys(m map[string]collector.Collector) []string {
ret := make([]string, 0, len(m))
for key := range m {
ret = append(ret, key)
}
return ret
}
func withConcurrencyLimit(n int, next http.HandlerFunc) http.HandlerFunc {
if n <= 0 {
return next
}
sem := make(chan struct{}, n)
return func(w http.ResponseWriter, r *http.Request) {
select {
case sem <- struct{}{}:
defer func() { <-sem }()
default:
w.WriteHeader(http.StatusServiceUnavailable)
_, _ = w.Write([]byte("Too many concurrent requests"))
return
}
next(w, r)
}
}
type wmiExporterService struct {
stopCh chan<- bool
}
func (s *wmiExporterService) Execute(args []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (ssec bool, errno uint32) {
const cmdsAccepted = svc.AcceptStop | svc.AcceptShutdown
changes <- svc.Status{State: svc.StartPending}
changes <- svc.Status{State: svc.Running, Accepts: cmdsAccepted}
loop:
for {
select {
case c := <-r:
switch c.Cmd {
case svc.Interrogate:
changes <- c.CurrentStatus
case svc.Stop, svc.Shutdown:
s.stopCh <- true
break loop
default:
log.Error(fmt.Sprintf("unexpected control request #%d", c))
}
}
}
changes <- svc.Status{State: svc.StopPending}
return
}
type metricsHandler struct {
timeoutMargin float64
collectorFactory func(timeout time.Duration) *WmiCollector
}
func (mh *metricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
const defaultTimeout = 10.0
var timeoutSeconds float64
if v := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds"); v != "" {
var err error
timeoutSeconds, err = strconv.ParseFloat(v, 64)
if err != nil {
log.Warnf("Couldn't parse X-Prometheus-Scrape-Timeout-Seconds: %q. Defaulting timeout to %f", v, defaultTimeout)
}
}
if timeoutSeconds == 0 {
timeoutSeconds = defaultTimeout
}
timeoutSeconds = timeoutSeconds - mh.timeoutMargin
reg := prometheus.NewRegistry()
reg.MustRegister(mh.collectorFactory(time.Duration(timeoutSeconds * float64(time.Second))))
reg.MustRegister(
prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}),
prometheus.NewGoCollector(),
version.NewCollector("windows_exporter"),
)
h := promhttp.HandlerFor(reg, promhttp.HandlerOpts{})
h.ServeHTTP(w, r)
}