// +build windows package main import ( "fmt" "net/http" _ "net/http/pprof" "os" "sort" "strconv" "strings" "sync" "time" "golang.org/x/sys/windows/svc" "github.com/StackExchange/wmi" "github.com/martinlindhe/wmi_exporter/collector" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/common/log" "github.com/prometheus/common/version" "gopkg.in/alecthomas/kingpin.v2" ) // WmiCollector implements the prometheus.Collector interface. type WmiCollector struct { maxScrapeDuration time.Duration collectors map[string]collector.Collector } const ( defaultCollectors = "cpu,cs,logical_disk,net,os,service,system,textfile" defaultCollectorsPlaceholder = "[defaults]" serviceName = "wmi_exporter" ) var ( scrapeDurationDesc = prometheus.NewDesc( prometheus.BuildFQName(collector.Namespace, "exporter", "collector_duration_seconds"), "wmi_exporter: Duration of a collection.", []string{"collector"}, nil, ) scrapeSuccessDesc = prometheus.NewDesc( prometheus.BuildFQName(collector.Namespace, "exporter", "collector_success"), "wmi_exporter: Whether the collector was successful.", []string{"collector"}, nil, ) scrapeTimeoutDesc = prometheus.NewDesc( prometheus.BuildFQName(collector.Namespace, "exporter", "collector_timeout"), "wmi_exporter: Whether the collector timed out.", []string{"collector"}, nil, ) snapshotDuration = prometheus.NewDesc( prometheus.BuildFQName(collector.Namespace, "exporter", "perflib_snapshot_duration_seconds"), "Duration of perflib snapshot capture", nil, nil, ) // This can be removed when client_golang exposes this on Windows // (See https://github.com/prometheus/client_golang/issues/376) startTime = float64(time.Now().Unix()) startTimeDesc = prometheus.NewDesc( "process_start_time_seconds", "Start time of the process since unix epoch in seconds.", nil, nil, ) ) // Describe sends all the descriptors of the collectors included to // the provided channel. func (coll WmiCollector) Describe(ch chan<- *prometheus.Desc) { ch <- scrapeDurationDesc ch <- scrapeSuccessDesc } type collectorOutcome int const ( pending collectorOutcome = iota success failed ) // Collect sends the collected metrics from each of the collectors to // prometheus. func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) { ch <- prometheus.MustNewConstMetric( startTimeDesc, prometheus.CounterValue, startTime, ) t := time.Now() scrapeContext, err := collector.PrepareScrapeContext() ch <- prometheus.MustNewConstMetric( snapshotDuration, prometheus.GaugeValue, time.Since(t).Seconds(), ) if err != nil { ch <- prometheus.NewInvalidMetric(scrapeSuccessDesc, fmt.Errorf("failed to prepare scrape: %v", err)) return } wg := sync.WaitGroup{} wg.Add(len(coll.collectors)) collectorOutcomes := make(map[string]collectorOutcome) for name := range coll.collectors { collectorOutcomes[name] = pending } metricsBuffer := make(chan prometheus.Metric) l := sync.Mutex{} finished := false go func() { for m := range metricsBuffer { l.Lock() if !finished { ch <- m } l.Unlock() } }() for name, c := range coll.collectors { go func(name string, c collector.Collector) { defer wg.Done() outcome := execute(name, c, scrapeContext, metricsBuffer) l.Lock() if !finished { collectorOutcomes[name] = outcome } l.Unlock() }(name, c) } allDone := make(chan struct{}) go func() { wg.Wait() close(allDone) close(metricsBuffer) }() // Wait until either all collectors finish, or timeout expires select { case <-allDone: case <-time.After(coll.maxScrapeDuration): } l.Lock() finished = true remainingCollectorNames := make([]string, 0) for name, outcome := range collectorOutcomes { var successValue, timeoutValue float64 if outcome == pending { timeoutValue = 1.0 remainingCollectorNames = append(remainingCollectorNames, name) } if outcome == success { successValue = 1.0 } ch <- prometheus.MustNewConstMetric( scrapeSuccessDesc, prometheus.GaugeValue, successValue, name, ) ch <- prometheus.MustNewConstMetric( scrapeTimeoutDesc, prometheus.GaugeValue, timeoutValue, name, ) } if len(remainingCollectorNames) > 0 { log.Warn("Collection timed out, still waiting for ", remainingCollectorNames) } l.Unlock() } func filterAvailableCollectors(collectors string) string { var availableCollectors []string for _, c := range strings.Split(collectors, ",") { _, ok := collector.Factories[c] if ok { availableCollectors = append(availableCollectors, c) } } return strings.Join(availableCollectors, ",") } func execute(name string, c collector.Collector, ctx *collector.ScrapeContext, ch chan<- prometheus.Metric) collectorOutcome { t := time.Now() err := c.Collect(ctx, ch) duration := time.Since(t).Seconds() ch <- prometheus.MustNewConstMetric( scrapeDurationDesc, prometheus.GaugeValue, duration, name, ) if err != nil { log.Errorf("collector %s failed after %fs: %s", name, duration, err) return failed } log.Debugf("collector %s succeeded after %fs.", name, duration) return success } func expandEnabledCollectors(enabled string) []string { expanded := strings.Replace(enabled, defaultCollectorsPlaceholder, defaultCollectors, -1) separated := strings.Split(expanded, ",") unique := map[string]bool{} for _, s := range separated { if s != "" { unique[s] = true } } result := make([]string, 0, len(unique)) for s := range unique { result = append(result, s) } return result } func loadCollectors(list string) (map[string]collector.Collector, error) { collectors := map[string]collector.Collector{} enabled := expandEnabledCollectors(list) for _, name := range enabled { fn, ok := collector.Factories[name] if !ok { return nil, fmt.Errorf("collector '%s' not available", name) } c, err := fn() if err != nil { return nil, err } collectors[name] = c } return collectors, nil } func initWbem() { // This initialization prevents a memory leak on WMF 5+. See // https://github.com/martinlindhe/wmi_exporter/issues/77 and linked issues // for details. log.Debugf("Initializing SWbemServices") s, err := wmi.InitializeSWbemServices(wmi.DefaultClient) if err != nil { log.Fatal(err) } wmi.DefaultClient.AllowMissingFields = true wmi.DefaultClient.SWbemServicesClient = s } func main() { var ( listenAddress = kingpin.Flag( "telemetry.addr", "host:port for WMI exporter.", ).Default(":9182").String() metricsPath = kingpin.Flag( "telemetry.path", "URL path for surfacing collected metrics.", ).Default("/metrics").String() enabledCollectors = kingpin.Flag( "collectors.enabled", "Comma-separated list of collectors to use. Use '[defaults]' as a placeholder for all the collectors enabled by default."). Default(filterAvailableCollectors(defaultCollectors)).String() printCollectors = kingpin.Flag( "collectors.print", "If true, print available collectors and exit.", ).Bool() timeoutMargin = kingpin.Flag( "scrape.timeout-margin", "Seconds to subtract from the timeout allowed by the client. Tune to allow for overhead or high loads.", ).Default("0.5").Float64() ) log.AddFlags(kingpin.CommandLine) kingpin.Version(version.Print("wmi_exporter")) kingpin.HelpFlag.Short('h') kingpin.Parse() if *printCollectors { collectorNames := make(sort.StringSlice, 0, len(collector.Factories)) for n := range collector.Factories { collectorNames = append(collectorNames, n) } collectorNames.Sort() fmt.Printf("Available collectors:\n") for _, n := range collectorNames { fmt.Printf(" - %s\n", n) } return } initWbem() isInteractive, err := svc.IsAnInteractiveSession() if err != nil { log.Fatal(err) } stopCh := make(chan bool) if !isInteractive { go func() { err = svc.Run(serviceName, &wmiExporterService{stopCh: stopCh}) if err != nil { log.Errorf("Failed to start service: %v", err) } }() } collectors, err := loadCollectors(*enabledCollectors) if err != nil { log.Fatalf("Couldn't load collectors: %s", err) } log.Infof("Enabled collectors: %v", strings.Join(keys(collectors), ", ")) h := &metricsHandler{ timeoutMargin: *timeoutMargin, collectorFactory: func(timeout time.Duration) *WmiCollector { return &WmiCollector{ collectors: collectors, maxScrapeDuration: timeout, } }, } http.Handle(*metricsPath, h) http.HandleFunc("/health", healthCheck) http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, *metricsPath, http.StatusMovedPermanently) }) log.Infoln("Starting WMI exporter", version.Info()) log.Infoln("Build context", version.BuildContext()) go func() { log.Infoln("Starting server on", *listenAddress) log.Fatalf("cannot start WMI exporter: %s", http.ListenAndServe(*listenAddress, nil)) }() for { if <-stopCh { log.Info("Shutting down WMI exporter") break } } } func healthCheck(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, err := fmt.Fprintln(w, `{"status":"ok"}`) if err != nil { log.Debugf("Failed to write to stream: %v", err) } } func keys(m map[string]collector.Collector) []string { ret := make([]string, 0, len(m)) for key := range m { ret = append(ret, key) } return ret } type wmiExporterService struct { stopCh chan<- bool } func (s *wmiExporterService) Execute(args []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (ssec bool, errno uint32) { const cmdsAccepted = svc.AcceptStop | svc.AcceptShutdown changes <- svc.Status{State: svc.StartPending} changes <- svc.Status{State: svc.Running, Accepts: cmdsAccepted} loop: for { select { case c := <-r: switch c.Cmd { case svc.Interrogate: changes <- c.CurrentStatus case svc.Stop, svc.Shutdown: s.stopCh <- true break loop default: log.Error(fmt.Sprintf("unexpected control request #%d", c)) } } } changes <- svc.Status{State: svc.StopPending} return } type metricsHandler struct { timeoutMargin float64 collectorFactory func(timeout time.Duration) *WmiCollector } func (mh *metricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { const defaultTimeout = 10.0 var timeoutSeconds float64 if v := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds"); v != "" { var err error timeoutSeconds, err = strconv.ParseFloat(v, 64) if err != nil { log.Warnf("Couldn't parse X-Prometheus-Scrape-Timeout-Seconds: %q. Defaulting timeout to %f", v, defaultTimeout) } } if timeoutSeconds == 0 { timeoutSeconds = defaultTimeout } timeoutSeconds = timeoutSeconds - mh.timeoutMargin reg := prometheus.NewRegistry() reg.MustRegister(mh.collectorFactory(time.Duration(timeoutSeconds * float64(time.Second)))) reg.MustRegister( prometheus.NewProcessCollector(os.Getpid(), ""), prometheus.NewGoCollector(), version.NewCollector("wmi_exporter"), ) h := promhttp.HandlerFor(reg, promhttp.HandlerOpts{}) h.ServeHTTP(w, r) }