2021-11-05 17:12:02 +00:00
//go:build windows
2018-11-30 00:51:12 +00:00
// +build windows
2016-08-26 06:59:27 +00:00
package main
import (
2022-08-24 09:04:23 +00:00
//Its important that we do these first so that we can register with the windows service control ASAP to avoid timeouts
2022-08-23 13:57:16 +00:00
"github.com/prometheus-community/windows_exporter/initiate"
"github.com/prometheus-community/windows_exporter/log"
2020-06-23 10:48:19 +00:00
"encoding/json"
2016-09-01 14:04:43 +00:00
"fmt"
2016-08-26 06:59:27 +00:00
"net/http"
2019-08-08 19:09:21 +00:00
_ "net/http/pprof"
2020-10-22 01:19:22 +00:00
"os"
2020-07-30 23:36:58 +00:00
"os/user"
2016-09-01 14:04:43 +00:00
"sort"
2019-06-23 20:01:43 +00:00
"strconv"
2016-09-01 14:04:43 +00:00
"strings"
2016-08-26 06:59:27 +00:00
"sync"
2016-09-01 14:04:43 +00:00
"time"
2016-08-26 06:59:27 +00:00
2020-05-24 18:38:05 +00:00
"github.com/prometheus-community/windows_exporter/collector"
2020-10-22 04:07:30 +00:00
"github.com/prometheus-community/windows_exporter/config"
2023-03-12 00:27:31 +00:00
"github.com/yusufpapurcu/wmi"
2022-08-23 13:57:16 +00:00
2016-08-26 06:59:27 +00:00
"github.com/prometheus/client_golang/prometheus"
2022-04-29 01:18:05 +00:00
"github.com/prometheus/client_golang/prometheus/collectors"
2017-04-30 22:12:05 +00:00
"github.com/prometheus/client_golang/prometheus/promhttp"
2016-09-01 14:04:43 +00:00
"github.com/prometheus/common/version"
2021-02-23 23:23:38 +00:00
"github.com/prometheus/exporter-toolkit/web"
webflag "github.com/prometheus/exporter-toolkit/web/kingpinflag"
2017-08-12 06:44:59 +00:00
"gopkg.in/alecthomas/kingpin.v2"
2016-08-26 06:59:27 +00:00
)
2020-05-24 18:45:54 +00:00
type windowsCollector struct {
2019-05-15 19:22:29 +00:00
maxScrapeDuration time . Duration
collectors map [ string ] collector . Collector
2016-08-26 06:59:27 +00:00
}
2020-06-23 10:48:19 +00:00
// Same struct prometheus uses for their /version endpoint.
// Separate copy to avoid pulling all of prometheus as a dependency
type prometheusVersion struct {
Version string ` json:"version" `
Revision string ` json:"revision" `
Branch string ` json:"branch" `
BuildUser string ` json:"buildUser" `
BuildDate string ` json:"buildDate" `
GoVersion string ` json:"goVersion" `
}
2016-09-01 14:04:43 +00:00
const (
2018-04-03 19:37:10 +00:00
defaultCollectors = "cpu,cs,logical_disk,net,os,service,system,textfile"
2017-03-04 11:44:47 +00:00
defaultCollectorsPlaceholder = "[defaults]"
2016-09-01 14:04:43 +00:00
)
var (
2017-07-05 11:12:42 +00:00
scrapeDurationDesc = prometheus . NewDesc (
prometheus . BuildFQName ( collector . Namespace , "exporter" , "collector_duration_seconds" ) ,
2020-05-24 18:36:25 +00:00
"windows_exporter: Duration of a collection." ,
2017-07-05 11:12:42 +00:00
[ ] string { "collector" } ,
nil ,
)
scrapeSuccessDesc = prometheus . NewDesc (
prometheus . BuildFQName ( collector . Namespace , "exporter" , "collector_success" ) ,
2020-05-24 18:36:25 +00:00
"windows_exporter: Whether the collector was successful." ,
2017-07-05 11:12:42 +00:00
[ ] string { "collector" } ,
nil ,
2016-09-01 14:04:43 +00:00
)
2019-05-15 19:22:29 +00:00
scrapeTimeoutDesc = prometheus . NewDesc (
prometheus . BuildFQName ( collector . Namespace , "exporter" , "collector_timeout" ) ,
2020-05-24 18:36:25 +00:00
"windows_exporter: Whether the collector timed out." ,
2019-05-15 19:22:29 +00:00
[ ] string { "collector" } ,
nil ,
)
2019-06-23 20:01:43 +00:00
snapshotDuration = prometheus . NewDesc (
prometheus . BuildFQName ( collector . Namespace , "exporter" , "perflib_snapshot_duration_seconds" ) ,
"Duration of perflib snapshot capture" ,
nil ,
nil ,
)
2016-09-01 14:04:43 +00:00
)
2016-08-26 06:59:27 +00:00
// Describe sends all the descriptors of the collectors included to
// the provided channel.
2020-05-24 18:45:54 +00:00
func ( coll windowsCollector ) Describe ( ch chan <- * prometheus . Desc ) {
2017-07-05 11:12:42 +00:00
ch <- scrapeDurationDesc
ch <- scrapeSuccessDesc
2016-08-26 06:59:27 +00:00
}
2019-08-03 17:29:01 +00:00
type collectorOutcome int
const (
pending collectorOutcome = iota
success
failed
)
2016-08-26 06:59:27 +00:00
// Collect sends the collected metrics from each of the collectors to
2019-05-15 19:22:29 +00:00
// prometheus.
2020-05-24 18:45:54 +00:00
func ( coll windowsCollector ) Collect ( ch chan <- prometheus . Metric ) {
2019-06-23 20:01:43 +00:00
t := time . Now ( )
2020-02-09 20:09:26 +00:00
cs := make ( [ ] string , 0 , len ( coll . collectors ) )
for name := range coll . collectors {
cs = append ( cs , name )
}
scrapeContext , err := collector . PrepareScrapeContext ( cs )
2019-06-23 20:01:43 +00:00
ch <- prometheus . MustNewConstMetric (
snapshotDuration ,
prometheus . GaugeValue ,
time . Since ( t ) . Seconds ( ) ,
)
2019-04-05 13:59:40 +00:00
if err != nil {
ch <- prometheus . NewInvalidMetric ( scrapeSuccessDesc , fmt . Errorf ( "failed to prepare scrape: %v" , err ) )
return
}
2019-08-03 17:29:01 +00:00
wg := sync . WaitGroup { }
wg . Add ( len ( coll . collectors ) )
collectorOutcomes := make ( map [ string ] collectorOutcome )
2019-05-15 19:22:29 +00:00
for name := range coll . collectors {
2019-08-03 17:29:01 +00:00
collectorOutcomes [ name ] = pending
2019-05-15 19:22:29 +00:00
}
metricsBuffer := make ( chan prometheus . Metric )
2019-08-03 17:29:01 +00:00
l := sync . Mutex { }
finished := false
2019-05-15 19:22:29 +00:00
go func ( ) {
2019-08-03 17:29:01 +00:00
for m := range metricsBuffer {
l . Lock ( )
if ! finished {
ch <- m
2019-05-15 19:22:29 +00:00
}
2019-08-03 17:29:01 +00:00
l . Unlock ( )
2019-05-15 19:22:29 +00:00
}
} ( )
2016-09-01 14:04:43 +00:00
for name , c := range coll . collectors {
go func ( name string , c collector . Collector ) {
2019-06-23 20:01:43 +00:00
defer wg . Done ( )
2019-08-03 17:29:01 +00:00
outcome := execute ( name , c , scrapeContext , metricsBuffer )
l . Lock ( )
if ! finished {
collectorOutcomes [ name ] = outcome
}
l . Unlock ( )
2016-09-01 14:04:43 +00:00
} ( name , c )
}
2018-04-29 14:53:34 +00:00
2019-08-03 17:29:01 +00:00
allDone := make ( chan struct { } )
go func ( ) {
wg . Wait ( )
close ( allDone )
2019-08-08 19:09:21 +00:00
close ( metricsBuffer )
2019-08-03 17:29:01 +00:00
} ( )
2019-05-15 19:22:29 +00:00
2019-08-03 17:29:01 +00:00
// Wait until either all collectors finish, or timeout expires
2019-05-15 19:22:29 +00:00
select {
case <- allDone :
case <- time . After ( coll . maxScrapeDuration ) :
2019-08-03 17:29:01 +00:00
}
l . Lock ( )
finished = true
remainingCollectorNames := make ( [ ] string , 0 )
for name , outcome := range collectorOutcomes {
var successValue , timeoutValue float64
if outcome == pending {
timeoutValue = 1.0
remainingCollectorNames = append ( remainingCollectorNames , name )
2019-05-15 19:22:29 +00:00
}
2019-08-03 17:29:01 +00:00
if outcome == success {
successValue = 1.0
2019-05-15 19:22:29 +00:00
}
2019-08-03 17:29:01 +00:00
ch <- prometheus . MustNewConstMetric (
scrapeSuccessDesc ,
prometheus . GaugeValue ,
successValue ,
name ,
)
ch <- prometheus . MustNewConstMetric (
scrapeTimeoutDesc ,
prometheus . GaugeValue ,
timeoutValue ,
name ,
)
2019-05-15 19:22:29 +00:00
}
2019-08-03 17:29:01 +00:00
if len ( remainingCollectorNames ) > 0 {
log . Warn ( "Collection timed out, still waiting for " , remainingCollectorNames )
}
l . Unlock ( )
2016-09-01 14:04:43 +00:00
}
2016-08-26 06:59:27 +00:00
2019-08-03 17:29:01 +00:00
func execute ( name string , c collector . Collector , ctx * collector . ScrapeContext , ch chan <- prometheus . Metric ) collectorOutcome {
t := time . Now ( )
2019-04-05 13:59:40 +00:00
err := c . Collect ( ctx , ch )
2019-08-03 17:29:01 +00:00
duration := time . Since ( t ) . Seconds ( )
2017-07-05 11:12:42 +00:00
ch <- prometheus . MustNewConstMetric (
scrapeDurationDesc ,
prometheus . GaugeValue ,
2019-08-03 17:29:01 +00:00
duration ,
2019-05-15 19:22:29 +00:00
name ,
)
2019-08-03 17:29:01 +00:00
if err != nil {
log . Errorf ( "collector %s failed after %fs: %s" , name , duration , err )
return failed
}
log . Debugf ( "collector %s succeeded after %fs." , name , duration )
return success
2016-09-01 14:04:43 +00:00
}
2017-03-04 11:44:47 +00:00
func expandEnabledCollectors ( enabled string ) [ ] string {
expanded := strings . Replace ( enabled , defaultCollectorsPlaceholder , defaultCollectors , - 1 )
separated := strings . Split ( expanded , "," )
unique := map [ string ] bool { }
for _ , s := range separated {
if s != "" {
unique [ s ] = true
}
}
result := make ( [ ] string , 0 , len ( unique ) )
2018-04-05 05:11:36 +00:00
for s := range unique {
2017-03-04 11:44:47 +00:00
result = append ( result , s )
}
return result
}
2016-09-01 14:04:43 +00:00
func loadCollectors ( list string ) ( map [ string ] collector . Collector , error ) {
collectors := map [ string ] collector . Collector { }
2017-03-04 11:44:47 +00:00
enabled := expandEnabledCollectors ( list )
for _ , name := range enabled {
2020-02-09 20:09:26 +00:00
c , err := collector . Build ( name )
2016-09-01 14:04:43 +00:00
if err != nil {
return nil , err
}
collectors [ name ] = c
}
2020-02-09 20:09:26 +00:00
2016-09-01 14:04:43 +00:00
return collectors , nil
}
2017-06-26 19:03:17 +00:00
func initWbem ( ) {
// This initialization prevents a memory leak on WMF 5+. See
2020-05-24 18:45:54 +00:00
// https://github.com/prometheus-community/windows_exporter/issues/77 and
// linked issues for details.
2017-06-26 19:03:17 +00:00
log . Debugf ( "Initializing SWbemServices" )
s , err := wmi . InitializeSWbemServices ( wmi . DefaultClient )
if err != nil {
log . Fatal ( err )
}
2018-06-06 08:31:50 +00:00
wmi . DefaultClient . AllowMissingFields = true
2017-06-26 19:03:17 +00:00
wmi . DefaultClient . SWbemServicesClient = s
}
2016-08-26 06:59:27 +00:00
func main ( ) {
var (
2020-11-07 06:56:39 +00:00
configFile = kingpin . Flag (
2020-10-22 04:07:30 +00:00
"config.file" ,
2021-12-18 18:18:16 +00:00
"YAML configuration file to use. Values set in this file will be overridden by CLI flags." ,
2020-11-03 07:07:23 +00:00
) . String ( )
2022-10-24 11:12:46 +00:00
webConfig = webflag . AddFlags ( kingpin . CommandLine , ":9182" )
2020-11-07 06:56:39 +00:00
metricsPath = kingpin . Flag (
2017-08-12 06:44:59 +00:00
"telemetry.path" ,
"URL path for surfacing collected metrics." ,
) . Default ( "/metrics" ) . String ( )
2020-11-07 06:56:39 +00:00
maxRequests = kingpin . Flag (
2020-03-02 21:39:39 +00:00
"telemetry.max-requests" ,
"Maximum number of concurrent requests. 0 to disable." ,
) . Default ( "5" ) . Int ( )
2020-11-07 06:56:39 +00:00
enabledCollectors = kingpin . Flag (
2017-08-12 06:44:59 +00:00
"collectors.enabled" ,
2018-08-06 01:39:43 +00:00
"Comma-separated list of collectors to use. Use '[defaults]' as a placeholder for all the collectors enabled by default." ) .
2020-02-09 20:09:26 +00:00
Default ( defaultCollectors ) . String ( )
2020-11-07 06:56:39 +00:00
printCollectors = kingpin . Flag (
2017-08-12 06:44:59 +00:00
"collectors.print" ,
"If true, print available collectors and exit." ,
) . Bool ( )
2020-11-07 06:56:39 +00:00
timeoutMargin = kingpin . Flag (
2019-06-23 20:01:43 +00:00
"scrape.timeout-margin" ,
"Seconds to subtract from the timeout allowed by the client. Tune to allow for overhead or high loads." ,
) . Default ( "0.5" ) . Float64 ( )
2016-08-26 06:59:27 +00:00
)
2020-11-07 06:56:39 +00:00
log . AddFlags ( kingpin . CommandLine )
kingpin . Version ( version . Print ( "windows_exporter" ) )
kingpin . HelpFlag . Short ( 'h' )
2020-10-22 04:07:30 +00:00
// Load values from configuration file(s). Executable flags must first be parsed, in order
// to load the specified file(s).
2020-11-07 06:56:39 +00:00
kingpin . Parse ( )
2022-08-23 13:57:16 +00:00
log . Debug ( "Logging has Started" )
2020-11-03 07:07:23 +00:00
if * configFile != "" {
resolver , err := config . NewResolver ( * configFile )
if err != nil {
log . Fatalf ( "could not load config file: %v\n" , err )
}
2020-11-07 06:56:39 +00:00
err = resolver . Bind ( kingpin . CommandLine , os . Args [ 1 : ] )
2020-11-03 07:07:23 +00:00
if err != nil {
log . Fatalf ( "%v\n" , err )
}
2022-12-03 23:44:53 +00:00
// NOTE: This is temporary fix for issue #1092, calling kingpin.Parse
// twice makes slices flags duplicate its value, this clean up
// the first parse before the second call.
* webConfig . WebListenAddresses = ( * webConfig . WebListenAddresses ) [ 1 : ]
2020-11-03 07:07:23 +00:00
// Parse flags once more to include those discovered in configuration file(s).
2020-11-07 06:56:39 +00:00
kingpin . Parse ( )
2020-10-22 04:07:30 +00:00
}
2016-09-01 14:04:43 +00:00
if * printCollectors {
2020-02-09 20:09:26 +00:00
collectors := collector . Available ( )
collectorNames := make ( sort . StringSlice , 0 , len ( collectors ) )
for _ , n := range collectors {
2016-09-01 14:04:43 +00:00
collectorNames = append ( collectorNames , n )
}
collectorNames . Sort ( )
fmt . Printf ( "Available collectors:\n" )
for _ , n := range collectorNames {
fmt . Printf ( " - %s\n" , n )
}
return
}
2017-06-26 19:03:17 +00:00
initWbem ( )
2016-09-01 14:04:43 +00:00
collectors , err := loadCollectors ( * enabledCollectors )
if err != nil {
log . Fatalf ( "Couldn't load collectors: %s" , err )
}
2020-07-30 23:36:58 +00:00
u , err := user . Current ( )
if err != nil {
log . Fatalf ( err . Error ( ) )
}
log . Infof ( "Running as %v" , u . Username )
if strings . Contains ( u . Username , "ContainerAdministrator" ) || strings . Contains ( u . Username , "ContainerUser" ) {
log . Warnf ( "Running as a preconfigured Windows Container user. This may mean you do not have Windows HostProcess containers configured correctly and some functionality will not work as expected." )
}
2016-09-16 06:36:58 +00:00
log . Infof ( "Enabled collectors: %v" , strings . Join ( keys ( collectors ) , ", " ) )
2016-09-01 14:04:43 +00:00
2019-06-23 20:01:43 +00:00
h := & metricsHandler {
timeoutMargin : * timeoutMargin ,
2020-10-26 13:01:25 +00:00
collectorFactory : func ( timeout time . Duration , requestedCollectors [ ] string ) ( error , * windowsCollector ) {
filteredCollectors := make ( map [ string ] collector . Collector )
// scrape all enabled collectors if no collector is requested
if len ( requestedCollectors ) == 0 {
filteredCollectors = collectors
}
for _ , name := range requestedCollectors {
col , exists := collectors [ name ]
if ! exists {
return fmt . Errorf ( "unavailable collector: %s" , name ) , nil
}
filteredCollectors [ name ] = col
}
return nil , & windowsCollector {
collectors : filteredCollectors ,
2019-06-23 20:01:43 +00:00
maxScrapeDuration : timeout ,
}
} ,
2019-05-15 19:22:29 +00:00
}
2016-08-26 06:59:27 +00:00
2020-03-02 21:39:39 +00:00
http . HandleFunc ( * metricsPath , withConcurrencyLimit ( * maxRequests , h . ServeHTTP ) )
2016-09-27 12:37:12 +00:00
http . HandleFunc ( "/health" , healthCheck )
2020-06-23 10:48:19 +00:00
http . HandleFunc ( "/version" , func ( w http . ResponseWriter , r * http . Request ) {
// we can't use "version" directly as it is a package, and not an object that
// can be serialized.
err := json . NewEncoder ( w ) . Encode ( prometheusVersion {
Version : version . Version ,
Revision : version . Revision ,
Branch : version . Branch ,
BuildUser : version . BuildUser ,
BuildDate : version . BuildDate ,
GoVersion : version . GoVersion ,
} )
if err != nil {
http . Error ( w , fmt . Sprintf ( "error encoding JSON: %s" , err ) , http . StatusInternalServerError )
}
} )
2016-08-26 06:59:27 +00:00
http . HandleFunc ( "/" , func ( w http . ResponseWriter , r * http . Request ) {
2020-03-02 19:20:02 +00:00
_ , _ = w . Write ( [ ] byte ( ` < html >
2020-05-24 18:45:54 +00:00
< head > < title > windows_exporter < / title > < / head >
2020-03-02 19:20:02 +00:00
< body >
2020-05-24 18:45:54 +00:00
< h1 > windows_exporter < / h1 >
2020-03-02 19:20:02 +00:00
< p > < a href = "` + *metricsPath + `" > Metrics < / a > < / p >
2020-05-24 18:45:54 +00:00
< p > < i > ` + version.Info() + ` < / i > < / p >
2020-03-02 19:20:02 +00:00
< / body >
< / html > ` ) )
2016-08-26 06:59:27 +00:00
} )
2020-05-24 18:45:54 +00:00
log . Infoln ( "Starting windows_exporter" , version . Info ( ) )
2016-09-01 14:04:43 +00:00
log . Infoln ( "Build context" , version . BuildContext ( ) )
2016-09-16 06:36:58 +00:00
go func ( ) {
2022-10-24 11:12:46 +00:00
server := & http . Server { }
if err := web . ListenAndServe ( server , webConfig , log . NewToolkitAdapter ( ) ) ; err != nil {
2021-01-30 10:17:29 +00:00
log . Fatalf ( "cannot start windows_exporter: %s" , err )
2021-01-03 14:54:32 +00:00
}
2016-09-16 06:36:58 +00:00
} ( )
for {
2022-08-23 13:57:16 +00:00
if <- initiate . StopCh {
2020-05-24 18:45:54 +00:00
log . Info ( "Shutting down windows_exporter" )
2016-09-16 06:36:58 +00:00
break
}
}
}
2016-09-27 12:37:12 +00:00
func healthCheck ( w http . ResponseWriter , r * http . Request ) {
w . Header ( ) . Set ( "Content-Type" , "application/json" )
2018-10-05 06:01:02 +00:00
_ , err := fmt . Fprintln ( w , ` { "status":"ok"} ` )
if err != nil {
log . Debugf ( "Failed to write to stream: %v" , err )
}
2016-09-27 12:37:12 +00:00
}
2016-09-16 06:36:58 +00:00
func keys ( m map [ string ] collector . Collector ) [ ] string {
ret := make ( [ ] string , 0 , len ( m ) )
2016-09-27 12:37:12 +00:00
for key := range m {
2016-09-16 06:36:58 +00:00
ret = append ( ret , key )
}
return ret
}
2020-03-02 21:39:39 +00:00
func withConcurrencyLimit ( n int , next http . HandlerFunc ) http . HandlerFunc {
if n <= 0 {
return next
}
sem := make ( chan struct { } , n )
return func ( w http . ResponseWriter , r * http . Request ) {
select {
case sem <- struct { } { } :
defer func ( ) { <- sem } ( )
default :
w . WriteHeader ( http . StatusServiceUnavailable )
_ , _ = w . Write ( [ ] byte ( "Too many concurrent requests" ) )
return
}
next ( w , r )
}
}
2019-06-23 20:01:43 +00:00
type metricsHandler struct {
timeoutMargin float64
2020-10-26 13:01:25 +00:00
collectorFactory func ( timeout time . Duration , requestedCollectors [ ] string ) ( error , * windowsCollector )
2019-06-23 20:01:43 +00:00
}
func ( mh * metricsHandler ) ServeHTTP ( w http . ResponseWriter , r * http . Request ) {
const defaultTimeout = 10.0
var timeoutSeconds float64
if v := r . Header . Get ( "X-Prometheus-Scrape-Timeout-Seconds" ) ; v != "" {
var err error
timeoutSeconds , err = strconv . ParseFloat ( v , 64 )
if err != nil {
2019-06-24 19:55:33 +00:00
log . Warnf ( "Couldn't parse X-Prometheus-Scrape-Timeout-Seconds: %q. Defaulting timeout to %f" , v , defaultTimeout )
2019-06-23 20:01:43 +00:00
}
}
if timeoutSeconds == 0 {
timeoutSeconds = defaultTimeout
}
timeoutSeconds = timeoutSeconds - mh . timeoutMargin
reg := prometheus . NewRegistry ( )
2020-10-26 13:01:25 +00:00
err , wc := mh . collectorFactory ( time . Duration ( timeoutSeconds * float64 ( time . Second ) ) , r . URL . Query ( ) [ "collect[]" ] )
if err != nil {
log . Warnln ( "Couldn't create filtered metrics handler: " , err )
w . WriteHeader ( http . StatusBadRequest )
2021-12-24 10:19:05 +00:00
w . Write ( [ ] byte ( fmt . Sprintf ( "Couldn't create filtered metrics handler: %s" , err ) ) ) //nolint:errcheck
2020-10-26 13:01:25 +00:00
return
}
reg . MustRegister ( wc )
2019-06-23 20:01:43 +00:00
reg . MustRegister (
2022-04-29 01:18:05 +00:00
collectors . NewProcessCollector ( collectors . ProcessCollectorOpts { } ) ,
collectors . NewGoCollector ( ) ,
2020-05-24 18:36:25 +00:00
version . NewCollector ( "windows_exporter" ) ,
2019-06-23 20:01:43 +00:00
)
h := promhttp . HandlerFor ( reg , promhttp . HandlerOpts { } )
h . ServeHTTP ( w , r )
}