collector: fix flapping metrics if process is enabled. (#1643)
This commit is contained in:
parent
2d334e4df0
commit
65d19f433e
8
go.mod
8
go.mod
|
@ -3,7 +3,7 @@ module github.com/prometheus-community/windows_exporter
|
|||
go 1.23
|
||||
|
||||
require (
|
||||
github.com/Microsoft/hcsshim v0.12.6
|
||||
github.com/Microsoft/hcsshim v0.12.7
|
||||
github.com/alecthomas/kingpin/v2 v2.4.0
|
||||
github.com/bmatcuk/doublestar/v4 v4.6.1
|
||||
github.com/dimchansky/utfbom v1.1.1
|
||||
|
@ -30,7 +30,7 @@ require (
|
|||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
|
||||
github.com/jpillora/backoff v1.0.0 // indirect
|
||||
github.com/klauspost/compress v1.17.9 // indirect
|
||||
github.com/klauspost/compress v1.17.10 // indirect
|
||||
github.com/mdlayher/socket v0.5.1 // indirect
|
||||
github.com/mdlayher/vsock v1.2.1 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
|
@ -46,8 +46,8 @@ require (
|
|||
golang.org/x/oauth2 v0.23.0 // indirect
|
||||
golang.org/x/sync v0.8.0 // indirect
|
||||
golang.org/x/text v0.18.0 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect
|
||||
google.golang.org/grpc v1.66.2 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240924160255-9d4c2d233b61 // indirect
|
||||
google.golang.org/grpc v1.67.0 // indirect
|
||||
google.golang.org/protobuf v1.34.2 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
)
|
||||
|
|
16
go.sum
16
go.sum
|
@ -2,8 +2,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
|
|||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
|
||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||
github.com/Microsoft/hcsshim v0.12.6 h1:qEnZjoHXv+4/s0LmKZWE0/AiZmMWEIkFfWBSf1a0wlU=
|
||||
github.com/Microsoft/hcsshim v0.12.6/go.mod h1:ZABCLVcvLMjIkzr9rUGcQ1QA0p0P3Ps+d3N1g2DsFfk=
|
||||
github.com/Microsoft/hcsshim v0.12.7 h1:MP6R1spmjxTE4EU4J3YsrTxn8CjvN9qwjTKJXldFaRg=
|
||||
github.com/Microsoft/hcsshim v0.12.7/go.mod h1:HPbAuJ9BvQYYZbB4yEQcyGIsTP5L4yHKeO9XO149AEM=
|
||||
github.com/alecthomas/kingpin/v2 v2.4.0 h1:f48lwail6p8zpO1bC4TxtqACaGqHYA22qkHjHpqDjYY=
|
||||
github.com/alecthomas/kingpin/v2 v2.4.0/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE=
|
||||
github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 h1:t3eaIm0rUkzbrIewtiFmMK5RXHej2XnoXNhxVsAYUfg=
|
||||
|
@ -63,8 +63,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
|||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
|
||||
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
|
||||
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
|
||||
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
|
||||
github.com/klauspost/compress v1.17.10 h1:oXAz+Vh0PMUvJczoi+flxpnBEPxoER1IaAnU/NMPtT0=
|
||||
github.com/klauspost/compress v1.17.10/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
|
@ -163,15 +163,15 @@ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7
|
|||
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
||||
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
|
||||
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240924160255-9d4c2d233b61 h1:N9BgCIAUvn/M+p4NJccWPWb3BWh88+zyL0ll9HgbEeM=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240924160255-9d4c2d233b61/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
|
||||
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
||||
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
|
||||
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
|
||||
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
|
||||
google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
|
||||
google.golang.org/grpc v1.66.2 h1:3QdXkuq3Bkh7w+ywLdLvM56cmGvQHUMZpiCzt6Rqaoo=
|
||||
google.golang.org/grpc v1.66.2/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y=
|
||||
google.golang.org/grpc v1.67.0 h1:IdH9y6PF5MPSdAntIcpjQ+tXO41pcQsfZV2RxtQgVcw=
|
||||
google.golang.org/grpc v1.67.0/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA=
|
||||
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
|
||||
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
|
||||
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
|
||||
|
|
|
@ -97,6 +97,7 @@ func (c *Collector) Build(logger *slog.Logger, _ *wmi.Client) error {
|
|||
// to the provided prometheus Metric channel.
|
||||
func (c *Collector) Collect(_ *types.ScrapeContext, logger *slog.Logger, ch chan<- prometheus.Metric) error {
|
||||
logger = logger.With(slog.String("collector", Name))
|
||||
|
||||
if err := c.collect(ch); err != nil {
|
||||
logger.Error("failed collecting cs metrics",
|
||||
slog.Any("err", err),
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sync"
|
||||
|
@ -164,37 +165,65 @@ func (p *Prometheus) Collect(ch chan<- prometheus.Metric) {
|
|||
)
|
||||
}
|
||||
|
||||
func (p *Prometheus) execute(name string, c Collector, ctx *types.ScrapeContext, ch chan<- prometheus.Metric) collectorStatusCode {
|
||||
func (p *Prometheus) execute(name string, c Collector, scrapeCtx *types.ScrapeContext, ch chan<- prometheus.Metric) collectorStatusCode {
|
||||
var (
|
||||
err error
|
||||
duration time.Duration
|
||||
timeout atomic.Bool
|
||||
err error
|
||||
numMetrics int
|
||||
duration time.Duration
|
||||
timeout atomic.Bool
|
||||
)
|
||||
|
||||
// bufCh is a buffer channel to store the metrics
|
||||
// This is needed because once timeout is reached, the prometheus registry channel is closed.
|
||||
bufCh := make(chan prometheus.Metric, 10)
|
||||
bufCh := make(chan prometheus.Metric, 1000)
|
||||
errCh := make(chan error, 1)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), p.maxScrapeDuration)
|
||||
defer cancel()
|
||||
|
||||
// Execute the collector
|
||||
go func() {
|
||||
errCh <- c.Collect(ctx, p.logger, bufCh)
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
p.logger.Error("panic in collector "+name,
|
||||
slog.Any("panic", r),
|
||||
)
|
||||
}
|
||||
}()
|
||||
|
||||
errCh <- c.Collect(scrapeCtx, p.logger, bufCh)
|
||||
|
||||
close(bufCh)
|
||||
}()
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(1)
|
||||
|
||||
go func() {
|
||||
defer func() {
|
||||
// This prevents a panic from race-condition when closing the ch channel too early.
|
||||
_ = recover()
|
||||
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
// Pass metrics to the prometheus registry
|
||||
// If timeout is reached, the channel is closed.
|
||||
// This will cause a panic if we try to write to it.
|
||||
for m := range bufCh {
|
||||
if !timeout.Load() {
|
||||
ch <- m
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case m, ok := <-bufCh:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
if !timeout.Load() {
|
||||
ch <- m
|
||||
|
||||
numMetrics++
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
@ -204,6 +233,8 @@ func (p *Prometheus) execute(name string, c Collector, ctx *types.ScrapeContext,
|
|||
// Wait for the collector to finish or timeout
|
||||
select {
|
||||
case err = <-errCh:
|
||||
wg.Wait() // Wait for the buffer channel to be closed and empty
|
||||
|
||||
duration = time.Since(t)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
p.collectorScrapeDurationDesc,
|
||||
|
@ -211,7 +242,7 @@ func (p *Prometheus) execute(name string, c Collector, ctx *types.ScrapeContext,
|
|||
duration.Seconds(),
|
||||
name,
|
||||
)
|
||||
case <-time.After(p.maxScrapeDuration):
|
||||
case <-ctx.Done():
|
||||
timeout.Store(true)
|
||||
|
||||
duration = time.Since(t)
|
||||
|
@ -222,20 +253,27 @@ func (p *Prometheus) execute(name string, c Collector, ctx *types.ScrapeContext,
|
|||
name,
|
||||
)
|
||||
|
||||
p.logger.Warn(fmt.Sprintf("collector %s timeouted after %s", name, p.maxScrapeDuration))
|
||||
p.logger.Warn(fmt.Sprintf("collector %s timeouted after %s, resulting in %d metrics", name, p.maxScrapeDuration, numMetrics))
|
||||
|
||||
go func() {
|
||||
// Drain channel in case of premature return to not leak a goroutine.
|
||||
//nolint:revive
|
||||
for range bufCh {
|
||||
}
|
||||
}()
|
||||
|
||||
return pending
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
p.logger.Error(fmt.Sprintf("collector %s failed after %s", name, duration),
|
||||
p.logger.Error(fmt.Sprintf("collector %s failed after %s, resulting in %d metrics", name, duration, numMetrics),
|
||||
slog.Any("err", err),
|
||||
)
|
||||
|
||||
return failed
|
||||
}
|
||||
|
||||
p.logger.Info(fmt.Sprintf("collector %s succeeded after %s", name, duration))
|
||||
p.logger.Info(fmt.Sprintf("collector %s succeeded after %s, resulting in %d metrics", name, duration, numMetrics))
|
||||
|
||||
return success
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue