Improve scraper shutdown time.
- Stop target pools in parallel. - Stop individual scrapers in goroutines, too. - Timing tweaks. Change-Id: I9dff1ee18616694f14b04408eaf1625d0f989696
This commit is contained in:
parent
92156ee89d
commit
74c143c4c9
5
main.go
5
main.go
|
@ -229,15 +229,12 @@ func (p *prometheus) Serve() {
|
|||
if err := p.storage.Stop(); err != nil {
|
||||
glog.Error("Error stopping local storage: ", err)
|
||||
}
|
||||
glog.Info("Local Storage: Done")
|
||||
|
||||
if p.remoteTSDBQueue != nil {
|
||||
p.remoteTSDBQueue.Stop()
|
||||
glog.Info("Remote Storage: Done")
|
||||
}
|
||||
|
||||
p.notificationHandler.Stop()
|
||||
glog.Info("Sundry Queues: Done")
|
||||
glog.Info("See you next time!")
|
||||
}
|
||||
|
||||
|
@ -258,9 +255,7 @@ func (p *prometheus) interruptHandler() {
|
|||
func (p *prometheus) close() {
|
||||
glog.Info("Shutdown has been requested; subsytems are closing:")
|
||||
p.targetManager.Stop()
|
||||
glog.Info("Remote Target Manager: Done")
|
||||
p.ruleManager.Stop()
|
||||
glog.Info("Rule Executor: Done")
|
||||
|
||||
close(p.unwrittenSamples)
|
||||
// Note: Before closing the remaining subsystems (storage, ...), we have
|
||||
|
|
|
@ -199,8 +199,10 @@ func (n *NotificationHandler) SubmitReqs(reqs NotificationReqs) {
|
|||
|
||||
// Stop shuts down the notification handler.
|
||||
func (n *NotificationHandler) Stop() {
|
||||
glog.Info("Stopping notification handler...")
|
||||
close(n.pendingNotifications)
|
||||
<-n.stopped
|
||||
glog.Info("Notification handler stopped.")
|
||||
}
|
||||
|
||||
// Describe implements prometheus.Collector.
|
||||
|
|
|
@ -223,6 +223,7 @@ func (t *target) RunScraper(ingester extraction.Ingester, interval time.Duration
|
|||
select {
|
||||
case <-jitterTimer.C:
|
||||
case <-t.stopScraper:
|
||||
jitterTimer.Stop()
|
||||
return
|
||||
}
|
||||
jitterTimer.Stop()
|
||||
|
@ -232,16 +233,32 @@ func (t *target) RunScraper(ingester extraction.Ingester, interval time.Duration
|
|||
|
||||
t.lastScrape = time.Now()
|
||||
t.scrape(ingester)
|
||||
|
||||
// Explanation of the contraption below:
|
||||
//
|
||||
// In case t.newBaseLabels or t.stopScraper have something to receive,
|
||||
// we want to read from those channels rather than starting a new scrape
|
||||
// (which might take very long). That's why the outer select has no
|
||||
// ticker.C. Should neither t.newBaseLabels nor t.stopScraper have
|
||||
// anything to receive, we go into the inner select, where ticker.C is
|
||||
// in the mix.
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
targetIntervalLength.WithLabelValues(interval.String()).Observe(float64(time.Since(t.lastScrape) / time.Second))
|
||||
t.lastScrape = time.Now()
|
||||
t.scrape(ingester)
|
||||
case newBaseLabels := <-t.newBaseLabels:
|
||||
t.baseLabels = newBaseLabels
|
||||
case <-t.stopScraper:
|
||||
return
|
||||
default:
|
||||
select {
|
||||
case newBaseLabels := <-t.newBaseLabels:
|
||||
t.baseLabels = newBaseLabels
|
||||
case <-t.stopScraper:
|
||||
return
|
||||
case <-ticker.C:
|
||||
targetIntervalLength.WithLabelValues(interval.String()).Observe(float64(time.Since(t.lastScrape) / time.Second))
|
||||
t.lastScrape = time.Now()
|
||||
t.scrape(ingester)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
package retrieval
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"github.com/golang/glog"
|
||||
"github.com/prometheus/client_golang/extraction"
|
||||
|
||||
|
@ -105,13 +106,22 @@ func (m *targetManager) AddTargetsFromConfig(config config.Config) {
|
|||
}
|
||||
|
||||
func (m *targetManager) Stop() {
|
||||
glog.Info("Target manager exiting...")
|
||||
for _, p := range m.poolsByJob {
|
||||
p.Stop()
|
||||
glog.Info("Stopping target manager...")
|
||||
var wg sync.WaitGroup
|
||||
for j, p := range m.poolsByJob {
|
||||
wg.Add(1)
|
||||
go func(j string, p *TargetPool) {
|
||||
defer wg.Done()
|
||||
glog.Infof("Stopping target pool %q...", j)
|
||||
p.Stop()
|
||||
glog.Infof("Target pool %q stopped.", j)
|
||||
}(j, p)
|
||||
}
|
||||
wg.Wait()
|
||||
glog.Info("Target manager stopped.")
|
||||
}
|
||||
|
||||
// TODO: Not really thread-safe. Only used in /status page for now.
|
||||
// TODO: Not goroutine-safe. Only used in /status page for now.
|
||||
func (m *targetManager) Pools() map[string]*TargetPool {
|
||||
return m.poolsByJob
|
||||
}
|
||||
|
|
|
@ -71,7 +71,6 @@ func (p *TargetPool) Run() {
|
|||
p.addTarget(newTarget)
|
||||
case stopped := <-p.done:
|
||||
p.ReplaceTargets([]Target{})
|
||||
glog.Info("TargetPool exiting...")
|
||||
close(stopped)
|
||||
return
|
||||
}
|
||||
|
@ -115,13 +114,20 @@ func (p *TargetPool) ReplaceTargets(newTargets []Target) {
|
|||
}
|
||||
}
|
||||
// Stop any targets no longer present.
|
||||
var wg sync.WaitGroup
|
||||
for k, oldTarget := range p.targetsByAddress {
|
||||
if !newTargetAddresses.Has(k) {
|
||||
glog.V(1).Info("Stopping scraper for target ", k)
|
||||
oldTarget.StopScraper()
|
||||
delete(p.targetsByAddress, k)
|
||||
wg.Add(1)
|
||||
go func(k string, oldTarget Target) {
|
||||
defer wg.Done()
|
||||
glog.V(1).Infof("Stopping scraper for target %s...", k)
|
||||
oldTarget.StopScraper()
|
||||
delete(p.targetsByAddress, k)
|
||||
glog.V(1).Infof("Scraper for target %s stopped.", k)
|
||||
}(k, oldTarget)
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (p *TargetPool) Targets() []Target {
|
||||
|
|
|
@ -126,13 +126,14 @@ func (m *ruleManager) Run() {
|
|||
m.runIteration(m.results)
|
||||
iterationDuration.Observe(float64(time.Since(start) / time.Millisecond))
|
||||
case <-m.done:
|
||||
glog.Info("Rule manager exiting...")
|
||||
glog.Info("Rule manager stopped.")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *ruleManager) Stop() {
|
||||
glog.Info("Stopping rule manager...")
|
||||
m.done <- true
|
||||
}
|
||||
|
||||
|
|
|
@ -181,6 +181,7 @@ func (s *memorySeriesStorage) Start() {
|
|||
|
||||
// Stop implements Storage.
|
||||
func (s *memorySeriesStorage) Stop() error {
|
||||
glog.Info("Stopping local storage...")
|
||||
glog.Info("Stopping maintenance loop...")
|
||||
close(s.loopStopping)
|
||||
<-s.loopStopped
|
||||
|
@ -197,6 +198,7 @@ func (s *memorySeriesStorage) Stop() error {
|
|||
if err := s.persistence.close(); err != nil {
|
||||
return err
|
||||
}
|
||||
glog.Info("Local storage stopped.")
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -122,12 +122,13 @@ func (t *TSDBQueueManager) Queue(s clientmodel.Samples) {
|
|||
// Stop stops sending samples to the TSDB and waits for pending sends to
|
||||
// complete.
|
||||
func (t *TSDBQueueManager) Stop() {
|
||||
glog.Infof("TSDB queue manager shutting down...")
|
||||
glog.Infof("Stopping remote storage...")
|
||||
close(t.queue)
|
||||
<-t.drained
|
||||
for i := 0; i < maxConcurrentSends; i++ {
|
||||
t.sendSemaphore <- true
|
||||
}
|
||||
glog.Info("Remote storage stopped.")
|
||||
}
|
||||
|
||||
// Describe implements prometheus.Collector.
|
||||
|
|
Loading…
Reference in New Issue