2017-11-25 13:13:54 +00:00
|
|
|
// Copyright 2013 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2018-02-01 09:55:07 +00:00
|
|
|
package scrape
|
2017-11-25 13:13:54 +00:00
|
|
|
|
|
|
|
import (
|
2019-03-12 10:46:15 +00:00
|
|
|
"encoding"
|
2019-02-13 13:24:22 +00:00
|
|
|
"fmt"
|
2019-03-12 10:46:15 +00:00
|
|
|
"hash/fnv"
|
|
|
|
"net"
|
|
|
|
"os"
|
2018-01-18 11:49:42 +00:00
|
|
|
"reflect"
|
2018-01-17 11:46:17 +00:00
|
|
|
"sync"
|
2018-09-26 09:20:56 +00:00
|
|
|
"time"
|
2017-11-25 13:13:54 +00:00
|
|
|
|
2021-06-11 16:17:59 +00:00
|
|
|
"github.com/go-kit/log"
|
|
|
|
"github.com/go-kit/log/level"
|
2019-03-25 23:01:12 +00:00
|
|
|
"github.com/pkg/errors"
|
2020-01-29 11:13:18 +00:00
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
2020-10-22 09:00:08 +00:00
|
|
|
|
2017-11-25 13:13:54 +00:00
|
|
|
"github.com/prometheus/prometheus/config"
|
Refactor SD configuration to remove `config` dependency (#3629)
* refactor: move targetGroup struct and CheckOverflow() to their own package
* refactor: move auth and security related structs to a utility package, fix import error in utility package
* refactor: Azure SD, remove SD struct from config
* refactor: DNS SD, remove SD struct from config into dns package
* refactor: ec2 SD, move SD struct from config into the ec2 package
* refactor: file SD, move SD struct from config to file discovery package
* refactor: gce, move SD struct from config to gce discovery package
* refactor: move HTTPClientConfig and URL into util/config, fix import error in httputil
* refactor: consul, move SD struct from config into consul discovery package
* refactor: marathon, move SD struct from config into marathon discovery package
* refactor: triton, move SD struct from config to triton discovery package, fix test
* refactor: zookeeper, move SD structs from config to zookeeper discovery package
* refactor: openstack, remove SD struct from config, move into openstack discovery package
* refactor: kubernetes, move SD struct from config into kubernetes discovery package
* refactor: notifier, use targetgroup package instead of config
* refactor: tests for file, marathon, triton SD - use targetgroup package instead of config.TargetGroup
* refactor: retrieval, use targetgroup package instead of config.TargetGroup
* refactor: storage, use config util package
* refactor: discovery manager, use targetgroup package instead of config.TargetGroup
* refactor: use HTTPClient and TLS config from configUtil instead of config
* refactor: tests, use targetgroup package instead of config.TargetGroup
* refactor: fix tagetgroup.Group pointers that were removed by mistake
* refactor: openstack, kubernetes: drop prefixes
* refactor: remove import aliases forced due to vscode bug
* refactor: move main SD struct out of config into discovery/config
* refactor: rename configUtil to config_util
* refactor: rename yamlUtil to yaml_config
* refactor: kubernetes, remove prefixes
* refactor: move the TargetGroup package to discovery/
* refactor: fix order of imports
2017-12-29 20:01:34 +00:00
|
|
|
"github.com/prometheus/prometheus/discovery/targetgroup"
|
2019-03-13 10:02:36 +00:00
|
|
|
"github.com/prometheus/prometheus/pkg/labels"
|
2017-11-25 13:13:54 +00:00
|
|
|
"github.com/prometheus/prometheus/storage"
|
|
|
|
)
|
|
|
|
|
2020-01-29 11:13:18 +00:00
|
|
|
var targetMetadataCache = newMetadataMetricsCollector()
|
|
|
|
|
|
|
|
// MetadataMetricsCollector is a Custom Collector for the metadata cache metrics.
|
|
|
|
type MetadataMetricsCollector struct {
|
|
|
|
CacheEntries *prometheus.Desc
|
|
|
|
CacheBytes *prometheus.Desc
|
|
|
|
|
|
|
|
scrapeManager *Manager
|
|
|
|
}
|
|
|
|
|
|
|
|
func newMetadataMetricsCollector() *MetadataMetricsCollector {
|
|
|
|
return &MetadataMetricsCollector{
|
|
|
|
CacheEntries: prometheus.NewDesc(
|
|
|
|
"prometheus_target_metadata_cache_entries",
|
|
|
|
"Total number of metric metadata entries in the cache",
|
|
|
|
[]string{"scrape_job"},
|
|
|
|
nil,
|
|
|
|
),
|
|
|
|
CacheBytes: prometheus.NewDesc(
|
|
|
|
"prometheus_target_metadata_cache_bytes",
|
|
|
|
"The number of bytes that are currently used for storing metric metadata in the cache",
|
|
|
|
[]string{"scrape_job"},
|
|
|
|
nil,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (mc *MetadataMetricsCollector) registerManager(m *Manager) {
|
|
|
|
mc.scrapeManager = m
|
|
|
|
}
|
|
|
|
|
|
|
|
// Describe sends the metrics descriptions to the channel.
|
|
|
|
func (mc *MetadataMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
|
|
|
|
ch <- mc.CacheEntries
|
|
|
|
ch <- mc.CacheBytes
|
|
|
|
}
|
|
|
|
|
|
|
|
// Collect creates and sends the metrics for the metadata cache.
|
|
|
|
func (mc *MetadataMetricsCollector) Collect(ch chan<- prometheus.Metric) {
|
|
|
|
if mc.scrapeManager == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
for tset, targets := range mc.scrapeManager.TargetsActive() {
|
|
|
|
var size, length int
|
|
|
|
for _, t := range targets {
|
|
|
|
size += t.MetadataSize()
|
|
|
|
length += t.MetadataLength()
|
|
|
|
}
|
|
|
|
|
|
|
|
ch <- prometheus.MustNewConstMetric(
|
|
|
|
mc.CacheEntries,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
float64(length),
|
|
|
|
tset,
|
|
|
|
)
|
|
|
|
|
|
|
|
ch <- prometheus.MustNewConstMetric(
|
|
|
|
mc.CacheBytes,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
float64(size),
|
|
|
|
tset,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-01 10:06:24 +00:00
|
|
|
// NewManager is the Manager constructor
|
2020-02-06 15:58:38 +00:00
|
|
|
func NewManager(logger log.Logger, app storage.Appendable) *Manager {
|
2018-09-26 09:20:56 +00:00
|
|
|
if logger == nil {
|
|
|
|
logger = log.NewNopLogger()
|
|
|
|
}
|
2020-01-29 11:13:18 +00:00
|
|
|
m := &Manager{
|
2017-11-25 13:13:54 +00:00
|
|
|
append: app,
|
|
|
|
logger: logger,
|
|
|
|
scrapeConfigs: make(map[string]*config.ScrapeConfig),
|
|
|
|
scrapePools: make(map[string]*scrapePool),
|
2017-11-26 15:15:15 +00:00
|
|
|
graceShut: make(chan struct{}),
|
2018-09-26 09:20:56 +00:00
|
|
|
triggerReload: make(chan struct{}, 1),
|
2017-11-25 13:13:54 +00:00
|
|
|
}
|
2020-01-29 11:13:18 +00:00
|
|
|
targetMetadataCache.registerManager(m)
|
|
|
|
|
|
|
|
return m
|
2017-11-25 13:13:54 +00:00
|
|
|
}
|
|
|
|
|
2018-02-01 10:06:24 +00:00
|
|
|
// Manager maintains a set of scrape pools and manages start/stop cycles
|
2017-11-25 13:13:54 +00:00
|
|
|
// when receiving new target groups form the discovery manager.
|
2018-02-01 10:06:24 +00:00
|
|
|
type Manager struct {
|
2018-04-09 14:18:25 +00:00
|
|
|
logger log.Logger
|
2020-02-06 15:58:38 +00:00
|
|
|
append storage.Appendable
|
2018-04-09 14:18:25 +00:00
|
|
|
graceShut chan struct{}
|
|
|
|
|
2019-03-12 10:46:15 +00:00
|
|
|
jitterSeed uint64 // Global jitterSeed seed is used to spread scrape workload across HA setup.
|
2018-04-09 14:18:25 +00:00
|
|
|
mtxScrape sync.Mutex // Guards the fields below.
|
2017-11-25 13:13:54 +00:00
|
|
|
scrapeConfigs map[string]*config.ScrapeConfig
|
|
|
|
scrapePools map[string]*scrapePool
|
2018-09-26 09:20:56 +00:00
|
|
|
targetSets map[string][]*targetgroup.Group
|
|
|
|
|
|
|
|
triggerReload chan struct{}
|
2017-11-25 13:13:54 +00:00
|
|
|
}
|
|
|
|
|
2018-09-26 09:20:56 +00:00
|
|
|
// Run receives and saves target set updates and triggers the scraping loops reloading.
|
|
|
|
// Reloading happens in the background so that it doesn't block receiving targets updates.
|
2018-02-01 10:06:24 +00:00
|
|
|
func (m *Manager) Run(tsets <-chan map[string][]*targetgroup.Group) error {
|
2018-09-26 09:20:56 +00:00
|
|
|
go m.reloader()
|
2017-11-25 13:13:54 +00:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case ts := <-tsets:
|
2018-09-26 09:20:56 +00:00
|
|
|
m.updateTsets(ts)
|
|
|
|
|
|
|
|
select {
|
|
|
|
case m.triggerReload <- struct{}{}:
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
2017-11-26 15:15:15 +00:00
|
|
|
case <-m.graceShut:
|
|
|
|
return nil
|
2017-11-25 13:13:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-26 09:20:56 +00:00
|
|
|
func (m *Manager) reloader() {
|
|
|
|
ticker := time.NewTicker(5 * time.Second)
|
|
|
|
defer ticker.Stop()
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-m.graceShut:
|
|
|
|
return
|
|
|
|
case <-ticker.C:
|
|
|
|
select {
|
|
|
|
case <-m.triggerReload:
|
|
|
|
m.reload()
|
|
|
|
case <-m.graceShut:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *Manager) reload() {
|
|
|
|
m.mtxScrape.Lock()
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
for setName, groups := range m.targetSets {
|
2019-02-13 13:24:22 +00:00
|
|
|
if _, ok := m.scrapePools[setName]; !ok {
|
2018-09-26 09:20:56 +00:00
|
|
|
scrapeConfig, ok := m.scrapeConfigs[setName]
|
|
|
|
if !ok {
|
|
|
|
level.Error(m.logger).Log("msg", "error reloading target set", "err", "invalid config id:"+setName)
|
2018-11-23 09:23:55 +00:00
|
|
|
continue
|
2018-09-26 09:20:56 +00:00
|
|
|
}
|
2019-03-12 10:46:15 +00:00
|
|
|
sp, err := newScrapePool(scrapeConfig, m.append, m.jitterSeed, log.With(m.logger, "scrape_pool", setName))
|
2019-02-13 13:24:22 +00:00
|
|
|
if err != nil {
|
|
|
|
level.Error(m.logger).Log("msg", "error creating new scrape pool", "err", err, "scrape_pool", setName)
|
|
|
|
continue
|
|
|
|
}
|
2018-09-26 09:20:56 +00:00
|
|
|
m.scrapePools[setName] = sp
|
|
|
|
}
|
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
// Run the sync in parallel as these take a while and at high load can't catch up.
|
|
|
|
go func(sp *scrapePool, groups []*targetgroup.Group) {
|
|
|
|
sp.Sync(groups)
|
|
|
|
wg.Done()
|
2019-02-13 13:24:22 +00:00
|
|
|
}(m.scrapePools[setName], groups)
|
2018-09-26 09:20:56 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
m.mtxScrape.Unlock()
|
|
|
|
wg.Wait()
|
|
|
|
}
|
|
|
|
|
2019-03-12 10:46:15 +00:00
|
|
|
// setJitterSeed calculates a global jitterSeed per server relying on extra label set.
|
2019-03-13 10:02:36 +00:00
|
|
|
func (m *Manager) setJitterSeed(labels labels.Labels) error {
|
2019-03-12 10:46:15 +00:00
|
|
|
h := fnv.New64a()
|
|
|
|
hostname, err := getFqdn()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if _, err := fmt.Fprintf(h, "%s%s", hostname, labels.String()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
m.jitterSeed = h.Sum64()
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-11-26 15:15:15 +00:00
|
|
|
// Stop cancels all running scrape pools and blocks until all have exited.
|
2018-02-01 10:06:24 +00:00
|
|
|
func (m *Manager) Stop() {
|
2018-04-09 14:18:25 +00:00
|
|
|
m.mtxScrape.Lock()
|
|
|
|
defer m.mtxScrape.Unlock()
|
|
|
|
|
2017-11-26 15:15:15 +00:00
|
|
|
for _, sp := range m.scrapePools {
|
|
|
|
sp.stop()
|
|
|
|
}
|
|
|
|
close(m.graceShut)
|
|
|
|
}
|
|
|
|
|
2018-09-26 09:20:56 +00:00
|
|
|
func (m *Manager) updateTsets(tsets map[string][]*targetgroup.Group) {
|
|
|
|
m.mtxScrape.Lock()
|
|
|
|
m.targetSets = tsets
|
|
|
|
m.mtxScrape.Unlock()
|
|
|
|
}
|
|
|
|
|
2017-11-25 13:13:54 +00:00
|
|
|
// ApplyConfig resets the manager's target providers and job configurations as defined by the new cfg.
|
2018-02-01 10:06:24 +00:00
|
|
|
func (m *Manager) ApplyConfig(cfg *config.Config) error {
|
2018-04-09 14:18:25 +00:00
|
|
|
m.mtxScrape.Lock()
|
|
|
|
defer m.mtxScrape.Unlock()
|
|
|
|
|
2018-01-17 11:46:17 +00:00
|
|
|
c := make(map[string]*config.ScrapeConfig)
|
|
|
|
for _, scfg := range cfg.ScrapeConfigs {
|
|
|
|
c[scfg.JobName] = scfg
|
2017-11-25 13:13:54 +00:00
|
|
|
}
|
2018-01-17 11:46:17 +00:00
|
|
|
m.scrapeConfigs = c
|
2018-01-18 11:49:42 +00:00
|
|
|
|
2019-03-12 10:46:15 +00:00
|
|
|
if err := m.setJitterSeed(cfg.GlobalConfig.ExternalLabels); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2019-02-13 13:24:22 +00:00
|
|
|
// Cleanup and reload pool if the configuration has changed.
|
|
|
|
var failed bool
|
2018-01-18 11:49:42 +00:00
|
|
|
for name, sp := range m.scrapePools {
|
|
|
|
if cfg, ok := m.scrapeConfigs[name]; !ok {
|
|
|
|
sp.stop()
|
|
|
|
delete(m.scrapePools, name)
|
|
|
|
} else if !reflect.DeepEqual(sp.config, cfg) {
|
2019-02-13 13:24:22 +00:00
|
|
|
err := sp.reload(cfg)
|
|
|
|
if err != nil {
|
|
|
|
level.Error(m.logger).Log("msg", "error reloading scrape pool", "err", err, "scrape_pool", name)
|
|
|
|
failed = true
|
|
|
|
}
|
2018-01-18 11:49:42 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-13 13:24:22 +00:00
|
|
|
if failed {
|
2019-03-25 23:01:12 +00:00
|
|
|
return errors.New("failed to apply the new configuration")
|
2019-02-13 13:24:22 +00:00
|
|
|
}
|
2017-11-25 13:13:54 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-04-09 14:18:25 +00:00
|
|
|
// TargetsAll returns active and dropped targets grouped by job_name.
|
|
|
|
func (m *Manager) TargetsAll() map[string][]*Target {
|
2018-09-26 09:20:56 +00:00
|
|
|
m.mtxScrape.Lock()
|
|
|
|
defer m.mtxScrape.Unlock()
|
2018-01-17 11:46:17 +00:00
|
|
|
|
2018-09-26 09:20:56 +00:00
|
|
|
targets := make(map[string][]*Target, len(m.scrapePools))
|
|
|
|
for tset, sp := range m.scrapePools {
|
|
|
|
targets[tset] = append(sp.ActiveTargets(), sp.DroppedTargets()...)
|
|
|
|
}
|
|
|
|
return targets
|
2018-04-09 14:18:25 +00:00
|
|
|
}
|
2018-01-17 11:46:17 +00:00
|
|
|
|
2018-09-26 09:20:56 +00:00
|
|
|
// TargetsActive returns the active targets currently being scraped.
|
|
|
|
func (m *Manager) TargetsActive() map[string][]*Target {
|
|
|
|
m.mtxScrape.Lock()
|
|
|
|
defer m.mtxScrape.Unlock()
|
2017-11-25 13:13:54 +00:00
|
|
|
|
2019-07-29 16:08:54 +00:00
|
|
|
var (
|
|
|
|
wg sync.WaitGroup
|
|
|
|
mtx sync.Mutex
|
|
|
|
)
|
|
|
|
|
2018-09-26 09:20:56 +00:00
|
|
|
targets := make(map[string][]*Target, len(m.scrapePools))
|
2019-07-29 16:08:54 +00:00
|
|
|
wg.Add(len(m.scrapePools))
|
2018-09-26 09:20:56 +00:00
|
|
|
for tset, sp := range m.scrapePools {
|
2019-07-29 16:08:54 +00:00
|
|
|
// Running in parallel limits the blocking time of scrapePool to scrape
|
|
|
|
// interval when there's an update from SD.
|
|
|
|
go func(tset string, sp *scrapePool) {
|
|
|
|
mtx.Lock()
|
|
|
|
targets[tset] = sp.ActiveTargets()
|
|
|
|
mtx.Unlock()
|
|
|
|
wg.Done()
|
|
|
|
}(tset, sp)
|
2018-02-21 17:26:18 +00:00
|
|
|
}
|
2019-07-29 16:08:54 +00:00
|
|
|
wg.Wait()
|
2018-09-26 09:20:56 +00:00
|
|
|
return targets
|
2018-02-21 17:26:18 +00:00
|
|
|
}
|
|
|
|
|
2018-09-26 09:20:56 +00:00
|
|
|
// TargetsDropped returns the dropped targets during relabelling.
|
|
|
|
func (m *Manager) TargetsDropped() map[string][]*Target {
|
2018-04-09 14:18:25 +00:00
|
|
|
m.mtxScrape.Lock()
|
|
|
|
defer m.mtxScrape.Unlock()
|
2017-11-25 13:13:54 +00:00
|
|
|
|
2018-09-26 09:20:56 +00:00
|
|
|
targets := make(map[string][]*Target, len(m.scrapePools))
|
|
|
|
for tset, sp := range m.scrapePools {
|
|
|
|
targets[tset] = sp.DroppedTargets()
|
2018-01-14 19:42:31 +00:00
|
|
|
}
|
2018-09-26 09:20:56 +00:00
|
|
|
return targets
|
2017-11-25 13:13:54 +00:00
|
|
|
}
|
2019-03-12 10:46:15 +00:00
|
|
|
|
2019-03-15 12:02:16 +00:00
|
|
|
// getFqdn returns a FQDN if it's possible, otherwise falls back to hostname.
|
2019-03-12 10:46:15 +00:00
|
|
|
func getFqdn() (string, error) {
|
|
|
|
hostname, err := os.Hostname()
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
ips, err := net.LookupIP(hostname)
|
|
|
|
if err != nil {
|
2019-03-15 12:02:16 +00:00
|
|
|
// Return the system hostname if we can't look up the IP address.
|
|
|
|
return hostname, nil
|
2019-03-12 10:46:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
lookup := func(ipStr encoding.TextMarshaler) (string, error) {
|
|
|
|
ip, err := ipStr.MarshalText()
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
hosts, err := net.LookupAddr(string(ip))
|
|
|
|
if err != nil || len(hosts) == 0 {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
return hosts[0], nil
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, addr := range ips {
|
|
|
|
if ip := addr.To4(); ip != nil {
|
|
|
|
if fqdn, err := lookup(ip); err == nil {
|
|
|
|
return fqdn, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if ip := addr.To16(); ip != nil {
|
|
|
|
if fqdn, err := lookup(ip); err == nil {
|
|
|
|
return fqdn, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return hostname, nil
|
|
|
|
}
|