mirror of
https://github.com/prometheus/prometheus
synced 2024-12-27 00:53:12 +00:00
Merge pull request #7073 from csmarchbanks/fix-md5-remote-write
Fix remote write not updating when relabel configs or secrets change
This commit is contained in:
commit
cd12f0873c
@ -237,9 +237,11 @@ type QueueManager struct {
|
||||
cfg config.QueueConfig
|
||||
externalLabels labels.Labels
|
||||
relabelConfigs []*relabel.Config
|
||||
client StorageClient
|
||||
watcher *wal.Watcher
|
||||
|
||||
clientMtx sync.RWMutex
|
||||
storeClient StorageClient
|
||||
|
||||
seriesMtx sync.Mutex
|
||||
seriesLabels map[uint64]labels.Labels
|
||||
seriesSegmentIndexes map[uint64]int
|
||||
@ -283,7 +285,7 @@ func NewQueueManager(metrics *queueManagerMetrics, watcherMetrics *wal.WatcherMe
|
||||
cfg: cfg,
|
||||
externalLabels: externalLabels,
|
||||
relabelConfigs: relabelConfigs,
|
||||
client: client,
|
||||
storeClient: client,
|
||||
|
||||
seriesLabels: make(map[uint64]labels.Labels),
|
||||
seriesSegmentIndexes: make(map[uint64]int),
|
||||
@ -358,8 +360,8 @@ func (t *QueueManager) Start() {
|
||||
// Setup the QueueManagers metrics. We do this here rather than in the
|
||||
// constructor because of the ordering of creating Queue Managers's, stopping them,
|
||||
// and then starting new ones in storage/remote/storage.go ApplyConfig.
|
||||
name := t.client.Name()
|
||||
ep := t.client.Endpoint()
|
||||
name := t.client().Name()
|
||||
ep := t.client().Endpoint()
|
||||
t.highestSentTimestampMetric = &maxGauge{
|
||||
Gauge: t.metrics.queueHighestSentTimestamp.WithLabelValues(name, ep),
|
||||
}
|
||||
@ -413,8 +415,8 @@ func (t *QueueManager) Stop() {
|
||||
}
|
||||
t.seriesMtx.Unlock()
|
||||
// Delete metrics so we don't have alerts for queues that are gone.
|
||||
name := t.client.Name()
|
||||
ep := t.client.Endpoint()
|
||||
name := t.client().Name()
|
||||
ep := t.client().Endpoint()
|
||||
t.metrics.queueHighestSentTimestamp.DeleteLabelValues(name, ep)
|
||||
t.metrics.queuePendingSamples.DeleteLabelValues(name, ep)
|
||||
t.metrics.enqueueRetriesTotal.DeleteLabelValues(name, ep)
|
||||
@ -472,6 +474,20 @@ func (t *QueueManager) SeriesReset(index int) {
|
||||
}
|
||||
}
|
||||
|
||||
// SetClient updates the client used by a queue. Used when only client specific
|
||||
// fields are updated to avoid restarting the queue.
|
||||
func (t *QueueManager) SetClient(c StorageClient) {
|
||||
t.clientMtx.Lock()
|
||||
t.storeClient = c
|
||||
t.clientMtx.Unlock()
|
||||
}
|
||||
|
||||
func (t *QueueManager) client() StorageClient {
|
||||
t.clientMtx.RLock()
|
||||
defer t.clientMtx.RUnlock()
|
||||
return t.storeClient
|
||||
}
|
||||
|
||||
func internLabels(lbls labels.Labels) {
|
||||
for i, l := range lbls {
|
||||
lbls[i].Name = interner.intern(l.Name)
|
||||
@ -866,7 +882,7 @@ func (s *shards) sendSamplesWithBackoff(ctx context.Context, samples []prompb.Ti
|
||||
default:
|
||||
}
|
||||
begin := time.Now()
|
||||
err := s.qm.client.Store(ctx, req)
|
||||
err := s.qm.client().Store(ctx, req)
|
||||
|
||||
s.qm.sentBatchDuration.Observe(time.Since(begin).Seconds())
|
||||
|
||||
|
@ -17,12 +17,12 @@ import (
|
||||
"context"
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/model"
|
||||
@ -174,7 +174,7 @@ func labelsToEqualityMatchers(ls model.LabelSet) []*labels.Matcher {
|
||||
|
||||
// Used for hashing configs and diff'ing hashes in ApplyConfig.
|
||||
func toHash(data interface{}) (string, error) {
|
||||
bytes, err := json.Marshal(data)
|
||||
bytes, err := yaml.Marshal(data)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ func TestStorageLifecycle(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
s.ApplyConfig(conf)
|
||||
testutil.Ok(t, s.ApplyConfig(conf))
|
||||
|
||||
// make sure remote write has a queue.
|
||||
testutil.Equals(t, 1, len(s.rws.queues))
|
||||
@ -73,13 +73,13 @@ func TestUpdateRemoteReadConfigs(t *testing.T) {
|
||||
conf := &config.Config{
|
||||
GlobalConfig: config.GlobalConfig{},
|
||||
}
|
||||
s.ApplyConfig(conf)
|
||||
testutil.Ok(t, s.ApplyConfig(conf))
|
||||
testutil.Equals(t, 0, len(s.queryables))
|
||||
|
||||
conf.RemoteReadConfigs = []*config.RemoteReadConfig{
|
||||
&config.DefaultRemoteReadConfig,
|
||||
}
|
||||
s.ApplyConfig(conf)
|
||||
testutil.Ok(t, s.ApplyConfig(conf))
|
||||
testutil.Equals(t, 1, len(s.queryables))
|
||||
|
||||
err = s.Close()
|
||||
|
@ -19,7 +19,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
"github.com/prometheus/prometheus/config"
|
||||
@ -53,8 +52,7 @@ type WriteStorage struct {
|
||||
queueMetrics *queueManagerMetrics
|
||||
watcherMetrics *wal.WatcherMetrics
|
||||
liveReaderMetrics *wal.LiveReaderMetrics
|
||||
configHash string
|
||||
externalLabelHash string
|
||||
externalLabels labels.Labels
|
||||
walDir string
|
||||
queues map[string]*QueueManager
|
||||
samplesIn *ewmaRate
|
||||
@ -94,25 +92,10 @@ func (rws *WriteStorage) ApplyConfig(conf *config.Config) error {
|
||||
rws.mtx.Lock()
|
||||
defer rws.mtx.Unlock()
|
||||
|
||||
configHash, err := toHash(conf.RemoteWriteConfigs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
externalLabelHash, err := toHash(conf.GlobalConfig.ExternalLabels)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Remote write queues only need to change if the remote write config or
|
||||
// external labels change.
|
||||
externalLabelUnchanged := externalLabelHash == rws.externalLabelHash
|
||||
if configHash == rws.configHash && externalLabelUnchanged {
|
||||
level.Debug(rws.logger).Log("msg", "Remote write config has not changed, no need to restart QueueManagers")
|
||||
return nil
|
||||
}
|
||||
|
||||
rws.configHash = configHash
|
||||
rws.externalLabelHash = externalLabelHash
|
||||
externalLabelUnchanged := labels.Equal(conf.GlobalConfig.ExternalLabels, rws.externalLabels)
|
||||
rws.externalLabels = conf.GlobalConfig.ExternalLabels
|
||||
|
||||
newQueues := make(map[string]*QueueManager)
|
||||
newHashes := []string{}
|
||||
@ -122,6 +105,11 @@ func (rws *WriteStorage) ApplyConfig(conf *config.Config) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Don't allow duplicate remote write configs.
|
||||
if _, ok := newQueues[hash]; ok {
|
||||
return fmt.Errorf("duplicate remote write configs are not allowed, found duplicate for URL: %s", rwConf.URL)
|
||||
}
|
||||
|
||||
// Set the queue name to the config hash if the user has not set
|
||||
// a name in their remote write config so we can still differentiate
|
||||
// between queues that have the same remote write endpoint.
|
||||
@ -130,22 +118,6 @@ func (rws *WriteStorage) ApplyConfig(conf *config.Config) error {
|
||||
name = rwConf.Name
|
||||
}
|
||||
|
||||
// Don't allow duplicate remote write configs.
|
||||
if _, ok := newQueues[hash]; ok {
|
||||
return fmt.Errorf("duplicate remote write configs are not allowed, found duplicate for URL: %s", rwConf.URL)
|
||||
}
|
||||
|
||||
var nameUnchanged bool
|
||||
queue, ok := rws.queues[hash]
|
||||
if ok {
|
||||
nameUnchanged = queue.client.Name() == name
|
||||
}
|
||||
if externalLabelUnchanged && nameUnchanged {
|
||||
newQueues[hash] = queue
|
||||
delete(rws.queues, hash)
|
||||
continue
|
||||
}
|
||||
|
||||
c, err := NewClient(name, &ClientConfig{
|
||||
URL: rwConf.URL,
|
||||
Timeout: rwConf.RemoteTimeout,
|
||||
@ -154,6 +126,16 @@ func (rws *WriteStorage) ApplyConfig(conf *config.Config) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
queue, ok := rws.queues[hash]
|
||||
if externalLabelUnchanged && ok {
|
||||
// Update the client in case any secret configuration has changed.
|
||||
queue.SetClient(c)
|
||||
newQueues[hash] = queue
|
||||
delete(rws.queues, hash)
|
||||
continue
|
||||
}
|
||||
|
||||
newQueues[hash] = NewQueueManager(
|
||||
rws.queueMetrics,
|
||||
rws.watcherMetrics,
|
||||
|
@ -24,6 +24,7 @@ import (
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/pkg/relabel"
|
||||
"github.com/prometheus/prometheus/util/testutil"
|
||||
)
|
||||
|
||||
@ -140,14 +141,14 @@ func TestRestartOnNameChange(t *testing.T) {
|
||||
},
|
||||
}
|
||||
testutil.Ok(t, s.ApplyConfig(conf))
|
||||
testutil.Equals(t, s.queues[hash].client.Name(), cfg.Name)
|
||||
testutil.Equals(t, s.queues[hash].client().Name(), cfg.Name)
|
||||
|
||||
// Change the queues name, ensure the queue has been restarted.
|
||||
conf.RemoteWriteConfigs[0].Name = "dev-2"
|
||||
testutil.Ok(t, s.ApplyConfig(conf))
|
||||
hash, err = toHash(cfg)
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, s.queues[hash].client.Name(), conf.RemoteWriteConfigs[0].Name)
|
||||
testutil.Equals(t, s.queues[hash].client().Name(), conf.RemoteWriteConfigs[0].Name)
|
||||
|
||||
err = s.Close()
|
||||
testutil.Ok(t, err)
|
||||
@ -247,10 +248,18 @@ func TestWriteStorageApplyConfigsPartialUpdate(t *testing.T) {
|
||||
c0 := &config.RemoteWriteConfig{
|
||||
RemoteTimeout: model.Duration(10 * time.Second),
|
||||
QueueConfig: config.DefaultQueueConfig,
|
||||
WriteRelabelConfigs: []*relabel.Config{
|
||||
&relabel.Config{
|
||||
Regex: relabel.MustNewRegexp(".+"),
|
||||
},
|
||||
},
|
||||
}
|
||||
c1 := &config.RemoteWriteConfig{
|
||||
RemoteTimeout: model.Duration(20 * time.Second),
|
||||
QueueConfig: config.DefaultQueueConfig,
|
||||
HTTPClientConfig: common_config.HTTPClientConfig{
|
||||
BearerToken: "foo",
|
||||
},
|
||||
}
|
||||
c2 := &config.RemoteWriteConfig{
|
||||
RemoteTimeout: model.Duration(30 * time.Second),
|
||||
@ -262,43 +271,63 @@ func TestWriteStorageApplyConfigsPartialUpdate(t *testing.T) {
|
||||
RemoteWriteConfigs: []*config.RemoteWriteConfig{c0, c1, c2},
|
||||
}
|
||||
// We need to set URL's so that metric creation doesn't panic.
|
||||
conf.RemoteWriteConfigs[0].URL = &common_config.URL{
|
||||
URL: &url.URL{
|
||||
Host: "http://test-storage.com",
|
||||
},
|
||||
for i := range conf.RemoteWriteConfigs {
|
||||
conf.RemoteWriteConfigs[i].URL = &common_config.URL{
|
||||
URL: &url.URL{
|
||||
Host: "http://test-storage.com",
|
||||
},
|
||||
}
|
||||
}
|
||||
conf.RemoteWriteConfigs[1].URL = &common_config.URL{
|
||||
URL: &url.URL{
|
||||
Host: "http://test-storage.com",
|
||||
},
|
||||
}
|
||||
conf.RemoteWriteConfigs[2].URL = &common_config.URL{
|
||||
URL: &url.URL{
|
||||
Host: "http://test-storage.com",
|
||||
},
|
||||
}
|
||||
s.ApplyConfig(conf)
|
||||
testutil.Ok(t, s.ApplyConfig(conf))
|
||||
testutil.Equals(t, 3, len(s.queues))
|
||||
|
||||
c0Hash, err := toHash(c0)
|
||||
testutil.Ok(t, err)
|
||||
c1Hash, err := toHash(c1)
|
||||
testutil.Ok(t, err)
|
||||
// q := s.queues[1]
|
||||
hashes := make([]string, len(conf.RemoteWriteConfigs))
|
||||
queues := make([]*QueueManager, len(conf.RemoteWriteConfigs))
|
||||
storeHashes := func() {
|
||||
for i := range conf.RemoteWriteConfigs {
|
||||
hash, err := toHash(conf.RemoteWriteConfigs[i])
|
||||
testutil.Ok(t, err)
|
||||
hashes[i] = hash
|
||||
queues[i] = s.queues[hash]
|
||||
}
|
||||
}
|
||||
|
||||
storeHashes()
|
||||
// Update c0 and c2.
|
||||
c0.RemoteTimeout = model.Duration(40 * time.Second)
|
||||
c0.WriteRelabelConfigs[0] = &relabel.Config{Regex: relabel.MustNewRegexp("foo")}
|
||||
c2.RemoteTimeout = model.Duration(50 * time.Second)
|
||||
conf = &config.Config{
|
||||
GlobalConfig: config.GlobalConfig{},
|
||||
RemoteWriteConfigs: []*config.RemoteWriteConfig{c0, c1, c2},
|
||||
}
|
||||
s.ApplyConfig(conf)
|
||||
testutil.Ok(t, s.ApplyConfig(conf))
|
||||
testutil.Equals(t, 3, len(s.queues))
|
||||
|
||||
_, hashExists := s.queues[c1Hash]
|
||||
_, hashExists := s.queues[hashes[0]]
|
||||
testutil.Assert(t, !hashExists, "The queue for the first remote write configuration should have been restarted because the relabel configuration has changed.")
|
||||
q, hashExists := s.queues[hashes[1]]
|
||||
testutil.Assert(t, hashExists, "Hash of unchanged queue should have remained the same")
|
||||
testutil.Assert(t, q == queues[1], "Pointer of unchanged queue should have remained the same")
|
||||
_, hashExists = s.queues[hashes[2]]
|
||||
testutil.Assert(t, !hashExists, "The queue for the third remote write configuration should have been restarted because the timeout has changed.")
|
||||
|
||||
storeHashes()
|
||||
secondClient := s.queues[hashes[1]].client()
|
||||
// Update c1.
|
||||
c1.HTTPClientConfig.BearerToken = "bar"
|
||||
err = s.ApplyConfig(conf)
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, 3, len(s.queues))
|
||||
|
||||
_, hashExists = s.queues[hashes[0]]
|
||||
testutil.Assert(t, hashExists, "Pointer of unchanged queue should have remained the same")
|
||||
q, hashExists = s.queues[hashes[1]]
|
||||
testutil.Assert(t, hashExists, "Hash of queue with secret change should have remained the same")
|
||||
testutil.Assert(t, secondClient != q.client(), "Pointer of a client with a secret change should not be the same")
|
||||
_, hashExists = s.queues[hashes[2]]
|
||||
testutil.Assert(t, hashExists, "Pointer of unchanged queue should have remained the same")
|
||||
|
||||
storeHashes()
|
||||
// Delete c0.
|
||||
conf = &config.Config{
|
||||
GlobalConfig: config.GlobalConfig{},
|
||||
@ -307,8 +336,12 @@ func TestWriteStorageApplyConfigsPartialUpdate(t *testing.T) {
|
||||
s.ApplyConfig(conf)
|
||||
testutil.Equals(t, 2, len(s.queues))
|
||||
|
||||
_, hashExists = s.queues[c0Hash]
|
||||
testutil.Assert(t, !hashExists, "If the index changed, the queue should be stopped and recreated.")
|
||||
_, hashExists = s.queues[hashes[0]]
|
||||
testutil.Assert(t, !hashExists, "If a config is removed, the queue should be stopped and recreated.")
|
||||
_, hashExists = s.queues[hashes[1]]
|
||||
testutil.Assert(t, hashExists, "Pointer of unchanged queue should have remained the same")
|
||||
_, hashExists = s.queues[hashes[2]]
|
||||
testutil.Assert(t, hashExists, "Pointer of unchanged queue should have remained the same")
|
||||
|
||||
err = s.Close()
|
||||
testutil.Ok(t, err)
|
||||
|
Loading…
Reference in New Issue
Block a user