Handle secret changes in remote write ApplyConfig

Remake the http client whenever ApplyConfig is called. This allows
secrets to be updated without needing to restart an otherwise unchanged
queue.

Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
This commit is contained in:
Chris Marchbanks 2020-03-31 03:39:29 +00:00
parent 317e73de79
commit d88a2b0261
3 changed files with 54 additions and 51 deletions

View File

@ -237,9 +237,11 @@ type QueueManager struct {
cfg config.QueueConfig
externalLabels labels.Labels
relabelConfigs []*relabel.Config
client StorageClient
watcher *wal.Watcher
clientMtx sync.RWMutex
storeClient StorageClient
seriesMtx sync.Mutex
seriesLabels map[uint64]labels.Labels
seriesSegmentIndexes map[uint64]int
@ -283,7 +285,7 @@ func NewQueueManager(metrics *queueManagerMetrics, watcherMetrics *wal.WatcherMe
cfg: cfg,
externalLabels: externalLabels,
relabelConfigs: relabelConfigs,
client: client,
storeClient: client,
seriesLabels: make(map[uint64]labels.Labels),
seriesSegmentIndexes: make(map[uint64]int),
@ -358,8 +360,8 @@ func (t *QueueManager) Start() {
// Setup the QueueManagers metrics. We do this here rather than in the
// constructor because of the ordering of creating Queue Managers's, stopping them,
// and then starting new ones in storage/remote/storage.go ApplyConfig.
name := t.client.Name()
ep := t.client.Endpoint()
name := t.client().Name()
ep := t.client().Endpoint()
t.highestSentTimestampMetric = &maxGauge{
Gauge: t.metrics.queueHighestSentTimestamp.WithLabelValues(name, ep),
}
@ -413,8 +415,8 @@ func (t *QueueManager) Stop() {
}
t.seriesMtx.Unlock()
// Delete metrics so we don't have alerts for queues that are gone.
name := t.client.Name()
ep := t.client.Endpoint()
name := t.client().Name()
ep := t.client().Endpoint()
t.metrics.queueHighestSentTimestamp.DeleteLabelValues(name, ep)
t.metrics.queuePendingSamples.DeleteLabelValues(name, ep)
t.metrics.enqueueRetriesTotal.DeleteLabelValues(name, ep)
@ -472,6 +474,20 @@ func (t *QueueManager) SeriesReset(index int) {
}
}
// SetClient updates the client used by a queue. Used when only client specific
// fields are updated to avoid restarting the queue.
func (t *QueueManager) SetClient(c StorageClient) {
t.clientMtx.Lock()
t.storeClient = c
t.clientMtx.Unlock()
}
func (t *QueueManager) client() StorageClient {
t.clientMtx.RLock()
defer t.clientMtx.RUnlock()
return t.storeClient
}
func internLabels(lbls labels.Labels) {
for i, l := range lbls {
lbls[i].Name = interner.intern(l.Name)
@ -865,7 +881,7 @@ func (s *shards) sendSamplesWithBackoff(ctx context.Context, samples []prompb.Ti
default:
}
begin := time.Now()
err := s.qm.client.Store(ctx, req)
err := s.qm.client().Store(ctx, req)
s.qm.sentBatchDuration.Observe(time.Since(begin).Seconds())

View File

@ -19,7 +19,6 @@ import (
"time"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/prometheus/config"
@ -53,8 +52,7 @@ type WriteStorage struct {
queueMetrics *queueManagerMetrics
watcherMetrics *wal.WatcherMetrics
liveReaderMetrics *wal.LiveReaderMetrics
configHash string
externalLabelHash string
externalLabels labels.Labels
walDir string
queues map[string]*QueueManager
samplesIn *ewmaRate
@ -94,25 +92,10 @@ func (rws *WriteStorage) ApplyConfig(conf *config.Config) error {
rws.mtx.Lock()
defer rws.mtx.Unlock()
configHash, err := toHash(conf.RemoteWriteConfigs)
if err != nil {
return err
}
externalLabelHash, err := toHash(conf.GlobalConfig.ExternalLabels)
if err != nil {
return err
}
// Remote write queues only need to change if the remote write config or
// external labels change.
externalLabelUnchanged := externalLabelHash == rws.externalLabelHash
if configHash == rws.configHash && externalLabelUnchanged {
level.Debug(rws.logger).Log("msg", "Remote write config has not changed, no need to restart QueueManagers")
return nil
}
rws.configHash = configHash
rws.externalLabelHash = externalLabelHash
externalLabelUnchanged := labels.Equal(conf.GlobalConfig.ExternalLabels, rws.externalLabels)
rws.externalLabels = conf.GlobalConfig.ExternalLabels
newQueues := make(map[string]*QueueManager)
newHashes := []string{}
@ -122,6 +105,11 @@ func (rws *WriteStorage) ApplyConfig(conf *config.Config) error {
return err
}
// Don't allow duplicate remote write configs.
if _, ok := newQueues[hash]; ok {
return fmt.Errorf("duplicate remote write configs are not allowed, found duplicate for URL: %s", rwConf.URL)
}
// Set the queue name to the config hash if the user has not set
// a name in their remote write config so we can still differentiate
// between queues that have the same remote write endpoint.
@ -130,22 +118,6 @@ func (rws *WriteStorage) ApplyConfig(conf *config.Config) error {
name = rwConf.Name
}
// Don't allow duplicate remote write configs.
if _, ok := newQueues[hash]; ok {
return fmt.Errorf("duplicate remote write configs are not allowed, found duplicate for URL: %s", rwConf.URL)
}
var nameUnchanged bool
queue, ok := rws.queues[hash]
if ok {
nameUnchanged = queue.client.Name() == name
}
if externalLabelUnchanged && nameUnchanged {
newQueues[hash] = queue
delete(rws.queues, hash)
continue
}
c, err := NewClient(name, &ClientConfig{
URL: rwConf.URL,
Timeout: rwConf.RemoteTimeout,
@ -154,6 +126,16 @@ func (rws *WriteStorage) ApplyConfig(conf *config.Config) error {
if err != nil {
return err
}
queue, ok := rws.queues[hash]
if externalLabelUnchanged && ok {
// Update the client in case any secret configuration has changed.
queue.SetClient(c)
newQueues[hash] = queue
delete(rws.queues, hash)
continue
}
newQueues[hash] = NewQueueManager(
rws.queueMetrics,
rws.watcherMetrics,

View File

@ -141,14 +141,14 @@ func TestRestartOnNameChange(t *testing.T) {
},
}
testutil.Ok(t, s.ApplyConfig(conf))
testutil.Equals(t, s.queues[hash].client.Name(), cfg.Name)
testutil.Equals(t, s.queues[hash].client().Name(), cfg.Name)
// Change the queues name, ensure the queue has been restarted.
conf.RemoteWriteConfigs[0].Name = "dev-2"
testutil.Ok(t, s.ApplyConfig(conf))
hash, err = toHash(cfg)
testutil.Ok(t, err)
testutil.Equals(t, s.queues[hash].client.Name(), conf.RemoteWriteConfigs[0].Name)
testutil.Equals(t, s.queues[hash].client().Name(), conf.RemoteWriteConfigs[0].Name)
err = s.Close()
testutil.Ok(t, err)
@ -257,7 +257,7 @@ func TestWriteStorageApplyConfigsPartialUpdate(t *testing.T) {
c1 := &config.RemoteWriteConfig{
RemoteTimeout: model.Duration(20 * time.Second),
QueueConfig: config.DefaultQueueConfig,
HTTPClientConfig: config_util.HTTPClientConfig{
HTTPClientConfig: common_config.HTTPClientConfig{
BearerToken: "foo",
},
}
@ -282,11 +282,13 @@ func TestWriteStorageApplyConfigsPartialUpdate(t *testing.T) {
testutil.Equals(t, 3, len(s.queues))
hashes := make([]string, len(conf.RemoteWriteConfigs))
queues := make([]*QueueManager, len(conf.RemoteWriteConfigs))
storeHashes := func() {
for i := range conf.RemoteWriteConfigs {
hash, err := toHash(conf.RemoteWriteConfigs[i])
testutil.Ok(t, err)
hashes[i] = hash
queues[i] = s.queues[hash]
}
}
@ -303,12 +305,14 @@ func TestWriteStorageApplyConfigsPartialUpdate(t *testing.T) {
_, hashExists := s.queues[hashes[0]]
testutil.Assert(t, !hashExists, "The queue for the first remote write configuration should have been restarted because the relabel configuration has changed.")
_, hashExists = s.queues[hashes[1]]
testutil.Assert(t, hashExists, "Pointer of unchanged queue should have remained the same")
q, hashExists := s.queues[hashes[1]]
testutil.Assert(t, hashExists, "Hash of unchanged queue should have remained the same")
testutil.Assert(t, q == queues[1], "Pointer of unchanged queue should have remained the same")
_, hashExists = s.queues[hashes[2]]
testutil.Assert(t, !hashExists, "The queue for the third remote write configuration should have been restarted because the timeout has changed.")
storeHashes()
secondClient := s.queues[hashes[1]].client()
// Update c1.
c1.HTTPClientConfig.BearerToken = "bar"
err = s.ApplyConfig(conf)
@ -317,8 +321,9 @@ func TestWriteStorageApplyConfigsPartialUpdate(t *testing.T) {
_, hashExists = s.queues[hashes[0]]
testutil.Assert(t, hashExists, "Pointer of unchanged queue should have remained the same")
_, hashExists = s.queues[hashes[1]]
testutil.Assert(t, !hashExists, "The queue for the second remote write configuration should have been restarted because the HTTP configuration has changed.")
q, hashExists = s.queues[hashes[1]]
testutil.Assert(t, hashExists, "Hash of queue with secret change should have remained the same")
testutil.Assert(t, secondClient != q.client(), "Pointer of a client with a secret change should not be the same")
_, hashExists = s.queues[hashes[2]]
testutil.Assert(t, hashExists, "Pointer of unchanged queue should have remained the same")
@ -332,7 +337,7 @@ func TestWriteStorageApplyConfigsPartialUpdate(t *testing.T) {
testutil.Equals(t, 2, len(s.queues))
_, hashExists = s.queues[hashes[0]]
testutil.Assert(t, !hashExists, "If the index changed, the queue should be stopped and recreated.")
testutil.Assert(t, !hashExists, "If a config is removed, the queue should be stopped and recreated.")
_, hashExists = s.queues[hashes[1]]
testutil.Assert(t, hashExists, "Pointer of unchanged queue should have remained the same")
_, hashExists = s.queues[hashes[2]]