prometheus/storage/remote/queue_manager_test.go

331 lines
8.6 KiB
Go

// Copyright 2013 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package remote
import (
"context"
"fmt"
"reflect"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/prompb"
)
const defaultFlushDeadline = 1 * time.Minute
type TestStorageClient struct {
receivedSamples map[string][]*prompb.Sample
expectedSamples map[string][]*prompb.Sample
wg sync.WaitGroup
mtx sync.Mutex
}
func NewTestStorageClient() *TestStorageClient {
return &TestStorageClient{
receivedSamples: map[string][]*prompb.Sample{},
expectedSamples: map[string][]*prompb.Sample{},
}
}
func (c *TestStorageClient) expectSamples(ss model.Samples) {
c.mtx.Lock()
defer c.mtx.Unlock()
c.expectedSamples = map[string][]*prompb.Sample{}
c.receivedSamples = map[string][]*prompb.Sample{}
for _, s := range ss {
ts := labelProtosToLabels(MetricToLabelProtos(s.Metric)).String()
c.expectedSamples[ts] = append(c.expectedSamples[ts], &prompb.Sample{
Timestamp: int64(s.Timestamp),
Value: float64(s.Value),
})
}
c.wg.Add(len(ss))
}
func (c *TestStorageClient) waitForExpectedSamples(t *testing.T) {
c.wg.Wait()
c.mtx.Lock()
defer c.mtx.Unlock()
for ts, expectedSamples := range c.expectedSamples {
if !reflect.DeepEqual(expectedSamples, c.receivedSamples[ts]) {
t.Fatalf("%s: Expected %v, got %v", ts, expectedSamples, c.receivedSamples[ts])
}
}
}
func (c *TestStorageClient) Store(_ context.Context, req *prompb.WriteRequest) error {
c.mtx.Lock()
defer c.mtx.Unlock()
count := 0
for _, ts := range req.Timeseries {
labels := labelProtosToLabels(ts.Labels).String()
for _, sample := range ts.Samples {
count++
c.receivedSamples[labels] = append(c.receivedSamples[labels], sample)
}
}
c.wg.Add(-count)
return nil
}
func (c *TestStorageClient) Name() string {
return "teststorageclient"
}
func TestSampleDelivery(t *testing.T) {
// Let's create an even number of send batches so we don't run into the
// batch timeout case.
n := config.DefaultQueueConfig.Capacity * 2
samples := make(model.Samples, 0, n)
for i := 0; i < n; i++ {
name := model.LabelValue(fmt.Sprintf("test_metric_%d", i))
samples = append(samples, &model.Sample{
Metric: model.Metric{
model.MetricNameLabel: name,
},
Value: model.SampleValue(i),
})
}
c := NewTestStorageClient()
c.expectSamples(samples[:len(samples)/2])
cfg := config.DefaultQueueConfig
cfg.MaxShards = 1
m := NewQueueManager(nil, cfg, nil, nil, c, defaultFlushDeadline)
// These should be received by the client.
for _, s := range samples[:len(samples)/2] {
m.Append(s)
}
// These will be dropped because the queue is full.
for _, s := range samples[len(samples)/2:] {
m.Append(s)
}
m.Start()
defer m.Stop()
c.waitForExpectedSamples(t)
}
func TestSampleDeliveryTimeout(t *testing.T) {
// Let's send one less sample than batch size, and wait the timeout duration
n := config.DefaultQueueConfig.Capacity - 1
samples := make(model.Samples, 0, n)
for i := 0; i < n; i++ {
name := model.LabelValue(fmt.Sprintf("test_metric_%d", i))
samples = append(samples, &model.Sample{
Metric: model.Metric{
model.MetricNameLabel: name,
},
Value: model.SampleValue(i),
})
}
c := NewTestStorageClient()
cfg := config.DefaultQueueConfig
cfg.MaxShards = 1
cfg.BatchSendDeadline = model.Duration(100 * time.Millisecond)
m := NewQueueManager(nil, cfg, nil, nil, c, defaultFlushDeadline)
m.Start()
defer m.Stop()
// Send the samples twice, waiting for the samples in the meantime.
c.expectSamples(samples)
for _, s := range samples {
m.Append(s)
}
c.waitForExpectedSamples(t)
c.expectSamples(samples)
for _, s := range samples {
m.Append(s)
}
c.waitForExpectedSamples(t)
}
func TestSampleDeliveryOrder(t *testing.T) {
ts := 10
n := config.DefaultQueueConfig.MaxSamplesPerSend * ts
samples := make(model.Samples, 0, n)
for i := 0; i < n; i++ {
name := model.LabelValue(fmt.Sprintf("test_metric_%d", i%ts))
samples = append(samples, &model.Sample{
Metric: model.Metric{
model.MetricNameLabel: name,
},
Value: model.SampleValue(i),
Timestamp: model.Time(i),
})
}
c := NewTestStorageClient()
c.expectSamples(samples)
m := NewQueueManager(nil, config.DefaultQueueConfig, nil, nil, c, defaultFlushDeadline)
// These should be received by the client.
for _, s := range samples {
m.Append(s)
}
m.Start()
defer m.Stop()
c.waitForExpectedSamples(t)
}
// TestBlockingStorageClient is a queue_manager StorageClient which will block
// on any calls to Store(), until the `block` channel is closed, at which point
// the `numCalls` property will contain a count of how many times Store() was
// called.
type TestBlockingStorageClient struct {
numCalls uint64
block chan bool
}
func NewTestBlockedStorageClient() *TestBlockingStorageClient {
return &TestBlockingStorageClient{
block: make(chan bool),
numCalls: 0,
}
}
func (c *TestBlockingStorageClient) Store(ctx context.Context, _ *prompb.WriteRequest) error {
atomic.AddUint64(&c.numCalls, 1)
select {
case <-c.block:
case <-ctx.Done():
}
return nil
}
func (c *TestBlockingStorageClient) NumCalls() uint64 {
return atomic.LoadUint64(&c.numCalls)
}
func (c *TestBlockingStorageClient) unlock() {
close(c.block)
}
func (c *TestBlockingStorageClient) Name() string {
return "testblockingstorageclient"
}
func (t *QueueManager) queueLen() int {
t.shardsMtx.Lock()
defer t.shardsMtx.Unlock()
queueLength := 0
for _, shard := range t.shards.queues {
queueLength += len(shard)
}
return queueLength
}
func TestSpawnNotMoreThanMaxConcurrentSendsGoroutines(t *testing.T) {
// Our goal is to fully empty the queue:
// `MaxSamplesPerSend*Shards` samples should be consumed by the
// per-shard goroutines, and then another `MaxSamplesPerSend`
// should be left on the queue.
n := config.DefaultQueueConfig.MaxSamplesPerSend * 2
samples := make(model.Samples, 0, n)
for i := 0; i < n; i++ {
name := model.LabelValue(fmt.Sprintf("test_metric_%d", i))
samples = append(samples, &model.Sample{
Metric: model.Metric{
model.MetricNameLabel: name,
},
Value: model.SampleValue(i),
})
}
c := NewTestBlockedStorageClient()
cfg := config.DefaultQueueConfig
cfg.MaxShards = 1
cfg.Capacity = n
m := NewQueueManager(nil, cfg, nil, nil, c, defaultFlushDeadline)
m.Start()
defer func() {
c.unlock()
m.Stop()
}()
for _, s := range samples {
m.Append(s)
}
// Wait until the runShard() loops drain the queue. If things went right, it
// should then immediately block in sendSamples(), but, in case of error,
// it would spawn too many goroutines, and thus we'd see more calls to
// client.Store()
//
// The timed wait is maybe non-ideal, but, in order to verify that we're
// not spawning too many concurrent goroutines, we have to wait on the
// Run() loop to consume a specific number of elements from the
// queue... and it doesn't signal that in any obvious way, except by
// draining the queue. We cap the waiting at 1 second -- that should give
// plenty of time, and keeps the failure fairly quick if we're not draining
// the queue properly.
for i := 0; i < 100 && m.queueLen() > 0; i++ {
time.Sleep(10 * time.Millisecond)
}
if m.queueLen() != config.DefaultQueueConfig.MaxSamplesPerSend {
t.Fatalf("Failed to drain QueueManager queue, %d elements left",
m.queueLen(),
)
}
numCalls := c.NumCalls()
if numCalls != uint64(1) {
t.Errorf("Saw %d concurrent sends, expected 1", numCalls)
}
}
func TestShutdown(t *testing.T) {
deadline := 10 * time.Second
c := NewTestBlockedStorageClient()
m := NewQueueManager(nil, config.DefaultQueueConfig, nil, nil, c, deadline)
for i := 0; i < config.DefaultQueueConfig.MaxSamplesPerSend; i++ {
m.Append(&model.Sample{
Metric: model.Metric{
model.MetricNameLabel: model.LabelValue(fmt.Sprintf("test_metric_%d", i)),
},
Value: model.SampleValue(i),
Timestamp: model.Time(i),
})
}
m.Start()
start := time.Now()
m.Stop()
duration := time.Now().Sub(start)
if duration > deadline+(deadline/10) {
t.Errorf("Took too long to shutdown: %s > %s", duration, deadline)
}
}