mirror of
https://github.com/prometheus/prometheus
synced 2024-12-24 23:42:32 +00:00
Limit maximum number of concurrent queries.
A high number of concurrent queries can slow each other down so that none of them is reasonbly responsive. This commit limits the number of queries being concurrently executed.
This commit is contained in:
parent
d59d1cb2c1
commit
9ab1f6c690
@ -31,8 +31,9 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
stalenessDelta = flag.Duration("query.staleness-delta", 300*time.Second, "Staleness delta allowance during expression evaluations.")
|
||||
defaultQueryTimeout = flag.Duration("query.timeout", 2*time.Minute, "Maximum time a query may take before being aborted.")
|
||||
stalenessDelta = flag.Duration("query.staleness-delta", 300*time.Second, "Staleness delta allowance during expression evaluations.")
|
||||
defaultQueryTimeout = flag.Duration("query.timeout", 2*time.Minute, "Maximum time a query may take before being aborted.")
|
||||
maxConcurrentQueries = flag.Int("query.max-concurrency", 20, "Maximum number of queries executed concurrently.")
|
||||
)
|
||||
|
||||
// SampleStream is a stream of Values belonging to an attached COWMetric.
|
||||
@ -215,10 +216,7 @@ func (q *query) Cancel() {
|
||||
|
||||
// Exec implements the Query interface.
|
||||
func (q *query) Exec() *Result {
|
||||
ctx, cancel := context.WithTimeout(q.ng.baseCtx, *defaultQueryTimeout)
|
||||
q.cancel = cancel
|
||||
|
||||
res, err := q.ng.exec(ctx, q)
|
||||
res, err := q.ng.exec(q)
|
||||
return &Result{Err: err, Value: res}
|
||||
}
|
||||
|
||||
@ -249,6 +247,8 @@ type Engine struct {
|
||||
// The base context for all queries and its cancellation function.
|
||||
baseCtx context.Context
|
||||
cancelQueries func()
|
||||
// The gate limiting the maximum number of concurrent and waiting queries.
|
||||
gate *queryGate
|
||||
}
|
||||
|
||||
// NewEngine returns a new engine.
|
||||
@ -258,6 +258,7 @@ func NewEngine(storage local.Storage) *Engine {
|
||||
storage: storage,
|
||||
baseCtx: ctx,
|
||||
cancelQueries: cancel,
|
||||
gate: newQueryGate(*maxConcurrentQueries),
|
||||
}
|
||||
}
|
||||
|
||||
@ -316,9 +317,21 @@ func (ng *Engine) newTestQuery(stmts ...Statement) Query {
|
||||
//
|
||||
// At this point per query only one EvalStmt is evaluated. Alert and record
|
||||
// statements are not handled by the Engine.
|
||||
func (ng *Engine) exec(ctx context.Context, q *query) (Value, error) {
|
||||
func (ng *Engine) exec(q *query) (Value, error) {
|
||||
const env = "query execution"
|
||||
|
||||
ctx, cancel := context.WithTimeout(q.ng.baseCtx, *defaultQueryTimeout)
|
||||
q.cancel = cancel
|
||||
|
||||
queueTimer := q.stats.GetTimer(stats.ExecQueueTime).Start()
|
||||
|
||||
if err := ng.gate.Start(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer ng.gate.Done()
|
||||
|
||||
queueTimer.Stop()
|
||||
|
||||
// Cancel when execution is done or an error was raised.
|
||||
defer q.cancel()
|
||||
|
||||
@ -1125,3 +1138,35 @@ func interpolateSamples(first, second *metric.SamplePair, timestamp clientmodel.
|
||||
Timestamp: timestamp,
|
||||
}
|
||||
}
|
||||
|
||||
// A queryGate controls the maximum number of concurrently running and waiting queries.
|
||||
type queryGate struct {
|
||||
ch chan struct{}
|
||||
}
|
||||
|
||||
// newQueryGate returns a query gate that limits the number of queries
|
||||
// being concurrently executed.
|
||||
func newQueryGate(length int) *queryGate {
|
||||
return &queryGate{
|
||||
ch: make(chan struct{}, length),
|
||||
}
|
||||
}
|
||||
|
||||
// Start blocks until the gate has a free spot or the context is done.
|
||||
func (g *queryGate) Start(ctx context.Context) error {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return contextDone(ctx, "query queue")
|
||||
case g.ch <- struct{}{}:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Done releases a single spot in the gate.
|
||||
func (g *queryGate) Done() {
|
||||
select {
|
||||
case <-g.ch:
|
||||
default:
|
||||
panic("engine.queryGate.Done: more operations done than started")
|
||||
}
|
||||
}
|
||||
|
@ -6,14 +6,61 @@ import (
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
|
||||
"github.com/prometheus/prometheus/storage/local"
|
||||
)
|
||||
|
||||
var noop = testStmt(func(context.Context) error {
|
||||
return nil
|
||||
})
|
||||
|
||||
func TestQueryConcurreny(t *testing.T) {
|
||||
engine := NewEngine(nil)
|
||||
defer engine.Stop()
|
||||
|
||||
block := make(chan struct{})
|
||||
processing := make(chan struct{})
|
||||
f1 := testStmt(func(context.Context) error {
|
||||
processing <- struct{}{}
|
||||
<-block
|
||||
return nil
|
||||
})
|
||||
|
||||
for i := 0; i < *maxConcurrentQueries; i++ {
|
||||
q := engine.newTestQuery(f1)
|
||||
go q.Exec()
|
||||
select {
|
||||
case <-processing:
|
||||
// Expected.
|
||||
case <-time.After(5 * time.Millisecond):
|
||||
t.Fatalf("Query within concurrency threshold not being executed")
|
||||
}
|
||||
}
|
||||
|
||||
q := engine.newTestQuery(f1)
|
||||
go q.Exec()
|
||||
|
||||
select {
|
||||
case <-processing:
|
||||
t.Fatalf("Query above concurrency threhosld being executed")
|
||||
case <-time.After(5 * time.Millisecond):
|
||||
// Expected.
|
||||
}
|
||||
|
||||
// Terminate a running query.
|
||||
block <- struct{}{}
|
||||
|
||||
select {
|
||||
case <-processing:
|
||||
// Expected.
|
||||
case <-time.After(5 * time.Millisecond):
|
||||
t.Fatalf("Query within concurrency threshold not being executed")
|
||||
}
|
||||
|
||||
// Terminate remaining queries.
|
||||
for i := 0; i < *maxConcurrentQueries; i++ {
|
||||
block <- struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
func TestQueryTimeout(t *testing.T) {
|
||||
*defaultQueryTimeout = 5 * time.Millisecond
|
||||
defer func() {
|
||||
@ -21,10 +68,7 @@ func TestQueryTimeout(t *testing.T) {
|
||||
*defaultQueryTimeout = 2 * time.Minute
|
||||
}()
|
||||
|
||||
storage, closer := local.NewTestStorage(t, 1)
|
||||
defer closer.Close()
|
||||
|
||||
engine := NewEngine(storage)
|
||||
engine := NewEngine(nil)
|
||||
defer engine.Stop()
|
||||
|
||||
f1 := testStmt(func(context.Context) error {
|
||||
@ -46,10 +90,7 @@ func TestQueryTimeout(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestQueryCancel(t *testing.T) {
|
||||
storage, closer := local.NewTestStorage(t, 1)
|
||||
defer closer.Close()
|
||||
|
||||
engine := NewEngine(storage)
|
||||
engine := NewEngine(nil)
|
||||
defer engine.Stop()
|
||||
|
||||
// As for timeouts, cancellation is only checked at designated points. We ensure
|
||||
@ -91,10 +132,7 @@ func TestQueryCancel(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestEngineShutdown(t *testing.T) {
|
||||
storage, closer := local.NewTestStorage(t, 1)
|
||||
defer closer.Close()
|
||||
|
||||
engine := NewEngine(storage)
|
||||
engine := NewEngine(nil)
|
||||
|
||||
handlerExecutions := 0
|
||||
// Shutdown engine on first handler execution. Should handler execution ever become
|
||||
|
@ -31,7 +31,7 @@ const (
|
||||
GetValueAtTimeTime
|
||||
GetBoundaryValuesTime
|
||||
GetRangeValuesTime
|
||||
ViewQueueTime
|
||||
ExecQueueTime
|
||||
ViewDiskPreparationTime
|
||||
ViewDataExtractionTime
|
||||
ViewDiskExtractionTime
|
||||
@ -64,8 +64,8 @@ func (s QueryTiming) String() string {
|
||||
return "GetBoundaryValues() time"
|
||||
case GetRangeValuesTime:
|
||||
return "GetRangeValues() time"
|
||||
case ViewQueueTime:
|
||||
return "View queue wait time"
|
||||
case ExecQueueTime:
|
||||
return "Exec queue wait time"
|
||||
case ViewDiskPreparationTime:
|
||||
return "View building disk preparation time"
|
||||
case ViewDataExtractionTime:
|
||||
|
Loading…
Reference in New Issue
Block a user