From 7d105277feaabfcb3270e47763ce6b4daaf3df6b Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Tue, 21 Sep 2021 11:27:28 +0100 Subject: [PATCH] Optimise topk where k==1 (#9365) * Add benchmark for query_range with topk Modify sample data so values within a metric differ Signed-off-by: Bryan Boreham * Optimise topk where k==1 In this case we don't need a heap to keep track of values; just a single slot is fine. Simplify the initialization of the heap: since all cases start off as a single-item heap we can just assign the value directly. Signed-off-by: Bryan Boreham * Allow at least one slot in results for topk, quantile k isn't set for quantile, but we need space to start collecting values Signed-off-by: Bryan Boreham --- promql/bench_test.go | 5 ++++- promql/engine.go | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/promql/bench_test.go b/promql/bench_test.go index aac2be716..4353d25bb 100644 --- a/promql/bench_test.go +++ b/promql/bench_test.go @@ -71,7 +71,7 @@ func BenchmarkRangeQuery(b *testing.B) { a := storage.Appender(context.Background()) ts := int64(s * 10000) // 10s interval. for i, metric := range metrics { - ref, _ := a.Append(refs[i], metric, ts, float64(s)) + ref, _ := a.Append(refs[i], metric, ts, float64(s)+float64(i)/float64(len(metrics))) refs[i] = ref } if err := a.Commit(); err != nil { @@ -159,6 +159,9 @@ func BenchmarkRangeQuery(b *testing.B) { { expr: "count_values('value', h_X)", }, + { + expr: "topk(1, a_X)", + }, // Combinations. { expr: "rate(a_X[1m]) + rate(b_X[1m])", diff --git a/promql/engine.go b/promql/engine.go index 44290f163..bd387f2c5 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2210,22 +2210,24 @@ func (ev *evaluator) aggregation(op parser.ItemType, grouping []string, without resultSize := k if k > inputVecLen { resultSize = inputVecLen + } else if k == 0 { + resultSize = 1 } switch op { case parser.STDVAR, parser.STDDEV: result[groupingKey].value = 0 case parser.TOPK, parser.QUANTILE: - result[groupingKey].heap = make(vectorByValueHeap, 0, resultSize) - heap.Push(&result[groupingKey].heap, &Sample{ + result[groupingKey].heap = make(vectorByValueHeap, 1, resultSize) + result[groupingKey].heap[0] = Sample{ Point: Point{V: s.V}, Metric: s.Metric, - }) + } case parser.BOTTOMK: - result[groupingKey].reverseHeap = make(vectorByReverseValueHeap, 0, resultSize) - heap.Push(&result[groupingKey].reverseHeap, &Sample{ + result[groupingKey].reverseHeap = make(vectorByReverseValueHeap, 1, resultSize) + result[groupingKey].reverseHeap[0] = Sample{ Point: Point{V: s.V}, Metric: s.Metric, - }) + } case parser.GROUP: result[groupingKey].value = 1 } @@ -2283,6 +2285,13 @@ func (ev *evaluator) aggregation(op parser.ItemType, grouping []string, without case parser.TOPK: if int64(len(group.heap)) < k || group.heap[0].V < s.V || math.IsNaN(group.heap[0].V) { if int64(len(group.heap)) == k { + if k == 1 { // For k==1 we can replace in-situ. + group.heap[0] = Sample{ + Point: Point{V: s.V}, + Metric: s.Metric, + } + break + } heap.Pop(&group.heap) } heap.Push(&group.heap, &Sample{ @@ -2294,6 +2303,13 @@ func (ev *evaluator) aggregation(op parser.ItemType, grouping []string, without case parser.BOTTOMK: if int64(len(group.reverseHeap)) < k || group.reverseHeap[0].V > s.V || math.IsNaN(group.reverseHeap[0].V) { if int64(len(group.reverseHeap)) == k { + if k == 1 { // For k==1 we can replace in-situ. + group.reverseHeap[0] = Sample{ + Point: Point{V: s.V}, + Metric: s.Metric, + } + break + } heap.Pop(&group.reverseHeap) } heap.Push(&group.reverseHeap, &Sample{ @@ -2327,7 +2343,9 @@ func (ev *evaluator) aggregation(op parser.ItemType, grouping []string, without case parser.TOPK: // The heap keeps the lowest value on top, so reverse it. - sort.Sort(sort.Reverse(aggr.heap)) + if len(aggr.heap) > 1 { + sort.Sort(sort.Reverse(aggr.heap)) + } for _, v := range aggr.heap { enh.Out = append(enh.Out, Sample{ Metric: v.Metric, @@ -2338,7 +2356,9 @@ func (ev *evaluator) aggregation(op parser.ItemType, grouping []string, without case parser.BOTTOMK: // The heap keeps the highest value on top, so reverse it. - sort.Sort(sort.Reverse(aggr.reverseHeap)) + if len(aggr.reverseHeap) > 1 { + sort.Sort(sort.Reverse(aggr.reverseHeap)) + } for _, v := range aggr.reverseHeap { enh.Out = append(enh.Out, Sample{ Metric: v.Metric,