From b866db009ba2271edf532d50b38afef27c8469ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Rabenstein?= Date: Mon, 29 Nov 2021 10:47:56 +0100 Subject: [PATCH] storage: Fix and improve the Seek method of various iterators (#9878) There was a subtle and nasty bug in listSeriesIterator.Seek. In addition, the Seek call is defined to be a no-op if the current position of the iterator is already pointing to a suitable sample. This commit adds fast paths for this case to several potentially expensive Seek calls. Another bug was in concreteSeriesIterator.Seek. It always searched the whole series and not from the current position of the iterator. Signed-off-by: beorn7 --- storage/merge.go | 5 ++++ storage/remote/codec.go | 12 ++++++-- storage/remote/codec_test.go | 44 +++++++++++++++++++++++++++++ storage/series.go | 6 +++- storage/series_test.go | 54 ++++++++++++++++++++++++++++++++++++ tsdb/tsdbutil/buffer_test.go | 6 +++- 6 files changed, 123 insertions(+), 4 deletions(-) create mode 100644 storage/series_test.go diff --git a/storage/merge.go b/storage/merge.go index 01e667751..f7246c7c8 100644 --- a/storage/merge.go +++ b/storage/merge.go @@ -457,6 +457,11 @@ func NewChainSampleIterator(iterators []chunkenc.Iterator) chunkenc.Iterator { } func (c *chainSampleIterator) Seek(t int64) bool { + // No-op check + if c.curr != nil && c.lastt >= t { + return true + } + c.h = samplesIteratorHeap{} for _, iter := range c.iterators { if iter.Seek(t) { diff --git a/storage/remote/codec.go b/storage/remote/codec.go index a9f6af0f3..fa033b589 100644 --- a/storage/remote/codec.go +++ b/storage/remote/codec.go @@ -357,8 +357,16 @@ func newConcreteSeriersIterator(series *concreteSeries) chunkenc.Iterator { // Seek implements storage.SeriesIterator. func (c *concreteSeriesIterator) Seek(t int64) bool { - c.cur = sort.Search(len(c.series.samples), func(n int) bool { - return c.series.samples[n].Timestamp >= t + if c.cur == -1 { + c.cur = 0 + } + // No-op check. + if s := c.series.samples[c.cur]; s.Timestamp >= t { + return true + } + // Do binary search between current position and end. + c.cur += sort.Search(len(c.series.samples)-c.cur, func(n int) bool { + return c.series.samples[n+c.cur].Timestamp >= t }) return c.cur < len(c.series.samples) } diff --git a/storage/remote/codec_test.go b/storage/remote/codec_test.go index b5949cd31..1432736e1 100644 --- a/storage/remote/codec_test.go +++ b/storage/remote/codec_test.go @@ -191,6 +191,50 @@ func TestConcreteSeriesClonesLabels(t *testing.T) { require.Equal(t, lbls, gotLabels) } +func TestConcreteSeriesIterator(t *testing.T) { + series := &concreteSeries{ + labels: labels.FromStrings("foo", "bar"), + samples: []prompb.Sample{ + {Value: 1, Timestamp: 1}, + {Value: 1.5, Timestamp: 1}, + {Value: 2, Timestamp: 2}, + {Value: 3, Timestamp: 3}, + {Value: 4, Timestamp: 4}, + }, + } + it := series.Iterator() + + // Seek to the first sample with ts=1. + require.True(t, it.Seek(1)) + ts, v := it.At() + require.Equal(t, int64(1), ts) + require.Equal(t, 1., v) + + // Seek one further, next sample still has ts=1. + require.True(t, it.Next()) + ts, v = it.At() + require.Equal(t, int64(1), ts) + require.Equal(t, 1.5, v) + + // Seek again to 1 and make sure we stay where we are. + require.True(t, it.Seek(1)) + ts, v = it.At() + require.Equal(t, int64(1), ts) + require.Equal(t, 1.5, v) + + // Another seek. + require.True(t, it.Seek(3)) + ts, v = it.At() + require.Equal(t, int64(3), ts) + require.Equal(t, 3., v) + + // And we don't go back. + require.True(t, it.Seek(2)) + ts, v = it.At() + require.Equal(t, int64(3), ts) + require.Equal(t, 3., v) +} + func TestFromQueryResultWithDuplicates(t *testing.T) { ts1 := prompb.TimeSeries{ Labels: []prompb.Label{ diff --git a/storage/series.go b/storage/series.go index 3a5790c2a..024b0fc72 100644 --- a/storage/series.go +++ b/storage/series.go @@ -99,8 +99,12 @@ func (it *listSeriesIterator) Seek(t int64) bool { if it.idx == -1 { it.idx = 0 } + // No-op check. + if s := it.samples.Get(it.idx); s.T() >= t { + return true + } // Do binary search between current position and end. - it.idx = sort.Search(it.samples.Len()-it.idx, func(i int) bool { + it.idx += sort.Search(it.samples.Len()-it.idx, func(i int) bool { s := it.samples.Get(i + it.idx) return s.T() >= t }) diff --git a/storage/series_test.go b/storage/series_test.go new file mode 100644 index 000000000..384009de4 --- /dev/null +++ b/storage/series_test.go @@ -0,0 +1,54 @@ +// Copyright 2021 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestListSeriesIterator(t *testing.T) { + it := NewListSeriesIterator(samples{sample{0, 0}, sample{1, 1}, sample{1, 1.5}, sample{2, 2}, sample{3, 3}}) + + // Seek to the first sample with ts=1. + require.True(t, it.Seek(1)) + ts, v := it.At() + require.Equal(t, int64(1), ts) + require.Equal(t, 1., v) + + // Seek one further, next sample still has ts=1. + require.True(t, it.Next()) + ts, v = it.At() + require.Equal(t, int64(1), ts) + require.Equal(t, 1.5, v) + + // Seek again to 1 and make sure we stay where we are. + require.True(t, it.Seek(1)) + ts, v = it.At() + require.Equal(t, int64(1), ts) + require.Equal(t, 1.5, v) + + // Another seek. + require.True(t, it.Seek(3)) + ts, v = it.At() + require.Equal(t, int64(3), ts) + require.Equal(t, 3., v) + + // And we don't go back. + require.True(t, it.Seek(2)) + ts, v = it.At() + require.Equal(t, int64(3), ts) + require.Equal(t, 3., v) +} diff --git a/tsdb/tsdbutil/buffer_test.go b/tsdb/tsdbutil/buffer_test.go index a66786b62..baae8266b 100644 --- a/tsdb/tsdbutil/buffer_test.go +++ b/tsdb/tsdbutil/buffer_test.go @@ -159,8 +159,12 @@ func (it *listSeriesIterator) Seek(t int64) bool { if it.idx == -1 { it.idx = 0 } + // No-op check. + if s := it.list[it.idx]; s.T() >= t { + return true + } // Do binary search between current position and end. - it.idx = sort.Search(len(it.list)-it.idx, func(i int) bool { + it.idx += sort.Search(len(it.list)-it.idx, func(i int) bool { s := it.list[i+it.idx] return s.t >= t })