storage: Fix and improve the Seek method of various iterators (#9878)

There was a subtle and nasty bug in listSeriesIterator.Seek.

In addition, the Seek call is defined to be a no-op if the current
position of the iterator is already pointing to a suitable
sample. This commit adds fast paths for this case to several
potentially expensive Seek calls.

Another bug was in concreteSeriesIterator.Seek. It always searched the
whole series and not from the current position of the iterator.

Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
Björn Rabenstein 2021-11-29 10:47:56 +01:00 committed by GitHub
parent 0e82a96e2f
commit b866db009b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 123 additions and 4 deletions

View File

@ -457,6 +457,11 @@ func NewChainSampleIterator(iterators []chunkenc.Iterator) chunkenc.Iterator {
}
func (c *chainSampleIterator) Seek(t int64) bool {
// No-op check
if c.curr != nil && c.lastt >= t {
return true
}
c.h = samplesIteratorHeap{}
for _, iter := range c.iterators {
if iter.Seek(t) {

View File

@ -357,8 +357,16 @@ func newConcreteSeriersIterator(series *concreteSeries) chunkenc.Iterator {
// Seek implements storage.SeriesIterator.
func (c *concreteSeriesIterator) Seek(t int64) bool {
c.cur = sort.Search(len(c.series.samples), func(n int) bool {
return c.series.samples[n].Timestamp >= t
if c.cur == -1 {
c.cur = 0
}
// No-op check.
if s := c.series.samples[c.cur]; s.Timestamp >= t {
return true
}
// Do binary search between current position and end.
c.cur += sort.Search(len(c.series.samples)-c.cur, func(n int) bool {
return c.series.samples[n+c.cur].Timestamp >= t
})
return c.cur < len(c.series.samples)
}

View File

@ -191,6 +191,50 @@ func TestConcreteSeriesClonesLabels(t *testing.T) {
require.Equal(t, lbls, gotLabels)
}
func TestConcreteSeriesIterator(t *testing.T) {
series := &concreteSeries{
labels: labels.FromStrings("foo", "bar"),
samples: []prompb.Sample{
{Value: 1, Timestamp: 1},
{Value: 1.5, Timestamp: 1},
{Value: 2, Timestamp: 2},
{Value: 3, Timestamp: 3},
{Value: 4, Timestamp: 4},
},
}
it := series.Iterator()
// Seek to the first sample with ts=1.
require.True(t, it.Seek(1))
ts, v := it.At()
require.Equal(t, int64(1), ts)
require.Equal(t, 1., v)
// Seek one further, next sample still has ts=1.
require.True(t, it.Next())
ts, v = it.At()
require.Equal(t, int64(1), ts)
require.Equal(t, 1.5, v)
// Seek again to 1 and make sure we stay where we are.
require.True(t, it.Seek(1))
ts, v = it.At()
require.Equal(t, int64(1), ts)
require.Equal(t, 1.5, v)
// Another seek.
require.True(t, it.Seek(3))
ts, v = it.At()
require.Equal(t, int64(3), ts)
require.Equal(t, 3., v)
// And we don't go back.
require.True(t, it.Seek(2))
ts, v = it.At()
require.Equal(t, int64(3), ts)
require.Equal(t, 3., v)
}
func TestFromQueryResultWithDuplicates(t *testing.T) {
ts1 := prompb.TimeSeries{
Labels: []prompb.Label{

View File

@ -99,8 +99,12 @@ func (it *listSeriesIterator) Seek(t int64) bool {
if it.idx == -1 {
it.idx = 0
}
// No-op check.
if s := it.samples.Get(it.idx); s.T() >= t {
return true
}
// Do binary search between current position and end.
it.idx = sort.Search(it.samples.Len()-it.idx, func(i int) bool {
it.idx += sort.Search(it.samples.Len()-it.idx, func(i int) bool {
s := it.samples.Get(i + it.idx)
return s.T() >= t
})

54
storage/series_test.go Normal file
View File

@ -0,0 +1,54 @@
// Copyright 2021 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestListSeriesIterator(t *testing.T) {
it := NewListSeriesIterator(samples{sample{0, 0}, sample{1, 1}, sample{1, 1.5}, sample{2, 2}, sample{3, 3}})
// Seek to the first sample with ts=1.
require.True(t, it.Seek(1))
ts, v := it.At()
require.Equal(t, int64(1), ts)
require.Equal(t, 1., v)
// Seek one further, next sample still has ts=1.
require.True(t, it.Next())
ts, v = it.At()
require.Equal(t, int64(1), ts)
require.Equal(t, 1.5, v)
// Seek again to 1 and make sure we stay where we are.
require.True(t, it.Seek(1))
ts, v = it.At()
require.Equal(t, int64(1), ts)
require.Equal(t, 1.5, v)
// Another seek.
require.True(t, it.Seek(3))
ts, v = it.At()
require.Equal(t, int64(3), ts)
require.Equal(t, 3., v)
// And we don't go back.
require.True(t, it.Seek(2))
ts, v = it.At()
require.Equal(t, int64(3), ts)
require.Equal(t, 3., v)
}

View File

@ -159,8 +159,12 @@ func (it *listSeriesIterator) Seek(t int64) bool {
if it.idx == -1 {
it.idx = 0
}
// No-op check.
if s := it.list[it.idx]; s.T() >= t {
return true
}
// Do binary search between current position and end.
it.idx = sort.Search(len(it.list)-it.idx, func(i int) bool {
it.idx += sort.Search(len(it.list)-it.idx, func(i int) bool {
s := it.list[i+it.idx]
return s.t >= t
})