mirror of
https://github.com/prometheus/prometheus
synced 2025-02-03 13:43:24 +00:00
b08f82fa4e
This adds interval metadata to indexed chunks. The queried interval is used to filter chunks when queried from the index to save unnecessary accesses of the chunks file. This is especially relevant for series that come and go often and larger files.
438 lines
9.0 KiB
Go
438 lines
9.0 KiB
Go
package tsdb
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/fabxc/tsdb/chunks"
|
|
)
|
|
|
|
// SeriesReader provides reading access of serialized time series data.
|
|
type SeriesReader interface {
|
|
// Chunk returns the series data chunk with the given reference.
|
|
Chunk(ref uint32) (chunks.Chunk, error)
|
|
}
|
|
|
|
// seriesReader implements a SeriesReader for a serialized byte stream
|
|
// of series data.
|
|
type seriesReader struct {
|
|
// The underlying byte slice holding the encoded series data.
|
|
b []byte
|
|
}
|
|
|
|
func newSeriesReader(b []byte) (*seriesReader, error) {
|
|
// Verify magic number.
|
|
if m := binary.BigEndian.Uint32(b[:4]); m != MagicSeries {
|
|
return nil, fmt.Errorf("invalid magic number %x", m)
|
|
}
|
|
return &seriesReader{b: b}, nil
|
|
}
|
|
|
|
func (s *seriesReader) Chunk(offset uint32) (chunks.Chunk, error) {
|
|
b := s.b[offset:]
|
|
|
|
l, n := binary.Uvarint(b)
|
|
if n < 0 {
|
|
return nil, fmt.Errorf("reading chunk length failed")
|
|
}
|
|
b = b[n:]
|
|
enc := chunks.Encoding(b[0])
|
|
|
|
c, err := chunks.FromData(enc, b[1:1+l])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return c, nil
|
|
}
|
|
|
|
// IndexReader provides reading access of serialized index data.
|
|
type IndexReader interface {
|
|
// Stats returns statisitics about the indexed data.
|
|
Stats() (BlockStats, error)
|
|
|
|
// LabelValues returns the possible label values
|
|
LabelValues(names ...string) (StringTuples, error)
|
|
|
|
// Postings returns the postings list iterator for the label pair.
|
|
Postings(name, value string) (Postings, error)
|
|
|
|
// Series returns the series for the given reference.
|
|
Series(ref uint32, mint, maxt int64) (Series, error)
|
|
}
|
|
|
|
// StringTuples provides access to a sorted list of string tuples.
|
|
type StringTuples interface {
|
|
// Total number of tuples in the list.
|
|
Len() int
|
|
// At returns the tuple at position i.
|
|
At(i int) ([]string, error)
|
|
}
|
|
|
|
type indexReader struct {
|
|
series SeriesReader
|
|
|
|
// The underlying byte slice holding the encoded series data.
|
|
b []byte
|
|
|
|
// Cached hashmaps of section offsets.
|
|
labels map[string]uint32
|
|
postings map[string]uint32
|
|
}
|
|
|
|
var (
|
|
errInvalidSize = fmt.Errorf("invalid size")
|
|
errInvalidFlag = fmt.Errorf("invalid flag")
|
|
errNotFound = fmt.Errorf("not found")
|
|
)
|
|
|
|
func newIndexReader(s SeriesReader, b []byte) (*indexReader, error) {
|
|
if len(b) < 16 {
|
|
return nil, errInvalidSize
|
|
}
|
|
r := &indexReader{
|
|
series: s,
|
|
b: b,
|
|
}
|
|
|
|
// Verify magic number.
|
|
if m := binary.BigEndian.Uint32(b[:4]); m != MagicIndex {
|
|
return nil, fmt.Errorf("invalid magic number %x", m)
|
|
}
|
|
|
|
var err error
|
|
// The last two 4 bytes hold the pointers to the hashmaps.
|
|
loff := binary.BigEndian.Uint32(b[len(b)-8 : len(b)-4])
|
|
poff := binary.BigEndian.Uint32(b[len(b)-4:])
|
|
|
|
if r.labels, err = readHashmap(r.section(loff)); err != nil {
|
|
return nil, err
|
|
}
|
|
if r.postings, err = readHashmap(r.section(poff)); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return r, nil
|
|
}
|
|
|
|
func readHashmap(flag byte, b []byte, err error) (map[string]uint32, error) {
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if flag != flagStd {
|
|
return nil, errInvalidFlag
|
|
}
|
|
h := make(map[string]uint32, 512)
|
|
|
|
for len(b) > 0 {
|
|
l, n := binary.Uvarint(b)
|
|
if n < 1 {
|
|
return nil, errInvalidSize
|
|
}
|
|
b = b[n:]
|
|
|
|
if len(b) < int(l) {
|
|
return nil, errInvalidSize
|
|
}
|
|
s := string(b[:l])
|
|
b = b[l:]
|
|
|
|
o, n := binary.Uvarint(b)
|
|
if n < 1 {
|
|
return nil, errInvalidSize
|
|
}
|
|
b = b[n:]
|
|
|
|
h[s] = uint32(o)
|
|
}
|
|
|
|
return h, nil
|
|
}
|
|
|
|
func (r *indexReader) section(o uint32) (byte, []byte, error) {
|
|
b := r.b[o:]
|
|
|
|
if len(b) < 5 {
|
|
return 0, nil, errInvalidSize
|
|
}
|
|
|
|
flag := b[0]
|
|
l := binary.BigEndian.Uint32(b[1:5])
|
|
|
|
b = b[5:]
|
|
|
|
// b must have the given length plus 4 bytes for the CRC32 checksum.
|
|
if len(b) < int(l)+4 {
|
|
return 0, nil, errInvalidSize
|
|
}
|
|
return flag, b[:l], nil
|
|
}
|
|
|
|
func (r *indexReader) lookupSymbol(o uint32) ([]byte, error) {
|
|
l, n := binary.Uvarint(r.b[o:])
|
|
if n < 0 {
|
|
return nil, fmt.Errorf("reading symbol length failed")
|
|
}
|
|
|
|
end := int(o) + n + int(l)
|
|
if end > len(r.b) {
|
|
return nil, fmt.Errorf("invalid length")
|
|
}
|
|
|
|
return r.b[int(o)+n : end], nil
|
|
}
|
|
|
|
func (r *indexReader) Stats() (BlockStats, error) {
|
|
flag, b, err := r.section(8)
|
|
if err != nil {
|
|
return BlockStats{}, err
|
|
}
|
|
if flag != flagStd {
|
|
return BlockStats{}, errInvalidFlag
|
|
}
|
|
|
|
if len(b) != 64 {
|
|
return BlockStats{}, errInvalidSize
|
|
}
|
|
|
|
return BlockStats{
|
|
MinTime: int64(binary.BigEndian.Uint64(b)),
|
|
MaxTime: int64(binary.BigEndian.Uint64(b[8:])),
|
|
SeriesCount: binary.BigEndian.Uint32(b[16:]),
|
|
ChunkCount: binary.BigEndian.Uint32(b[20:]),
|
|
SampleCount: binary.BigEndian.Uint64(b[24:]),
|
|
}, nil
|
|
}
|
|
|
|
func (r *indexReader) LabelValues(names ...string) (StringTuples, error) {
|
|
key := strings.Join(names, string(sep))
|
|
off, ok := r.labels[key]
|
|
if !ok {
|
|
return nil, fmt.Errorf("label index doesn't exist")
|
|
}
|
|
|
|
flag, b, err := r.section(off)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("section: %s", err)
|
|
}
|
|
if flag != flagStd {
|
|
return nil, errInvalidFlag
|
|
}
|
|
l, n := binary.Uvarint(b)
|
|
if n < 1 {
|
|
return nil, errInvalidSize
|
|
}
|
|
|
|
st := &serializedStringTuples{
|
|
l: int(l),
|
|
b: b[n:],
|
|
lookup: r.lookupSymbol,
|
|
}
|
|
return st, nil
|
|
}
|
|
|
|
func (r *indexReader) Series(ref uint32, mint, maxt int64) (Series, error) {
|
|
k, n := binary.Uvarint(r.b[ref:])
|
|
if n < 1 {
|
|
return nil, errInvalidSize
|
|
}
|
|
|
|
b := r.b[int(ref)+n:]
|
|
offsets := make([]uint32, 0, k)
|
|
|
|
for i := 0; i < int(k); i++ {
|
|
o, n := binary.Uvarint(b)
|
|
if n < 1 {
|
|
return nil, errInvalidSize
|
|
}
|
|
offsets = append(offsets, uint32(o))
|
|
|
|
b = b[n:]
|
|
}
|
|
// Symbol offests must occur in pairs representing name and value.
|
|
if len(offsets)&1 != 0 {
|
|
return nil, errInvalidSize
|
|
}
|
|
|
|
// TODO(fabxc): Fully materialize series symbols for now. Figure out later if it
|
|
// makes sense to decode those lazily.
|
|
// If we use unsafe strings the there'll be no copy overhead.
|
|
//
|
|
// The references are expected to be sorted and match the order of
|
|
// the underlying strings.
|
|
labels := make(Labels, 0, k)
|
|
|
|
for i := 0; i < int(k); i += 2 {
|
|
n, err := r.lookupSymbol(offsets[i])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
v, err := r.lookupSymbol(offsets[i+1])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
labels = append(labels, Label{
|
|
Name: string(n),
|
|
Value: string(v),
|
|
})
|
|
}
|
|
|
|
// Read the chunks meta data.
|
|
k, n = binary.Uvarint(b)
|
|
if n < 1 {
|
|
return nil, errInvalidSize
|
|
}
|
|
|
|
b = b[n:]
|
|
chunks := make([]ChunkMeta, 0, k)
|
|
|
|
for i := 0; i < int(k); i++ {
|
|
firstTime, n := binary.Varint(b)
|
|
if n < 1 {
|
|
return nil, errInvalidSize
|
|
}
|
|
b = b[n:]
|
|
|
|
// Terminate early if we exceeded the queried time range.
|
|
if firstTime > maxt {
|
|
break
|
|
}
|
|
|
|
lastTime, n := binary.Varint(b)
|
|
if n < 1 {
|
|
return nil, errInvalidSize
|
|
}
|
|
b = b[n:]
|
|
|
|
o, n := binary.Uvarint(b)
|
|
if n < 1 {
|
|
return nil, errInvalidSize
|
|
}
|
|
b = b[n:]
|
|
|
|
// Skip the chunk if it is before the queried time range.
|
|
if lastTime < mint {
|
|
continue
|
|
}
|
|
|
|
chunks = append(chunks, ChunkMeta{
|
|
Ref: uint32(o),
|
|
MinTime: firstTime,
|
|
MaxTime: lastTime,
|
|
})
|
|
}
|
|
// If no chunks applicable to the time range were found, the series
|
|
// can be skipped.
|
|
if len(chunks) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
return &series{
|
|
labels: labels,
|
|
chunks: chunks,
|
|
chunk: r.series.Chunk,
|
|
}, nil
|
|
}
|
|
|
|
func (r *indexReader) Postings(name, value string) (Postings, error) {
|
|
key := name + string(sep) + value
|
|
|
|
off, ok := r.postings[key]
|
|
if !ok {
|
|
return nil, errNotFound
|
|
}
|
|
|
|
flag, b, err := r.section(off)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if flag != flagStd {
|
|
return nil, errInvalidFlag
|
|
}
|
|
|
|
// TODO(fabxc): just read into memory as an intermediate solution.
|
|
// Add iterator over serialized data.
|
|
var l []uint32
|
|
|
|
for len(b) > 0 {
|
|
if len(b) < 4 {
|
|
return nil, errInvalidSize
|
|
}
|
|
l = append(l, binary.BigEndian.Uint32(b[:4]))
|
|
|
|
b = b[4:]
|
|
}
|
|
|
|
return &listIterator{list: l, idx: -1}, nil
|
|
}
|
|
|
|
type stringTuples struct {
|
|
l int // tuple length
|
|
s []string // flattened tuple entries
|
|
}
|
|
|
|
func newStringTuples(s []string, l int) (*stringTuples, error) {
|
|
if len(s)%l != 0 {
|
|
return nil, errInvalidSize
|
|
}
|
|
return &stringTuples{s: s, l: l}, nil
|
|
}
|
|
|
|
func (t *stringTuples) Len() int { return len(t.s) / t.l }
|
|
func (t *stringTuples) At(i int) ([]string, error) { return t.s[i : i+t.l], nil }
|
|
|
|
func (t *stringTuples) Swap(i, j int) {
|
|
c := make([]string, t.l)
|
|
copy(c, t.s[i:i+t.l])
|
|
|
|
for k := 0; k < t.l; k++ {
|
|
t.s[i+k] = t.s[j+k]
|
|
t.s[j+k] = c[k]
|
|
}
|
|
}
|
|
|
|
func (t *stringTuples) Less(i, j int) bool {
|
|
for k := 0; k < t.l; k++ {
|
|
d := strings.Compare(t.s[i+k], t.s[j+k])
|
|
|
|
if d < 0 {
|
|
return true
|
|
}
|
|
if d > 0 {
|
|
return false
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
type serializedStringTuples struct {
|
|
l int
|
|
b []byte
|
|
lookup func(uint32) ([]byte, error)
|
|
}
|
|
|
|
func (t *serializedStringTuples) Len() int {
|
|
// TODO(fabxc): Cache this?
|
|
return len(t.b) / (4 * t.l)
|
|
}
|
|
|
|
func (t *serializedStringTuples) At(i int) ([]string, error) {
|
|
if len(t.b) < (i+t.l)*4 {
|
|
return nil, errInvalidSize
|
|
}
|
|
res := make([]string, t.l)
|
|
|
|
for k := 0; k < t.l; k++ {
|
|
offset := binary.BigEndian.Uint32(t.b[i*4:])
|
|
|
|
b, err := t.lookup(offset)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("lookup: %s", err)
|
|
}
|
|
res = append(res, string(b))
|
|
}
|
|
|
|
return res, nil
|
|
}
|