prometheus/head.go

package tsdb

import (
	"math"
	"os"
	"sort"
	"sync"

	"github.com/fabxc/tsdb/chunks"
	"github.com/fabxc/tsdb/labels"
)

// HeadBlock handles reads and writes of time series data within a time window.
type HeadBlock struct {
	mtx sync.RWMutex

	// descs holds all chunk descs for the head block. Each chunk implicitly
	// is assigned the index as its ID.
	descs []*chunkDesc
	// hashes contains a collision map of label set hashes of chunks
	// to their position in the chunk desc slice.
	hashes map[uint64][]int

	symbols  []string             // all seen strings
	values   map[string]stringset // label names to possible values
	postings *memPostings         // postings lists for terms

	stats BlockStats
}

// NewHeadBlock creates a new empty head block.
func NewHeadBlock(baseTime int64) *HeadBlock {
	b := &HeadBlock{
		descs:    []*chunkDesc{},
		hashes:   map[uint64][]int{},
		values:   map[string]stringset{},
		postings: &memPostings{m: make(map[term][]uint32)},
	}
	b.stats.MinTime = baseTime

	return b
}

// Querier returns a new querier over the head block.
func (h *HeadBlock) Querier(mint, maxt int64) Querier {
	return newBlockQuerier(h, h, mint, maxt)
}

// Chunk returns the chunk for the reference number.
func (h *HeadBlock) Chunk(ref uint32) (chunks.Chunk, error) {
	if int(ref) >= len(h.descs) {
		return nil, errNotFound
	}
	return h.descs[int(ref)].chunk, nil
}

func (h *HeadBlock) interval() (int64, int64) {
	return h.stats.MinTime, h.stats.MaxTime
}

// Stats returns statisitics about the indexed data.
func (h *HeadBlock) Stats() (BlockStats, error) {
	return h.stats, nil
}

// LabelValues returns the possible label values
func (h *HeadBlock) LabelValues(names ...string) (StringTuples, error) {
	if len(names) != 1 {
		return nil, errInvalidSize
	}
	var sl []string

	for s := range h.values[names[0]] {
		sl = append(sl, s)
	}
	sort.Strings(sl)

	t := &stringTuples{
		l: len(names),
		s: sl,
	}
	return t, nil
}

// Postings returns the postings list iterator for the label pair.
func (h *HeadBlock) Postings(name, value string) (Postings, error) {
	return h.postings.get(term{name: name, value: value}), nil
}

// Series returns the series for the given reference.
func (h *HeadBlock) Series(ref uint32, mint, maxt int64) (Series, error) {
	if int(ref) >= len(h.descs) {
		return nil, errNotFound
	}
	cd := h.descs[ref]

	if !intervalOverlap(cd.firsTimestamp, cd.lastTimestamp, mint, maxt) {
		return nil, nil
	}
	s := &chunkSeries{
		labels: cd.lset,
		chunks: []ChunkMeta{
			{MinTime: h.stats.MinTime, Ref: 0},
		},
		chunk: func(ref uint32) (chunks.Chunk, error) {
			return cd.chunk, nil
		},
	}
	return s, nil
}

// get retrieves the chunk with the hash and label set and creates
// a new one if it doesn't exist yet.
func (h *HeadBlock) get(hash uint64, lset labels.Labels) *chunkDesc {
	refs := h.hashes[hash]

	for _, ref := range refs {
		if cd := h.descs[ref]; cd.lset.Equals(lset) {
			return cd
		}
	}
	// None of the given chunks was for the series, create a new one.
	cd := &chunkDesc{
		lset:  lset,
		chunk: chunks.NewXORChunk(int(math.MaxInt64)),
	}
	// Index the new chunk.
	ref := len(h.descs)

	h.descs = append(h.descs, cd)
	h.hashes[hash] = append(refs, ref)

	// Add each label pair as a term to the inverted index.
	terms := make([]term, 0, len(lset))

	for _, l := range lset {
		terms = append(terms, term{name: l.Name, value: l.Value})

		valset, ok := h.values[l.Name]
		if !ok {
			valset = stringset{}
			h.values[l.Name] = valset
		}
		valset.set(l.Value)
	}
	h.postings.add(uint32(ref), terms...)

	// For the head block there's exactly one chunk per series.
	h.stats.ChunkCount++
	h.stats.SeriesCount++

	return cd
}

func (h *HeadBlock) appendBatch(samples []hashedSample) error {
	var merr MultiError

	for _, s := range samples {
		merr.Add(h.append(s.hash, s.labels, s.t, s.v))
	}

	return merr.Err()
}

// append adds the sample to the headblock.
func (h *HeadBlock) append(hash uint64, lset labels.Labels, ts int64, v float64) error {
	if err := h.get(hash, lset).append(ts, v); err != nil {
		return err
	}

	h.stats.SampleCount++

	if ts > h.stats.MaxTime {
		h.stats.MaxTime = ts
	}

	return nil
}

func (h *HeadBlock) persist(p string) (int64, error) {
	sf, err := os.Create(chunksFileName(p))
	if err != nil {
		return 0, err
	}
	xf, err := os.Create(indexFileName(p))
	if err != nil {
		return 0, err
	}

	iw := newIndexWriter(xf)
	sw := newSeriesWriter(sf, iw, h.stats.MinTime)

	defer sw.Close()
	defer iw.Close()

	for ref, cd := range h.descs {
		if err := sw.WriteSeries(uint32(ref), cd.lset, []*chunkDesc{cd}); err != nil {
			return 0, err
		}
	}

	if err := iw.WriteStats(h.stats); err != nil {
		return 0, err
	}
	for n, v := range h.values {
		s := make([]string, 0, len(v))
		for x := range v {
			s = append(s, x)
		}

		if err := iw.WriteLabelIndex([]string{n}, s); err != nil {
			return 0, err
		}
	}

	for t := range h.postings.m {
		if err := iw.WritePostings(t.name, t.value, h.postings.get(t)); err != nil {
			return 0, err
		}
	}

	return iw.Size() + sw.Size(), nil
}
Add new interfaces and skeleton 2016-12-04 12:16:11 +00:00			`package tsdb`

			`import (`
			`"math"`
Properly close files before reopening 2016-12-19 21:37:03 +00:00			`"os"`
Misc fixes for initial Prometheus integration 2016-12-14 17:38:46 +00:00			`"sort"`
Add new interfaces and skeleton 2016-12-04 12:16:11 +00:00			`"sync"`

			`"github.com/fabxc/tsdb/chunks"`
Extract labels package 2016-12-21 08:39:01 +00:00			`"github.com/fabxc/tsdb/labels"`
Add new interfaces and skeleton 2016-12-04 12:16:11 +00:00			`)`

			`// HeadBlock handles reads and writes of time series data within a time window.`
			`type HeadBlock struct {`
Consolidate mem index into HeadBlock 2016-12-22 00:12:28 +00:00			`mtx sync.RWMutex`

			`// descs holds all chunk descs for the head block. Each chunk implicitly`
			`// is assigned the index as its ID.`
			`descs []*chunkDesc`
			`// hashes contains a collision map of label set hashes of chunks`
			`// to their position in the chunk desc slice.`
			`hashes map[uint64][]int`

			`symbols []string // all seen strings`
			`values map[string]stringset // label names to possible values`
			`postings *memPostings // postings lists for terms`
Bucket samples before appending. This pre-sorts samples into buckets before appending them to reduce locking of shards. 2016-12-07 16:10:49 +00:00
Add stats serialization, load querier of all blocks 2016-12-15 15:14:33 +00:00			`stats BlockStats`
Add new interfaces and skeleton 2016-12-04 12:16:11 +00:00			`}`

Persist blocks periodically 2016-12-09 12:41:38 +00:00			`// NewHeadBlock creates a new empty head block.`
			`func NewHeadBlock(baseTime int64) *HeadBlock {`
Add stats serialization, load querier of all blocks 2016-12-15 15:14:33 +00:00			`b := &HeadBlock{`
Consolidate mem index into HeadBlock 2016-12-22 00:12:28 +00:00			`descs: []*chunkDesc{},`
			`hashes: map[uint64][]int{},`
			`values: map[string]stringset{},`
			`postings: &memPostings{m: make(map[term][]uint32)},`
Move sub-indexes into single index structure 2016-12-09 09:41:51 +00:00			`}`
Add stats serialization, load querier of all blocks 2016-12-15 15:14:33 +00:00			`b.stats.MinTime = baseTime`

			`return b`
Move sub-indexes into single index structure 2016-12-09 09:41:51 +00:00			`}`

Misc fixes for initial Prometheus integration 2016-12-14 17:38:46 +00:00			`// Querier returns a new querier over the head block.`
			`func (h *HeadBlock) Querier(mint, maxt int64) Querier {`
			`return newBlockQuerier(h, h, mint, maxt)`
			`}`

Add stats serialization, load querier of all blocks 2016-12-15 15:14:33 +00:00			`// Chunk returns the chunk for the reference number.`
Misc fixes for initial Prometheus integration 2016-12-14 17:38:46 +00:00			`func (h *HeadBlock) Chunk(ref uint32) (chunks.Chunk, error) {`
Consolidate mem index into HeadBlock 2016-12-22 00:12:28 +00:00			`if int(ref) >= len(h.descs) {`
Misc fixes for initial Prometheus integration 2016-12-14 17:38:46 +00:00			`return nil, errNotFound`
			`}`
Consolidate mem index into HeadBlock 2016-12-22 00:12:28 +00:00			`return h.descs[int(ref)].chunk, nil`
Misc fixes for initial Prometheus integration 2016-12-14 17:38:46 +00:00			`}`

Add stats serialization, load querier of all blocks 2016-12-15 15:14:33 +00:00			`func (h *HeadBlock) interval() (int64, int64) {`
			`return h.stats.MinTime, h.stats.MaxTime`
			`}`

Misc fixes for initial Prometheus integration 2016-12-14 17:38:46 +00:00			`// Stats returns statisitics about the indexed data.`
Add stats serialization, load querier of all blocks 2016-12-15 15:14:33 +00:00			`func (h *HeadBlock) Stats() (BlockStats, error) {`
			`return h.stats, nil`
Misc fixes for initial Prometheus integration 2016-12-14 17:38:46 +00:00			`}`

			`// LabelValues returns the possible label values`
			`func (h *HeadBlock) LabelValues(names ...string) (StringTuples, error) {`
			`if len(names) != 1 {`
			`return nil, errInvalidSize`
			`}`
			`var sl []string`

Consolidate mem index into HeadBlock 2016-12-22 00:12:28 +00:00			`for s := range h.values[names[0]] {`
Misc fixes for initial Prometheus integration 2016-12-14 17:38:46 +00:00			`sl = append(sl, s)`
			`}`
			`sort.Strings(sl)`

			`t := &stringTuples{`
			`l: len(names),`
			`s: sl,`
			`}`
			`return t, nil`
			`}`

			`// Postings returns the postings list iterator for the label pair.`
Rename Iterator to Postings 2016-12-14 20:58:29 +00:00			`func (h *HeadBlock) Postings(name, value string) (Postings, error) {`
Consolidate mem index into HeadBlock 2016-12-22 00:12:28 +00:00			`return h.postings.get(term{name: name, value: value}), nil`
Misc fixes for initial Prometheus integration 2016-12-14 17:38:46 +00:00			`}`

			`// Series returns the series for the given reference.`
Pre-select relevant chunks on series access. This adds interval metadata to indexed chunks. The queried interval is used to filter chunks when queried from the index to save unnecessary accesses of the chunks file. This is especially relevant for series that come and go often and larger files. 2016-12-16 11:13:17 +00:00			`func (h *HeadBlock) Series(ref uint32, mint, maxt int64) (Series, error) {`
Consolidate mem index into HeadBlock 2016-12-22 00:12:28 +00:00			`if int(ref) >= len(h.descs) {`
Misc fixes for initial Prometheus integration 2016-12-14 17:38:46 +00:00			`return nil, errNotFound`
			`}`
Consolidate mem index into HeadBlock 2016-12-22 00:12:28 +00:00			`cd := h.descs[ref]`

Pre-select relevant chunks on series access. This adds interval metadata to indexed chunks. The queried interval is used to filter chunks when queried from the index to save unnecessary accesses of the chunks file. This is especially relevant for series that come and go often and larger files. 2016-12-16 11:13:17 +00:00			`if !intervalOverlap(cd.firsTimestamp, cd.lastTimestamp, mint, maxt) {`
			`return nil, nil`
			`}`
Implement label value queries in all layers. 2016-12-19 11:26:25 +00:00			`s := &chunkSeries{`
Misc fixes for initial Prometheus integration 2016-12-14 17:38:46 +00:00			`labels: cd.lset,`
Pre-select relevant chunks on series access. This adds interval metadata to indexed chunks. The queried interval is used to filter chunks when queried from the index to save unnecessary accesses of the chunks file. This is especially relevant for series that come and go often and larger files. 2016-12-16 11:13:17 +00:00			`chunks: []ChunkMeta{`
			`{MinTime: h.stats.MinTime, Ref: 0},`
Misc fixes for initial Prometheus integration 2016-12-14 17:38:46 +00:00			`},`
			`chunk: func(ref uint32) (chunks.Chunk, error) {`
			`return cd.chunk, nil`
			`},`
			`}`
			`return s, nil`
			`}`

Add new interfaces and skeleton 2016-12-04 12:16:11 +00:00			`// get retrieves the chunk with the hash and label set and creates`
			`// a new one if it doesn't exist yet.`
Extract labels package 2016-12-21 08:39:01 +00:00			`func (h HeadBlock) get(hash uint64, lset labels.Labels) chunkDesc {`
Consolidate mem index into HeadBlock 2016-12-22 00:12:28 +00:00			`refs := h.hashes[hash]`

			`for _, ref := range refs {`
			`if cd := h.descs[ref]; cd.lset.Equals(lset) {`
misc 2016-12-09 09:00:14 +00:00			`return cd`
Add new interfaces and skeleton 2016-12-04 12:16:11 +00:00			`}`
			`}`
			`// None of the given chunks was for the series, create a new one.`
			`cd := &chunkDesc{`
			`lset: lset,`
			`chunk: chunks.NewXORChunk(int(math.MaxInt64)),`
			`}`
Consolidate mem index into HeadBlock 2016-12-22 00:12:28 +00:00			`// Index the new chunk.`
			`ref := len(h.descs)`

			`h.descs = append(h.descs, cd)`
			`h.hashes[hash] = append(refs, ref)`

			`// Add each label pair as a term to the inverted index.`
			`terms := make([]term, 0, len(lset))`

			`for _, l := range lset {`
			`terms = append(terms, term{name: l.Name, value: l.Value})`

			`valset, ok := h.values[l.Name]`
			`if !ok {`
			`valset = stringset{}`
			`h.values[l.Name] = valset`
			`}`
			`valset.set(l.Value)`
			`}`
			`h.postings.add(uint32(ref), terms...)`
Add new interfaces and skeleton 2016-12-04 12:16:11 +00:00
Add stats serialization, load querier of all blocks 2016-12-15 15:14:33 +00:00			`// For the head block there's exactly one chunk per series.`
			`h.stats.ChunkCount++`
			`h.stats.SeriesCount++`

misc 2016-12-09 09:00:14 +00:00			`return cd`
Add new interfaces and skeleton 2016-12-04 12:16:11 +00:00			`}`

Consolidate mem index into HeadBlock 2016-12-22 00:12:28 +00:00			`func (h *HeadBlock) appendBatch(samples []hashedSample) error {`
			`var merr MultiError`

			`for _, s := range samples {`
			`merr.Add(h.append(s.hash, s.labels, s.t, s.v))`
			`}`

			`return merr.Err()`
			`}`

misc 2016-12-09 09:00:14 +00:00			`// append adds the sample to the headblock.`
Extract labels package 2016-12-21 08:39:01 +00:00			`func (h *HeadBlock) append(hash uint64, lset labels.Labels, ts int64, v float64) error {`
misc 2016-12-09 09:00:14 +00:00			`if err := h.get(hash, lset).append(ts, v); err != nil {`
Bucket samples before appending. This pre-sorts samples into buckets before appending them to reduce locking of shards. 2016-12-07 16:10:49 +00:00			`return err`
			`}`
Add initial seriailization of block data 2016-12-08 16:43:10 +00:00
Add stats serialization, load querier of all blocks 2016-12-15 15:14:33 +00:00			`h.stats.SampleCount++`
cleanup and switching removal of unsafe calls. 2016-12-10 17:08:50 +00:00
Add stats serialization, load querier of all blocks 2016-12-15 15:14:33 +00:00			`if ts > h.stats.MaxTime {`
			`h.stats.MaxTime = ts`
misc 2016-12-09 09:00:14 +00:00			`}`
Add stats serialization, load querier of all blocks 2016-12-15 15:14:33 +00:00
			`return nil`
Add initial seriailization of block data 2016-12-08 16:43:10 +00:00			`}`
Extract head serialization into Head method 2016-12-18 13:43:27 +00:00
Properly close files before reopening 2016-12-19 21:37:03 +00:00			`func (h *HeadBlock) persist(p string) (int64, error) {`
			`sf, err := os.Create(chunksFileName(p))`
			`if err != nil {`
			`return 0, err`
			`}`
			`xf, err := os.Create(indexFileName(p))`
			`if err != nil {`
			`return 0, err`
			`}`

			`iw := newIndexWriter(xf)`
			`sw := newSeriesWriter(sf, iw, h.stats.MinTime)`

			`defer sw.Close()`
			`defer iw.Close()`

Consolidate mem index into HeadBlock 2016-12-22 00:12:28 +00:00			`for ref, cd := range h.descs {`
			`if err := sw.WriteSeries(uint32(ref), cd.lset, []*chunkDesc{cd}); err != nil {`
Properly close files before reopening 2016-12-19 21:37:03 +00:00			`return 0, err`
Extract head serialization into Head method 2016-12-18 13:43:27 +00:00			`}`
			`}`

			`if err := iw.WriteStats(h.stats); err != nil {`
Properly close files before reopening 2016-12-19 21:37:03 +00:00			`return 0, err`
Extract head serialization into Head method 2016-12-18 13:43:27 +00:00			`}`
Consolidate mem index into HeadBlock 2016-12-22 00:12:28 +00:00			`for n, v := range h.values {`
Extract head serialization into Head method 2016-12-18 13:43:27 +00:00			`s := make([]string, 0, len(v))`
			`for x := range v {`
			`s = append(s, x)`
			`}`

			`if err := iw.WriteLabelIndex([]string{n}, s); err != nil {`
Properly close files before reopening 2016-12-19 21:37:03 +00:00			`return 0, err`
Extract head serialization into Head method 2016-12-18 13:43:27 +00:00			`}`
			`}`

Consolidate mem index into HeadBlock 2016-12-22 00:12:28 +00:00			`for t := range h.postings.m {`
			`if err := iw.WritePostings(t.name, t.value, h.postings.get(t)); err != nil {`
Properly close files before reopening 2016-12-19 21:37:03 +00:00			`return 0, err`
Extract head serialization into Head method 2016-12-18 13:43:27 +00:00			`}`
			`}`

Properly close files before reopening 2016-12-19 21:37:03 +00:00			`return iw.Size() + sw.Size(), nil`
Extract head serialization into Head method 2016-12-18 13:43:27 +00:00			`}`