prometheus/head.go

package tsdb

import (
	"errors"
	"math"
	"sort"
	"sync"
	"time"

	"github.com/bradfitz/slice"
	"github.com/fabxc/tsdb/chunks"
	"github.com/fabxc/tsdb/labels"
	"github.com/go-kit/kit/log"
)

// HeadBlock handles reads and writes of time series data within a time window.
type HeadBlock struct {
	mtx sync.RWMutex
	d   string

	// descs holds all chunk descs for the head block. Each chunk implicitly
	// is assigned the index as its ID.
	descs []*chunkDesc
	// mapping maps a series ID to its position in an ordered list
	// of all series. The orderDirty flag indicates that it has gone stale.
	mapper *positionMapper
	// hashes contains a collision map of label set hashes of chunks
	// to their chunk descs.
	hashes map[uint64][]*chunkDesc

	values   map[string]stringset // label names to possible values
	postings *memPostings         // postings lists for terms

	wal *WAL

	bstats BlockStats
}

// OpenHeadBlock creates a new empty head block.
func OpenHeadBlock(dir string, l log.Logger) (*HeadBlock, error) {
	wal, err := OpenWAL(dir, log.NewContext(l).With("component", "wal"), 15*time.Second)
	if err != nil {
		return nil, err
	}

	b := &HeadBlock{
		d:        dir,
		descs:    []*chunkDesc{},
		hashes:   map[uint64][]*chunkDesc{},
		values:   map[string]stringset{},
		postings: &memPostings{m: make(map[term][]uint32)},
		wal:      wal,
	}

	b.bstats.MinTime = math.MaxInt64
	b.bstats.MaxTime = math.MinInt64

	err = wal.ReadAll(&walHandler{
		series: func(lset labels.Labels) {
			b.create(lset.Hash(), lset)
		},
		sample: func(s hashedSample) {
			cd := b.descs[s.ref]

			// Duplicated from appendBatch – TODO(fabxc): deduplicate?
			if cd.lastTimestamp == s.t && cd.lastValue != s.v {
				return
			}
			cd.append(s.t, s.v)

			if s.t > b.bstats.MaxTime {
				b.bstats.MaxTime = s.t
			}
			if s.t < b.bstats.MinTime {
				b.bstats.MinTime = s.t
			}
			b.bstats.SampleCount++
		},
	})
	if err != nil {
		return nil, err
	}

	b.rewriteMapping()

	return b, nil
}

// Close syncs all data and closes underlying resources of the head block.
func (h *HeadBlock) Close() error {
	return h.wal.Close()
}

func (h *HeadBlock) dir() string          { return h.d }
func (h *HeadBlock) persisted() bool      { return false }
func (h *HeadBlock) index() IndexReader   { return h }
func (h *HeadBlock) series() SeriesReader { return h }
func (h *HeadBlock) stats() BlockStats    { return h.bstats }

// Chunk returns the chunk for the reference number.
func (h *HeadBlock) Chunk(ref uint32) (chunks.Chunk, error) {
	if int(ref) >= len(h.descs) {
		return nil, errNotFound
	}
	return h.descs[int(ref)].chunk, nil
}

func (h *HeadBlock) interval() (int64, int64) {
	return h.bstats.MinTime, h.bstats.MaxTime
}

// Stats returns statisitics about the indexed data.
func (h *HeadBlock) Stats() (BlockStats, error) {
	return h.bstats, nil
}

// LabelValues returns the possible label values
func (h *HeadBlock) LabelValues(names ...string) (StringTuples, error) {
	if len(names) != 1 {
		return nil, errInvalidSize
	}
	var sl []string

	for s := range h.values[names[0]] {
		sl = append(sl, s)
	}
	sort.Strings(sl)

	return &stringTuples{l: len(names), s: sl}, nil
}

// Postings returns the postings list iterator for the label pair.
func (h *HeadBlock) Postings(name, value string) (Postings, error) {
	return h.postings.get(term{name: name, value: value}), nil
}

// remapPostings changes the order of the postings from their ID to the ordering
// of the series they reference.
// Returned postings have no longer monotonic IDs and MUST NOT be used for regular
// postings set operations, i.e. intersect and merge.
func (h *HeadBlock) remapPostings(p Postings) Postings {
	list, err := expandPostings(p)
	if err != nil {
		return errPostings{err: err}
	}

	slice.Sort(list, func(i, j int) bool {
		return h.mapper.fw[list[i]] < h.mapper.fw[list[j]]
	})

	return newListPostings(list)
}

// Series returns the series for the given reference.
func (h *HeadBlock) Series(ref uint32) (labels.Labels, []ChunkMeta, error) {
	if int(ref) >= len(h.descs) {
		return nil, nil, errNotFound
	}
	cd := h.descs[ref]

	meta := ChunkMeta{
		MinTime: cd.firstTimestamp,
		MaxTime: cd.lastTimestamp,
		Ref:     ref,
	}
	return cd.lset, []ChunkMeta{meta}, nil
}

func (h *HeadBlock) LabelIndices() ([][]string, error) {
	res := [][]string{}

	for s := range h.values {
		res = append(res, []string{s})
	}
	return res, nil
}

// get retrieves the chunk with the hash and label set and creates
// a new one if it doesn't exist yet.
func (h *HeadBlock) get(hash uint64, lset labels.Labels) *chunkDesc {
	cds := h.hashes[hash]

	for _, cd := range cds {
		if cd.lset.Equals(lset) {
			return cd
		}
	}
	return nil
}

func (h *HeadBlock) create(hash uint64, lset labels.Labels) *chunkDesc {
	cd := &chunkDesc{
		lset:          lset,
		chunk:         chunks.NewXORChunk(),
		lastTimestamp: math.MinInt64,
	}

	var err error
	cd.app, err = cd.chunk.Appender()
	if err != nil {
		// Getting an Appender for a new chunk must not panic.
		panic(err)
	}
	// Index the new chunk.
	cd.ref = uint32(len(h.descs))

	h.descs = append(h.descs, cd)
	h.hashes[hash] = append(h.hashes[hash], cd)

	for _, l := range lset {
		valset, ok := h.values[l.Name]
		if !ok {
			valset = stringset{}
			h.values[l.Name] = valset
		}
		valset.set(l.Value)

		h.postings.add(cd.ref, term{name: l.Name, value: l.Value})
	}

	h.postings.add(cd.ref, term{})

	// For the head block there's exactly one chunk per series.
	h.bstats.ChunkCount++
	h.bstats.SeriesCount++

	return cd
}

var (
	// ErrOutOfOrderSample is returned if an appended sample has a
	// timestamp larger than the most recent sample.
	ErrOutOfOrderSample = errors.New("out of order sample")

	// ErrAmendSample is returned if an appended sample has the same timestamp
	// as the most recent sample but a different value.
	ErrAmendSample = errors.New("amending sample")
)

func (h *HeadBlock) appendBatch(samples []hashedSample) error {
	// Find head chunks for all samples and allocate new IDs/refs for
	// ones we haven't seen before.
	var (
		newSeries    []labels.Labels
		newHashes    []uint64
		uniqueHashes = map[uint64]uint32{}
	)

	for i := range samples {
		s := &samples[i]

		cd := h.get(s.hash, s.labels)
		if cd != nil {
			// Samples must only occur in order.
			if s.t < cd.lastTimestamp {
				return ErrOutOfOrderSample
			}
			if cd.lastTimestamp == s.t && cd.lastValue != s.v {
				return ErrAmendSample
			}
			// TODO(fabxc): sample refs are only scoped within a block for
			// now and we ignore any previously set value
			s.ref = cd.ref
			continue
		}

		// There may be several samples for a new series in a batch.
		// We don't want to reserve a new space for each.
		if ref, ok := uniqueHashes[s.hash]; ok {
			s.ref = ref
			continue
		}
		s.ref = uint32(len(h.descs) + len(newSeries))
		uniqueHashes[s.hash] = s.ref

		newSeries = append(newSeries, s.labels)
		newHashes = append(newHashes, s.hash)
	}

	// Write all new series and samples to the WAL and add it to the
	// in-mem database on success.
	if err := h.wal.Log(newSeries, samples); err != nil {
		return err
	}

	// After the samples were successfully written to the WAL, there may
	// be no further failures.
	for i, s := range newSeries {
		h.create(newHashes[i], s)
	}
	// TODO(fabxc): just mark as dirty instead and trigger a remapping
	// periodically and upon querying.
	if len(newSeries) > 0 {
		h.rewriteMapping()
	}

	for _, s := range samples {
		cd := h.descs[s.ref]
		// Skip duplicate samples.
		if cd.lastTimestamp == s.t && cd.lastValue != s.v {
			continue
		}
		cd.append(s.t, s.v)

		if s.t > h.bstats.MaxTime {
			h.bstats.MaxTime = s.t
		}
		if s.t < h.bstats.MinTime {
			h.bstats.MinTime = s.t
		}
		h.bstats.SampleCount++
	}

	return nil
}

func (h *HeadBlock) rewriteMapping() {
	cds := make([]*chunkDesc, len(h.descs))
	copy(cds, h.descs)

	s := slice.SortInterface(cds, func(i, j int) bool {
		return labels.Compare(cds[i].lset, cds[j].lset) < 0
	})

	h.mapper = newPositionMapper(s)
}

// positionMapper stores a position mapping from unsorted to
// sorted indices of a sortable collection.
type positionMapper struct {
	sortable sort.Interface
	iv, fw   []int
}

func newPositionMapper(s sort.Interface) *positionMapper {
	m := &positionMapper{
		sortable: s,
		iv:       make([]int, s.Len()),
		fw:       make([]int, s.Len()),
	}
	for i := range m.iv {
		m.iv[i] = i
	}
	sort.Sort(m)

	for i, k := range m.iv {
		m.fw[k] = i
	}

	return m
}

func (m *positionMapper) Len() int           { return m.sortable.Len() }
func (m *positionMapper) Less(i, j int) bool { return m.sortable.Less(i, j) }

func (m *positionMapper) Swap(i, j int) {
	m.sortable.Swap(i, j)

	m.iv[i], m.iv[j] = m.iv[j], m.iv[i]
}
-												Add new interfaces and skeleton

											
										
										
											2016-12-04 12:16:11 +00:00
+								package tsdb
 								import (
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											2016-12-31 14:35:08 +00:00
+									"errors"
-												Fix last timestamp initialization

This initializes the chunkDesc's last timestamp to the minimum
value so initial samples with a timestamp of 0 (e.g. in tests)
are not accidentally dropped.

											
										
										
											2017-01-04 13:06:40 +00:00
+									"math"
-												Misc fixes for initial Prometheus integration

											
										
										
											2016-12-14 17:38:46 +00:00
+									"sort"
-												Add new interfaces and skeleton

											
										
										
											2016-12-04 12:16:11 +00:00
+									"sync"
-												Periodically fsync WAL, make head cut async

											
										
										
											2017-01-06 14:18:06 +00:00
+									"time"
-												Add new interfaces and skeleton

											
										
										
											2016-12-04 12:16:11 +00:00
-												Add postings wrapper that emits head postings in label set order

This adds a position mapper that takes series from a head block
in the order they were appended and creates a mapping representing
them in order of their label sets.

Write-repair of the postings list would cause very expensive writing.
Hence, we keep them as they are and only apply the postition mapping
at the very end, after a postings list has been sufficienctly reduced
through intersections etc.

											
										
										
											2017-01-05 14:13:01 +00:00
+									"github.com/bradfitz/slice"
-												Add new interfaces and skeleton

											
										
										
											2016-12-04 12:16:11 +00:00
+									"github.com/fabxc/tsdb/chunks"
-												Extract labels package

											
										
										
											2016-12-21 08:39:01 +00:00
+									"github.com/fabxc/tsdb/labels"
-												Periodically fsync WAL, make head cut async

											
										
										
											2017-01-06 14:18:06 +00:00
+									"github.com/go-kit/kit/log"
-												Add new interfaces and skeleton

											
										
										
											2016-12-04 12:16:11 +00:00
+								)
 								// HeadBlock handles reads and writes of time series data within a time window.
 								type HeadBlock struct {
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
+									mtx sync.RWMutex
-												Lock mmapped files

											
										
										
											2017-01-03 09:09:20 +00:00
+									d   string
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
 									// descs holds all chunk descs for the head block. Each chunk implicitly
 									// is assigned the index as its ID.
 									descs []*chunkDesc
-												Add postings wrapper that emits head postings in label set order

This adds a position mapper that takes series from a head block
in the order they were appended and creates a mapping representing
them in order of their label sets.

Write-repair of the postings list would cause very expensive writing.
Hence, we keep them as they are and only apply the postition mapping
at the very end, after a postings list has been sufficienctly reduced
through intersections etc.

											
										
										
											2017-01-05 14:13:01 +00:00
+									// mapping maps a series ID to its position in an ordered list
 									// of all series. The orderDirty flag indicates that it has gone stale.
 									mapper *positionMapper
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
+									// hashes contains a collision map of label set hashes of chunks
-												Remove double-reference in chunk hashmap

											
										
										
											2016-12-31 09:19:02 +00:00
+									// to their chunk descs.
 									hashes map[uint64][]*chunkDesc
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
 									values   map[string]stringset // label names to possible values
 									postings *memPostings         // postings lists for terms
-												Bucket samples before appending.

This pre-sorts samples into buckets before appending them to reduce
locking of shards.

											
										
										
											2016-12-07 16:10:49 +00:00
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+									wal *WAL
-												Consolidate persistence and compaction

											
										
										
											2017-01-03 14:43:26 +00:00
+									bstats BlockStats
-												Add new interfaces and skeleton

											
										
										
											2016-12-04 12:16:11 +00:00
+								}
-												Rename to OpenHeadBlock

											
										
										
											2016-12-22 19:00:24 +00:00
+								// OpenHeadBlock creates a new empty head block.
-												Periodically fsync WAL, make head cut async

											
										
										
											2017-01-06 14:18:06 +00:00
+								func OpenHeadBlock(dir string, l log.Logger) (*HeadBlock, error) {
 									wal, err := OpenWAL(dir, log.NewContext(l).With("component", "wal"), 15*time.Second)
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+									if err != nil {
 										return nil, err
 									}
-												Add stats serialization, load querier of all blocks

											
										
										
											2016-12-15 15:14:33 +00:00
+									b := &HeadBlock{
-												Lock mmapped files

											
										
										
											2017-01-03 09:09:20 +00:00
+										d:        dir,
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
+										descs:    []*chunkDesc{},
-												Remove double-reference in chunk hashmap

											
										
										
											2016-12-31 09:19:02 +00:00
+										hashes:   map[uint64][]*chunkDesc{},
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
+										values:   map[string]stringset{},
 										postings: &memPostings{m: make(map[term][]uint32)},
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+										wal:      wal,
-												Move sub-indexes into single index structure

											
										
										
											2016-12-09 09:41:51 +00:00
+									}
-												Add stats serialization, load querier of all blocks

											
										
										
											2016-12-15 15:14:33 +00:00
-												Handle compaction trigger and reinitializing in DB

											
										
										
											2017-01-06 11:37:28 +00:00
+									b.bstats.MinTime = math.MaxInt64
 									b.bstats.MaxTime = math.MinInt64
-												Add WAL decoder+loading and benchmarks

											
										
										
											2016-12-22 14:18:33 +00:00
+									err = wal.ReadAll(&walHandler{
 										series: func(lset labels.Labels) {
 											b.create(lset.Hash(), lset)
 										},
 										sample: func(s hashedSample) {
-												Switch to sequential block names

This changes block directory names from the int64 timestamp
to sequential numbering.

											
										
										
											2017-01-06 08:26:39 +00:00
+											cd := b.descs[s.ref]
 											// Duplicated from appendBatch – TODO(fabxc): deduplicate?
 											if cd.lastTimestamp == s.t && cd.lastValue != s.v {
 												return
 											}
 											cd.append(s.t, s.v)
 											if s.t > b.bstats.MaxTime {
 												b.bstats.MaxTime = s.t
 											}
 											if s.t < b.bstats.MinTime {
 												b.bstats.MinTime = s.t
 											}
-												Consolidate persistence and compaction

											
										
										
											2017-01-03 14:43:26 +00:00
+											b.bstats.SampleCount++
-												Add WAL decoder+loading and benchmarks

											
										
										
											2016-12-22 14:18:33 +00:00
+										},
 									})
 									if err != nil {
 										return nil, err
 									}
-												Add postings wrapper that emits head postings in label set order

This adds a position mapper that takes series from a head block
in the order they were appended and creates a mapping representing
them in order of their label sets.

Write-repair of the postings list would cause very expensive writing.
Hence, we keep them as they are and only apply the postition mapping
at the very end, after a postings list has been sufficienctly reduced
through intersections etc.

											
										
										
											2017-01-05 14:13:01 +00:00
+									b.rewriteMapping()
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+									return b, nil
 								}
 								// Close syncs all data and closes underlying resources of the head block.
 								func (h *HeadBlock) Close() error {
 									return h.wal.Close()
-												Move sub-indexes into single index structure

											
										
										
											2016-12-09 09:41:51 +00:00
+								}
-												Lock mmapped files

											
										
										
											2017-01-03 09:09:20 +00:00
+								func (h *HeadBlock) dir() string          { return h.d }
-												Consolidate persistence and compaction

											
										
										
											2017-01-03 14:43:26 +00:00
+								func (h *HeadBlock) persisted() bool      { return false }
-												Lock mmapped files

											
										
										
											2017-01-03 09:09:20 +00:00
+								func (h *HeadBlock) index() IndexReader   { return h }
 								func (h *HeadBlock) series() SeriesReader { return h }
-												Consolidate persistence and compaction

											
										
										
											2017-01-03 14:43:26 +00:00
+								func (h *HeadBlock) stats() BlockStats    { return h.bstats }
-												Misc fixes for initial Prometheus integration

											
										
										
											2016-12-14 17:38:46 +00:00
-												Add stats serialization, load querier of all blocks

											
										
										
											2016-12-15 15:14:33 +00:00
+								// Chunk returns the chunk for the reference number.
-												Misc fixes for initial Prometheus integration

											
										
										
											2016-12-14 17:38:46 +00:00
+								func (h *HeadBlock) Chunk(ref uint32) (chunks.Chunk, error) {
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
+									if int(ref) >= len(h.descs) {
-												Misc fixes for initial Prometheus integration

											
										
										
											2016-12-14 17:38:46 +00:00
+										return nil, errNotFound
 									}
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
+									return h.descs[int(ref)].chunk, nil
-												Misc fixes for initial Prometheus integration

											
										
										
											2016-12-14 17:38:46 +00:00
+								}
-												Add stats serialization, load querier of all blocks

											
										
										
											2016-12-15 15:14:33 +00:00
+								func (h *HeadBlock) interval() (int64, int64) {
-												Consolidate persistence and compaction

											
										
										
											2017-01-03 14:43:26 +00:00
+									return h.bstats.MinTime, h.bstats.MaxTime
-												Add stats serialization, load querier of all blocks

											
										
										
											2016-12-15 15:14:33 +00:00
+								}
-												Misc fixes for initial Prometheus integration

											
										
										
											2016-12-14 17:38:46 +00:00
+								// Stats returns statisitics about the indexed data.
-												Add stats serialization, load querier of all blocks

											
										
										
											2016-12-15 15:14:33 +00:00
+								func (h *HeadBlock) Stats() (BlockStats, error) {
-												Consolidate persistence and compaction

											
										
										
											2017-01-03 14:43:26 +00:00
+									return h.bstats, nil
-												Misc fixes for initial Prometheus integration

											
										
										
											2016-12-14 17:38:46 +00:00
+								}
 								// LabelValues returns the possible label values
 								func (h *HeadBlock) LabelValues(names ...string) (StringTuples, error) {
 									if len(names) != 1 {
 										return nil, errInvalidSize
 									}
 									var sl []string
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
+									for s := range h.values[names[0]] {
-												Misc fixes for initial Prometheus integration

											
										
										
											2016-12-14 17:38:46 +00:00
+										sl = append(sl, s)
 									}
 									sort.Strings(sl)
-												Add postings wrapper that emits head postings in label set order

This adds a position mapper that takes series from a head block
in the order they were appended and creates a mapping representing
them in order of their label sets.

Write-repair of the postings list would cause very expensive writing.
Hence, we keep them as they are and only apply the postition mapping
at the very end, after a postings list has been sufficienctly reduced
through intersections etc.

											
										
										
											2017-01-05 14:13:01 +00:00
+									return &stringTuples{l: len(names), s: sl}, nil
-												Misc fixes for initial Prometheus integration

											
										
										
											2016-12-14 17:38:46 +00:00
+								}
 								// Postings returns the postings list iterator for the label pair.
-												Rename Iterator to Postings

											
										
										
											2016-12-14 20:58:29 +00:00
+								func (h *HeadBlock) Postings(name, value string) (Postings, error) {
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
+									return h.postings.get(term{name: name, value: value}), nil
-												Misc fixes for initial Prometheus integration

											
										
										
											2016-12-14 17:38:46 +00:00
+								}
-												Add postings wrapper that emits head postings in label set order

This adds a position mapper that takes series from a head block
in the order they were appended and creates a mapping representing
them in order of their label sets.

Write-repair of the postings list would cause very expensive writing.
Hence, we keep them as they are and only apply the postition mapping
at the very end, after a postings list has been sufficienctly reduced
through intersections etc.

											
										
										
											2017-01-05 14:13:01 +00:00
+								// remapPostings changes the order of the postings from their ID to the ordering
 								// of the series they reference.
 								// Returned postings have no longer monotonic IDs and MUST NOT be used for regular
 								// postings set operations, i.e. intersect and merge.
 								func (h *HeadBlock) remapPostings(p Postings) Postings {
 									list, err := expandPostings(p)
 									if err != nil {
 										return errPostings{err: err}
 									}
 									slice.Sort(list, func(i, j int) bool {
 										return h.mapper.fw[list[i]] < h.mapper.fw[list[j]]
 									})
 									return newListPostings(list)
 								}
-												Misc fixes for initial Prometheus integration

											
										
										
											2016-12-14 17:38:46 +00:00
+								// Series returns the series for the given reference.
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											2016-12-31 14:35:08 +00:00
+								func (h *HeadBlock) Series(ref uint32) (labels.Labels, []ChunkMeta, error) {
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
+									if int(ref) >= len(h.descs) {
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											2016-12-31 14:35:08 +00:00
+										return nil, nil, errNotFound
-												Misc fixes for initial Prometheus integration

											
										
										
											2016-12-14 17:38:46 +00:00
+									}
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
+									cd := h.descs[ref]
-												Consolidate persistence and compaction

											
										
										
											2017-01-03 14:43:26 +00:00
+									meta := ChunkMeta{
-												Fix last timestamp initialization

This initializes the chunkDesc's last timestamp to the minimum
value so initial samples with a timestamp of 0 (e.g. in tests)
are not accidentally dropped.

											
										
										
											2017-01-04 13:06:40 +00:00
+										MinTime: cd.firstTimestamp,
-												Consolidate persistence and compaction

											
										
										
											2017-01-03 14:43:26 +00:00
+										MaxTime: cd.lastTimestamp,
 										Ref:     ref,
 									}
 									return cd.lset, []ChunkMeta{meta}, nil
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											2016-12-31 14:35:08 +00:00
+								}
 								func (h *HeadBlock) LabelIndices() ([][]string, error) {
 									res := [][]string{}
 									for s := range h.values {
 										res = append(res, []string{s})
-												Misc fixes for initial Prometheus integration

											
										
										
											2016-12-14 17:38:46 +00:00
+									}
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											2016-12-31 14:35:08 +00:00
+									return res, nil
-												Misc fixes for initial Prometheus integration

											
										
										
											2016-12-14 17:38:46 +00:00
+								}
-												Add new interfaces and skeleton

											
										
										
											2016-12-04 12:16:11 +00:00
+								// get retrieves the chunk with the hash and label set and creates
 								// a new one if it doesn't exist yet.
-												Remove double-reference in chunk hashmap

											
										
										
											2016-12-31 09:19:02 +00:00
+								func (h *HeadBlock) get(hash uint64, lset labels.Labels) *chunkDesc {
 									cds := h.hashes[hash]
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
-												Remove double-reference in chunk hashmap

											
										
										
											2016-12-31 09:19:02 +00:00
+									for _, cd := range cds {
 										if cd.lset.Equals(lset) {
 											return cd
-												Add new interfaces and skeleton

											
										
										
											2016-12-04 12:16:11 +00:00
+										}
 									}
-												Remove double-reference in chunk hashmap

											
										
										
											2016-12-31 09:19:02 +00:00
+									return nil
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+								}
 								func (h *HeadBlock) create(hash uint64, lset labels.Labels) *chunkDesc {
-												Add new interfaces and skeleton

											
										
										
											2016-12-04 12:16:11 +00:00
+									cd := &chunkDesc{
-												Fix last timestamp initialization

This initializes the chunkDesc's last timestamp to the minimum
value so initial samples with a timestamp of 0 (e.g. in tests)
are not accidentally dropped.

											
										
										
											2017-01-04 13:06:40 +00:00
+										lset:          lset,
 										chunk:         chunks.NewXORChunk(),
 										lastTimestamp: math.MinInt64,
-												Append to chunks cannot error

											
										
										
											2016-12-31 09:10:27 +00:00
+									}
-												Fix last timestamp initialization

This initializes the chunkDesc's last timestamp to the minimum
value so initial samples with a timestamp of 0 (e.g. in tests)
are not accidentally dropped.

											
										
										
											2017-01-04 13:06:40 +00:00
-												Switch to sequential block names

This changes block directory names from the int64 timestamp
to sequential numbering.

											
										
										
											2017-01-06 08:26:39 +00:00
+									var err error
-												Append to chunks cannot error

											
										
										
											2016-12-31 09:10:27 +00:00
+									cd.app, err = cd.chunk.Appender()
 									if err != nil {
 										// Getting an Appender for a new chunk must not panic.
 										panic(err)
-												Add new interfaces and skeleton

											
										
										
											2016-12-04 12:16:11 +00:00
+									}
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
+									// Index the new chunk.
-												Remove double-reference in chunk hashmap

											
										
										
											2016-12-31 09:19:02 +00:00
+									cd.ref = uint32(len(h.descs))
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
 									h.descs = append(h.descs, cd)
-												Remove double-reference in chunk hashmap

											
										
										
											2016-12-31 09:19:02 +00:00
+									h.hashes[hash] = append(h.hashes[hash], cd)
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
 									for _, l := range lset {
 										valset, ok := h.values[l.Name]
 										if !ok {
 											valset = stringset{}
 											h.values[l.Name] = valset
 										}
 										valset.set(l.Value)
-												Consolidate persistence and compaction

											
										
										
											2017-01-03 14:43:26 +00:00
 										h.postings.add(cd.ref, term{name: l.Name, value: l.Value})
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
+									}
-												Consolidate persistence and compaction

											
										
										
											2017-01-03 14:43:26 +00:00
 									h.postings.add(cd.ref, term{})
-												Add new interfaces and skeleton

											
										
										
											2016-12-04 12:16:11 +00:00
-												Add stats serialization, load querier of all blocks

											
										
										
											2016-12-15 15:14:33 +00:00
+									// For the head block there's exactly one chunk per series.
-												Consolidate persistence and compaction

											
										
										
											2017-01-03 14:43:26 +00:00
+									h.bstats.ChunkCount++
 									h.bstats.SeriesCount++
-												Add stats serialization, load querier of all blocks

											
										
										
											2016-12-15 15:14:33 +00:00
-												misc

											
										
										
											2016-12-09 09:00:14 +00:00
+									return cd
-												Add new interfaces and skeleton

											
										
										
											2016-12-04 12:16:11 +00:00
+								}
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											2016-12-31 14:35:08 +00:00
+								var (
-												Run persistence in separate goroutine

											
										
										
											2017-01-02 21:24:35 +00:00
+									// ErrOutOfOrderSample is returned if an appended sample has a
 									// timestamp larger than the most recent sample.
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											2016-12-31 14:35:08 +00:00
+									ErrOutOfOrderSample = errors.New("out of order sample")
-												Run persistence in separate goroutine

											
										
										
											2017-01-02 21:24:35 +00:00
 									// ErrAmendSample is returned if an appended sample has the same timestamp
 									// as the most recent sample but a different value.
 									ErrAmendSample = errors.New("amending sample")
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											2016-12-31 14:35:08 +00:00
+								)
-												Remove double-reference in chunk hashmap

											
										
										
											2016-12-31 09:19:02 +00:00
+								func (h *HeadBlock) appendBatch(samples []hashedSample) error {
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+									// Find head chunks for all samples and allocate new IDs/refs for
 									// ones we haven't seen before.
 									var (
-												Fix uniqueness of new series

											
										
										
											2016-12-26 15:55:32 +00:00
+										newSeries    []labels.Labels
 										newHashes    []uint64
 										uniqueHashes = map[uint64]uint32{}
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+									)
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
-												Fix erroneous value assignments

											
										
										
											2016-12-22 19:57:00 +00:00
+									for i := range samples {
 										s := &samples[i]
-												Remove double-reference in chunk hashmap

											
										
										
											2016-12-31 09:19:02 +00:00
+										cd := h.get(s.hash, s.labels)
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+										if cd != nil {
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											2016-12-31 14:35:08 +00:00
+											// Samples must only occur in order.
 											if s.t < cd.lastTimestamp {
 												return ErrOutOfOrderSample
 											}
 											if cd.lastTimestamp == s.t && cd.lastValue != s.v {
 												return ErrAmendSample
 											}
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+											// TODO(fabxc): sample refs are only scoped within a block for
 											// now and we ignore any previously set value
-												Remove double-reference in chunk hashmap

											
										
										
											2016-12-31 09:19:02 +00:00
+											s.ref = cd.ref
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+											continue
 										}
-												Fix uniqueness of new series

											
										
										
											2016-12-26 15:55:32 +00:00
 										// There may be several samples for a new series in a batch.
 										// We don't want to reserve a new space for each.
 										if ref, ok := uniqueHashes[s.hash]; ok {
 											s.ref = ref
 											continue
 										}
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+										s.ref = uint32(len(h.descs) + len(newSeries))
-												Fix uniqueness of new series

											
										
										
											2016-12-26 15:55:32 +00:00
+										uniqueHashes[s.hash] = s.ref
-												Fix erroneous value assignments

											
										
										
											2016-12-22 19:57:00 +00:00
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+										newSeries = append(newSeries, s.labels)
 										newHashes = append(newHashes, s.hash)
 									}
-												Consolidate mem index into HeadBlock

											
										
										
											2016-12-22 00:12:28 +00:00
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+									// Write all new series and samples to the WAL and add it to the
 									// in-mem database on success.
 									if err := h.wal.Log(newSeries, samples); err != nil {
-												Bucket samples before appending.

This pre-sorts samples into buckets before appending them to reduce
locking of shards.

											
										
										
											2016-12-07 16:10:49 +00:00
+										return err
 									}
-												Add initial seriailization of block data

											
										
										
											2016-12-08 16:43:10 +00:00
-												Remove double-reference in chunk hashmap

											
										
										
											2016-12-31 09:19:02 +00:00
+									// After the samples were successfully written to the WAL, there may
 									// be no further failures.
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+									for i, s := range newSeries {
 										h.create(newHashes[i], s)
 									}
-												Add postings wrapper that emits head postings in label set order

This adds a position mapper that takes series from a head block
in the order they were appended and creates a mapping representing
them in order of their label sets.

Write-repair of the postings list would cause very expensive writing.
Hence, we keep them as they are and only apply the postition mapping
at the very end, after a postings list has been sufficienctly reduced
through intersections etc.

											
										
										
											2017-01-05 14:13:01 +00:00
+									// TODO(fabxc): just mark as dirty instead and trigger a remapping
 									// periodically and upon querying.
 									if len(newSeries) > 0 {
 										h.rewriteMapping()
 									}
-												cleanup and switching removal of unsafe calls.

											
										
										
											2016-12-10 17:08:50 +00:00
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+									for _, s := range samples {
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											2016-12-31 14:35:08 +00:00
+										cd := h.descs[s.ref]
 										// Skip duplicate samples.
 										if cd.lastTimestamp == s.t && cd.lastValue != s.v {
 											continue
 										}
 										cd.append(s.t, s.v)
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
-												Consolidate persistence and compaction

											
										
										
											2017-01-03 14:43:26 +00:00
+										if s.t > h.bstats.MaxTime {
 											h.bstats.MaxTime = s.t
-												Add initial WAL writing

											
										
										
											2016-12-22 11:05:24 +00:00
+										}
-												Switch to sequential block names

This changes block directory names from the int64 timestamp
to sequential numbering.

											
										
										
											2017-01-06 08:26:39 +00:00
+										if s.t < h.bstats.MinTime {
 											h.bstats.MinTime = s.t
 										}
-												Consolidate persistence and compaction

											
										
										
											2017-01-03 14:43:26 +00:00
+										h.bstats.SampleCount++
-												misc

											
										
										
											2016-12-09 09:00:14 +00:00
+									}
-												Add stats serialization, load querier of all blocks

											
										
										
											2016-12-15 15:14:33 +00:00
-												Append to chunks cannot error

											
										
										
											2016-12-31 09:10:27 +00:00
+									return nil
-												Add initial seriailization of block data

											
										
										
											2016-12-08 16:43:10 +00:00
+								}
-												Add postings wrapper that emits head postings in label set order

This adds a position mapper that takes series from a head block
in the order they were appended and creates a mapping representing
them in order of their label sets.

Write-repair of the postings list would cause very expensive writing.
Hence, we keep them as they are and only apply the postition mapping
at the very end, after a postings list has been sufficienctly reduced
through intersections etc.

											
										
										
											2017-01-05 14:13:01 +00:00
 								func (h *HeadBlock) rewriteMapping() {
 									cds := make([]*chunkDesc, len(h.descs))
 									copy(cds, h.descs)
 									s := slice.SortInterface(cds, func(i, j int) bool {
 										return labels.Compare(cds[i].lset, cds[j].lset) < 0
 									})
 									h.mapper = newPositionMapper(s)
 								}
 								// positionMapper stores a position mapping from unsorted to
 								// sorted indices of a sortable collection.
 								type positionMapper struct {
 									sortable sort.Interface
 									iv, fw   []int
 								}
 								func newPositionMapper(s sort.Interface) *positionMapper {
 									m := &positionMapper{
 										sortable: s,
 										iv:       make([]int, s.Len()),
 										fw:       make([]int, s.Len()),
 									}
 									for i := range m.iv {
 										m.iv[i] = i
 									}
 									sort.Sort(m)
 									for i, k := range m.iv {
 										m.fw[k] = i
 									}
 									return m
 								}
 								func (m *positionMapper) Len() int           { return m.sortable.Len() }
 								func (m *positionMapper) Less(i, j int) bool { return m.sortable.Less(i, j) }
 								func (m *positionMapper) Swap(i, j int) {
 									m.sortable.Swap(i, j)
 									m.iv[i], m.iv[j] = m.iv[j], m.iv[i]
 								}