TSDB: Use a dedicated head chunk reference type (#9501)

* Use dedicated Ref type Throughout the code base, there are reference types masked as regular integers. Let's use dedicated types. They are equivalent, but clearer semantically. This also makes it trivial to find where they are used, and from uses, find the centralized docs. Signed-off-by: Dieter Plaetinck <dieter@grafana.com> * postpone some work until after possible return Signed-off-by: Dieter Plaetinck <dieter@grafana.com> * clarify Signed-off-by: Dieter Plaetinck <dieter@grafana.com> * rename feedback Signed-off-by: Dieter Plaetinck <dieter@grafana.com> * skip header is up to caller Signed-off-by: Dieter Plaetinck <dieter@grafana.com>
2021-10-13 14:14:32 +02:00 · 2021-10-13 14:14:32 +02:00 · d5afe0a577
parent 054fc4ba19
commit d5afe0a577
4 changed files with 47 additions and 41 deletions
--- a/tsdb/chunks/head_chunks.go
+++ b/tsdb/chunks/head_chunks.go
@ -70,6 +70,22 @@ const (
 	DefaultWriteBufferSize = 4 * 1024 * 1024 // 4 MiB.
 )

+// ChunkDiskMapperRef represents the location of a head chunk on disk.
+// The upper 4 bytes hold the index of the head chunk file and
+// the lower 4 bytes hold the byte offset in the head chunk file where the chunk starts.
+type ChunkDiskMapperRef uint64
+
+func newChunkDiskMapperRef(seq, offset uint64) ChunkDiskMapperRef {
+	return ChunkDiskMapperRef((seq << 32) | offset)
+}
+
+func (ref ChunkDiskMapperRef) Unpack() (sgmIndex, chkStart int) {
+	sgmIndex = int(ref >> 32)
+	chkStart = int((ref << 32) >> 32)
+	return sgmIndex, chkStart
+
+}
+
 // CorruptionErr is an error that's returned when corruption is encountered.
 type CorruptionErr struct {
 	Dir       string
@ -272,7 +288,7 @@ func repairLastChunkFile(files map[int]string) (_ map[int]string, returnErr erro

 // WriteChunk writes the chunk to the disk.
 // The returned chunk ref is the reference from where the chunk encoding starts for the chunk.
-func (cdm *ChunkDiskMapper) WriteChunk(seriesRef uint64, mint, maxt int64, chk chunkenc.Chunk) (chkRef uint64, err error) {
+func (cdm *ChunkDiskMapper) WriteChunk(seriesRef uint64, mint, maxt int64, chk chunkenc.Chunk) (chkRef ChunkDiskMapperRef, err error) {
 	cdm.writePathMtx.Lock()
 	defer cdm.writePathMtx.Unlock()

@ -297,9 +313,7 @@ func (cdm *ChunkDiskMapper) WriteChunk(seriesRef uint64, mint, maxt int64, chk c
 	cdm.crc32.Reset()
 	bytesWritten := 0

-	// The upper 4 bytes are for the head chunk file index and
-	// the lower 4 bytes are for the head chunk file offset where to start reading this chunk.
-	chkRef = chunkRef(uint64(cdm.curFileSequence), uint64(cdm.curFileSize()))
+	chkRef = newChunkDiskMapperRef(uint64(cdm.curFileSequence), uint64(cdm.curFileSize()))

 	binary.BigEndian.PutUint64(cdm.byteBuf[bytesWritten:], seriesRef)
 	bytesWritten += SeriesRefSize
@ -339,10 +353,6 @@ func (cdm *ChunkDiskMapper) WriteChunk(seriesRef uint64, mint, maxt int64, chk c
 	return chkRef, nil
 }

-func chunkRef(seq, offset uint64) (chunkRef uint64) {
-	return (seq << 32) | offset
-}
-
 // shouldCutNewFile decides the cutting of a new file based on time and size retention.
 // Size retention: because depending on the system architecture, there is a limit on how big of a file we can m-map.
 // Time retention: so that we can delete old chunks with some time guarantee in low load environments.
@ -456,28 +466,22 @@ func (cdm *ChunkDiskMapper) flushBuffer() error {
 }

 // Chunk returns a chunk from a given reference.
-func (cdm *ChunkDiskMapper) Chunk(ref uint64) (chunkenc.Chunk, error) {
+func (cdm *ChunkDiskMapper) Chunk(ref ChunkDiskMapperRef) (chunkenc.Chunk, error) {
 	cdm.readPathMtx.RLock()
 	// We hold this read lock for the entire duration because if the Close()
 	// is called, the data in the byte slice will get corrupted as the mmapped
 	// file will be closed.
 	defer cdm.readPathMtx.RUnlock()

-	var (
-		// Get the upper 4 bytes.
-		// These contain the head chunk file index.
-		sgmIndex = int(ref >> 32)
-		// Get the lower 4 bytes.
-		// These contain the head chunk file offset where the chunk starts.
-		// We skip the series ref and the mint/maxt beforehand.
-		chkStart = int((ref<<32)>>32) + SeriesRefSize + (2 * MintMaxtSize)
-		chkCRC32 = newCRC32()
-	)
-
 	if cdm.closed {
 		return nil, ErrChunkDiskMapperClosed
 	}

+	sgmIndex, chkStart := ref.Unpack()
+	// We skip the series ref and the mint/maxt beforehand.
+	chkStart += SeriesRefSize + (2 * MintMaxtSize)
+	chkCRC32 := newCRC32()
+
 	// If it is the current open file, then the chunks can be in the buffer too.
 	if sgmIndex == cdm.curFileSequence {
 		chunk := cdm.chunkBuffer.get(ref)
@ -578,7 +582,7 @@ func (cdm *ChunkDiskMapper) Chunk(ref uint64) (chunkenc.Chunk, error) {
 // and runs the provided function on each chunk. It returns on the first error encountered.
 // NOTE: This method needs to be called at least once after creating ChunkDiskMapper
 // to set the maxt of all the file.
-func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef, chunkRef uint64, mint, maxt int64, numSamples uint16) error) (err error) {
+func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef uint64, chunkRef ChunkDiskMapperRef, mint, maxt int64, numSamples uint16) error) (err error) {
 	cdm.writePathMtx.Lock()
 	defer cdm.writePathMtx.Unlock()

@ -623,7 +627,7 @@ func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef, chunkRef uint64,
 				}
 			}
 			chkCRC32.Reset()
-			chunkRef := chunkRef(uint64(segID), uint64(idx))
+			chunkRef := newChunkDiskMapperRef(uint64(segID), uint64(idx))

 			startIdx := idx
 			seriesRef := binary.BigEndian.Uint64(mmapFile.byteSlice.Range(idx, idx+SeriesRefSize))
@ -826,19 +830,19 @@ const inBufferShards = 128 // 128 is a randomly chosen number.

 // chunkBuffer is a thread safe buffer for chunks.
 type chunkBuffer struct {
-	inBufferChunks     [inBufferShards]map[uint64]chunkenc.Chunk
+	inBufferChunks     [inBufferShards]map[ChunkDiskMapperRef]chunkenc.Chunk
 	inBufferChunksMtxs [inBufferShards]sync.RWMutex
 }

 func newChunkBuffer() *chunkBuffer {
 	cb := &chunkBuffer{}
 	for i := 0; i < inBufferShards; i++ {
-		cb.inBufferChunks[i] = make(map[uint64]chunkenc.Chunk)
+		cb.inBufferChunks[i] = make(map[ChunkDiskMapperRef]chunkenc.Chunk)
 	}
 	return cb
 }

-func (cb *chunkBuffer) put(ref uint64, chk chunkenc.Chunk) {
+func (cb *chunkBuffer) put(ref ChunkDiskMapperRef, chk chunkenc.Chunk) {
 	shardIdx := ref % inBufferShards

 	cb.inBufferChunksMtxs[shardIdx].Lock()
@ -846,7 +850,7 @@ func (cb *chunkBuffer) put(ref uint64, chk chunkenc.Chunk) {
 	cb.inBufferChunksMtxs[shardIdx].Unlock()
 }

-func (cb *chunkBuffer) get(ref uint64) chunkenc.Chunk {
+func (cb *chunkBuffer) get(ref ChunkDiskMapperRef) chunkenc.Chunk {
 	shardIdx := ref % inBufferShards

 	cb.inBufferChunksMtxs[shardIdx].RLock()
@ -858,7 +862,7 @@ func (cb *chunkBuffer) get(ref uint64) chunkenc.Chunk {
 func (cb *chunkBuffer) clear() {
 	for i := 0; i < inBufferShards; i++ {
 		cb.inBufferChunksMtxs[i].Lock()
-		cb.inBufferChunks[i] = make(map[uint64]chunkenc.Chunk)
+		cb.inBufferChunks[i] = make(map[ChunkDiskMapperRef]chunkenc.Chunk)
 		cb.inBufferChunksMtxs[i].Unlock()
 	}
 }
--- a/tsdb/chunks/head_chunks_test.go
+++ b/tsdb/chunks/head_chunks_test.go
@ -38,7 +38,8 @@ func TestChunkDiskMapper_WriteChunk_Chunk_IterateChunks(t *testing.T) {
 	chkCRC32 := newCRC32()

 	type expectedDataType struct {
-		seriesRef, chunkRef uint64
+		seriesRef  uint64
+		chunkRef   ChunkDiskMapperRef
 		mint, maxt int64
 		numSamples uint16
 		chunk      chunkenc.Chunk
@ -69,7 +70,7 @@ func TestChunkDiskMapper_WriteChunk_Chunk_IterateChunks(t *testing.T) {

 				// Calculating expected bytes written on disk for first file.
 				firstFileName = hrw.curFile.Name()
-				require.Equal(t, chunkRef(1, nextChunkOffset), chkRef)
+				require.Equal(t, newChunkDiskMapperRef(1, nextChunkOffset), chkRef)

 				bytesWritten := 0
 				chkCRC32.Reset()
@ -132,7 +133,7 @@ func TestChunkDiskMapper_WriteChunk_Chunk_IterateChunks(t *testing.T) {
 	require.NoError(t, err)

 	idx := 0
-	require.NoError(t, hrw.IterateAllChunks(func(seriesRef, chunkRef uint64, mint, maxt int64, numSamples uint16) error {
+	require.NoError(t, hrw.IterateAllChunks(func(seriesRef uint64, chunkRef ChunkDiskMapperRef, mint, maxt int64, numSamples uint16) error {
 		t.Helper()

 		expData := expectedData[idx]
@ -220,7 +221,7 @@ func TestChunkDiskMapper_Truncate(t *testing.T) {
 	require.NoError(t, err)

 	require.False(t, hrw.fileMaxtSet)
-	require.NoError(t, hrw.IterateAllChunks(func(_, _ uint64, _, _ int64, _ uint16) error { return nil }))
+	require.NoError(t, hrw.IterateAllChunks(func(_ uint64, _ ChunkDiskMapperRef, _, _ int64, _ uint16) error { return nil }))
 	require.True(t, hrw.fileMaxtSet)

 	verifyFiles([]int{3, 4, 5, 6, 7, 8})
@ -334,7 +335,7 @@ func TestHeadReadWriter_TruncateAfterFailedIterateChunks(t *testing.T) {
 	require.NoError(t, err)

 	// Forcefully failing IterateAllChunks.
-	require.Error(t, hrw.IterateAllChunks(func(_, _ uint64, _, _ int64, _ uint16) error {
+	require.Error(t, hrw.IterateAllChunks(func(_ uint64, _ ChunkDiskMapperRef, _, _ int64, _ uint16) error {
 		return errors.New("random error")
 	}))

@ -390,7 +391,7 @@ func TestHeadReadWriter_ReadRepairOnEmptyLastFile(t *testing.T) {
 	hrw, err = NewChunkDiskMapper(dir, chunkenc.NewPool(), DefaultWriteBufferSize)
 	require.NoError(t, err)
 	require.False(t, hrw.fileMaxtSet)
-	require.NoError(t, hrw.IterateAllChunks(func(_, _ uint64, _, _ int64, _ uint16) error { return nil }))
+	require.NoError(t, hrw.IterateAllChunks(func(_ uint64, _ ChunkDiskMapperRef, _, _ int64, _ uint16) error { return nil }))
 	require.True(t, hrw.fileMaxtSet)

 	// Removed from memory.
@ -421,7 +422,7 @@ func testChunkDiskMapper(t *testing.T) *ChunkDiskMapper {
 	hrw, err := NewChunkDiskMapper(tmpdir, chunkenc.NewPool(), DefaultWriteBufferSize)
 	require.NoError(t, err)
 	require.False(t, hrw.fileMaxtSet)
-	require.NoError(t, hrw.IterateAllChunks(func(_, _ uint64, _, _ int64, _ uint16) error { return nil }))
+	require.NoError(t, hrw.IterateAllChunks(func(_ uint64, _ ChunkDiskMapperRef, _, _ int64, _ uint16) error { return nil }))
 	require.True(t, hrw.fileMaxtSet)
 	return hrw
 }
@ -437,7 +438,7 @@ func randomChunk(t *testing.T) chunkenc.Chunk {
 	return chunk
 }

-func createChunk(t *testing.T, idx int, hrw *ChunkDiskMapper) (seriesRef uint64, chunkRef uint64, mint, maxt int64, chunk chunkenc.Chunk) {
+func createChunk(t *testing.T, idx int, hrw *ChunkDiskMapper) (seriesRef uint64, chunkRef ChunkDiskMapperRef, mint, maxt int64, chunk chunkenc.Chunk) {
 	var err error
 	seriesRef = uint64(rand.Int63())
 	mint = int64((idx)*1000 + 1)
--- a/tsdb/head.go
+++ b/tsdb/head.go
@ -605,7 +605,7 @@ func (h *Head) Init(minValidTime int64) error {

 func (h *Head) loadMmappedChunks(refSeries map[uint64]*memSeries) (map[uint64][]*mmappedChunk, error) {
 	mmappedChunks := map[uint64][]*mmappedChunk{}
-	if err := h.chunkDiskMapper.IterateAllChunks(func(seriesRef, chunkRef uint64, mint, maxt int64, numSamples uint16) error {
+	if err := h.chunkDiskMapper.IterateAllChunks(func(seriesRef uint64, chunkRef chunks.ChunkDiskMapperRef, mint, maxt int64, numSamples uint16) error {
 		if maxt < h.minValidTime.Load() {
 			return nil
 		}
@ -1563,8 +1563,9 @@ func overlapsClosedInterval(mint1, maxt1, mint2, maxt2 int64) bool {
 	return mint1 <= maxt2 && mint2 <= maxt1
 }

+// mappedChunks describes chunk data on disk that can be mmapped
 type mmappedChunk struct {
-	ref              uint64
+	ref              chunks.ChunkDiskMapperRef
 	numSamples       uint16
 	minTime, maxTime int64
 }
--- a/tsdb/head_test.go
+++ b/tsdb/head_test.go
@ -63,7 +63,7 @@ func newTestHead(t testing.TB, chunkRange int64, compressWAL bool) (*Head, *wal.
 	h, err := NewHead(nil, nil, wlog, opts, nil)
 	require.NoError(t, err)

-	require.NoError(t, h.chunkDiskMapper.IterateAllChunks(func(_, _ uint64, _, _ int64, _ uint16) error { return nil }))
+	require.NoError(t, h.chunkDiskMapper.IterateAllChunks(func(_ uint64, _ chunks.ChunkDiskMapperRef, _, _ int64, _ uint16) error { return nil }))

 	t.Cleanup(func() {
 		require.NoError(t, os.RemoveAll(dir))