TSDB: Use a dedicated head chunk reference type (#9501)

* Use dedicated Ref type

Throughout the code base, there are reference types masked as
regular integers.  Let's use dedicated types.  They are
equivalent, but clearer semantically.
This also makes it trivial to find where they are used,
and from uses, find the centralized docs.

Signed-off-by: Dieter Plaetinck <dieter@grafana.com>

* postpone some work until after possible return

Signed-off-by: Dieter Plaetinck <dieter@grafana.com>

* clarify

Signed-off-by: Dieter Plaetinck <dieter@grafana.com>

* rename feedback

Signed-off-by: Dieter Plaetinck <dieter@grafana.com>

* skip header is up to caller

Signed-off-by: Dieter Plaetinck <dieter@grafana.com>
This commit is contained in:
Dieter Plaetinck 2021-10-13 14:14:32 +02:00 committed by GitHub
parent 054fc4ba19
commit d5afe0a577
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 47 additions and 41 deletions

View File

@ -70,6 +70,22 @@ const (
DefaultWriteBufferSize = 4 * 1024 * 1024 // 4 MiB. DefaultWriteBufferSize = 4 * 1024 * 1024 // 4 MiB.
) )
// ChunkDiskMapperRef represents the location of a head chunk on disk.
// The upper 4 bytes hold the index of the head chunk file and
// the lower 4 bytes hold the byte offset in the head chunk file where the chunk starts.
type ChunkDiskMapperRef uint64
func newChunkDiskMapperRef(seq, offset uint64) ChunkDiskMapperRef {
return ChunkDiskMapperRef((seq << 32) | offset)
}
func (ref ChunkDiskMapperRef) Unpack() (sgmIndex, chkStart int) {
sgmIndex = int(ref >> 32)
chkStart = int((ref << 32) >> 32)
return sgmIndex, chkStart
}
// CorruptionErr is an error that's returned when corruption is encountered. // CorruptionErr is an error that's returned when corruption is encountered.
type CorruptionErr struct { type CorruptionErr struct {
Dir string Dir string
@ -272,7 +288,7 @@ func repairLastChunkFile(files map[int]string) (_ map[int]string, returnErr erro
// WriteChunk writes the chunk to the disk. // WriteChunk writes the chunk to the disk.
// The returned chunk ref is the reference from where the chunk encoding starts for the chunk. // The returned chunk ref is the reference from where the chunk encoding starts for the chunk.
func (cdm *ChunkDiskMapper) WriteChunk(seriesRef uint64, mint, maxt int64, chk chunkenc.Chunk) (chkRef uint64, err error) { func (cdm *ChunkDiskMapper) WriteChunk(seriesRef uint64, mint, maxt int64, chk chunkenc.Chunk) (chkRef ChunkDiskMapperRef, err error) {
cdm.writePathMtx.Lock() cdm.writePathMtx.Lock()
defer cdm.writePathMtx.Unlock() defer cdm.writePathMtx.Unlock()
@ -297,9 +313,7 @@ func (cdm *ChunkDiskMapper) WriteChunk(seriesRef uint64, mint, maxt int64, chk c
cdm.crc32.Reset() cdm.crc32.Reset()
bytesWritten := 0 bytesWritten := 0
// The upper 4 bytes are for the head chunk file index and chkRef = newChunkDiskMapperRef(uint64(cdm.curFileSequence), uint64(cdm.curFileSize()))
// the lower 4 bytes are for the head chunk file offset where to start reading this chunk.
chkRef = chunkRef(uint64(cdm.curFileSequence), uint64(cdm.curFileSize()))
binary.BigEndian.PutUint64(cdm.byteBuf[bytesWritten:], seriesRef) binary.BigEndian.PutUint64(cdm.byteBuf[bytesWritten:], seriesRef)
bytesWritten += SeriesRefSize bytesWritten += SeriesRefSize
@ -339,10 +353,6 @@ func (cdm *ChunkDiskMapper) WriteChunk(seriesRef uint64, mint, maxt int64, chk c
return chkRef, nil return chkRef, nil
} }
func chunkRef(seq, offset uint64) (chunkRef uint64) {
return (seq << 32) | offset
}
// shouldCutNewFile decides the cutting of a new file based on time and size retention. // shouldCutNewFile decides the cutting of a new file based on time and size retention.
// Size retention: because depending on the system architecture, there is a limit on how big of a file we can m-map. // Size retention: because depending on the system architecture, there is a limit on how big of a file we can m-map.
// Time retention: so that we can delete old chunks with some time guarantee in low load environments. // Time retention: so that we can delete old chunks with some time guarantee in low load environments.
@ -456,28 +466,22 @@ func (cdm *ChunkDiskMapper) flushBuffer() error {
} }
// Chunk returns a chunk from a given reference. // Chunk returns a chunk from a given reference.
func (cdm *ChunkDiskMapper) Chunk(ref uint64) (chunkenc.Chunk, error) { func (cdm *ChunkDiskMapper) Chunk(ref ChunkDiskMapperRef) (chunkenc.Chunk, error) {
cdm.readPathMtx.RLock() cdm.readPathMtx.RLock()
// We hold this read lock for the entire duration because if the Close() // We hold this read lock for the entire duration because if the Close()
// is called, the data in the byte slice will get corrupted as the mmapped // is called, the data in the byte slice will get corrupted as the mmapped
// file will be closed. // file will be closed.
defer cdm.readPathMtx.RUnlock() defer cdm.readPathMtx.RUnlock()
var (
// Get the upper 4 bytes.
// These contain the head chunk file index.
sgmIndex = int(ref >> 32)
// Get the lower 4 bytes.
// These contain the head chunk file offset where the chunk starts.
// We skip the series ref and the mint/maxt beforehand.
chkStart = int((ref<<32)>>32) + SeriesRefSize + (2 * MintMaxtSize)
chkCRC32 = newCRC32()
)
if cdm.closed { if cdm.closed {
return nil, ErrChunkDiskMapperClosed return nil, ErrChunkDiskMapperClosed
} }
sgmIndex, chkStart := ref.Unpack()
// We skip the series ref and the mint/maxt beforehand.
chkStart += SeriesRefSize + (2 * MintMaxtSize)
chkCRC32 := newCRC32()
// If it is the current open file, then the chunks can be in the buffer too. // If it is the current open file, then the chunks can be in the buffer too.
if sgmIndex == cdm.curFileSequence { if sgmIndex == cdm.curFileSequence {
chunk := cdm.chunkBuffer.get(ref) chunk := cdm.chunkBuffer.get(ref)
@ -578,7 +582,7 @@ func (cdm *ChunkDiskMapper) Chunk(ref uint64) (chunkenc.Chunk, error) {
// and runs the provided function on each chunk. It returns on the first error encountered. // and runs the provided function on each chunk. It returns on the first error encountered.
// NOTE: This method needs to be called at least once after creating ChunkDiskMapper // NOTE: This method needs to be called at least once after creating ChunkDiskMapper
// to set the maxt of all the file. // to set the maxt of all the file.
func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef, chunkRef uint64, mint, maxt int64, numSamples uint16) error) (err error) { func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef uint64, chunkRef ChunkDiskMapperRef, mint, maxt int64, numSamples uint16) error) (err error) {
cdm.writePathMtx.Lock() cdm.writePathMtx.Lock()
defer cdm.writePathMtx.Unlock() defer cdm.writePathMtx.Unlock()
@ -623,7 +627,7 @@ func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef, chunkRef uint64,
} }
} }
chkCRC32.Reset() chkCRC32.Reset()
chunkRef := chunkRef(uint64(segID), uint64(idx)) chunkRef := newChunkDiskMapperRef(uint64(segID), uint64(idx))
startIdx := idx startIdx := idx
seriesRef := binary.BigEndian.Uint64(mmapFile.byteSlice.Range(idx, idx+SeriesRefSize)) seriesRef := binary.BigEndian.Uint64(mmapFile.byteSlice.Range(idx, idx+SeriesRefSize))
@ -826,19 +830,19 @@ const inBufferShards = 128 // 128 is a randomly chosen number.
// chunkBuffer is a thread safe buffer for chunks. // chunkBuffer is a thread safe buffer for chunks.
type chunkBuffer struct { type chunkBuffer struct {
inBufferChunks [inBufferShards]map[uint64]chunkenc.Chunk inBufferChunks [inBufferShards]map[ChunkDiskMapperRef]chunkenc.Chunk
inBufferChunksMtxs [inBufferShards]sync.RWMutex inBufferChunksMtxs [inBufferShards]sync.RWMutex
} }
func newChunkBuffer() *chunkBuffer { func newChunkBuffer() *chunkBuffer {
cb := &chunkBuffer{} cb := &chunkBuffer{}
for i := 0; i < inBufferShards; i++ { for i := 0; i < inBufferShards; i++ {
cb.inBufferChunks[i] = make(map[uint64]chunkenc.Chunk) cb.inBufferChunks[i] = make(map[ChunkDiskMapperRef]chunkenc.Chunk)
} }
return cb return cb
} }
func (cb *chunkBuffer) put(ref uint64, chk chunkenc.Chunk) { func (cb *chunkBuffer) put(ref ChunkDiskMapperRef, chk chunkenc.Chunk) {
shardIdx := ref % inBufferShards shardIdx := ref % inBufferShards
cb.inBufferChunksMtxs[shardIdx].Lock() cb.inBufferChunksMtxs[shardIdx].Lock()
@ -846,7 +850,7 @@ func (cb *chunkBuffer) put(ref uint64, chk chunkenc.Chunk) {
cb.inBufferChunksMtxs[shardIdx].Unlock() cb.inBufferChunksMtxs[shardIdx].Unlock()
} }
func (cb *chunkBuffer) get(ref uint64) chunkenc.Chunk { func (cb *chunkBuffer) get(ref ChunkDiskMapperRef) chunkenc.Chunk {
shardIdx := ref % inBufferShards shardIdx := ref % inBufferShards
cb.inBufferChunksMtxs[shardIdx].RLock() cb.inBufferChunksMtxs[shardIdx].RLock()
@ -858,7 +862,7 @@ func (cb *chunkBuffer) get(ref uint64) chunkenc.Chunk {
func (cb *chunkBuffer) clear() { func (cb *chunkBuffer) clear() {
for i := 0; i < inBufferShards; i++ { for i := 0; i < inBufferShards; i++ {
cb.inBufferChunksMtxs[i].Lock() cb.inBufferChunksMtxs[i].Lock()
cb.inBufferChunks[i] = make(map[uint64]chunkenc.Chunk) cb.inBufferChunks[i] = make(map[ChunkDiskMapperRef]chunkenc.Chunk)
cb.inBufferChunksMtxs[i].Unlock() cb.inBufferChunksMtxs[i].Unlock()
} }
} }

View File

@ -38,10 +38,11 @@ func TestChunkDiskMapper_WriteChunk_Chunk_IterateChunks(t *testing.T) {
chkCRC32 := newCRC32() chkCRC32 := newCRC32()
type expectedDataType struct { type expectedDataType struct {
seriesRef, chunkRef uint64 seriesRef uint64
mint, maxt int64 chunkRef ChunkDiskMapperRef
numSamples uint16 mint, maxt int64
chunk chunkenc.Chunk numSamples uint16
chunk chunkenc.Chunk
} }
expectedData := []expectedDataType{} expectedData := []expectedDataType{}
@ -69,7 +70,7 @@ func TestChunkDiskMapper_WriteChunk_Chunk_IterateChunks(t *testing.T) {
// Calculating expected bytes written on disk for first file. // Calculating expected bytes written on disk for first file.
firstFileName = hrw.curFile.Name() firstFileName = hrw.curFile.Name()
require.Equal(t, chunkRef(1, nextChunkOffset), chkRef) require.Equal(t, newChunkDiskMapperRef(1, nextChunkOffset), chkRef)
bytesWritten := 0 bytesWritten := 0
chkCRC32.Reset() chkCRC32.Reset()
@ -132,7 +133,7 @@ func TestChunkDiskMapper_WriteChunk_Chunk_IterateChunks(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
idx := 0 idx := 0
require.NoError(t, hrw.IterateAllChunks(func(seriesRef, chunkRef uint64, mint, maxt int64, numSamples uint16) error { require.NoError(t, hrw.IterateAllChunks(func(seriesRef uint64, chunkRef ChunkDiskMapperRef, mint, maxt int64, numSamples uint16) error {
t.Helper() t.Helper()
expData := expectedData[idx] expData := expectedData[idx]
@ -220,7 +221,7 @@ func TestChunkDiskMapper_Truncate(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
require.False(t, hrw.fileMaxtSet) require.False(t, hrw.fileMaxtSet)
require.NoError(t, hrw.IterateAllChunks(func(_, _ uint64, _, _ int64, _ uint16) error { return nil })) require.NoError(t, hrw.IterateAllChunks(func(_ uint64, _ ChunkDiskMapperRef, _, _ int64, _ uint16) error { return nil }))
require.True(t, hrw.fileMaxtSet) require.True(t, hrw.fileMaxtSet)
verifyFiles([]int{3, 4, 5, 6, 7, 8}) verifyFiles([]int{3, 4, 5, 6, 7, 8})
@ -334,7 +335,7 @@ func TestHeadReadWriter_TruncateAfterFailedIterateChunks(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
// Forcefully failing IterateAllChunks. // Forcefully failing IterateAllChunks.
require.Error(t, hrw.IterateAllChunks(func(_, _ uint64, _, _ int64, _ uint16) error { require.Error(t, hrw.IterateAllChunks(func(_ uint64, _ ChunkDiskMapperRef, _, _ int64, _ uint16) error {
return errors.New("random error") return errors.New("random error")
})) }))
@ -390,7 +391,7 @@ func TestHeadReadWriter_ReadRepairOnEmptyLastFile(t *testing.T) {
hrw, err = NewChunkDiskMapper(dir, chunkenc.NewPool(), DefaultWriteBufferSize) hrw, err = NewChunkDiskMapper(dir, chunkenc.NewPool(), DefaultWriteBufferSize)
require.NoError(t, err) require.NoError(t, err)
require.False(t, hrw.fileMaxtSet) require.False(t, hrw.fileMaxtSet)
require.NoError(t, hrw.IterateAllChunks(func(_, _ uint64, _, _ int64, _ uint16) error { return nil })) require.NoError(t, hrw.IterateAllChunks(func(_ uint64, _ ChunkDiskMapperRef, _, _ int64, _ uint16) error { return nil }))
require.True(t, hrw.fileMaxtSet) require.True(t, hrw.fileMaxtSet)
// Removed from memory. // Removed from memory.
@ -421,7 +422,7 @@ func testChunkDiskMapper(t *testing.T) *ChunkDiskMapper {
hrw, err := NewChunkDiskMapper(tmpdir, chunkenc.NewPool(), DefaultWriteBufferSize) hrw, err := NewChunkDiskMapper(tmpdir, chunkenc.NewPool(), DefaultWriteBufferSize)
require.NoError(t, err) require.NoError(t, err)
require.False(t, hrw.fileMaxtSet) require.False(t, hrw.fileMaxtSet)
require.NoError(t, hrw.IterateAllChunks(func(_, _ uint64, _, _ int64, _ uint16) error { return nil })) require.NoError(t, hrw.IterateAllChunks(func(_ uint64, _ ChunkDiskMapperRef, _, _ int64, _ uint16) error { return nil }))
require.True(t, hrw.fileMaxtSet) require.True(t, hrw.fileMaxtSet)
return hrw return hrw
} }
@ -437,7 +438,7 @@ func randomChunk(t *testing.T) chunkenc.Chunk {
return chunk return chunk
} }
func createChunk(t *testing.T, idx int, hrw *ChunkDiskMapper) (seriesRef uint64, chunkRef uint64, mint, maxt int64, chunk chunkenc.Chunk) { func createChunk(t *testing.T, idx int, hrw *ChunkDiskMapper) (seriesRef uint64, chunkRef ChunkDiskMapperRef, mint, maxt int64, chunk chunkenc.Chunk) {
var err error var err error
seriesRef = uint64(rand.Int63()) seriesRef = uint64(rand.Int63())
mint = int64((idx)*1000 + 1) mint = int64((idx)*1000 + 1)

View File

@ -605,7 +605,7 @@ func (h *Head) Init(minValidTime int64) error {
func (h *Head) loadMmappedChunks(refSeries map[uint64]*memSeries) (map[uint64][]*mmappedChunk, error) { func (h *Head) loadMmappedChunks(refSeries map[uint64]*memSeries) (map[uint64][]*mmappedChunk, error) {
mmappedChunks := map[uint64][]*mmappedChunk{} mmappedChunks := map[uint64][]*mmappedChunk{}
if err := h.chunkDiskMapper.IterateAllChunks(func(seriesRef, chunkRef uint64, mint, maxt int64, numSamples uint16) error { if err := h.chunkDiskMapper.IterateAllChunks(func(seriesRef uint64, chunkRef chunks.ChunkDiskMapperRef, mint, maxt int64, numSamples uint16) error {
if maxt < h.minValidTime.Load() { if maxt < h.minValidTime.Load() {
return nil return nil
} }
@ -1563,8 +1563,9 @@ func overlapsClosedInterval(mint1, maxt1, mint2, maxt2 int64) bool {
return mint1 <= maxt2 && mint2 <= maxt1 return mint1 <= maxt2 && mint2 <= maxt1
} }
// mappedChunks describes chunk data on disk that can be mmapped
type mmappedChunk struct { type mmappedChunk struct {
ref uint64 ref chunks.ChunkDiskMapperRef
numSamples uint16 numSamples uint16
minTime, maxTime int64 minTime, maxTime int64
} }

View File

@ -63,7 +63,7 @@ func newTestHead(t testing.TB, chunkRange int64, compressWAL bool) (*Head, *wal.
h, err := NewHead(nil, nil, wlog, opts, nil) h, err := NewHead(nil, nil, wlog, opts, nil)
require.NoError(t, err) require.NoError(t, err)
require.NoError(t, h.chunkDiskMapper.IterateAllChunks(func(_, _ uint64, _, _ int64, _ uint16) error { return nil })) require.NoError(t, h.chunkDiskMapper.IterateAllChunks(func(_ uint64, _ chunks.ChunkDiskMapperRef, _, _ int64, _ uint16) error { return nil }))
t.Cleanup(func() { t.Cleanup(func() {
require.NoError(t, os.RemoveAll(dir)) require.NoError(t, os.RemoveAll(dir))