Replay WAL concurrently without blocking (#10973)

* Replay WAL concurrently without blocking

Signed-off-by: Xiaochao Dong (@damnever) <the.xcdong@gmail.com>

* Resolve review comments

Signed-off-by: Xiaochao Dong (@damnever) <the.xcdong@gmail.com>

Signed-off-by: Xiaochao Dong (@damnever) <the.xcdong@gmail.com>
This commit is contained in:
Xiaochao Dong 2022-08-17 21:53:57 +08:00 committed by GitHub
parent 3196c98bc2
commit 09187fb0cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 60 additions and 78 deletions

View File

@ -49,7 +49,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H
var unknownExemplarRefs atomic.Uint64 var unknownExemplarRefs atomic.Uint64
var unknownMetadataRefs atomic.Uint64 var unknownMetadataRefs atomic.Uint64
// Track number of series records that had overlapping m-map chunks. // Track number of series records that had overlapping m-map chunks.
var mmapOverlappingChunks uint64 var mmapOverlappingChunks atomic.Uint64
// Start workers that each process samples for a partition of the series ID space. // Start workers that each process samples for a partition of the series ID space.
var ( var (
@ -107,8 +107,9 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H
processors[i].setup() processors[i].setup()
go func(wp *walSubsetProcessor) { go func(wp *walSubsetProcessor) {
unknown := wp.processWALSamples(h) unknown, overlapping := wp.processWALSamples(h, mmappedChunks)
unknownRefs.Add(unknown) unknownRefs.Add(unknown)
mmapOverlappingChunks.Add(overlapping)
wg.Done() wg.Done()
}(&processors[i]) }(&processors[i])
} }
@ -224,56 +225,12 @@ Outer:
if chunks.HeadSeriesRef(h.lastSeriesID.Load()) < walSeries.Ref { if chunks.HeadSeriesRef(h.lastSeriesID.Load()) < walSeries.Ref {
h.lastSeriesID.Store(uint64(walSeries.Ref)) h.lastSeriesID.Store(uint64(walSeries.Ref))
} }
if !created {
multiRef[walSeries.Ref] = mSeries.ref
}
idx := uint64(mSeries.ref) % uint64(n) idx := uint64(mSeries.ref) % uint64(n)
// It is possible that some old sample is being processed in processWALSamples that processors[idx].input <- walSubsetProcessorInputItem{walSeriesRef: walSeries.Ref, existingSeries: mSeries}
// could cause race below. So we wait for the goroutine to empty input the buffer and finish
// processing all old samples after emptying the buffer.
processors[idx].waitUntilIdle()
// Lock the subset so we can modify the series object
processors[idx].mx.Lock()
mmc := mmappedChunks[walSeries.Ref]
if created {
// This is the first WAL series record for this series.
h.resetSeriesWithMMappedChunks(mSeries, mmc)
processors[idx].mx.Unlock()
continue
}
// There's already a different ref for this series.
// A duplicate series record is only possible when the old samples were already compacted into a block.
// Hence we can discard all the samples and m-mapped chunks replayed till now for this series.
multiRef[walSeries.Ref] = mSeries.ref
// Checking if the new m-mapped chunks overlap with the already existing ones.
if len(mSeries.mmappedChunks) > 0 && len(mmc) > 0 {
if overlapsClosedInterval(
mSeries.mmappedChunks[0].minTime,
mSeries.mmappedChunks[len(mSeries.mmappedChunks)-1].maxTime,
mmc[0].minTime,
mmc[len(mmc)-1].maxTime,
) {
mmapOverlappingChunks++
level.Debug(h.logger).Log(
"msg", "M-mapped chunks overlap on a duplicate series record",
"series", mSeries.lset.String(),
"oldref", mSeries.ref,
"oldmint", mSeries.mmappedChunks[0].minTime,
"oldmaxt", mSeries.mmappedChunks[len(mSeries.mmappedChunks)-1].maxTime,
"newref", walSeries.Ref,
"newmint", mmc[0].minTime,
"newmaxt", mmc[len(mmc)-1].maxTime,
)
}
}
// Replacing m-mapped chunks with the new ones (could be empty).
h.resetSeriesWithMMappedChunks(mSeries, mmc)
processors[idx].mx.Unlock()
} }
//nolint:staticcheck // Ignore SA6002 relax staticcheck verification. //nolint:staticcheck // Ignore SA6002 relax staticcheck verification.
seriesPool.Put(v) seriesPool.Put(v)
@ -299,7 +256,7 @@ Outer:
shards[mod] = append(shards[mod], sam) shards[mod] = append(shards[mod], sam)
} }
for i := 0; i < n; i++ { for i := 0; i < n; i++ {
processors[i].input <- shards[i] processors[i].input <- walSubsetProcessorInputItem{samples: shards[i]}
} }
samples = samples[m:] samples = samples[m:]
} }
@ -370,14 +327,38 @@ Outer:
if unknownRefs.Load() > 0 || unknownExemplarRefs.Load() > 0 || unknownMetadataRefs.Load() > 0 { if unknownRefs.Load() > 0 || unknownExemplarRefs.Load() > 0 || unknownMetadataRefs.Load() > 0 {
level.Warn(h.logger).Log("msg", "Unknown series references", "samples", unknownRefs.Load(), "exemplars", unknownExemplarRefs.Load(), "metadata", unknownMetadataRefs.Load()) level.Warn(h.logger).Log("msg", "Unknown series references", "samples", unknownRefs.Load(), "exemplars", unknownExemplarRefs.Load(), "metadata", unknownMetadataRefs.Load())
} }
if mmapOverlappingChunks > 0 { if count := mmapOverlappingChunks.Load(); count > 0 {
level.Info(h.logger).Log("msg", "Overlapping m-map chunks on duplicate series records", "count", mmapOverlappingChunks) level.Info(h.logger).Log("msg", "Overlapping m-map chunks on duplicate series records", "count", count)
} }
return nil return nil
} }
// resetSeriesWithMMappedChunks is only used during the WAL replay. // resetSeriesWithMMappedChunks is only used during the WAL replay.
func (h *Head) resetSeriesWithMMappedChunks(mSeries *memSeries, mmc []*mmappedChunk) { func (h *Head) resetSeriesWithMMappedChunks(mSeries *memSeries, mmc []*mmappedChunk, walSeriesRef chunks.HeadSeriesRef) (overlapped bool) {
if mSeries.ref != walSeriesRef {
// Checking if the new m-mapped chunks overlap with the already existing ones.
if len(mSeries.mmappedChunks) > 0 && len(mmc) > 0 {
if overlapsClosedInterval(
mSeries.mmappedChunks[0].minTime,
mSeries.mmappedChunks[len(mSeries.mmappedChunks)-1].maxTime,
mmc[0].minTime,
mmc[len(mmc)-1].maxTime,
) {
level.Debug(h.logger).Log(
"msg", "M-mapped chunks overlap on a duplicate series record",
"series", mSeries.lset.String(),
"oldref", mSeries.ref,
"oldmint", mSeries.mmappedChunks[0].minTime,
"oldmaxt", mSeries.mmappedChunks[len(mSeries.mmappedChunks)-1].maxTime,
"newref", walSeriesRef,
"newmint", mmc[0].minTime,
"newmaxt", mmc[len(mmc)-1].maxTime,
)
overlapped = true
}
}
}
h.metrics.chunksCreated.Add(float64(len(mmc))) h.metrics.chunksCreated.Add(float64(len(mmc)))
h.metrics.chunksRemoved.Add(float64(len(mSeries.mmappedChunks))) h.metrics.chunksRemoved.Add(float64(len(mSeries.mmappedChunks)))
h.metrics.chunks.Add(float64(len(mmc) - len(mSeries.mmappedChunks))) h.metrics.chunks.Add(float64(len(mmc) - len(mSeries.mmappedChunks)))
@ -394,17 +375,23 @@ func (h *Head) resetSeriesWithMMappedChunks(mSeries *memSeries, mmc []*mmappedCh
mSeries.nextAt = 0 mSeries.nextAt = 0
mSeries.headChunk = nil mSeries.headChunk = nil
mSeries.app = nil mSeries.app = nil
return
} }
type walSubsetProcessor struct { type walSubsetProcessor struct {
mx sync.Mutex // Take this lock while modifying series in the subset. input chan walSubsetProcessorInputItem
input chan []record.RefSample
output chan []record.RefSample output chan []record.RefSample
} }
type walSubsetProcessorInputItem struct {
samples []record.RefSample
existingSeries *memSeries
walSeriesRef chunks.HeadSeriesRef
}
func (wp *walSubsetProcessor) setup() { func (wp *walSubsetProcessor) setup() {
wp.output = make(chan []record.RefSample, 300) wp.output = make(chan []record.RefSample, 300)
wp.input = make(chan []record.RefSample, 300) wp.input = make(chan walSubsetProcessorInputItem, 300)
} }
func (wp *walSubsetProcessor) closeAndDrain() { func (wp *walSubsetProcessor) closeAndDrain() {
@ -426,15 +413,22 @@ func (wp *walSubsetProcessor) reuseBuf() []record.RefSample {
// processWALSamples adds the samples it receives to the head and passes // processWALSamples adds the samples it receives to the head and passes
// the buffer received to an output channel for reuse. // the buffer received to an output channel for reuse.
// Samples before the minValidTime timestamp are discarded. // Samples before the minValidTime timestamp are discarded.
func (wp *walSubsetProcessor) processWALSamples(h *Head) (unknownRefs uint64) { func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk) (unknownRefs, mmapOverlappingChunks uint64) {
defer close(wp.output) defer close(wp.output)
minValidTime := h.minValidTime.Load() minValidTime := h.minValidTime.Load()
mint, maxt := int64(math.MaxInt64), int64(math.MinInt64) mint, maxt := int64(math.MaxInt64), int64(math.MinInt64)
for samples := range wp.input { for in := range wp.input {
wp.mx.Lock() if in.existingSeries != nil {
for _, s := range samples { mmc := mmappedChunks[in.walSeriesRef]
if h.resetSeriesWithMMappedChunks(in.existingSeries, mmc, in.walSeriesRef) {
mmapOverlappingChunks++
}
continue
}
for _, s := range in.samples {
if s.T < minValidTime { if s.T < minValidTime {
continue continue
} }
@ -457,26 +451,14 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head) (unknownRefs uint64) {
mint = s.T mint = s.T
} }
} }
wp.mx.Unlock() select {
wp.output <- samples case wp.output <- in.samples:
default:
}
} }
h.updateMinMaxTime(mint, maxt) h.updateMinMaxTime(mint, maxt)
return unknownRefs return unknownRefs, mmapOverlappingChunks
}
func (wp *walSubsetProcessor) waitUntilIdle() {
select {
case <-wp.output: // Allow output side to drain to avoid deadlock.
default:
}
wp.input <- []record.RefSample{}
for len(wp.input) != 0 {
select {
case <-wp.output: // Allow output side to drain to avoid deadlock.
case <-time.After(10 * time.Microsecond):
}
}
} }
const ( const (