Merge pull request #6337 from cstyan/rw-log-replay
Log the start and end of the WAL replay within the WAL watcher.
This commit is contained in:
commit
5830e03691
|
@ -35,6 +35,7 @@ import (
|
|||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/pkg/timestamp"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
"github.com/prometheus/prometheus/tsdb/record"
|
||||
"github.com/prometheus/prometheus/util/testutil"
|
||||
|
@ -531,7 +532,7 @@ func BenchmarkStartup(b *testing.B) {
|
|||
m := NewQueueManager(nil, logger, dir,
|
||||
newEWMARate(ewmaWeight, shardUpdateDuration),
|
||||
config.DefaultQueueConfig, nil, nil, c, 1*time.Minute)
|
||||
m.watcher.StartTime = math.MaxInt64
|
||||
m.watcher.SetStartTime(timestamp.Time(math.MaxInt64))
|
||||
m.watcher.MaxSegment = segments[len(segments)-2]
|
||||
err := m.watcher.Run()
|
||||
testutil.Ok(b, err)
|
||||
|
|
|
@ -68,7 +68,9 @@ type Watcher struct {
|
|||
metrics *WatcherMetrics
|
||||
readerMetrics *liveReaderMetrics
|
||||
|
||||
StartTime int64
|
||||
startTime time.Time
|
||||
startTimestamp int64 // the start time as a Prometheus timestamp
|
||||
sendSamples bool
|
||||
|
||||
recordsReadMetric *prometheus.CounterVec
|
||||
recordDecodeFailsMetric prometheus.Counter
|
||||
|
@ -191,7 +193,7 @@ func (w *Watcher) loop() {
|
|||
|
||||
// We may encounter failures processing the WAL; we should wait and retry.
|
||||
for !isClosed(w.quit) {
|
||||
w.StartTime = timestamp.FromTime(time.Now())
|
||||
w.SetStartTime(time.Now())
|
||||
if err := w.Run(); err != nil {
|
||||
level.Error(w.logger).Log("msg", "error tailing WAL", "err", err)
|
||||
}
|
||||
|
@ -212,6 +214,12 @@ func (w *Watcher) Run() error {
|
|||
return errors.Wrap(err, "wal.Segments")
|
||||
}
|
||||
|
||||
// We want to ensure this is false across iterations since
|
||||
// Run will be called again if there was a failure to read the WAL.
|
||||
w.sendSamples = false
|
||||
|
||||
level.Info(w.logger).Log("msg", "replaying WAL", "queue", w.name)
|
||||
|
||||
// Backfill from the checkpoint first if it exists.
|
||||
lastCheckpoint, checkpointIndex, err := LastCheckpoint(w.walDir)
|
||||
if err != nil && err != record.ErrNotFound {
|
||||
|
@ -456,7 +464,6 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
|
|||
samples []record.RefSample
|
||||
send []record.RefSample
|
||||
)
|
||||
|
||||
for r.Next() && !isClosed(w.quit) {
|
||||
rec := r.Record()
|
||||
w.recordsReadMetric.WithLabelValues(recordType(dec.Type(rec))).Inc()
|
||||
|
@ -482,7 +489,12 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
|
|||
return err
|
||||
}
|
||||
for _, s := range samples {
|
||||
if s.T > w.StartTime {
|
||||
if s.T > w.startTimestamp {
|
||||
if !w.sendSamples {
|
||||
w.sendSamples = true
|
||||
duration := time.Since(w.startTime)
|
||||
level.Info(w.logger).Log("msg", "done replaying WAL", "duration", duration)
|
||||
}
|
||||
send = append(send, s)
|
||||
}
|
||||
}
|
||||
|
@ -505,6 +517,11 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
|
|||
return r.Err()
|
||||
}
|
||||
|
||||
func (w *Watcher) SetStartTime(t time.Time) {
|
||||
w.startTime = t
|
||||
w.startTimestamp = timestamp.FromTime(t)
|
||||
}
|
||||
|
||||
func recordType(rt record.Type) string {
|
||||
switch rt {
|
||||
case record.Invalid:
|
||||
|
|
|
@ -139,7 +139,7 @@ func TestTailSamples(t *testing.T) {
|
|||
|
||||
wt := newWriteToMock()
|
||||
watcher := NewWatcher(nil, wMetrics, nil, "", wt, dir)
|
||||
watcher.StartTime = now.UnixNano()
|
||||
watcher.SetStartTime(now)
|
||||
|
||||
// Set the Watcher's metrics so they're not nil pointers.
|
||||
watcher.setMetrics()
|
||||
|
|
Loading…
Reference in New Issue