diff --git a/tsdb/head_append.go b/tsdb/head_append.go index 785e99db0..be53a4f3f 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -751,6 +751,12 @@ func (a *headAppender) Commit() (err error) { // No errors logging to WAL, so pass the exemplars along to the in memory storage. for _, e := range a.exemplars { s := a.head.series.getByID(chunks.HeadSeriesRef(e.ref)) + if s == nil { + // This is very unlikely to happen, but we have seen it in the wild. + // It means that the series was truncated between AppendExemplar and Commit. + // See TestHeadCompactionWhileAppendAndCommitExemplar. + continue + } // We don't instrument exemplar appends here, all is instrumented by storage. if err := a.head.exemplars.AddExemplar(s.lset, e.exemplar); err != nil { if err == storage.ErrOutOfOrderExemplar { diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 253f92d61..f2325039a 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -5514,3 +5514,31 @@ func TestWALSampleAndExemplarOrder(t *testing.T) { }) } } + +// TestHeadCompactionWhileAppendAndCommitExemplar simulates a use case where +// a series is removed from the head while an exemplar is being appended to it. +// This can happen in theory by compacting the head at the right time due to +// a series being idle. +// The test cheats a little bit by not appending a sample with the exemplar. +// If you also add a sample and run Truncate in a concurrent goroutine and run +// the test around a million(!) times, you can get +// `unknown HeadSeriesRef when trying to add exemplar: 1` error on push. +// It is likely that running the test for much longer and with more time variations +// would trigger the +// `signal SIGSEGV: segmentation violation code=0x1 addr=0x20 pc=0xbb03d1` +// panic, that we have seen in the wild once. +func TestHeadCompactionWhileAppendAndCommitExemplar(t *testing.T) { + h, _ := newTestHead(t, DefaultBlockDuration, wlog.CompressionNone, false) + app := h.Appender(context.Background()) + lbls := labels.FromStrings("foo", "bar") + ref, err := app.Append(0, lbls, 1, 1) + require.NoError(t, err) + app.Commit() + // Not adding a sample here to trigger the fault. + app = h.Appender(context.Background()) + _, err = app.AppendExemplar(ref, lbls, exemplar.Exemplar{Value: 1, Ts: 20}) + require.NoError(t, err) + h.Truncate(10) + app.Commit() + h.Close() +}