mirror of
https://github.com/prometheus/prometheus
synced 2024-12-23 23:13:11 +00:00
Fix RWLock memory storage deadlock.
This fixes https://github.com/prometheus/prometheus/issues/390 The cause for the deadlock was a lock semantic in Go that wasn't obvious to me when introducing this bug: http://golang.org/pkg/sync/#RWMutex.Lock Key phrase: "To ensure that the lock eventually becomes available, a blocked Lock call excludes new readers from acquiring the lock." In the memory series storage, we have one function (GetFingerprintsForLabelMatchers) acquiring an RLock(), which calls another function also acquiring the same RLock() (GetLabelValuesForLabelName). That normally doesn't deadlock, unless a Lock() call from another goroutine happens right in between the two RLock() calls, blocking both the Lock() and the second RLock() call from ever completing. GoRoutine 1 GoRoutine 2 ====================================== RLock() ... Lock() [DEADLOCK] RLock() [DEADLOCK] Unlock() RUnlock() RUnlock() Testing deadlocks is tricky, but the regression test I added does reliably detect the deadlock in the original code on my machine within a normal concurrent reader/writer run duration of 250ms. Change-Id: Ib34c2bb8df1a80af44550cc2bf5007055cdef413
This commit is contained in:
parent
01f652cb4c
commit
1b29975865
@ -367,7 +367,7 @@ func (s *memorySeriesStorage) GetFingerprintsForLabelMatchers(labelMatchers metr
|
||||
}
|
||||
sets = append(sets, set)
|
||||
default:
|
||||
values, err := s.GetLabelValuesForLabelName(matcher.Name)
|
||||
values, err := s.getLabelValuesForLabelName(matcher.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -414,6 +414,10 @@ func (s *memorySeriesStorage) GetLabelValuesForLabelName(labelName clientmodel.L
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
return s.getLabelValuesForLabelName(labelName)
|
||||
}
|
||||
|
||||
func (s *memorySeriesStorage) getLabelValuesForLabelName(labelName clientmodel.LabelName) (clientmodel.LabelValues, error) {
|
||||
set, ok := s.labelNameToLabelValues[labelName]
|
||||
if !ok {
|
||||
return nil, nil
|
||||
|
@ -16,6 +16,7 @@ package tiered
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -154,3 +155,65 @@ func TestDroppedSeriesIndexRegression(t *testing.T) {
|
||||
t.Fatalf("Got %d fingerprints, expected 1", len(fps))
|
||||
}
|
||||
}
|
||||
|
||||
func TestReaderWriterDeadlockRegression(t *testing.T) {
|
||||
mp := runtime.GOMAXPROCS(2)
|
||||
defer func(mp int) {
|
||||
runtime.GOMAXPROCS(mp)
|
||||
}(mp)
|
||||
|
||||
s := NewMemorySeriesStorage(MemorySeriesOptions{})
|
||||
lms := metric.LabelMatchers{}
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
lm, err := metric.NewLabelMatcher(metric.NotEqual, clientmodel.MetricNameLabel, "testmetric")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
lms = append(lms, lm)
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(2)
|
||||
|
||||
start := time.Now()
|
||||
runDuration := 250 * time.Millisecond
|
||||
|
||||
writer := func() {
|
||||
for time.Since(start) < runDuration {
|
||||
s.AppendSamples(clientmodel.Samples{
|
||||
&clientmodel.Sample{
|
||||
Metric: clientmodel.Metric{
|
||||
clientmodel.MetricNameLabel: "testmetric",
|
||||
},
|
||||
Value: 1,
|
||||
Timestamp: 0,
|
||||
},
|
||||
})
|
||||
}
|
||||
wg.Done()
|
||||
}
|
||||
|
||||
reader := func() {
|
||||
for time.Since(start) < runDuration {
|
||||
s.GetFingerprintsForLabelMatchers(lms)
|
||||
}
|
||||
wg.Done()
|
||||
}
|
||||
|
||||
go reader()
|
||||
go writer()
|
||||
|
||||
allDone := make(chan struct{})
|
||||
go func() {
|
||||
wg.Wait()
|
||||
allDone <- struct{}{}
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-allDone:
|
||||
break
|
||||
case <-time.NewTimer(5 * time.Second).C:
|
||||
t.Fatalf("Deadlock timeout")
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user