Fix RWLock memory storage deadlock.

This fixes https://github.com/prometheus/prometheus/issues/390

The cause for the deadlock was a lock semantic in Go that wasn't
obvious to me when introducing this bug:

http://golang.org/pkg/sync/#RWMutex.Lock

Key phrase: "To ensure that the lock eventually becomes available, a
blocked Lock call excludes new readers from acquiring the lock."

In the memory series storage, we have one function
(GetFingerprintsForLabelMatchers) acquiring an RLock(), which calls
another function also acquiring the same RLock()
(GetLabelValuesForLabelName). That normally doesn't deadlock, unless a
Lock() call from another goroutine happens right in between the two
RLock() calls, blocking both the Lock() and the second RLock() call from
ever completing.

  GoRoutine 1          GoRoutine 2
  ======================================
  RLock()
  ...                  Lock() [DEADLOCK]
  RLock() [DEADLOCK]   Unlock()
  RUnlock()
  RUnlock()

Testing deadlocks is tricky, but the regression test I added does
reliably detect the deadlock in the original code on my machine within a
normal concurrent reader/writer run duration of 250ms.

Change-Id: Ib34c2bb8df1a80af44550cc2bf5007055cdef413
This commit is contained in:
Julius Volz 2014-04-17 12:28:50 +02:00
parent 01f652cb4c
commit 1b29975865
2 changed files with 68 additions and 1 deletions

View File

@ -367,7 +367,7 @@ func (s *memorySeriesStorage) GetFingerprintsForLabelMatchers(labelMatchers metr
}
sets = append(sets, set)
default:
values, err := s.GetLabelValuesForLabelName(matcher.Name)
values, err := s.getLabelValuesForLabelName(matcher.Name)
if err != nil {
return nil, err
}
@ -414,6 +414,10 @@ func (s *memorySeriesStorage) GetLabelValuesForLabelName(labelName clientmodel.L
s.RLock()
defer s.RUnlock()
return s.getLabelValuesForLabelName(labelName)
}
func (s *memorySeriesStorage) getLabelValuesForLabelName(labelName clientmodel.LabelName) (clientmodel.LabelValues, error) {
set, ok := s.labelNameToLabelValues[labelName]
if !ok {
return nil, nil

View File

@ -16,6 +16,7 @@ package tiered
import (
"fmt"
"runtime"
"sync"
"testing"
"time"
@ -154,3 +155,65 @@ func TestDroppedSeriesIndexRegression(t *testing.T) {
t.Fatalf("Got %d fingerprints, expected 1", len(fps))
}
}
func TestReaderWriterDeadlockRegression(t *testing.T) {
mp := runtime.GOMAXPROCS(2)
defer func(mp int) {
runtime.GOMAXPROCS(mp)
}(mp)
s := NewMemorySeriesStorage(MemorySeriesOptions{})
lms := metric.LabelMatchers{}
for i := 0; i < 100; i++ {
lm, err := metric.NewLabelMatcher(metric.NotEqual, clientmodel.MetricNameLabel, "testmetric")
if err != nil {
t.Fatal(err)
}
lms = append(lms, lm)
}
wg := sync.WaitGroup{}
wg.Add(2)
start := time.Now()
runDuration := 250 * time.Millisecond
writer := func() {
for time.Since(start) < runDuration {
s.AppendSamples(clientmodel.Samples{
&clientmodel.Sample{
Metric: clientmodel.Metric{
clientmodel.MetricNameLabel: "testmetric",
},
Value: 1,
Timestamp: 0,
},
})
}
wg.Done()
}
reader := func() {
for time.Since(start) < runDuration {
s.GetFingerprintsForLabelMatchers(lms)
}
wg.Done()
}
go reader()
go writer()
allDone := make(chan struct{})
go func() {
wg.Wait()
allDone <- struct{}{}
}()
select {
case <-allDone:
break
case <-time.NewTimer(5 * time.Second).C:
t.Fatalf("Deadlock timeout")
}
}