Avoid having contended mutexes on same cacheline

CPUs have to serialise write access to a single cache line
effectively reducing level of possible parallelism. Placing
mutexes on different cache lines avoids this problem.

Most gains will be seen on NUMA servers where CPU interconnect
traffic is especially expensive

Before:
go test . -run none -bench BenchmarkFingerprintLocker
BenchmarkFingerprintLockerParallel-4   	 2000000	       932 ns/op
BenchmarkFingerprintLockerSerial-4     	30000000	        49.6 ns/op

After:
go test . -run none -bench BenchmarkFingerprintLocker
BenchmarkFingerprintLockerParallel-4   	 3000000	       569 ns/op
BenchmarkFingerprintLockerSerial-4     	30000000	        51.0 ns/op
This commit is contained in:
Maxim Ivanov 2016-09-18 08:37:55 +01:00
parent 5f5a78e807
commit bdc53098fc

View File

@ -15,10 +15,21 @@ package local
import (
"sync"
"unsafe"
"github.com/prometheus/common/model"
)
const (
cacheLineSize = 64
)
// Avoid false sharing when using array of mutexes.
type paddedMutex struct {
sync.Mutex
pad [cacheLineSize - unsafe.Sizeof(sync.Mutex{})]byte
}
// fingerprintLocker allows locking individual fingerprints. To limit the number
// of mutexes needed for that, only a fixed number of mutexes are
// allocated. Fingerprints to be locked are assigned to those pre-allocated
@ -30,7 +41,7 @@ import (
// fingerprint at the same time. (In that case a collision would try to acquire
// the same mutex twice).
type fingerprintLocker struct {
fpMtxs []sync.Mutex
fpMtxs []paddedMutex
numFpMtxs uint
}
@ -41,7 +52,7 @@ func newFingerprintLocker(preallocatedMutexes int) *fingerprintLocker {
preallocatedMutexes = 1024
}
return &fingerprintLocker{
make([]sync.Mutex, preallocatedMutexes),
make([]paddedMutex, preallocatedMutexes),
uint(preallocatedMutexes),
}
}