mirror of
https://github.com/prometheus/prometheus
synced 2025-01-28 10:23:18 +00:00
bdc53098fc
CPUs have to serialise write access to a single cache line effectively reducing level of possible parallelism. Placing mutexes on different cache lines avoids this problem. Most gains will be seen on NUMA servers where CPU interconnect traffic is especially expensive Before: go test . -run none -bench BenchmarkFingerprintLocker BenchmarkFingerprintLockerParallel-4 2000000 932 ns/op BenchmarkFingerprintLockerSerial-4 30000000 49.6 ns/op After: go test . -run none -bench BenchmarkFingerprintLocker BenchmarkFingerprintLockerParallel-4 3000000 569 ns/op BenchmarkFingerprintLockerSerial-4 30000000 51.0 ns/op
80 lines
2.8 KiB
Go
80 lines
2.8 KiB
Go
// Copyright 2016 The Prometheus Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package local
|
|
|
|
import (
|
|
"sync"
|
|
"unsafe"
|
|
|
|
"github.com/prometheus/common/model"
|
|
)
|
|
|
|
const (
|
|
cacheLineSize = 64
|
|
)
|
|
|
|
// Avoid false sharing when using array of mutexes.
|
|
type paddedMutex struct {
|
|
sync.Mutex
|
|
pad [cacheLineSize - unsafe.Sizeof(sync.Mutex{})]byte
|
|
}
|
|
|
|
// fingerprintLocker allows locking individual fingerprints. To limit the number
|
|
// of mutexes needed for that, only a fixed number of mutexes are
|
|
// allocated. Fingerprints to be locked are assigned to those pre-allocated
|
|
// mutexes by their value. Collisions are not detected. If two fingerprints get
|
|
// assigned to the same mutex, only one of them can be locked at the same
|
|
// time. As long as the number of pre-allocated mutexes is much larger than the
|
|
// number of goroutines requiring a fingerprint lock concurrently, the loss in
|
|
// efficiency is small. However, a goroutine must never lock more than one
|
|
// fingerprint at the same time. (In that case a collision would try to acquire
|
|
// the same mutex twice).
|
|
type fingerprintLocker struct {
|
|
fpMtxs []paddedMutex
|
|
numFpMtxs uint
|
|
}
|
|
|
|
// newFingerprintLocker returns a new fingerprintLocker ready for use. At least
|
|
// 1024 preallocated mutexes are used, even if preallocatedMutexes is lower.
|
|
func newFingerprintLocker(preallocatedMutexes int) *fingerprintLocker {
|
|
if preallocatedMutexes < 1024 {
|
|
preallocatedMutexes = 1024
|
|
}
|
|
return &fingerprintLocker{
|
|
make([]paddedMutex, preallocatedMutexes),
|
|
uint(preallocatedMutexes),
|
|
}
|
|
}
|
|
|
|
// Lock locks the given fingerprint.
|
|
func (l *fingerprintLocker) Lock(fp model.Fingerprint) {
|
|
l.fpMtxs[hashFP(fp)%l.numFpMtxs].Lock()
|
|
}
|
|
|
|
// Unlock unlocks the given fingerprint.
|
|
func (l *fingerprintLocker) Unlock(fp model.Fingerprint) {
|
|
l.fpMtxs[hashFP(fp)%l.numFpMtxs].Unlock()
|
|
}
|
|
|
|
// hashFP simply moves entropy from the most significant 48 bits of the
|
|
// fingerprint into the least significant 16 bits (by XORing) so that a simple
|
|
// MOD on the result can be used to pick a mutex while still making use of
|
|
// changes in more significant bits of the fingerprint. (The fast fingerprinting
|
|
// function we use is prone to only change a few bits for similar metrics. We
|
|
// really want to make use of every change in the fingerprint to vary mutex
|
|
// selection.)
|
|
func hashFP(fp model.Fingerprint) uint {
|
|
return uint(fp ^ (fp >> 32) ^ (fp >> 16))
|
|
}
|