Add separate head mutex
Introduce a seperate mutex for the head blocks to avoid a race where a post-compaction reload may run between switching the DB's base mutex to create a new head block in an appender.
This commit is contained in:
parent
3e569bc964
commit
55a9b5428a
131
db.go
131
db.go
|
@ -87,17 +87,24 @@ const sep = '\xff'
|
||||||
// DB handles reads and writes of time series falling into
|
// DB handles reads and writes of time series falling into
|
||||||
// a hashed partition of a seriedb.
|
// a hashed partition of a seriedb.
|
||||||
type DB struct {
|
type DB struct {
|
||||||
dir string
|
dir string
|
||||||
lockf lockfile.Lockfile
|
lockf lockfile.Lockfile
|
||||||
|
|
||||||
logger log.Logger
|
logger log.Logger
|
||||||
metrics *dbMetrics
|
metrics *dbMetrics
|
||||||
opts *Options
|
opts *Options
|
||||||
|
|
||||||
|
// Mutex for that must be held when modifying the general
|
||||||
|
// block layout.
|
||||||
mtx sync.RWMutex
|
mtx sync.RWMutex
|
||||||
persisted []*persistedBlock
|
persisted []*persistedBlock
|
||||||
heads []*headBlock
|
|
||||||
seqBlocks map[int]Block
|
seqBlocks map[int]Block
|
||||||
headGen uint8
|
|
||||||
|
// Mutex that must be held when modifying just the head blocks
|
||||||
|
// or the general layout.
|
||||||
|
headmtx sync.RWMutex
|
||||||
|
heads []*headBlock
|
||||||
|
headGen uint8
|
||||||
|
|
||||||
compactor Compactor
|
compactor Compactor
|
||||||
|
|
||||||
|
@ -200,7 +207,15 @@ func (db *DB) run() {
|
||||||
case <-db.compactc:
|
case <-db.compactc:
|
||||||
db.metrics.compactionsTriggered.Inc()
|
db.metrics.compactionsTriggered.Inc()
|
||||||
|
|
||||||
if err := db.compact(); err != nil {
|
var merr MultiError
|
||||||
|
|
||||||
|
changes, err := db.compact()
|
||||||
|
merr.Add(err)
|
||||||
|
|
||||||
|
if changes {
|
||||||
|
merr.Add(db.reloadBlocks())
|
||||||
|
}
|
||||||
|
if err := merr.Err(); err != nil {
|
||||||
db.logger.Log("msg", "compaction failed", "err", err)
|
db.logger.Log("msg", "compaction failed", "err", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -210,12 +225,17 @@ func (db *DB) run() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) compact() error {
|
func (db *DB) compact() (changes bool, err error) {
|
||||||
changes := false
|
|
||||||
// Check whether we have pending head blocks that are ready to be persisted.
|
// Check whether we have pending head blocks that are ready to be persisted.
|
||||||
// They have the highest priority.
|
// They have the highest priority.
|
||||||
db.mtx.RLock()
|
db.headmtx.RLock()
|
||||||
|
|
||||||
|
var singles []*headBlock
|
||||||
|
|
||||||
|
// Collect head blocks that are ready for compaction. Write them after
|
||||||
|
// returning the lock to not block Appenders.
|
||||||
|
// Selected blocks are semantically ensured to not be written to afterwards
|
||||||
|
// by appendable().
|
||||||
if len(db.heads) > db.opts.AppendableBlocks {
|
if len(db.heads) > db.opts.AppendableBlocks {
|
||||||
for _, h := range db.heads[:len(db.heads)-db.opts.AppendableBlocks] {
|
for _, h := range db.heads[:len(db.heads)-db.opts.AppendableBlocks] {
|
||||||
// Blocks that won't be appendable when instantiating a new appender
|
// Blocks that won't be appendable when instantiating a new appender
|
||||||
|
@ -224,36 +244,38 @@ func (db *DB) compact() error {
|
||||||
if atomic.LoadUint64(&h.activeWriters) > 0 {
|
if atomic.LoadUint64(&h.activeWriters) > 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
singles = append(singles, h)
|
||||||
db.logger.Log("msg", "write head", "seq", h.Meta().Sequence)
|
|
||||||
|
|
||||||
select {
|
|
||||||
case <-db.stopc:
|
|
||||||
db.mtx.RUnlock()
|
|
||||||
return nil
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := db.compactor.Write(h.Dir(), h); err != nil {
|
|
||||||
db.mtx.RUnlock()
|
|
||||||
return errors.Wrap(err, "persist head block")
|
|
||||||
}
|
|
||||||
changes = true
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
db.mtx.RUnlock()
|
db.headmtx.RUnlock()
|
||||||
|
|
||||||
|
Loop:
|
||||||
|
for _, h := range singles {
|
||||||
|
db.logger.Log("msg", "write head", "seq", h.Meta().Sequence)
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-db.stopc:
|
||||||
|
break Loop
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = db.compactor.Write(h.Dir(), h); err != nil {
|
||||||
|
return changes, errors.Wrap(err, "persist head block")
|
||||||
|
}
|
||||||
|
changes = true
|
||||||
|
}
|
||||||
|
|
||||||
// Check for compactions of multiple blocks.
|
// Check for compactions of multiple blocks.
|
||||||
for {
|
for {
|
||||||
plans, err := db.compactor.Plan(db.dir)
|
plans, err := db.compactor.Plan(db.dir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return errors.Wrap(err, "plan compaction")
|
return changes, errors.Wrap(err, "plan compaction")
|
||||||
}
|
}
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-db.stopc:
|
case <-db.stopc:
|
||||||
return nil
|
return false, nil
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
// We just execute compactions sequentially to not cause too extreme
|
// We just execute compactions sequentially to not cause too extreme
|
||||||
|
@ -264,7 +286,7 @@ func (db *DB) compact() error {
|
||||||
db.logger.Log("msg", "compact blocks", "seq", fmt.Sprintf("%v", p))
|
db.logger.Log("msg", "compact blocks", "seq", fmt.Sprintf("%v", p))
|
||||||
|
|
||||||
if err := db.compactor.Compact(p...); err != nil {
|
if err := db.compactor.Compact(p...); err != nil {
|
||||||
return errors.Wrapf(err, "compact %s", p)
|
return changes, errors.Wrapf(err, "compact %s", p)
|
||||||
}
|
}
|
||||||
changes = true
|
changes = true
|
||||||
}
|
}
|
||||||
|
@ -274,10 +296,7 @@ func (db *DB) compact() error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if changes {
|
return changes, nil
|
||||||
return errors.Wrap(db.reloadBlocks(), "reload blocks")
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// func (db *DB) retentionCutoff() error {
|
// func (db *DB) retentionCutoff() error {
|
||||||
|
@ -381,10 +400,11 @@ func (db *DB) Close() error {
|
||||||
close(db.stopc)
|
close(db.stopc)
|
||||||
<-db.donec
|
<-db.donec
|
||||||
|
|
||||||
var merr MultiError
|
// Lock mutex and leave it locked so we panic if there's a bug causing
|
||||||
|
// the block to be used afterwards.
|
||||||
db.mtx.Lock()
|
db.mtx.Lock()
|
||||||
defer db.mtx.Unlock()
|
|
||||||
|
var merr MultiError
|
||||||
|
|
||||||
for _, pb := range db.persisted {
|
for _, pb := range db.persisted {
|
||||||
merr.Add(pb.Close())
|
merr.Add(pb.Close())
|
||||||
|
@ -403,9 +423,14 @@ func (db *DB) Appender() Appender {
|
||||||
db.mtx.RLock()
|
db.mtx.RLock()
|
||||||
a := &dbAppender{db: db}
|
a := &dbAppender{db: db}
|
||||||
|
|
||||||
|
db.headmtx.RLock()
|
||||||
|
|
||||||
for _, b := range db.appendable() {
|
for _, b := range db.appendable() {
|
||||||
a.heads = append(a.heads, b.Appender().(*headAppender))
|
a.heads = append(a.heads, b.Appender().(*headAppender))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
db.headmtx.RUnlock()
|
||||||
|
|
||||||
return a
|
return a
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -468,15 +493,12 @@ func (a *dbAppender) AddFast(ref uint64, t int64, v float64) error {
|
||||||
func (a *dbAppender) appenderFor(t int64) (*headAppender, error) {
|
func (a *dbAppender) appenderFor(t int64) (*headAppender, error) {
|
||||||
// If there's no fitting head block for t, ensure it gets created.
|
// If there's no fitting head block for t, ensure it gets created.
|
||||||
if len(a.heads) == 0 || t >= a.heads[len(a.heads)-1].meta.MaxTime {
|
if len(a.heads) == 0 || t >= a.heads[len(a.heads)-1].meta.MaxTime {
|
||||||
a.db.mtx.RUnlock()
|
a.db.headmtx.Lock()
|
||||||
|
|
||||||
if err := a.db.ensureHead(t); err != nil {
|
if err := a.db.ensureHead(t); err != nil {
|
||||||
a.db.mtx.RLock()
|
a.db.headmtx.Unlock()
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
a.db.mtx.RLock()
|
|
||||||
|
|
||||||
if len(a.heads) == 0 {
|
if len(a.heads) == 0 {
|
||||||
for _, b := range a.db.appendable() {
|
for _, b := range a.db.appendable() {
|
||||||
a.heads = append(a.heads, b.Appender().(*headAppender))
|
a.heads = append(a.heads, b.Appender().(*headAppender))
|
||||||
|
@ -489,6 +511,8 @@ func (a *dbAppender) appenderFor(t int64) (*headAppender, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
a.db.headmtx.Unlock()
|
||||||
}
|
}
|
||||||
for i := len(a.heads) - 1; i >= 0; i-- {
|
for i := len(a.heads) - 1; i >= 0; i-- {
|
||||||
if h := a.heads[i]; t >= h.meta.MinTime {
|
if h := a.heads[i]; t >= h.meta.MinTime {
|
||||||
|
@ -500,8 +524,8 @@ func (a *dbAppender) appenderFor(t int64) (*headAppender, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) ensureHead(t int64) error {
|
func (db *DB) ensureHead(t int64) error {
|
||||||
db.mtx.Lock()
|
// db.mtx.Lock()
|
||||||
defer db.mtx.Unlock()
|
// defer db.mtx.Unlock()
|
||||||
|
|
||||||
// Initial case for a new database: we must create the first
|
// Initial case for a new database: we must create the first
|
||||||
// AppendableBlocks-1 front padding heads.
|
// AppendableBlocks-1 front padding heads.
|
||||||
|
@ -557,31 +581,6 @@ func (db *DB) appendable() []*headBlock {
|
||||||
return db.heads[len(db.heads)-db.opts.AppendableBlocks:]
|
return db.heads[len(db.heads)-db.opts.AppendableBlocks:]
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) compactable() []Block {
|
|
||||||
db.mtx.RLock()
|
|
||||||
defer db.mtx.RUnlock()
|
|
||||||
|
|
||||||
var blocks []Block
|
|
||||||
for _, pb := range db.persisted {
|
|
||||||
blocks = append(blocks, pb)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(db.heads) <= db.opts.AppendableBlocks {
|
|
||||||
return blocks
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, h := range db.heads[:len(db.heads)-db.opts.AppendableBlocks] {
|
|
||||||
// Blocks that won't be appendable when instantiating a new appender
|
|
||||||
// might still have active appenders on them.
|
|
||||||
// Abort at the first one we encounter.
|
|
||||||
if atomic.LoadUint64(&h.activeWriters) > 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
blocks = append(blocks, h)
|
|
||||||
}
|
|
||||||
return blocks
|
|
||||||
}
|
|
||||||
|
|
||||||
func intervalOverlap(amin, amax, bmin, bmax int64) bool {
|
func intervalOverlap(amin, amax, bmin, bmax int64) bool {
|
||||||
if bmin >= amin && bmin <= amax {
|
if bmin >= amin && bmin <= amax {
|
||||||
return true
|
return true
|
||||||
|
|
4
head.go
4
head.go
|
@ -145,6 +145,10 @@ func (h *headBlock) inBounds(t int64) bool {
|
||||||
|
|
||||||
// Close syncs all data and closes underlying resources of the head block.
|
// Close syncs all data and closes underlying resources of the head block.
|
||||||
func (h *headBlock) Close() error {
|
func (h *headBlock) Close() error {
|
||||||
|
// Lock mutex and leave it locked so we panic if there's a bug causing
|
||||||
|
// the block to be used afterwards.
|
||||||
|
h.mtx.Lock()
|
||||||
|
|
||||||
if err := h.wal.Close(); err != nil {
|
if err := h.wal.Close(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,9 @@ type querier struct {
|
||||||
func (s *DB) Querier(mint, maxt int64) Querier {
|
func (s *DB) Querier(mint, maxt int64) Querier {
|
||||||
s.mtx.RLock()
|
s.mtx.RLock()
|
||||||
|
|
||||||
|
s.headmtx.RLock()
|
||||||
blocks := s.blocksForInterval(mint, maxt)
|
blocks := s.blocksForInterval(mint, maxt)
|
||||||
|
s.headmtx.RUnlock()
|
||||||
|
|
||||||
sq := &querier{
|
sq := &querier{
|
||||||
blocks: make([]Querier, 0, len(blocks)),
|
blocks: make([]Querier, 0, len(blocks)),
|
||||||
|
|
3
wal.go
3
wal.go
|
@ -265,8 +265,9 @@ func (w *WAL) Close() error {
|
||||||
close(w.stopc)
|
close(w.stopc)
|
||||||
<-w.donec
|
<-w.donec
|
||||||
|
|
||||||
|
// Lock mutex and leave it locked so we panic if there's a bug causing
|
||||||
|
// the block to be used afterwards.
|
||||||
w.mtx.Lock()
|
w.mtx.Lock()
|
||||||
defer w.mtx.Unlock()
|
|
||||||
|
|
||||||
if err := w.sync(); err != nil {
|
if err := w.sync(); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
Loading…
Reference in New Issue