os/bluestore: release wal_cleaning extents in order

We need to order the freelist updates so that they match the
commit order of the actual transactions.  Otherwise we might, say,
set a key here, delete it in the _txc_update_fm, but commit in
the wrong order and end up with the key surviving.

Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2016-03-25 11:03:28 -04:00
parent 5fe8e941dc
commit 8b45b0d7d7

View File

@ -3920,9 +3920,41 @@ void BlueStore::_kv_sync_thread()
dout(30) << __func__ << " committing txc " << kv_committing << dendl;
dout(30) << __func__ << " wal_cleaning txc " << wal_cleaning << dendl;
// one transaction to force a sync
alloc->commit_start();
// flush/barrier on block device
bdev->flush();
if (!g_conf->bluestore_sync_transaction &&
!g_conf->bluestore_sync_submit_transaction) {
for (std::deque<TransContext *>::iterator it = kv_committing.begin();
it != kv_committing.end();
++it) {
_txc_update_fm((*it));
db->submit_transaction((*it)->t);
}
}
// one final transaction to force a sync
KeyValueDB::Transaction t = db->get_transaction();
vector<bluestore_extent_t> bluefs_gift_extents;
if (bluefs) {
int r = _balance_bluefs_freespace(&bluefs_gift_extents, t);
assert(r >= 0);
if (r > 0) {
for (auto& p : bluefs_gift_extents) {
fm->allocate(p.offset, p.length, t);
bluefs_extents.insert(p.offset, p.length);
}
bufferlist bl;
::encode(bluefs_extents, bl);
dout(10) << __func__ << " bluefs_extents now " << bluefs_extents
<< dendl;
t->set(PREFIX_SUPER, "bluefs_extents", bl);
}
}
// allocations and deallocations
for (std::deque<TransContext *>::iterator it = wal_cleaning.begin();
it != wal_cleaning.end();
@ -3945,37 +3977,6 @@ void BlueStore::_kv_sync_thread()
}
}
vector<bluestore_extent_t> bluefs_gift_extents;
if (bluefs) {
int r = _balance_bluefs_freespace(&bluefs_gift_extents, t);
assert(r >= 0);
if (r > 0) {
for (auto& p : bluefs_gift_extents) {
fm->allocate(p.offset, p.length, t);
bluefs_extents.insert(p.offset, p.length);
}
bufferlist bl;
::encode(bluefs_extents, bl);
dout(10) << __func__ << " bluefs_extents now " << bluefs_extents
<< dendl;
t->set(PREFIX_SUPER, "bluefs_extents", bl);
}
}
alloc->commit_start();
// flush/barrier on block device
bdev->flush();
if (!g_conf->bluestore_sync_transaction && !g_conf->bluestore_sync_submit_transaction) {
for (std::deque<TransContext *>::iterator it = kv_committing.begin();
it != kv_committing.end();
++it) {
_txc_update_fm((*it));
db->submit_transaction((*it)->t);
}
}
// cleanup sync wal keys
for (std::deque<TransContext *>::iterator it = wal_cleaning.begin();
it != wal_cleaning.end();
@ -3997,6 +3998,7 @@ void BlueStore::_kv_sync_thread()
t->rmkey(PREFIX_WAL, key);
}
db->submit_transaction_sync(t);
utime_t finish = ceph_clock_now(NULL);
utime_t dur = finish - start;
dout(20) << __func__ << " committed " << kv_committing.size()