Merge pull request #17610 from liewegas/wip-bluestore-fm-mas

os/bluestore: set bitmap freelist resolution to min_alloc_size

Reviewed-by: xie xingguo <xie.xingguo@zte.com.cn>
Reviewed-by: Varada Kari <varada.kari@gmail.com>
This commit is contained in:
Sage Weil 2017-09-14 09:18:41 -05:00 committed by GitHub
commit 1587f87b6b
4 changed files with 51 additions and 41 deletions

View File

@ -58,9 +58,11 @@ BitmapFreelistManager::BitmapFreelistManager(CephContext* cct,
{
}
int BitmapFreelistManager::create(uint64_t new_size, KeyValueDB::Transaction txn)
int BitmapFreelistManager::create(uint64_t new_size, uint64_t min_alloc_size,
KeyValueDB::Transaction txn)
{
bytes_per_block = cct->_conf->bdev_block_size;
bytes_per_block = std::max(cct->_conf->bdev_block_size,
(int64_t)min_alloc_size);
assert(ISP2(bytes_per_block));
size = P2ALIGN(new_size, bytes_per_block);
blocks_per_key = cct->_conf->bluestore_freelist_blocks_per_key;

View File

@ -51,7 +51,8 @@ public:
static void setup_merge_operator(KeyValueDB *db, string prefix);
int create(uint64_t size, KeyValueDB::Transaction txn) override;
int create(uint64_t size, uint64_t min_alloc_size,
KeyValueDB::Transaction txn) override;
int init() override;
void shutdown() override;

View File

@ -4169,18 +4169,19 @@ int BlueStore::_open_fm(bool create)
bl.append(freelist_type);
t->set(PREFIX_SUPER, "freelist_type", bl);
}
fm->create(bdev->get_size(), t);
fm->create(bdev->get_size(), min_alloc_size, t);
// allocate superblock reserved space. note that we do not mark
// bluefs space as allocated in the freelist; we instead rely on
// bluefs_extents.
fm->allocate(0, SUPER_RESERVED, t);
uint64_t reserved = ROUND_UP_TO(MAX(SUPER_RESERVED, min_alloc_size),
min_alloc_size);
fm->allocate(0, reserved, t);
uint64_t reserved = 0;
if (cct->_conf->bluestore_bluefs) {
assert(bluefs_extents.num_intervals() == 1);
interval_set<uint64_t>::iterator p = bluefs_extents.begin();
reserved = p.get_start() + p.get_len();
reserved = ROUND_UP_TO(p.get_start() + p.get_len(), min_alloc_size);
dout(20) << __func__ << " reserved 0x" << std::hex << reserved << std::dec
<< " for bluefs" << dendl;
bufferlist bl;
@ -4188,8 +4189,6 @@ int BlueStore::_open_fm(bool create)
t->set(PREFIX_SUPER, "bluefs_extents", bl);
dout(20) << __func__ << " bluefs_extents 0x" << std::hex << bluefs_extents
<< std::dec << dendl;
} else {
reserved = SUPER_RESERVED;
}
if (cct->_conf->bluestore_debug_prefill > 0) {
@ -4555,6 +4554,13 @@ int BlueStore::_open_db(bool create)
bdev->get_size() * (cct->_conf->bluestore_bluefs_min_ratio +
cct->_conf->bluestore_bluefs_gift_ratio);
initial = MAX(initial, cct->_conf->bluestore_bluefs_min);
if (cct->_conf->bluefs_alloc_size % min_alloc_size) {
derr << __func__ << " bluefs_alloc_size 0x" << std::hex
<< cct->_conf->bluefs_alloc_size << " is not a multiple of "
<< "min_alloc_size 0x" << min_alloc_size << std::dec << dendl;
r = -EINVAL;
goto free_bluefs;
}
// align to bluefs's alloc_size
initial = P2ROUNDUP(initial, cct->_conf->bluefs_alloc_size);
// put bluefs in the middle of the device in case it is an HDD
@ -5160,6 +5166,28 @@ int BlueStore::mkfs()
if (r < 0)
goto out_close_fsid;
// choose min_alloc_size
if (cct->_conf->bluestore_min_alloc_size) {
min_alloc_size = cct->_conf->bluestore_min_alloc_size;
} else {
assert(bdev);
if (bdev->is_rotational()) {
min_alloc_size = cct->_conf->bluestore_min_alloc_size_hdd;
} else {
min_alloc_size = cct->_conf->bluestore_min_alloc_size_ssd;
}
}
// make sure min_alloc_size is power of 2 aligned.
if (!ISP2(min_alloc_size)) {
derr << __func__ << " min_alloc_size 0x"
<< std::hex << min_alloc_size << std::dec
<< " is not power of 2 aligned!"
<< dendl;
r = -EINVAL;
goto out_close_bdev;
}
r = _open_db(true);
if (r < 0)
goto out_close_bdev;
@ -5177,28 +5205,6 @@ int BlueStore::mkfs()
t->set(PREFIX_SUPER, "blobid_max", bl);
}
// choose min_alloc_size
if (cct->_conf->bluestore_min_alloc_size) {
min_alloc_size = cct->_conf->bluestore_min_alloc_size;
} else {
assert(bdev);
if (bdev->is_rotational()) {
min_alloc_size = cct->_conf->bluestore_min_alloc_size_hdd;
} else {
min_alloc_size = cct->_conf->bluestore_min_alloc_size_ssd;
}
}
// make sure min_alloc_size is power of 2 aligned.
if (!ISP2(min_alloc_size)) {
derr << __func__ << " min_alloc_size 0x"
<< std::hex << min_alloc_size << std::dec
<< " is not power of 2 aligned!"
<< dendl;
r = -EINVAL;
goto out_close_fm;
}
{
bufferlist bl;
::encode((uint64_t)min_alloc_size, bl);
@ -5454,7 +5460,7 @@ int BlueStore::_fsck_check_extents(
}
bool already = false;
apply(
e.offset, e.length, block_size, used_blocks,
e.offset, e.length, min_alloc_size, used_blocks,
[&](uint64_t pos, mempool_dynamic_bitset &bs) {
if (bs.test(pos))
already = true;
@ -5556,9 +5562,9 @@ int BlueStore::fsck(bool deep)
if (r < 0)
goto out_scan;
used_blocks.resize(bdev->get_size() / block_size);
used_blocks.resize(bdev->get_size() / min_alloc_size);
apply(
0, SUPER_RESERVED, block_size, used_blocks,
0, MAX(min_alloc_size, SUPER_RESERVED), min_alloc_size, used_blocks,
[&](uint64_t pos, mempool_dynamic_bitset &bs) {
bs.set(pos);
}
@ -5567,7 +5573,7 @@ int BlueStore::fsck(bool deep)
if (bluefs) {
for (auto e = bluefs_extents.begin(); e != bluefs_extents.end(); ++e) {
apply(
e.get_start(), e.get_len(), block_size, used_blocks,
e.get_start(), e.get_len(), min_alloc_size, used_blocks,
[&](uint64_t pos, mempool_dynamic_bitset &bs) {
bs.set(pos);
}
@ -5966,7 +5972,7 @@ int BlueStore::fsck(bool deep)
<< " released 0x" << std::hex << wt.released << std::dec << dendl;
for (auto e = wt.released.begin(); e != wt.released.end(); ++e) {
apply(
e.get_start(), e.get_len(), block_size, used_blocks,
e.get_start(), e.get_len(), min_alloc_size, used_blocks,
[&](uint64_t pos, mempool_dynamic_bitset &bs) {
bs.set(pos);
}
@ -5981,7 +5987,7 @@ int BlueStore::fsck(bool deep)
// know they are allocated.
for (auto e = bluefs_extents.begin(); e != bluefs_extents.end(); ++e) {
apply(
e.get_start(), e.get_len(), block_size, used_blocks,
e.get_start(), e.get_len(), min_alloc_size, used_blocks,
[&](uint64_t pos, mempool_dynamic_bitset &bs) {
bs.reset(pos);
}
@ -5992,7 +5998,7 @@ int BlueStore::fsck(bool deep)
while (fm->enumerate_next(&offset, &length)) {
bool intersects = false;
apply(
offset, length, block_size, used_blocks,
offset, length, min_alloc_size, used_blocks,
[&](uint64_t pos, mempool_dynamic_bitset &bs) {
if (bs.test(pos)) {
intersects = true;
@ -6021,8 +6027,8 @@ int BlueStore::fsck(bool deep)
size_t next = used_blocks.find_next(cur);
if (next != cur + 1) {
derr << __func__ << " error: leaked extent 0x" << std::hex
<< ((uint64_t)start * block_size) << "~"
<< ((cur + 1 - start) * block_size) << std::dec
<< ((uint64_t)start * min_alloc_size) << "~"
<< ((cur + 1 - start) * min_alloc_size) << std::dec
<< dendl;
start = next;
break;

View File

@ -24,7 +24,8 @@ public:
static void setup_merge_operators(KeyValueDB *db);
virtual int create(uint64_t size, KeyValueDB::Transaction txn) = 0;
virtual int create(uint64_t size, uint64_t min_alloc_size,
KeyValueDB::Transaction txn) = 0;
virtual int init() = 0;
virtual void shutdown() = 0;