Merge pull request #41615 from tchaikov/wip-avl-alloc-ff

os/bluestore/AvlAllocator: introduce bluestore_avl_alloc_ff_max_* options

Reviewed-by: Igor Fedotov <ifedotov@suse,com>
Reviewed-by: Adam Kupczyk <akupczyk@redhat.com>
This commit is contained in:
Kefu Chai 2021-06-25 17:01:11 +08:00 committed by GitHub
commit 74df5af8e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 61 additions and 24 deletions

View File

@ -5029,6 +5029,18 @@ options:
- hdd
- ssd
with_legacy: true
- name: bluestore_avl_alloc_ff_max_search_count
type: uint
level: dev
desc: Search for this many ranges in first-fit mode before switching over to
to best-fit mode. 0 to iterate through all ranges for required chunk.
default: 100
- name: bluestore_avl_alloc_ff_max_search_bytes
type: size
level: dev
desc: Maximum distance to search in first-fit mode before switching over to
to best-fit mode. 0 to iterate through all ranges for required chunk.
default: 16_M
- name: bluestore_avl_alloc_bf_threshold
type: uint
level: dev

View File

@ -34,6 +34,8 @@ uint64_t AvlAllocator::_pick_block_after(uint64_t *cursor,
uint64_t align)
{
const auto compare = range_tree.key_comp();
uint32_t search_count = 0;
uint64_t search_bytes = 0;
auto rs_start = range_tree.lower_bound(range_t{*cursor, size}, compare);
for (auto rs = rs_start; rs != range_tree.end(); ++rs) {
uint64_t offset = p2roundup(rs->start, align);
@ -41,6 +43,13 @@ uint64_t AvlAllocator::_pick_block_after(uint64_t *cursor,
*cursor = offset + size;
return offset;
}
if (max_search_count > 0 && ++search_count > max_search_count) {
return -1ULL;
}
if (search_bytes = rs->start - rs_start->start;
max_search_bytes > 0 && search_bytes > max_search_bytes) {
return -1ULL;
}
}
if (*cursor == 0) {
// If we already started from beginning, don't bother with searching from beginning
@ -53,6 +62,12 @@ uint64_t AvlAllocator::_pick_block_after(uint64_t *cursor,
*cursor = offset + size;
return offset;
}
if (max_search_count > 0 && ++search_count > max_search_count) {
return -1ULL;
}
if (max_search_bytes > 0 && search_bytes + rs->start > max_search_bytes) {
return -1ULL;
}
}
return -1ULL;
}
@ -240,13 +255,27 @@ int AvlAllocator::_allocate(
const int free_pct = num_free * 100 / device_size;
uint64_t start = 0;
/*
* If we're running low on space switch to using the size
* sorted AVL tree (best-fit).
*/
// If we're running low on space, find a range by size by looking up in the size
// sorted tree (best-fit), instead of searching in the area pointed by cursor
if (force_range_size_alloc ||
max_size < range_size_alloc_threshold ||
free_pct < range_size_alloc_free_pct) {
start = -1ULL;
} else {
/*
* Find the largest power of 2 block size that evenly divides the
* requested size. This is used to try to allocate blocks with similar
* alignment from the same area (i.e. same cursor bucket) but it does
* not guarantee that other allocations sizes may exist in the same
* region.
*/
uint64_t align = size & -size;
ceph_assert(align != 0);
uint64_t* cursor = &lbas[cbits(align) - 1];
start = _pick_block_after(cursor, size, unit);
dout(20) << __func__ << " first fit=" << start << " size=" << size << dendl;
}
if (start == -1ULL) {
do {
start = _pick_block_fits(size, unit);
dout(20) << __func__ << " best fit=" << start << " size=" << size << dendl;
@ -257,25 +286,6 @@ int AvlAllocator::_allocate(
// that large block due to misaligned extents
size = p2align(size >> 1, unit);
} while (size >= unit);
} else {
do {
/*
* Find the largest power of 2 block size that evenly divides the
* requested size. This is used to try to allocate blocks with similar
* alignment from the same area (i.e. same cursor bucket) but it does
* not guarantee that other allocations sizes may exist in the same
* region.
*/
uint64_t* cursor = &lbas[cbits(size) - 1];
start = _pick_block_after(cursor, size, unit);
dout(20) << __func__ << " first fit=" << start << " size=" << size << dendl;
if (start != uint64_t(-1ULL)) {
break;
}
// try to collect smaller extents as we could fail to retrieve
// that large block due to misaligned extents
size = p2align(size >> 1, unit);
} while (size >= unit);
}
if (start == -1ULL) {
return -ENOSPC;
@ -328,6 +338,10 @@ AvlAllocator::AvlAllocator(CephContext* cct,
cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_threshold")),
range_size_alloc_free_pct(
cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_free_pct")),
max_search_count(
cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_ff_max_search_count")),
max_search_bytes(
cct->_conf.get_val<Option::size_t>("bluestore_avl_alloc_ff_max_search_bytes")),
range_count_cap(max_mem / sizeof(range_seg_t)),
cct(cct)
{}

View File

@ -158,7 +158,18 @@ private:
* switch to using best-fit allocations.
*/
int range_size_alloc_free_pct = 0;
/*
* Maximum number of segments to check in the first-fit mode, without this
* limit, fragmented device can see lots of iterations and _block_picker()
* becomes the performance limiting factor on high-performance storage.
*/
const uint32_t max_search_count;
/*
* Maximum distance to search forward from the last offset, without this
* limit, fragmented device can see lots of iterations and _block_picker()
* becomes the performance limiting factor on high-performance storage.
*/
const uint32_t max_search_bytes;
/*
* Max amount of range entries allowed. 0 - unlimited
*/