mirror of
https://github.com/ceph/ceph
synced 2025-02-23 19:17:37 +00:00
Merge pull request #41615 from tchaikov/wip-avl-alloc-ff
os/bluestore/AvlAllocator: introduce bluestore_avl_alloc_ff_max_* options Reviewed-by: Igor Fedotov <ifedotov@suse,com> Reviewed-by: Adam Kupczyk <akupczyk@redhat.com>
This commit is contained in:
commit
74df5af8e2
@ -5029,6 +5029,18 @@ options:
|
||||
- hdd
|
||||
- ssd
|
||||
with_legacy: true
|
||||
- name: bluestore_avl_alloc_ff_max_search_count
|
||||
type: uint
|
||||
level: dev
|
||||
desc: Search for this many ranges in first-fit mode before switching over to
|
||||
to best-fit mode. 0 to iterate through all ranges for required chunk.
|
||||
default: 100
|
||||
- name: bluestore_avl_alloc_ff_max_search_bytes
|
||||
type: size
|
||||
level: dev
|
||||
desc: Maximum distance to search in first-fit mode before switching over to
|
||||
to best-fit mode. 0 to iterate through all ranges for required chunk.
|
||||
default: 16_M
|
||||
- name: bluestore_avl_alloc_bf_threshold
|
||||
type: uint
|
||||
level: dev
|
||||
|
@ -34,6 +34,8 @@ uint64_t AvlAllocator::_pick_block_after(uint64_t *cursor,
|
||||
uint64_t align)
|
||||
{
|
||||
const auto compare = range_tree.key_comp();
|
||||
uint32_t search_count = 0;
|
||||
uint64_t search_bytes = 0;
|
||||
auto rs_start = range_tree.lower_bound(range_t{*cursor, size}, compare);
|
||||
for (auto rs = rs_start; rs != range_tree.end(); ++rs) {
|
||||
uint64_t offset = p2roundup(rs->start, align);
|
||||
@ -41,6 +43,13 @@ uint64_t AvlAllocator::_pick_block_after(uint64_t *cursor,
|
||||
*cursor = offset + size;
|
||||
return offset;
|
||||
}
|
||||
if (max_search_count > 0 && ++search_count > max_search_count) {
|
||||
return -1ULL;
|
||||
}
|
||||
if (search_bytes = rs->start - rs_start->start;
|
||||
max_search_bytes > 0 && search_bytes > max_search_bytes) {
|
||||
return -1ULL;
|
||||
}
|
||||
}
|
||||
if (*cursor == 0) {
|
||||
// If we already started from beginning, don't bother with searching from beginning
|
||||
@ -53,6 +62,12 @@ uint64_t AvlAllocator::_pick_block_after(uint64_t *cursor,
|
||||
*cursor = offset + size;
|
||||
return offset;
|
||||
}
|
||||
if (max_search_count > 0 && ++search_count > max_search_count) {
|
||||
return -1ULL;
|
||||
}
|
||||
if (max_search_bytes > 0 && search_bytes + rs->start > max_search_bytes) {
|
||||
return -1ULL;
|
||||
}
|
||||
}
|
||||
return -1ULL;
|
||||
}
|
||||
@ -240,13 +255,27 @@ int AvlAllocator::_allocate(
|
||||
|
||||
const int free_pct = num_free * 100 / device_size;
|
||||
uint64_t start = 0;
|
||||
/*
|
||||
* If we're running low on space switch to using the size
|
||||
* sorted AVL tree (best-fit).
|
||||
*/
|
||||
// If we're running low on space, find a range by size by looking up in the size
|
||||
// sorted tree (best-fit), instead of searching in the area pointed by cursor
|
||||
if (force_range_size_alloc ||
|
||||
max_size < range_size_alloc_threshold ||
|
||||
free_pct < range_size_alloc_free_pct) {
|
||||
start = -1ULL;
|
||||
} else {
|
||||
/*
|
||||
* Find the largest power of 2 block size that evenly divides the
|
||||
* requested size. This is used to try to allocate blocks with similar
|
||||
* alignment from the same area (i.e. same cursor bucket) but it does
|
||||
* not guarantee that other allocations sizes may exist in the same
|
||||
* region.
|
||||
*/
|
||||
uint64_t align = size & -size;
|
||||
ceph_assert(align != 0);
|
||||
uint64_t* cursor = &lbas[cbits(align) - 1];
|
||||
start = _pick_block_after(cursor, size, unit);
|
||||
dout(20) << __func__ << " first fit=" << start << " size=" << size << dendl;
|
||||
}
|
||||
if (start == -1ULL) {
|
||||
do {
|
||||
start = _pick_block_fits(size, unit);
|
||||
dout(20) << __func__ << " best fit=" << start << " size=" << size << dendl;
|
||||
@ -257,25 +286,6 @@ int AvlAllocator::_allocate(
|
||||
// that large block due to misaligned extents
|
||||
size = p2align(size >> 1, unit);
|
||||
} while (size >= unit);
|
||||
} else {
|
||||
do {
|
||||
/*
|
||||
* Find the largest power of 2 block size that evenly divides the
|
||||
* requested size. This is used to try to allocate blocks with similar
|
||||
* alignment from the same area (i.e. same cursor bucket) but it does
|
||||
* not guarantee that other allocations sizes may exist in the same
|
||||
* region.
|
||||
*/
|
||||
uint64_t* cursor = &lbas[cbits(size) - 1];
|
||||
start = _pick_block_after(cursor, size, unit);
|
||||
dout(20) << __func__ << " first fit=" << start << " size=" << size << dendl;
|
||||
if (start != uint64_t(-1ULL)) {
|
||||
break;
|
||||
}
|
||||
// try to collect smaller extents as we could fail to retrieve
|
||||
// that large block due to misaligned extents
|
||||
size = p2align(size >> 1, unit);
|
||||
} while (size >= unit);
|
||||
}
|
||||
if (start == -1ULL) {
|
||||
return -ENOSPC;
|
||||
@ -328,6 +338,10 @@ AvlAllocator::AvlAllocator(CephContext* cct,
|
||||
cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_threshold")),
|
||||
range_size_alloc_free_pct(
|
||||
cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_free_pct")),
|
||||
max_search_count(
|
||||
cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_ff_max_search_count")),
|
||||
max_search_bytes(
|
||||
cct->_conf.get_val<Option::size_t>("bluestore_avl_alloc_ff_max_search_bytes")),
|
||||
range_count_cap(max_mem / sizeof(range_seg_t)),
|
||||
cct(cct)
|
||||
{}
|
||||
|
@ -158,7 +158,18 @@ private:
|
||||
* switch to using best-fit allocations.
|
||||
*/
|
||||
int range_size_alloc_free_pct = 0;
|
||||
|
||||
/*
|
||||
* Maximum number of segments to check in the first-fit mode, without this
|
||||
* limit, fragmented device can see lots of iterations and _block_picker()
|
||||
* becomes the performance limiting factor on high-performance storage.
|
||||
*/
|
||||
const uint32_t max_search_count;
|
||||
/*
|
||||
* Maximum distance to search forward from the last offset, without this
|
||||
* limit, fragmented device can see lots of iterations and _block_picker()
|
||||
* becomes the performance limiting factor on high-performance storage.
|
||||
*/
|
||||
const uint32_t max_search_bytes;
|
||||
/*
|
||||
* Max amount of range entries allowed. 0 - unlimited
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user