mirror of
https://github.com/ceph/ceph
synced 2025-01-01 08:32:24 +00:00
Merge pull request #14434 from ifed01/wip-bluestore-params
os/bluestore: make bluestore_max_blob_size parameter hdd/ssd case dependant Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
commit
adada74873
@ -1068,8 +1068,12 @@ OPTION(bluestore_prefer_deferred_size_hdd, OPT_U32, 32768)
|
||||
OPTION(bluestore_prefer_deferred_size_ssd, OPT_U32, 0)
|
||||
OPTION(bluestore_compression_mode, OPT_STR, "none") // force|aggressive|passive|none
|
||||
OPTION(bluestore_compression_algorithm, OPT_STR, "snappy")
|
||||
OPTION(bluestore_compression_min_blob_size, OPT_U32, 128*1024)
|
||||
OPTION(bluestore_compression_max_blob_size, OPT_U32, 512*1024)
|
||||
OPTION(bluestore_compression_min_blob_size, OPT_U32, 0)
|
||||
OPTION(bluestore_compression_min_blob_size_hdd, OPT_U32, 128*1024)
|
||||
OPTION(bluestore_compression_min_blob_size_ssd, OPT_U32, 8*1024)
|
||||
OPTION(bluestore_compression_max_blob_size, OPT_U32, 0)
|
||||
OPTION(bluestore_compression_max_blob_size_hdd, OPT_U32, 512*1024)
|
||||
OPTION(bluestore_compression_max_blob_size_ssd, OPT_U32, 64*1024)
|
||||
/*
|
||||
* Specifies minimum expected amount of saved allocation units
|
||||
* per single blob to enable compressed blobs garbage collection
|
||||
@ -1083,7 +1087,9 @@ OPTION(bluestore_gc_enable_blob_threshold, OPT_INT, 0)
|
||||
*/
|
||||
OPTION(bluestore_gc_enable_total_threshold, OPT_INT, 0)
|
||||
|
||||
OPTION(bluestore_max_blob_size, OPT_U32, 512*1024)
|
||||
OPTION(bluestore_max_blob_size, OPT_U32, 0)
|
||||
OPTION(bluestore_max_blob_size_hdd, OPT_U32, 512*1024)
|
||||
OPTION(bluestore_max_blob_size_ssd, OPT_U32, 64*1024)
|
||||
/*
|
||||
* Require the net gain of compression at least to be at this ratio,
|
||||
* otherwise we don't compress.
|
||||
|
@ -3249,7 +3249,11 @@ const char **BlueStore::get_tracked_conf_keys() const
|
||||
"bluestore_compression_mode",
|
||||
"bluestore_compression_algorithm",
|
||||
"bluestore_compression_min_blob_size",
|
||||
"bluestore_compression_min_blob_size_ssd",
|
||||
"bluestore_compression_min_blob_size_hdd",
|
||||
"bluestore_compression_max_blob_size",
|
||||
"bluestore_compression_max_blob_size_ssd",
|
||||
"bluestore_compression_max_blob_size_hdd",
|
||||
"bluestore_max_alloc_size",
|
||||
"bluestore_prefer_deferred_size",
|
||||
"bleustore_deferred_batch_ops",
|
||||
@ -3259,6 +3263,9 @@ const char **BlueStore::get_tracked_conf_keys() const
|
||||
"bluestore_max_bytes",
|
||||
"bluestore_deferred_max_ops",
|
||||
"bluestore_deferred_max_bytes",
|
||||
"bluestore_max_blob_size",
|
||||
"bluestore_max_blob_size_ssd",
|
||||
"bluestore_max_blob_size_hdd",
|
||||
NULL
|
||||
};
|
||||
return KEYS;
|
||||
@ -3274,7 +3281,17 @@ void BlueStore::handle_conf_change(const struct md_config_t *conf,
|
||||
changed.count("bluestore_compression_algorithm") ||
|
||||
changed.count("bluestore_compression_min_blob_size") ||
|
||||
changed.count("bluestore_compression_max_blob_size")) {
|
||||
_set_compression();
|
||||
if (bdev) {
|
||||
_set_compression();
|
||||
}
|
||||
}
|
||||
if (changed.count("bluestore_max_blob_size") ||
|
||||
changed.count("bluestore_max_blob_size_ssd") ||
|
||||
changed.count("bluestore_max_blob_size_hdd")) {
|
||||
if (bdev) {
|
||||
// only after startup
|
||||
_set_blob_size();
|
||||
}
|
||||
}
|
||||
if (changed.count("bluestore_prefer_deferred_size") ||
|
||||
changed.count("bluestore_max_alloc_size") ||
|
||||
@ -3306,8 +3323,27 @@ void BlueStore::handle_conf_change(const struct md_config_t *conf,
|
||||
|
||||
void BlueStore::_set_compression()
|
||||
{
|
||||
comp_min_blob_size = cct->_conf->bluestore_compression_min_blob_size;
|
||||
comp_max_blob_size = cct->_conf->bluestore_compression_max_blob_size;
|
||||
if (cct->_conf->bluestore_compression_max_blob_size) {
|
||||
comp_min_blob_size = cct->_conf->bluestore_compression_max_blob_size;
|
||||
} else {
|
||||
assert(bdev);
|
||||
if (bdev->is_rotational()) {
|
||||
comp_min_blob_size = cct->_conf->bluestore_compression_min_blob_size_hdd;
|
||||
} else {
|
||||
comp_min_blob_size = cct->_conf->bluestore_compression_min_blob_size_ssd;
|
||||
}
|
||||
}
|
||||
|
||||
if (cct->_conf->bluestore_compression_max_blob_size) {
|
||||
comp_max_blob_size = cct->_conf->bluestore_compression_max_blob_size;
|
||||
} else {
|
||||
assert(bdev);
|
||||
if (bdev->is_rotational()) {
|
||||
comp_max_blob_size = cct->_conf->bluestore_compression_max_blob_size_hdd;
|
||||
} else {
|
||||
comp_max_blob_size = cct->_conf->bluestore_compression_max_blob_size_ssd;
|
||||
}
|
||||
}
|
||||
|
||||
auto m = Compressor::get_comp_mode_type(cct->_conf->bluestore_compression_mode);
|
||||
if (m) {
|
||||
@ -3364,6 +3400,21 @@ void BlueStore::_set_throttle_params()
|
||||
dout(10) << __func__ << " throttle_cost_per_io " << throttle_cost_per_io
|
||||
<< dendl;
|
||||
}
|
||||
void BlueStore::_set_blob_size()
|
||||
{
|
||||
if (cct->_conf->bluestore_max_blob_size) {
|
||||
max_blob_size = cct->_conf->bluestore_max_blob_size;
|
||||
} else {
|
||||
assert(bdev);
|
||||
if (bdev->is_rotational()) {
|
||||
max_blob_size = cct->_conf->bluestore_max_blob_size_hdd;
|
||||
} else {
|
||||
max_blob_size = cct->_conf->bluestore_max_blob_size_ssd;
|
||||
}
|
||||
}
|
||||
dout(10) << __func__ << " max_blob_size 0x" << std::hex << max_blob_size
|
||||
<< std::dec << dendl;
|
||||
}
|
||||
|
||||
void BlueStore::_init_logger()
|
||||
{
|
||||
@ -4848,8 +4899,6 @@ int BlueStore::mount()
|
||||
|
||||
mempool_thread.init();
|
||||
|
||||
_set_csum();
|
||||
_set_compression();
|
||||
|
||||
mounted = true;
|
||||
return 0;
|
||||
@ -7120,6 +7169,10 @@ int BlueStore::_open_super_meta()
|
||||
_set_alloc_sizes();
|
||||
_set_throttle_params();
|
||||
|
||||
_set_csum();
|
||||
_set_compression();
|
||||
_set_blob_size();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -7208,9 +7261,10 @@ void BlueStore::_txc_calc_cost(TransContext *txc)
|
||||
for (auto& p : txc->ioc.pending_aios) {
|
||||
ios += p.iov.size();
|
||||
}
|
||||
txc->cost = ios * throttle_cost_per_io + txc->bytes;
|
||||
auto cost = throttle_cost_per_io.load();
|
||||
txc->cost = ios * cost + txc->bytes;
|
||||
dout(10) << __func__ << " " << txc << " cost " << txc->cost << " ("
|
||||
<< ios << " ios * " << throttle_cost_per_io << " + " << txc->bytes
|
||||
<< ios << " ios * " << cost << " + " << txc->bytes
|
||||
<< " bytes)" << dendl;
|
||||
}
|
||||
|
||||
@ -9305,8 +9359,9 @@ int BlueStore::_do_alloc_write(
|
||||
|
||||
AllocExtentVector extents;
|
||||
extents.reserve(4); // 4 should be (more than) enough for most allocations
|
||||
int64_t got = alloc->allocate(final_length, min_alloc_size, max_alloc_size,
|
||||
hint, &extents);
|
||||
int64_t got = alloc->allocate(final_length, min_alloc_size,
|
||||
max_alloc_size.load(),
|
||||
hint, &extents);
|
||||
assert(got == (int64_t)final_length);
|
||||
need -= got;
|
||||
txc->statfs_delta.allocated() += got;
|
||||
@ -9352,7 +9407,7 @@ int BlueStore::_do_alloc_write(
|
||||
|
||||
// queue io
|
||||
if (!g_conf->bluestore_debug_omit_block_device_write) {
|
||||
if (l->length() <= prefer_deferred_size) {
|
||||
if (l->length() <= prefer_deferred_size.load()) {
|
||||
dout(20) << __func__ << " deferring small 0x" << std::hex
|
||||
<< l->length() << std::dec << " write via deferred" << dendl;
|
||||
bluestore_deferred_op_t *op = _get_deferred_op(txc, o);
|
||||
@ -9557,11 +9612,12 @@ int BlueStore::_do_write(
|
||||
CEPH_OSD_ALLOC_HINT_FLAG_APPEND_ONLY)) &&
|
||||
(alloc_hints & CEPH_OSD_ALLOC_HINT_FLAG_RANDOM_WRITE) == 0) {
|
||||
dout(20) << __func__ << " will prefer large blob and csum sizes" << dendl;
|
||||
auto order = min_alloc_size_order.load();
|
||||
if (o->onode.expected_write_size) {
|
||||
wctx.csum_order = std::max(min_alloc_size_order,
|
||||
wctx.csum_order = std::max(order,
|
||||
(size_t)ctzl(o->onode.expected_write_size));
|
||||
} else {
|
||||
wctx.csum_order = min_alloc_size_order;
|
||||
wctx.csum_order = order;
|
||||
}
|
||||
|
||||
if (wctx.compress) {
|
||||
@ -9592,9 +9648,9 @@ int BlueStore::_do_write(
|
||||
);
|
||||
}
|
||||
}
|
||||
if (wctx.target_blob_size == 0 ||
|
||||
wctx.target_blob_size > cct->_conf->bluestore_max_blob_size) {
|
||||
wctx.target_blob_size = cct->_conf->bluestore_max_blob_size;
|
||||
uint64_t max_bsize = max_blob_size.load();
|
||||
if (wctx.target_blob_size == 0 || wctx.target_blob_size > max_bsize) {
|
||||
wctx.target_blob_size = max_bsize;
|
||||
}
|
||||
// set the min blob size floor at 2x the min_alloc_size, or else we
|
||||
// won't be able to allocate a smaller extent for the compressed
|
||||
|
@ -1792,19 +1792,27 @@ private:
|
||||
size_t block_size_order = 0; ///< bits to shift to get block size
|
||||
|
||||
uint64_t min_alloc_size = 0; ///< minimum allocation unit (power of 2)
|
||||
size_t min_alloc_size_order = 0; ///< bits for min_alloc_size
|
||||
uint64_t prefer_deferred_size = 0; ///< size threshold for forced deferred writes
|
||||
int deferred_batch_ops = 0; ///< deferred batch size
|
||||
|
||||
uint64_t max_alloc_size = 0; ///< maximum allocation unit (power of 2)
|
||||
///< bits for min_alloc_size
|
||||
std::atomic<size_t> min_alloc_size_order = {0};
|
||||
|
||||
///< size threshold for forced deferred writes
|
||||
std::atomic<uint64_t> prefer_deferred_size = {0};
|
||||
|
||||
uint64_t throttle_cost_per_io = 0; ///< approx cost per io, in bytes
|
||||
///< maximum allocation unit (power of 2)
|
||||
std::atomic<uint64_t> max_alloc_size = {0};
|
||||
|
||||
///< approx cost per io, in bytes
|
||||
std::atomic<uint64_t> throttle_cost_per_io = {0};
|
||||
|
||||
std::atomic<Compressor::CompressionMode> comp_mode = {Compressor::COMP_NONE}; ///< compression mode
|
||||
CompressorRef compressor;
|
||||
std::atomic<uint64_t> comp_min_blob_size = {0};
|
||||
std::atomic<uint64_t> comp_max_blob_size = {0};
|
||||
|
||||
std::atomic<uint64_t> max_blob_size = {0}; ///< maximum blob size
|
||||
|
||||
// cache trim control
|
||||
|
||||
// note that these update in a racy way, but we don't *really* care if
|
||||
@ -1858,6 +1866,8 @@ private:
|
||||
int _write_fsid();
|
||||
void _close_fsid();
|
||||
void _set_alloc_sizes();
|
||||
void _set_blob_size();
|
||||
|
||||
int _open_bdev(bool create);
|
||||
void _close_bdev();
|
||||
int _open_db(bool create);
|
||||
|
@ -1549,9 +1549,10 @@ TEST_P(StoreTestSpecificAUSize, BluestoreStatFSTest) {
|
||||
g_ceph_context->_conf->apply_changes(NULL);
|
||||
}
|
||||
|
||||
TEST_P(StoreTest, BluestoreFragmentedBlobTest) {
|
||||
TEST_P(StoreTestSpecificAUSize, BluestoreFragmentedBlobTest) {
|
||||
if(string(GetParam()) != "bluestore")
|
||||
return;
|
||||
StartDeferred(0x10000);
|
||||
|
||||
ObjectStore::Sequencer osr("test");
|
||||
int r;
|
||||
@ -5665,9 +5666,12 @@ void doMany4KWritesTest(boost::scoped_ptr<ObjectStore>& store,
|
||||
test_obj.shutdown();
|
||||
}
|
||||
|
||||
TEST_P(StoreTest, Many4KWritesTest) {
|
||||
TEST_P(StoreTestSpecificAUSize, Many4KWritesTest) {
|
||||
if (string(GetParam()) != "bluestore")
|
||||
return;
|
||||
|
||||
StartDeferred(0x10000);
|
||||
|
||||
store_statfs_t res_stat;
|
||||
unsigned max_object = 4*1024*1024;
|
||||
|
||||
@ -5677,9 +5681,10 @@ TEST_P(StoreTest, Many4KWritesTest) {
|
||||
ASSERT_EQ(res_stat.allocated, max_object);
|
||||
}
|
||||
|
||||
TEST_P(StoreTest, Many4KWritesNoCSumTest) {
|
||||
TEST_P(StoreTestSpecificAUSize, Many4KWritesNoCSumTest) {
|
||||
if (string(GetParam()) != "bluestore")
|
||||
return;
|
||||
StartDeferred(0x10000);
|
||||
g_conf->set_val("bluestore_csum_type", "none");
|
||||
g_ceph_context->_conf->apply_changes(NULL);
|
||||
store_statfs_t res_stat;
|
||||
@ -5692,9 +5697,10 @@ TEST_P(StoreTest, Many4KWritesNoCSumTest) {
|
||||
g_conf->set_val("bluestore_csum_type", "crc32c");
|
||||
}
|
||||
|
||||
TEST_P(StoreTest, TooManyBlobsTest) {
|
||||
TEST_P(StoreTestSpecificAUSize, TooManyBlobsTest) {
|
||||
if (string(GetParam()) != "bluestore")
|
||||
return;
|
||||
StartDeferred(0x10000);
|
||||
store_statfs_t res_stat;
|
||||
unsigned max_object = 4*1024*1024;
|
||||
doMany4KWritesTest(store, 1, 1000, max_object, 4*1024, 0, &res_stat);
|
||||
@ -5991,7 +5997,7 @@ TEST_P(StoreTestSpecificAUSize, BlobReuseOnOverwrite) {
|
||||
r = apply_transaction(store, &osr, std::move(t));
|
||||
ASSERT_EQ(r, 0);
|
||||
}
|
||||
g_conf->set_val("bluestore_max_blob_size", "524288");
|
||||
g_conf->set_val("bluestore_max_blob_size", "0");
|
||||
|
||||
}
|
||||
|
||||
@ -6174,7 +6180,7 @@ TEST_P(StoreTestSpecificAUSize, BlobReuseOnOverwriteReverse) {
|
||||
r = apply_transaction(store, &osr, std::move(t));
|
||||
ASSERT_EQ(r, 0);
|
||||
}
|
||||
g_conf->set_val("bluestore_max_blob_size", "524288");
|
||||
g_conf->set_val("bluestore_max_blob_size", "0");
|
||||
}
|
||||
|
||||
TEST_P(StoreTestSpecificAUSize, BlobReuseOnSmallOverwrite) {
|
||||
@ -6248,7 +6254,7 @@ TEST_P(StoreTestSpecificAUSize, BlobReuseOnSmallOverwrite) {
|
||||
r = apply_transaction(store, &osr, std::move(t));
|
||||
ASSERT_EQ(r, 0);
|
||||
}
|
||||
g_conf->set_val("bluestore_max_blob_size", "524288");
|
||||
g_conf->set_val("bluestore_max_blob_size", "0");
|
||||
}
|
||||
|
||||
// The test case to reproduce an issue when write happens
|
||||
@ -6452,6 +6458,7 @@ TEST_P(StoreTestSpecificAUSize, garbageCollection) {
|
||||
|
||||
StartDeferred(65536);
|
||||
|
||||
g_conf->set_val("bluestore_compression_max_blob_size", "524288");
|
||||
g_conf->set_val("bluestore_compression_min_blob_size", "262144");
|
||||
g_conf->set_val("bluestore_compression_mode", "force");
|
||||
g_conf->apply_changes(NULL);
|
||||
@ -6585,7 +6592,8 @@ TEST_P(StoreTestSpecificAUSize, garbageCollection) {
|
||||
}
|
||||
}
|
||||
g_conf->set_val("bluestore_gc_enable_total_threshold", "0");
|
||||
g_conf->set_val("bluestore_compression_min_blob_size", "131072");
|
||||
g_conf->set_val("bluestore_compression_min_blob_size", "0");
|
||||
g_conf->set_val("bluestore_compression_max_blob_size", "0");
|
||||
g_conf->set_val("bluestore_compression_mode", "none");
|
||||
g_conf->apply_changes(NULL);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user