Merge pull request #14434 from ifed01/wip-bluestore-params

os/bluestore: make bluestore_max_blob_size parameter hdd/ssd case dependant

Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2017-04-20 15:13:18 -05:00 committed by GitHub
commit adada74873
4 changed files with 110 additions and 30 deletions

View File

@ -1068,8 +1068,12 @@ OPTION(bluestore_prefer_deferred_size_hdd, OPT_U32, 32768)
OPTION(bluestore_prefer_deferred_size_ssd, OPT_U32, 0)
OPTION(bluestore_compression_mode, OPT_STR, "none") // force|aggressive|passive|none
OPTION(bluestore_compression_algorithm, OPT_STR, "snappy")
OPTION(bluestore_compression_min_blob_size, OPT_U32, 128*1024)
OPTION(bluestore_compression_max_blob_size, OPT_U32, 512*1024)
OPTION(bluestore_compression_min_blob_size, OPT_U32, 0)
OPTION(bluestore_compression_min_blob_size_hdd, OPT_U32, 128*1024)
OPTION(bluestore_compression_min_blob_size_ssd, OPT_U32, 8*1024)
OPTION(bluestore_compression_max_blob_size, OPT_U32, 0)
OPTION(bluestore_compression_max_blob_size_hdd, OPT_U32, 512*1024)
OPTION(bluestore_compression_max_blob_size_ssd, OPT_U32, 64*1024)
/*
* Specifies minimum expected amount of saved allocation units
* per single blob to enable compressed blobs garbage collection
@ -1083,7 +1087,9 @@ OPTION(bluestore_gc_enable_blob_threshold, OPT_INT, 0)
*/
OPTION(bluestore_gc_enable_total_threshold, OPT_INT, 0)
OPTION(bluestore_max_blob_size, OPT_U32, 512*1024)
OPTION(bluestore_max_blob_size, OPT_U32, 0)
OPTION(bluestore_max_blob_size_hdd, OPT_U32, 512*1024)
OPTION(bluestore_max_blob_size_ssd, OPT_U32, 64*1024)
/*
* Require the net gain of compression at least to be at this ratio,
* otherwise we don't compress.

View File

@ -3249,7 +3249,11 @@ const char **BlueStore::get_tracked_conf_keys() const
"bluestore_compression_mode",
"bluestore_compression_algorithm",
"bluestore_compression_min_blob_size",
"bluestore_compression_min_blob_size_ssd",
"bluestore_compression_min_blob_size_hdd",
"bluestore_compression_max_blob_size",
"bluestore_compression_max_blob_size_ssd",
"bluestore_compression_max_blob_size_hdd",
"bluestore_max_alloc_size",
"bluestore_prefer_deferred_size",
"bleustore_deferred_batch_ops",
@ -3259,6 +3263,9 @@ const char **BlueStore::get_tracked_conf_keys() const
"bluestore_max_bytes",
"bluestore_deferred_max_ops",
"bluestore_deferred_max_bytes",
"bluestore_max_blob_size",
"bluestore_max_blob_size_ssd",
"bluestore_max_blob_size_hdd",
NULL
};
return KEYS;
@ -3274,7 +3281,17 @@ void BlueStore::handle_conf_change(const struct md_config_t *conf,
changed.count("bluestore_compression_algorithm") ||
changed.count("bluestore_compression_min_blob_size") ||
changed.count("bluestore_compression_max_blob_size")) {
_set_compression();
if (bdev) {
_set_compression();
}
}
if (changed.count("bluestore_max_blob_size") ||
changed.count("bluestore_max_blob_size_ssd") ||
changed.count("bluestore_max_blob_size_hdd")) {
if (bdev) {
// only after startup
_set_blob_size();
}
}
if (changed.count("bluestore_prefer_deferred_size") ||
changed.count("bluestore_max_alloc_size") ||
@ -3306,8 +3323,27 @@ void BlueStore::handle_conf_change(const struct md_config_t *conf,
void BlueStore::_set_compression()
{
comp_min_blob_size = cct->_conf->bluestore_compression_min_blob_size;
comp_max_blob_size = cct->_conf->bluestore_compression_max_blob_size;
if (cct->_conf->bluestore_compression_max_blob_size) {
comp_min_blob_size = cct->_conf->bluestore_compression_max_blob_size;
} else {
assert(bdev);
if (bdev->is_rotational()) {
comp_min_blob_size = cct->_conf->bluestore_compression_min_blob_size_hdd;
} else {
comp_min_blob_size = cct->_conf->bluestore_compression_min_blob_size_ssd;
}
}
if (cct->_conf->bluestore_compression_max_blob_size) {
comp_max_blob_size = cct->_conf->bluestore_compression_max_blob_size;
} else {
assert(bdev);
if (bdev->is_rotational()) {
comp_max_blob_size = cct->_conf->bluestore_compression_max_blob_size_hdd;
} else {
comp_max_blob_size = cct->_conf->bluestore_compression_max_blob_size_ssd;
}
}
auto m = Compressor::get_comp_mode_type(cct->_conf->bluestore_compression_mode);
if (m) {
@ -3364,6 +3400,21 @@ void BlueStore::_set_throttle_params()
dout(10) << __func__ << " throttle_cost_per_io " << throttle_cost_per_io
<< dendl;
}
void BlueStore::_set_blob_size()
{
if (cct->_conf->bluestore_max_blob_size) {
max_blob_size = cct->_conf->bluestore_max_blob_size;
} else {
assert(bdev);
if (bdev->is_rotational()) {
max_blob_size = cct->_conf->bluestore_max_blob_size_hdd;
} else {
max_blob_size = cct->_conf->bluestore_max_blob_size_ssd;
}
}
dout(10) << __func__ << " max_blob_size 0x" << std::hex << max_blob_size
<< std::dec << dendl;
}
void BlueStore::_init_logger()
{
@ -4848,8 +4899,6 @@ int BlueStore::mount()
mempool_thread.init();
_set_csum();
_set_compression();
mounted = true;
return 0;
@ -7120,6 +7169,10 @@ int BlueStore::_open_super_meta()
_set_alloc_sizes();
_set_throttle_params();
_set_csum();
_set_compression();
_set_blob_size();
return 0;
}
@ -7208,9 +7261,10 @@ void BlueStore::_txc_calc_cost(TransContext *txc)
for (auto& p : txc->ioc.pending_aios) {
ios += p.iov.size();
}
txc->cost = ios * throttle_cost_per_io + txc->bytes;
auto cost = throttle_cost_per_io.load();
txc->cost = ios * cost + txc->bytes;
dout(10) << __func__ << " " << txc << " cost " << txc->cost << " ("
<< ios << " ios * " << throttle_cost_per_io << " + " << txc->bytes
<< ios << " ios * " << cost << " + " << txc->bytes
<< " bytes)" << dendl;
}
@ -9305,8 +9359,9 @@ int BlueStore::_do_alloc_write(
AllocExtentVector extents;
extents.reserve(4); // 4 should be (more than) enough for most allocations
int64_t got = alloc->allocate(final_length, min_alloc_size, max_alloc_size,
hint, &extents);
int64_t got = alloc->allocate(final_length, min_alloc_size,
max_alloc_size.load(),
hint, &extents);
assert(got == (int64_t)final_length);
need -= got;
txc->statfs_delta.allocated() += got;
@ -9352,7 +9407,7 @@ int BlueStore::_do_alloc_write(
// queue io
if (!g_conf->bluestore_debug_omit_block_device_write) {
if (l->length() <= prefer_deferred_size) {
if (l->length() <= prefer_deferred_size.load()) {
dout(20) << __func__ << " deferring small 0x" << std::hex
<< l->length() << std::dec << " write via deferred" << dendl;
bluestore_deferred_op_t *op = _get_deferred_op(txc, o);
@ -9557,11 +9612,12 @@ int BlueStore::_do_write(
CEPH_OSD_ALLOC_HINT_FLAG_APPEND_ONLY)) &&
(alloc_hints & CEPH_OSD_ALLOC_HINT_FLAG_RANDOM_WRITE) == 0) {
dout(20) << __func__ << " will prefer large blob and csum sizes" << dendl;
auto order = min_alloc_size_order.load();
if (o->onode.expected_write_size) {
wctx.csum_order = std::max(min_alloc_size_order,
wctx.csum_order = std::max(order,
(size_t)ctzl(o->onode.expected_write_size));
} else {
wctx.csum_order = min_alloc_size_order;
wctx.csum_order = order;
}
if (wctx.compress) {
@ -9592,9 +9648,9 @@ int BlueStore::_do_write(
);
}
}
if (wctx.target_blob_size == 0 ||
wctx.target_blob_size > cct->_conf->bluestore_max_blob_size) {
wctx.target_blob_size = cct->_conf->bluestore_max_blob_size;
uint64_t max_bsize = max_blob_size.load();
if (wctx.target_blob_size == 0 || wctx.target_blob_size > max_bsize) {
wctx.target_blob_size = max_bsize;
}
// set the min blob size floor at 2x the min_alloc_size, or else we
// won't be able to allocate a smaller extent for the compressed

View File

@ -1792,19 +1792,27 @@ private:
size_t block_size_order = 0; ///< bits to shift to get block size
uint64_t min_alloc_size = 0; ///< minimum allocation unit (power of 2)
size_t min_alloc_size_order = 0; ///< bits for min_alloc_size
uint64_t prefer_deferred_size = 0; ///< size threshold for forced deferred writes
int deferred_batch_ops = 0; ///< deferred batch size
uint64_t max_alloc_size = 0; ///< maximum allocation unit (power of 2)
///< bits for min_alloc_size
std::atomic<size_t> min_alloc_size_order = {0};
///< size threshold for forced deferred writes
std::atomic<uint64_t> prefer_deferred_size = {0};
uint64_t throttle_cost_per_io = 0; ///< approx cost per io, in bytes
///< maximum allocation unit (power of 2)
std::atomic<uint64_t> max_alloc_size = {0};
///< approx cost per io, in bytes
std::atomic<uint64_t> throttle_cost_per_io = {0};
std::atomic<Compressor::CompressionMode> comp_mode = {Compressor::COMP_NONE}; ///< compression mode
CompressorRef compressor;
std::atomic<uint64_t> comp_min_blob_size = {0};
std::atomic<uint64_t> comp_max_blob_size = {0};
std::atomic<uint64_t> max_blob_size = {0}; ///< maximum blob size
// cache trim control
// note that these update in a racy way, but we don't *really* care if
@ -1858,6 +1866,8 @@ private:
int _write_fsid();
void _close_fsid();
void _set_alloc_sizes();
void _set_blob_size();
int _open_bdev(bool create);
void _close_bdev();
int _open_db(bool create);

View File

@ -1549,9 +1549,10 @@ TEST_P(StoreTestSpecificAUSize, BluestoreStatFSTest) {
g_ceph_context->_conf->apply_changes(NULL);
}
TEST_P(StoreTest, BluestoreFragmentedBlobTest) {
TEST_P(StoreTestSpecificAUSize, BluestoreFragmentedBlobTest) {
if(string(GetParam()) != "bluestore")
return;
StartDeferred(0x10000);
ObjectStore::Sequencer osr("test");
int r;
@ -5665,9 +5666,12 @@ void doMany4KWritesTest(boost::scoped_ptr<ObjectStore>& store,
test_obj.shutdown();
}
TEST_P(StoreTest, Many4KWritesTest) {
TEST_P(StoreTestSpecificAUSize, Many4KWritesTest) {
if (string(GetParam()) != "bluestore")
return;
StartDeferred(0x10000);
store_statfs_t res_stat;
unsigned max_object = 4*1024*1024;
@ -5677,9 +5681,10 @@ TEST_P(StoreTest, Many4KWritesTest) {
ASSERT_EQ(res_stat.allocated, max_object);
}
TEST_P(StoreTest, Many4KWritesNoCSumTest) {
TEST_P(StoreTestSpecificAUSize, Many4KWritesNoCSumTest) {
if (string(GetParam()) != "bluestore")
return;
StartDeferred(0x10000);
g_conf->set_val("bluestore_csum_type", "none");
g_ceph_context->_conf->apply_changes(NULL);
store_statfs_t res_stat;
@ -5692,9 +5697,10 @@ TEST_P(StoreTest, Many4KWritesNoCSumTest) {
g_conf->set_val("bluestore_csum_type", "crc32c");
}
TEST_P(StoreTest, TooManyBlobsTest) {
TEST_P(StoreTestSpecificAUSize, TooManyBlobsTest) {
if (string(GetParam()) != "bluestore")
return;
StartDeferred(0x10000);
store_statfs_t res_stat;
unsigned max_object = 4*1024*1024;
doMany4KWritesTest(store, 1, 1000, max_object, 4*1024, 0, &res_stat);
@ -5991,7 +5997,7 @@ TEST_P(StoreTestSpecificAUSize, BlobReuseOnOverwrite) {
r = apply_transaction(store, &osr, std::move(t));
ASSERT_EQ(r, 0);
}
g_conf->set_val("bluestore_max_blob_size", "524288");
g_conf->set_val("bluestore_max_blob_size", "0");
}
@ -6174,7 +6180,7 @@ TEST_P(StoreTestSpecificAUSize, BlobReuseOnOverwriteReverse) {
r = apply_transaction(store, &osr, std::move(t));
ASSERT_EQ(r, 0);
}
g_conf->set_val("bluestore_max_blob_size", "524288");
g_conf->set_val("bluestore_max_blob_size", "0");
}
TEST_P(StoreTestSpecificAUSize, BlobReuseOnSmallOverwrite) {
@ -6248,7 +6254,7 @@ TEST_P(StoreTestSpecificAUSize, BlobReuseOnSmallOverwrite) {
r = apply_transaction(store, &osr, std::move(t));
ASSERT_EQ(r, 0);
}
g_conf->set_val("bluestore_max_blob_size", "524288");
g_conf->set_val("bluestore_max_blob_size", "0");
}
// The test case to reproduce an issue when write happens
@ -6452,6 +6458,7 @@ TEST_P(StoreTestSpecificAUSize, garbageCollection) {
StartDeferred(65536);
g_conf->set_val("bluestore_compression_max_blob_size", "524288");
g_conf->set_val("bluestore_compression_min_blob_size", "262144");
g_conf->set_val("bluestore_compression_mode", "force");
g_conf->apply_changes(NULL);
@ -6585,7 +6592,8 @@ TEST_P(StoreTestSpecificAUSize, garbageCollection) {
}
}
g_conf->set_val("bluestore_gc_enable_total_threshold", "0");
g_conf->set_val("bluestore_compression_min_blob_size", "131072");
g_conf->set_val("bluestore_compression_min_blob_size", "0");
g_conf->set_val("bluestore_compression_max_blob_size", "0");
g_conf->set_val("bluestore_compression_mode", "none");
g_conf->apply_changes(NULL);
}