mirror of
https://github.com/ceph/ceph
synced 2025-02-24 11:37:37 +00:00
Merge pull request #25697 from Aran85/fix-onode-trim
os/bluestore: more aggressive deferred submit when onode trim skipping Reviewed-by: xie xingguo <xie.xingguo@zte.com.cn> Reviewed-by: Igor Fedotov <ifedotov@suse.com>
This commit is contained in:
commit
263a78c3dd
@ -1035,6 +1035,7 @@ OPTION(bluestore_freelist_blocks_per_key, OPT_INT)
|
||||
OPTION(bluestore_bitmapallocator_blocks_per_zone, OPT_INT) // must be power of 2 aligned, e.g., 512, 1024, 2048...
|
||||
OPTION(bluestore_bitmapallocator_span_size, OPT_INT) // must be power of 2 aligned, e.g., 512, 1024, 2048...
|
||||
OPTION(bluestore_max_deferred_txc, OPT_U64)
|
||||
OPTION(bluestore_max_defer_interval, OPT_U64)
|
||||
OPTION(bluestore_rocksdb_options, OPT_STR)
|
||||
OPTION(bluestore_fsck_on_mount, OPT_BOOL)
|
||||
OPTION(bluestore_fsck_on_mount_deep, OPT_BOOL)
|
||||
|
@ -4700,6 +4700,10 @@ std::vector<Option> get_global_options() {
|
||||
.set_default(32)
|
||||
.set_description("Max transactions with deferred writes that can accumulate before we force flush deferred writes"),
|
||||
|
||||
Option("bluestore_max_defer_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
|
||||
.set_default(3)
|
||||
.set_description("max duration to force deferred submit"),
|
||||
|
||||
Option("bluestore_rocksdb_options", Option::TYPE_STR, Option::LEVEL_ADVANCED)
|
||||
.set_default("compression=kNoCompression,max_write_buffer_number=4,min_write_buffer_number_to_merge=1,recycle_log_file_num=4,write_buffer_size=268435456,writable_file_max_buffer_size=0,compaction_readahead_size=2097152,max_background_compactions=2")
|
||||
.set_description("Rocksdb options"),
|
||||
|
@ -3759,12 +3759,14 @@ void *BlueStore::MempoolThread::entry()
|
||||
|
||||
utime_t next_balance = ceph_clock_now();
|
||||
utime_t next_resize = ceph_clock_now();
|
||||
utime_t next_deferred_force_submit = ceph_clock_now();
|
||||
|
||||
bool interval_stats_trim = false;
|
||||
while (!stop) {
|
||||
// Before we trim, check and see if it's time to rebalance/resize.
|
||||
double autotune_interval = store->cache_autotune_interval;
|
||||
double resize_interval = store->osd_memory_cache_resize_interval;
|
||||
double max_defer_interval = store->max_defer_interval;
|
||||
|
||||
if (autotune_interval > 0 && next_balance < ceph_clock_now()) {
|
||||
_adjust_cache_settings();
|
||||
@ -3787,6 +3789,16 @@ void *BlueStore::MempoolThread::entry()
|
||||
next_resize += resize_interval;
|
||||
}
|
||||
|
||||
if (max_defer_interval > 0 &&
|
||||
next_deferred_force_submit < ceph_clock_now()) {
|
||||
if (store->get_deferred_last_submitted() + max_defer_interval <
|
||||
ceph_clock_now()) {
|
||||
store->deferred_try_submit();
|
||||
}
|
||||
next_deferred_force_submit = ceph_clock_now();
|
||||
next_deferred_force_submit += max_defer_interval/3;
|
||||
}
|
||||
|
||||
// Now Resize the shards
|
||||
_resize_shards(interval_stats_trim);
|
||||
interval_stats_trim = false;
|
||||
@ -4136,6 +4148,7 @@ const char **BlueStore::get_tracked_conf_keys() const
|
||||
"bluestore_no_per_pool_stats_tolerance",
|
||||
"bluestore_warn_on_legacy_statfs",
|
||||
"bluestore_warn_on_no_per_pool_omap",
|
||||
"bluestore_max_defer_interval",
|
||||
NULL
|
||||
};
|
||||
return KEYS;
|
||||
@ -4199,6 +4212,11 @@ void BlueStore::handle_conf_change(const ConfigProxy& conf,
|
||||
throttle_deferred_bytes.reset_max(
|
||||
conf->bluestore_throttle_bytes + conf->bluestore_throttle_deferred_bytes);
|
||||
}
|
||||
if (changed.count("bluestore_max_defer_interval")) {
|
||||
if (bdev) {
|
||||
_set_max_defer_interval();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BlueStore::_set_compression()
|
||||
@ -4774,6 +4792,7 @@ int BlueStore::_open_bdev(bool create)
|
||||
block_mask = ~(block_size - 1);
|
||||
block_size_order = ctz(block_size);
|
||||
ceph_assert(block_size == 1u << block_size_order);
|
||||
_set_max_defer_interval();
|
||||
// and set cache_size based on device type
|
||||
r = _set_cache_sizes();
|
||||
if (r < 0) {
|
||||
@ -11195,6 +11214,8 @@ void BlueStore::deferred_try_submit()
|
||||
dout(20) << __func__ << " osr " << osr << " has no pending" << dendl;
|
||||
}
|
||||
}
|
||||
|
||||
deferred_last_submitted = ceph_clock_now();
|
||||
}
|
||||
|
||||
void BlueStore::_deferred_submit_unlock(OpSequencer *osr)
|
||||
|
@ -152,6 +152,10 @@ public:
|
||||
void _set_compression();
|
||||
void _set_throttle_params();
|
||||
int _set_cache_sizes();
|
||||
void _set_max_defer_interval() {
|
||||
max_defer_interval =
|
||||
cct->_conf.get_val<double>("bluestore_max_defer_interval");
|
||||
}
|
||||
|
||||
class TransContext;
|
||||
|
||||
@ -1812,6 +1816,7 @@ private:
|
||||
int deferred_queue_size = 0; ///< num txc's queued across all osrs
|
||||
atomic_int deferred_aggressive = {0}; ///< aggressive wakeup of kv thread
|
||||
Finisher deferred_finisher, finisher;
|
||||
utime_t deferred_last_submitted = utime_t();
|
||||
|
||||
KVSyncThread kv_sync_thread;
|
||||
ceph::mutex kv_lock = ceph::make_mutex("BlueStore::kv_lock");
|
||||
@ -1893,6 +1898,7 @@ private:
|
||||
double osd_memory_expected_fragmentation = 0; ///< expected memory fragmentation
|
||||
uint64_t osd_memory_cache_min = 0; ///< Min memory to assign when autotuning cache
|
||||
double osd_memory_cache_resize_interval = 0; ///< Time to wait between cache resizing
|
||||
double max_defer_interval = 0; ///< Time to wait between last deferred submit
|
||||
|
||||
typedef map<uint64_t, volatile_statfs> osd_pools_map;
|
||||
|
||||
@ -2105,6 +2111,11 @@ private:
|
||||
bool create);
|
||||
|
||||
public:
|
||||
utime_t get_deferred_last_submitted() {
|
||||
std::lock_guard l(deferred_lock);
|
||||
return deferred_last_submitted;
|
||||
}
|
||||
|
||||
static int _write_bdev_label(CephContext* cct,
|
||||
string path, bluestore_bdev_label_t label);
|
||||
static int _read_bdev_label(CephContext* cct, string path,
|
||||
|
Loading…
Reference in New Issue
Block a user