From b12780667f7b038f615d198c225f761ad7191ea9 Mon Sep 17 00:00:00 2001 From: Sridhar Seshasayee Date: Thu, 27 Oct 2022 10:32:46 +0530 Subject: [PATCH] osd: Restore defaults of mClock built-in profiles upon modification The QoS parameters (res, wgt, lim) of mClock profiles are not allowed to be modified by users using commands like "config set" or via admin socket. handle_conf_change() does not allow changes to any built-in mClock profile at the mClock scheduler. But the config subsystem showed the change as expected for the built-in mClock profile QoS parameters. This misled the user into thinking that the change was made at the mClock server when it was not the case. The above issue is the result of the config "levels" used by the config subsystem. The inital built-in QoS params are set at the CONF_DEFAULT level. This allows the user to modify the built-in QoS params using "config set" command which sets values at CONF_MON level which has higher priority than CONF_DEFAULT level. The new value is persisted on the mon store and therefore the config subsystem shows the change when "config show" command is issued. To prevent the above, this commit adds changes to restore the defaults set for the built-in profiles by removing the new config changes from the MON store. This results in the original defaults to come back into effect and maintain a consistent view of the built-in profile across all levels. To accomplish this, the mClock scheduler is provided with additional information like the OSD id, shard id and a pointer to the MonClient using which the Mon store command to remove the option is executed. A standalone test is added to verify that built-in params cannot be modified and the original profile params are retained. Fixes: https://tracker.ceph.com/issues/57533 Signed-off-by: Sridhar Seshasayee --- qa/standalone/misc/mclock-config.sh | 48 ++++++++++++++++++ src/osd/OSD.cc | 4 +- src/osd/scheduler/OpScheduler.cc | 7 +-- src/osd/scheduler/OpScheduler.h | 5 +- src/osd/scheduler/mClockScheduler.cc | 73 ++++++++++++++++++++++++---- src/osd/scheduler/mClockScheduler.h | 6 ++- src/test/osd/TestMClockScheduler.cc | 8 ++- 7 files changed, 132 insertions(+), 19 deletions(-) diff --git a/qa/standalone/misc/mclock-config.sh b/qa/standalone/misc/mclock-config.sh index 702c5096756..17260b100ae 100755 --- a/qa/standalone/misc/mclock-config.sh +++ b/qa/standalone/misc/mclock-config.sh @@ -258,6 +258,54 @@ function TEST_backfill_limit_adjustment_mclock() { teardown $dir || return 1 } +function TEST_profile_disallow_builtin_params_modify() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1 + + # Verify that the default mclock profile is set on the OSD + local mclock_profile=$(ceph config get osd.0 osd_mclock_profile) + test "$mclock_profile" = "high_client_ops" || return 1 + + declare -a options=("osd_mclock_scheduler_background_recovery_res" + "osd_mclock_scheduler_client_res") + + for opt in "${options[@]}" + do + # Try and change a mclock config param and confirm that no change occurred + local opt_val_orig=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get $opt | jq .$opt | bc) + local opt_val_new=$(expr $opt_val_orig + 10) + ceph config set osd.0 $opt $opt_val_new || return 1 + sleep 2 # Allow time for changes to take effect + + # Check configuration value on Mon store (or the default) for the osd + local res=$(ceph config get osd.0 $opt) || return 1 + echo "Mon db (or default): osd.0 $opt = $res" + test $res -ne $opt_val_new || return 1 + + # Check running configuration value using "config show" cmd + res=$(ceph config show osd.0 | grep $opt |\ + awk '{ print $2 }' | bc ) || return 1 + echo "Running config: osd.0 $opt = $res" + test $res -ne $opt_val_new || return 1 + test $res -eq $opt_val_orig || return 1 + + # Check value in the in-memory 'values' map is unmodified + res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get $opt | jq .$opt | bc) + echo "Values map: osd.0 $opt = $res" + test $res -ne $opt_val_new || return 1 + test $res -eq $opt_val_orig || return 1 + done + + teardown $dir || return 1 +} + main mclock-config "$@" # Local Variables: diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 9a7e04a7d2d..33113575040 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -10567,8 +10567,8 @@ OSDShard::OSDShard( shard_lock_name(shard_name + "::shard_lock"), shard_lock{make_mutex(shard_lock_name)}, scheduler(ceph::osd::scheduler::make_scheduler( - cct, osd->num_shards, osd->store->is_rotational(), - osd->store->get_type())), + cct, osd->whoami, osd->num_shards, id, osd->store->is_rotational(), + osd->store->get_type(), osd->monc)), context_queue(sdata_wait_lock, sdata_cond) { dout(0) << "using op scheduler " << *scheduler << dendl; diff --git a/src/osd/scheduler/OpScheduler.cc b/src/osd/scheduler/OpScheduler.cc index b0d14b496b5..cb5ef13b6f6 100644 --- a/src/osd/scheduler/OpScheduler.cc +++ b/src/osd/scheduler/OpScheduler.cc @@ -22,8 +22,8 @@ namespace ceph::osd::scheduler { OpSchedulerRef make_scheduler( - CephContext *cct, uint32_t num_shards, - bool is_rotational, std::string_view osd_objectstore) + CephContext *cct, int whoami, uint32_t num_shards, int shard_id, + bool is_rotational, std::string_view osd_objectstore, MonClient *monc) { const std::string *type = &cct->_conf->osd_op_queue; if (*type == "debug_random") { @@ -45,7 +45,8 @@ OpSchedulerRef make_scheduler( ); } else if (*type == "mclock_scheduler") { // default is 'mclock_scheduler' - return std::make_unique(cct, num_shards, is_rotational); + return std::make_unique< + mClockScheduler>(cct, whoami, num_shards, shard_id, is_rotational, monc); } else { ceph_assert("Invalid choice of wq" == 0); } diff --git a/src/osd/scheduler/OpScheduler.h b/src/osd/scheduler/OpScheduler.h index dc524314f25..1575bcae4f6 100644 --- a/src/osd/scheduler/OpScheduler.h +++ b/src/osd/scheduler/OpScheduler.h @@ -18,6 +18,7 @@ #include #include "common/ceph_context.h" +#include "mon/MonClient.h" #include "osd/scheduler/OpSchedulerItem.h" namespace ceph::osd::scheduler { @@ -61,8 +62,8 @@ std::ostream &operator<<(std::ostream &lhs, const OpScheduler &); using OpSchedulerRef = std::unique_ptr; OpSchedulerRef make_scheduler( - CephContext *cct, uint32_t num_shards, bool is_rotational, - std::string_view osd_objectstore); + CephContext *cct, int whoami, uint32_t num_shards, int shard_id, + bool is_rotational, std::string_view osd_objectstore, MonClient *monc); /** * Implements OpScheduler in terms of OpQueue diff --git a/src/osd/scheduler/mClockScheduler.cc b/src/osd/scheduler/mClockScheduler.cc index be6e00444bc..4b388bd5217 100644 --- a/src/osd/scheduler/mClockScheduler.cc +++ b/src/osd/scheduler/mClockScheduler.cc @@ -31,11 +31,17 @@ using namespace std::placeholders; namespace ceph::osd::scheduler { mClockScheduler::mClockScheduler(CephContext *cct, + int whoami, uint32_t num_shards, - bool is_rotational) + int shard_id, + bool is_rotational, + MonClient *monc) : cct(cct), + whoami(whoami), num_shards(num_shards), + shard_id(shard_id), is_rotational(is_rotational), + monc(monc), scheduler( std::bind(&mClockScheduler::ClientRegistry::get_info, &client_registry, @@ -339,6 +345,11 @@ void mClockScheduler::enable_mclock_profile_settings() void mClockScheduler::set_profile_config() { + // Let only a single osd shard (id:0) set the profile configs + if (shard_id > 0) { + return; + } + ClientAllocs client = client_allocs[ static_cast(op_scheduler_class::client)]; ClientAllocs rec = client_allocs[ @@ -378,6 +389,9 @@ void mClockScheduler::set_profile_config() dout(10) << __func__ << " Best effort QoS params: " << "[" << best_effort.res << "," << best_effort.wgt << "," << best_effort.lim << "]" << dendl; + + // Apply the configuration changes + update_configuration(); } int mClockScheduler::calc_scaled_cost(int item_cost) @@ -544,17 +558,56 @@ void mClockScheduler::handle_conf_change( client_registry.update_from_config(conf); } } - if (changed.count("osd_mclock_scheduler_client_res") || - changed.count("osd_mclock_scheduler_client_wgt") || - changed.count("osd_mclock_scheduler_client_lim") || - changed.count("osd_mclock_scheduler_background_recovery_res") || - changed.count("osd_mclock_scheduler_background_recovery_wgt") || - changed.count("osd_mclock_scheduler_background_recovery_lim") || - changed.count("osd_mclock_scheduler_background_best_effort_res") || - changed.count("osd_mclock_scheduler_background_best_effort_wgt") || - changed.count("osd_mclock_scheduler_background_best_effort_lim")) { + + auto get_changed_key = [&changed]() -> std::optional { + static const std::vector qos_params = { + "osd_mclock_scheduler_client_res", + "osd_mclock_scheduler_client_wgt", + "osd_mclock_scheduler_client_lim", + "osd_mclock_scheduler_background_recovery_res", + "osd_mclock_scheduler_background_recovery_wgt", + "osd_mclock_scheduler_background_recovery_lim", + "osd_mclock_scheduler_background_best_effort_res", + "osd_mclock_scheduler_background_best_effort_wgt", + "osd_mclock_scheduler_background_best_effort_lim" + }; + + for (auto &qp : qos_params) { + if (changed.count(qp)) { + return qp; + } + } + return std::nullopt; + }; + + if (auto key = get_changed_key(); key.has_value()) { if (mclock_profile == "custom") { client_registry.update_from_config(conf); + } else { + // Attempt to change QoS parameter for a built-in profile. Restore the + // profile defaults by making one of the OSD shards remove the key from + // config monitor store. Note: monc is included in the check since the + // mock unit test currently doesn't initialize it. + if (shard_id == 0 && monc) { + static const std::vector osds = { + "osd", + "osd." + std::to_string(whoami) + }; + + for (auto osd : osds) { + std::string cmd = + "{" + "\"prefix\": \"config rm\", " + "\"who\": \"" + osd + "\", " + "\"name\": \"" + *key + "\"" + "}"; + std::vector vcmd{cmd}; + + dout(10) << __func__ << " Removing Key: " << *key + << " for " << osd << " from Mon db" << dendl; + monc->start_mon_command(vcmd, {}, nullptr, nullptr, nullptr); + } + } } } } diff --git a/src/osd/scheduler/mClockScheduler.h b/src/osd/scheduler/mClockScheduler.h index c3b79dba44b..dbacc348e6f 100644 --- a/src/osd/scheduler/mClockScheduler.h +++ b/src/osd/scheduler/mClockScheduler.h @@ -72,8 +72,11 @@ struct scheduler_id_t { class mClockScheduler : public OpScheduler, md_config_obs_t { CephContext *cct; + const int whoami; const uint32_t num_shards; + const int shard_id; bool is_rotational; + MonClient *monc; double max_osd_capacity; double osd_mclock_cost_per_io; double osd_mclock_cost_per_byte; @@ -144,7 +147,8 @@ class mClockScheduler : public OpScheduler, md_config_obs_t { } public: - mClockScheduler(CephContext *cct, uint32_t num_shards, bool is_rotational); + mClockScheduler(CephContext *cct, int whoami, uint32_t num_shards, + int shard_id, bool is_rotational, MonClient *monc); ~mClockScheduler() override; // Set the max osd capacity in iops diff --git a/src/test/osd/TestMClockScheduler.cc b/src/test/osd/TestMClockScheduler.cc index 0feb427ec10..7e1971b1359 100644 --- a/src/test/osd/TestMClockScheduler.cc +++ b/src/test/osd/TestMClockScheduler.cc @@ -27,8 +27,11 @@ int main(int argc, char **argv) { class mClockSchedulerTest : public testing::Test { public: + int whoami; uint32_t num_shards; + int shard_id; bool is_rotational; + MonClient *monc; mClockScheduler q; uint64_t client1; @@ -36,9 +39,12 @@ public: uint64_t client3; mClockSchedulerTest() : + whoami(0), num_shards(1), + shard_id(0), is_rotational(false), - q(g_ceph_context, num_shards, is_rotational), + monc(nullptr), + q(g_ceph_context, whoami, num_shards, shard_id, is_rotational, monc), client1(1001), client2(9999), client3(100000001)