osd: Restore defaults of mClock built-in profiles upon modification

The QoS parameters (res, wgt, lim) of mClock profiles are not allowed to
be modified by users using commands like "config set" or via admin socket.
handle_conf_change() does not allow changes to any built-in mClock profile
at the mClock scheduler. But the config subsystem showed the change as
expected for the built-in mClock profile QoS parameters. This misled the
user into thinking that the change was made at the mClock server when
it was not the case.

The above issue is the result of the config "levels" used by the config
subsystem. The inital built-in QoS params are set at the CONF_DEFAULT
level. This allows the user to modify the built-in QoS params using
"config set" command which sets values at CONF_MON level which has higher
priority than CONF_DEFAULT level. The new value is persisted on the mon
store and therefore the config subsystem shows the change when "config
show" command is issued.

To prevent the above, this commit adds changes to restore the defaults set
for the built-in profiles by removing the new config changes from the MON
store. This results in the original defaults to come back into effect and
maintain a consistent view of the built-in profile across all levels.

To accomplish this, the mClock scheduler is provided with additional
information like the OSD id, shard id and a pointer to the MonClient
using which the Mon store command to remove the option is executed.

A standalone test is added to verify that built-in params cannot be
modified and the original profile params are retained.

Fixes: https://tracker.ceph.com/issues/57533
Signed-off-by: Sridhar Seshasayee <sseshasa@redhat.com>
This commit is contained in:
Sridhar Seshasayee 2022-10-27 10:32:46 +05:30
parent 1650525670
commit b12780667f
7 changed files with 132 additions and 19 deletions

View File

@ -258,6 +258,54 @@ function TEST_backfill_limit_adjustment_mclock() {
teardown $dir || return 1
}
function TEST_profile_disallow_builtin_params_modify() {
local dir=$1
setup $dir || return 1
run_mon $dir a || return 1
run_mgr $dir x || return 1
run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
# Verify that the default mclock profile is set on the OSD
local mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
test "$mclock_profile" = "high_client_ops" || return 1
declare -a options=("osd_mclock_scheduler_background_recovery_res"
"osd_mclock_scheduler_client_res")
for opt in "${options[@]}"
do
# Try and change a mclock config param and confirm that no change occurred
local opt_val_orig=$(CEPH_ARGS='' ceph --format=json daemon \
$(get_asok_path osd.0) config get $opt | jq .$opt | bc)
local opt_val_new=$(expr $opt_val_orig + 10)
ceph config set osd.0 $opt $opt_val_new || return 1
sleep 2 # Allow time for changes to take effect
# Check configuration value on Mon store (or the default) for the osd
local res=$(ceph config get osd.0 $opt) || return 1
echo "Mon db (or default): osd.0 $opt = $res"
test $res -ne $opt_val_new || return 1
# Check running configuration value using "config show" cmd
res=$(ceph config show osd.0 | grep $opt |\
awk '{ print $2 }' | bc ) || return 1
echo "Running config: osd.0 $opt = $res"
test $res -ne $opt_val_new || return 1
test $res -eq $opt_val_orig || return 1
# Check value in the in-memory 'values' map is unmodified
res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
osd.0) config get $opt | jq .$opt | bc)
echo "Values map: osd.0 $opt = $res"
test $res -ne $opt_val_new || return 1
test $res -eq $opt_val_orig || return 1
done
teardown $dir || return 1
}
main mclock-config "$@"
# Local Variables:

View File

@ -10567,8 +10567,8 @@ OSDShard::OSDShard(
shard_lock_name(shard_name + "::shard_lock"),
shard_lock{make_mutex(shard_lock_name)},
scheduler(ceph::osd::scheduler::make_scheduler(
cct, osd->num_shards, osd->store->is_rotational(),
osd->store->get_type())),
cct, osd->whoami, osd->num_shards, id, osd->store->is_rotational(),
osd->store->get_type(), osd->monc)),
context_queue(sdata_wait_lock, sdata_cond)
{
dout(0) << "using op scheduler " << *scheduler << dendl;

View File

@ -22,8 +22,8 @@
namespace ceph::osd::scheduler {
OpSchedulerRef make_scheduler(
CephContext *cct, uint32_t num_shards,
bool is_rotational, std::string_view osd_objectstore)
CephContext *cct, int whoami, uint32_t num_shards, int shard_id,
bool is_rotational, std::string_view osd_objectstore, MonClient *monc)
{
const std::string *type = &cct->_conf->osd_op_queue;
if (*type == "debug_random") {
@ -45,7 +45,8 @@ OpSchedulerRef make_scheduler(
);
} else if (*type == "mclock_scheduler") {
// default is 'mclock_scheduler'
return std::make_unique<mClockScheduler>(cct, num_shards, is_rotational);
return std::make_unique<
mClockScheduler>(cct, whoami, num_shards, shard_id, is_rotational, monc);
} else {
ceph_assert("Invalid choice of wq" == 0);
}

View File

@ -18,6 +18,7 @@
#include <variant>
#include "common/ceph_context.h"
#include "mon/MonClient.h"
#include "osd/scheduler/OpSchedulerItem.h"
namespace ceph::osd::scheduler {
@ -61,8 +62,8 @@ std::ostream &operator<<(std::ostream &lhs, const OpScheduler &);
using OpSchedulerRef = std::unique_ptr<OpScheduler>;
OpSchedulerRef make_scheduler(
CephContext *cct, uint32_t num_shards, bool is_rotational,
std::string_view osd_objectstore);
CephContext *cct, int whoami, uint32_t num_shards, int shard_id,
bool is_rotational, std::string_view osd_objectstore, MonClient *monc);
/**
* Implements OpScheduler in terms of OpQueue

View File

@ -31,11 +31,17 @@ using namespace std::placeholders;
namespace ceph::osd::scheduler {
mClockScheduler::mClockScheduler(CephContext *cct,
int whoami,
uint32_t num_shards,
bool is_rotational)
int shard_id,
bool is_rotational,
MonClient *monc)
: cct(cct),
whoami(whoami),
num_shards(num_shards),
shard_id(shard_id),
is_rotational(is_rotational),
monc(monc),
scheduler(
std::bind(&mClockScheduler::ClientRegistry::get_info,
&client_registry,
@ -339,6 +345,11 @@ void mClockScheduler::enable_mclock_profile_settings()
void mClockScheduler::set_profile_config()
{
// Let only a single osd shard (id:0) set the profile configs
if (shard_id > 0) {
return;
}
ClientAllocs client = client_allocs[
static_cast<size_t>(op_scheduler_class::client)];
ClientAllocs rec = client_allocs[
@ -378,6 +389,9 @@ void mClockScheduler::set_profile_config()
dout(10) << __func__ << " Best effort QoS params: " << "["
<< best_effort.res << "," << best_effort.wgt << "," << best_effort.lim
<< "]" << dendl;
// Apply the configuration changes
update_configuration();
}
int mClockScheduler::calc_scaled_cost(int item_cost)
@ -544,17 +558,56 @@ void mClockScheduler::handle_conf_change(
client_registry.update_from_config(conf);
}
}
if (changed.count("osd_mclock_scheduler_client_res") ||
changed.count("osd_mclock_scheduler_client_wgt") ||
changed.count("osd_mclock_scheduler_client_lim") ||
changed.count("osd_mclock_scheduler_background_recovery_res") ||
changed.count("osd_mclock_scheduler_background_recovery_wgt") ||
changed.count("osd_mclock_scheduler_background_recovery_lim") ||
changed.count("osd_mclock_scheduler_background_best_effort_res") ||
changed.count("osd_mclock_scheduler_background_best_effort_wgt") ||
changed.count("osd_mclock_scheduler_background_best_effort_lim")) {
auto get_changed_key = [&changed]() -> std::optional<std::string> {
static const std::vector<std::string> qos_params = {
"osd_mclock_scheduler_client_res",
"osd_mclock_scheduler_client_wgt",
"osd_mclock_scheduler_client_lim",
"osd_mclock_scheduler_background_recovery_res",
"osd_mclock_scheduler_background_recovery_wgt",
"osd_mclock_scheduler_background_recovery_lim",
"osd_mclock_scheduler_background_best_effort_res",
"osd_mclock_scheduler_background_best_effort_wgt",
"osd_mclock_scheduler_background_best_effort_lim"
};
for (auto &qp : qos_params) {
if (changed.count(qp)) {
return qp;
}
}
return std::nullopt;
};
if (auto key = get_changed_key(); key.has_value()) {
if (mclock_profile == "custom") {
client_registry.update_from_config(conf);
} else {
// Attempt to change QoS parameter for a built-in profile. Restore the
// profile defaults by making one of the OSD shards remove the key from
// config monitor store. Note: monc is included in the check since the
// mock unit test currently doesn't initialize it.
if (shard_id == 0 && monc) {
static const std::vector<std::string> osds = {
"osd",
"osd." + std::to_string(whoami)
};
for (auto osd : osds) {
std::string cmd =
"{"
"\"prefix\": \"config rm\", "
"\"who\": \"" + osd + "\", "
"\"name\": \"" + *key + "\""
"}";
std::vector<std::string> vcmd{cmd};
dout(10) << __func__ << " Removing Key: " << *key
<< " for " << osd << " from Mon db" << dendl;
monc->start_mon_command(vcmd, {}, nullptr, nullptr, nullptr);
}
}
}
}
}

View File

@ -72,8 +72,11 @@ struct scheduler_id_t {
class mClockScheduler : public OpScheduler, md_config_obs_t {
CephContext *cct;
const int whoami;
const uint32_t num_shards;
const int shard_id;
bool is_rotational;
MonClient *monc;
double max_osd_capacity;
double osd_mclock_cost_per_io;
double osd_mclock_cost_per_byte;
@ -144,7 +147,8 @@ class mClockScheduler : public OpScheduler, md_config_obs_t {
}
public:
mClockScheduler(CephContext *cct, uint32_t num_shards, bool is_rotational);
mClockScheduler(CephContext *cct, int whoami, uint32_t num_shards,
int shard_id, bool is_rotational, MonClient *monc);
~mClockScheduler() override;
// Set the max osd capacity in iops

View File

@ -27,8 +27,11 @@ int main(int argc, char **argv) {
class mClockSchedulerTest : public testing::Test {
public:
int whoami;
uint32_t num_shards;
int shard_id;
bool is_rotational;
MonClient *monc;
mClockScheduler q;
uint64_t client1;
@ -36,9 +39,12 @@ public:
uint64_t client3;
mClockSchedulerTest() :
whoami(0),
num_shards(1),
shard_id(0),
is_rotational(false),
q(g_ceph_context, num_shards, is_rotational),
monc(nullptr),
q(g_ceph_context, whoami, num_shards, shard_id, is_rotational, monc),
client1(1001),
client2(9999),
client3(100000001)