mds: add balance_automate fs setting

To turn off the automatic ("default") balancer in multiple MDS clusters. The
new default is "off" as the balancer  is a constant source of problems and
surprise for administrators trying multiple actives. Instead, it should be a
deliberate decision to turn it on and usually with customization like the
"bal_rank_mask" setting or pinning.

Fixes: https://tracker.ceph.com/issues/61378
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
This commit is contained in:
Patrick Donnelly 2023-06-23 17:01:00 -04:00
parent ef620825f0
commit 47342a8b3c
No known key found for this signature in database
GPG Key ID: FA47FD0B0367D313
7 changed files with 57 additions and 7 deletions

View File

@ -95,6 +95,10 @@ CephFS: Disallow delegating preallocated inode ranges to clients. Config
mirroring policies between RGW and AWS, you may wish to set
"rgw policy reject invalid principals" to "false". This affects only newly set
policies, not policies that are already in place.
* The CephFS automatic metadata load (sometimes called "default") balancer is
now disabled by default. The new file system flag `balance_automate`
can be used to toggle it on or off. It can be enabled or disabled via
`ceph fs set <fs_name> balance_automate <bool>`.
* RGW's default backend for `rgw_enable_ops_log` changed from RADOS to file.
The default value of `rgw_ops_log_rados` is now false, and `rgw_ops_log_file_path`
defaults to "/var/log/ceph/ops-log-$cluster-$name.log".

View File

@ -292,6 +292,7 @@ struct ceph_mon_subscribe_ack {
request */
#define CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS (1<<7) /* fs is forbidden to use standby
for another fs */
#define CEPH_MDSMAP_BALANCE_AUTOMATE (1<<8) /* automate metadata balancing */
#define CEPH_MDSMAP_DEFAULTS (CEPH_MDSMAP_ALLOW_SNAPS | \
CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS)

View File

@ -230,6 +230,7 @@ void MDBalancer::handle_export_pins(void)
void MDBalancer::tick()
{
static int num_bal_times = g_conf()->mds_bal_max;
bool balance_automate = mds->mdsmap->allows_balance_automate();
auto bal_interval = g_conf().get_val<int64_t>("mds_bal_interval");
auto bal_max_until = g_conf().get_val<int64_t>("mds_bal_max_until");
time now = clock::now();
@ -248,7 +249,8 @@ void MDBalancer::tick()
// We can use duration_cast below, although the result is an int,
// because the values from g_conf are also integers.
// balance?
if (mds->get_nodeid() == 0
if (balance_automate
&& mds->get_nodeid() == 0
&& mds->is_active()
&& bal_interval > 0
&& chrono::duration_cast<chrono::seconds>(now - last_heartbeat).count() >= bal_interval

View File

@ -239,6 +239,7 @@ void MDSMap::dump_flags_state(Formatter *f) const
f->dump_bool(flag_display.at(CEPH_MDSMAP_ALLOW_STANDBY_REPLAY), allows_standby_replay());
f->dump_bool(flag_display.at(CEPH_MDSMAP_REFUSE_CLIENT_SESSION), test_flag(CEPH_MDSMAP_REFUSE_CLIENT_SESSION));
f->dump_bool(flag_display.at(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS), test_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS));
f->dump_bool(flag_display.at(CEPH_MDSMAP_BALANCE_AUTOMATE), test_flag(CEPH_MDSMAP_BALANCE_AUTOMATE));
f->close_section();
}
@ -383,6 +384,8 @@ void MDSMap::print_flags(std::ostream& out) const {
out << " " << flag_display.at(CEPH_MDSMAP_REFUSE_CLIENT_SESSION);
if (test_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS))
out << " " << flag_display.at(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS);
if (test_flag(CEPH_MDSMAP_BALANCE_AUTOMATE))
out << " " << flag_display.at(CEPH_MDSMAP_BALANCE_AUTOMATE);
}
void MDSMap::get_health(list<pair<health_status_t,string> >& summary,

View File

@ -244,6 +244,15 @@ public:
bool allows_standby_replay() const { return test_flag(CEPH_MDSMAP_ALLOW_STANDBY_REPLAY); }
bool was_standby_replay_ever_allowed() const { return ever_allowed_features & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY; }
void set_balance_automate() {
set_flag(CEPH_MDSMAP_BALANCE_AUTOMATE);
ever_allowed_features |= CEPH_MDSMAP_BALANCE_AUTOMATE;
explicitly_allowed_features |= CEPH_MDSMAP_BALANCE_AUTOMATE;
}
void clear_balance_automate() { clear_flag(CEPH_MDSMAP_BALANCE_AUTOMATE); }
bool allows_balance_automate() const { return test_flag(CEPH_MDSMAP_BALANCE_AUTOMATE); }
bool was_balance_automate_ever_allowed() const { return ever_allowed_features & CEPH_MDSMAP_BALANCE_AUTOMATE; }
void set_multimds_snaps_allowed() {
set_flag(CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS);
ever_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS;
@ -676,7 +685,8 @@ private:
{CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS, "allow_multimds_snaps"},
{CEPH_MDSMAP_ALLOW_STANDBY_REPLAY, "allow_standby_replay"},
{CEPH_MDSMAP_REFUSE_CLIENT_SESSION, "refuse_client_session"},
{CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS, "refuse_standby_for_another_fs"}
{CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS, "refuse_standby_for_another_fs"},
{CEPH_MDSMAP_BALANCE_AUTOMATE, "balance_automate"}
};
};
WRITE_CLASS_ENCODER_FEATURES(MDSMap::mds_info_t)

View File

@ -665,6 +665,21 @@ public:
}
};
fsmap.modify_filesystem(fsp->get_fscid(), std::move(f));
} else if (var == "balance_automate") {
bool allow = false;
int r = parse_bool(val, &allow, ss);
if (r != 0) {
return r;
}
auto f = [allow](auto&& fs) {
if (allow) {
fs.get_mds_map().set_balance_automate();
} else {
fs.get_mds_map().clear_balance_automate();
}
};
fsmap.modify_filesystem(fsp->get_fscid(), std::move(f));
} else if (var == "min_compat_client") {
auto vno = ceph_release_from_name(val.c_str());
if (!vno) {

View File

@ -374,11 +374,26 @@ COMMAND("fs get name=fs_name,type=CephString",
"fs", "r")
COMMAND("fs set "
"name=fs_name,type=CephString "
"name=var,type=CephChoices,strings=max_mds|max_file_size"
"|allow_new_snaps|inline_data|cluster_down|allow_dirfrags|balancer"
"|standby_count_wanted|session_timeout|session_autoclose"
"|allow_standby_replay|down|joinable|min_compat_client|bal_rank_mask"
"|refuse_client_session|max_xattr_size|refuse_standby_for_another_fs "
"name=var,type=CephChoices,strings=max_mds"
"|allow_dirfrags"
"|allow_new_snaps"
"|allow_standby_replay"
"|bal_rank_mask"
"|balance_automate"
"|balancer"
"|cluster_down"
"|down"
"|inline_data"
"|joinable"
"|max_file_size"
"|max_xattr_size"
"|min_compat_client"
"|refuse_client_session"
"|refuse_standby_for_another_fs"
"|session_autoclose"
"|session_timeout"
"|standby_count_wanted"
" "
"name=val,type=CephString "
"name=yes_i_really_mean_it,type=CephBool,req=false "
"name=yes_i_really_really_mean_it,type=CephBool,req=false",