mirror of
https://github.com/ceph/ceph
synced 2025-02-24 19:47:44 +00:00
mds: recall caps from quiescent sessions
This introduces two new config options [1,2] that dictate when a session is considered quiescent by the MDS. (Options are documented fully in options.cc.) When a session is quiescent, the MDS will preemptively recall caps to reduce the outstanding capabilities which optimizes for reducing work during failover. [1] mds_session_cache_liveness_magnitude [2] mds_session_cache_liveness_decay_rate Fixes: https://tracker.ceph.com/issues/22446 Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
This commit is contained in:
parent
63f9448947
commit
740f6f99a1
@ -7823,6 +7823,18 @@ std::vector<Option> get_mds_options() {
|
||||
.set_default(60.0)
|
||||
.set_description("decay rate for warning on slow session cap recall"),
|
||||
|
||||
Option("mds_session_cache_liveness_decay_rate", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
|
||||
.add_see_also("mds_session_cache_liveness_magnitude")
|
||||
.set_default(5_min)
|
||||
.set_description("decay rate for session liveness leading to preemptive cap recall")
|
||||
.set_long_description("This determines how long a session needs to be quiescent before the MDS begins preemptively recalling capabilities. The default of 5 minutes will cause 10 halvings of the decay counter after 1 hour, or 1/1024. The default magnitude of 10 (1^10 or 1024) is chosen so that the MDS considers a previously chatty session (approximately) to be quiescent after 1 hour."),
|
||||
|
||||
Option("mds_session_cache_liveness_magnitude", Option::TYPE_SIZE, Option::LEVEL_ADVANCED)
|
||||
.add_see_also("mds_session_cache_liveness_decay_rate")
|
||||
.set_default(10)
|
||||
.set_description("decay magnitude for preemptively recalling caps on quiet client")
|
||||
.set_long_description("This is the order of magnitude difference (in base 2) of the internal liveness decay counter and the number of capabilities the session holds. When this difference occurs, the MDS treats the session as quiescent and begins recalling capabilities."),
|
||||
|
||||
Option("mds_freeze_tree_timeout", Option::TYPE_FLOAT, Option::LEVEL_DEV)
|
||||
.set_default(30)
|
||||
.set_description(""),
|
||||
|
@ -175,7 +175,8 @@ MDCache::MDCache(MDSRank *m, PurgeQueue &purge_queue_) :
|
||||
trim_client_leases();
|
||||
trim();
|
||||
check_memory_usage();
|
||||
mds->server->recall_client_state(nullptr, Server::RecallFlags::ENFORCE_MAX);
|
||||
auto flags = Server::RecallFlags::ENFORCE_MAX|Server::RecallFlags::ENFORCE_LIVENESS;
|
||||
mds->server->recall_client_state(nullptr, flags);
|
||||
upkeep_last_trim = clock::now();
|
||||
} else {
|
||||
dout(10) << "cache not ready for trimming" << dendl;
|
||||
|
@ -3671,6 +3671,7 @@ const char** MDSRankDispatcher::get_tracked_conf_keys() const
|
||||
"mds_recall_max_decay_rate",
|
||||
"mds_recall_warning_decay_rate",
|
||||
"mds_request_load_average_decay_rate",
|
||||
"mds_session_cache_liveness_decay_rate",
|
||||
NULL
|
||||
};
|
||||
return KEYS;
|
||||
|
@ -1570,6 +1570,7 @@ std::pair<bool, uint64_t> Server::recall_client_state(MDSGatherBuilder* gather,
|
||||
const auto now = clock::now();
|
||||
const bool steady = !!(flags&RecallFlags::STEADY);
|
||||
const bool enforce_max = !!(flags&RecallFlags::ENFORCE_MAX);
|
||||
const bool enforce_liveness = !!(flags&RecallFlags::ENFORCE_LIVENESS);
|
||||
const bool trim = !!(flags&RecallFlags::TRIM);
|
||||
|
||||
const auto max_caps_per_client = g_conf().get_val<uint64_t>("mds_max_caps_per_client");
|
||||
@ -1577,6 +1578,7 @@ std::pair<bool, uint64_t> Server::recall_client_state(MDSGatherBuilder* gather,
|
||||
const auto recall_global_max_decay_threshold = g_conf().get_val<Option::size_t>("mds_recall_global_max_decay_threshold");
|
||||
const auto recall_max_caps = g_conf().get_val<Option::size_t>("mds_recall_max_caps");
|
||||
const auto recall_max_decay_threshold = g_conf().get_val<Option::size_t>("mds_recall_max_decay_threshold");
|
||||
const auto cache_liveness_magnitude = g_conf().get_val<Option::size_t>("mds_session_cache_liveness_magnitude");
|
||||
|
||||
dout(7) << __func__ << ":"
|
||||
<< " min=" << min_caps_per_client
|
||||
@ -1587,9 +1589,10 @@ std::pair<bool, uint64_t> Server::recall_client_state(MDSGatherBuilder* gather,
|
||||
|
||||
/* trim caps of sessions with the most caps first */
|
||||
std::multimap<uint64_t, Session*> caps_session;
|
||||
auto f = [&caps_session, enforce_max, trim, max_caps_per_client](auto& s) {
|
||||
auto f = [&caps_session, enforce_max, enforce_liveness, trim, max_caps_per_client, cache_liveness_magnitude](auto& s) {
|
||||
auto num_caps = s->caps.size();
|
||||
if (trim || (enforce_max && num_caps > max_caps_per_client)) {
|
||||
auto cache_liveness = s->get_session_cache_liveness();
|
||||
if (trim || (enforce_max && num_caps > max_caps_per_client) || (enforce_liveness && cache_liveness < (num_caps>>cache_liveness_magnitude))) {
|
||||
caps_session.emplace(std::piecewise_construct, std::forward_as_tuple(num_caps), std::forward_as_tuple(s));
|
||||
}
|
||||
};
|
||||
|
@ -174,6 +174,7 @@ public:
|
||||
STEADY = (1<<0),
|
||||
ENFORCE_MAX = (1<<1),
|
||||
TRIM = (1<<2),
|
||||
ENFORCE_LIVENESS = (1<<3),
|
||||
};
|
||||
std::pair<bool, uint64_t> recall_client_state(MDSGatherBuilder* gather, RecallFlags=RecallFlags::NONE);
|
||||
void force_clients_readonly();
|
||||
|
@ -588,6 +588,7 @@ void Session::dump(Formatter *f) const
|
||||
f->dump_object("release_caps", release_caps);
|
||||
f->dump_object("recall_caps_throttle", recall_caps_throttle);
|
||||
f->dump_object("recall_caps_throttle2o", recall_caps_throttle2o);
|
||||
f->dump_object("session_cache_liveness", session_cache_liveness);
|
||||
info.dump(f);
|
||||
}
|
||||
|
||||
@ -1072,6 +1073,14 @@ void SessionMap::handle_conf_change(const std::set<std::string>& changed)
|
||||
};
|
||||
apply_to_open_sessions(mut);
|
||||
}
|
||||
if (changed.count("mds_session_cache_liveness_decay_rate")) {
|
||||
auto d = g_conf().get_val<double>("mds_session_cache_liveness_decay_rate");
|
||||
auto mut = [d](auto s) {
|
||||
s->session_cache_liveness = DecayCounter(d);
|
||||
s->session_cache_liveness.hit(s->caps.size()); /* so the MDS doesn't immediately start trimming a new session */
|
||||
};
|
||||
apply_to_open_sessions(mut);
|
||||
}
|
||||
}
|
||||
|
||||
void SessionMap::update_average_session_age() {
|
||||
|
@ -128,6 +128,9 @@ private:
|
||||
// New limit in SESSION_RECALL
|
||||
uint32_t recall_limit = 0;
|
||||
|
||||
// session caps liveness
|
||||
DecayCounter session_cache_liveness;
|
||||
|
||||
// session start time -- used to track average session time
|
||||
// note that this is initialized in the constructor rather
|
||||
// than at the time of adding a session to the sessionmap
|
||||
@ -204,6 +207,9 @@ public:
|
||||
auto get_release_caps() const {
|
||||
return release_caps.get();
|
||||
}
|
||||
auto get_session_cache_liveness() const {
|
||||
return session_cache_liveness.get();
|
||||
}
|
||||
|
||||
inodeno_t next_ino() const {
|
||||
if (info.prealloc_inos.empty())
|
||||
@ -306,14 +312,17 @@ public:
|
||||
}
|
||||
|
||||
void touch_cap(Capability *cap) {
|
||||
session_cache_liveness.hit(1.0);
|
||||
caps.push_front(&cap->item_session_caps);
|
||||
}
|
||||
|
||||
void touch_cap_bottom(Capability *cap) {
|
||||
session_cache_liveness.hit(1.0);
|
||||
caps.push_back(&cap->item_session_caps);
|
||||
}
|
||||
|
||||
void touch_lease(ClientLease *r) {
|
||||
session_cache_liveness.hit(1.0);
|
||||
leases.push_back(&r->item_session_lease);
|
||||
}
|
||||
|
||||
@ -412,6 +421,7 @@ public:
|
||||
release_caps(g_conf().get_val<double>("mds_recall_warning_decay_rate")),
|
||||
recall_caps_throttle(g_conf().get_val<double>("mds_recall_max_decay_rate")),
|
||||
recall_caps_throttle2o(0.5),
|
||||
session_cache_liveness(g_conf().get_val<double>("mds_session_cache_liveness_decay_rate")),
|
||||
birth_time(clock::now()),
|
||||
auth_caps(g_ceph_context),
|
||||
item_session_list(this),
|
||||
|
Loading…
Reference in New Issue
Block a user