mirror of
https://github.com/ceph/ceph
synced 2025-04-11 04:02:04 +00:00
Merge pull request #11373 from ukernel/wip-mds-recall-warning
mds: fix false "failing to respond to cache pressure" warning Reviewed-by: Greg Farnum <gfarnum@redhat.com> Reviewed-by: John Spray <john.spray@redhat.com>
This commit is contained in:
commit
c2d0235ce5
@ -385,8 +385,10 @@ void Beacon::notify_health(MDSRank const *mds)
|
|||||||
{
|
{
|
||||||
set<Session*> sessions;
|
set<Session*> sessions;
|
||||||
mds->sessionmap.get_client_session_set(sessions);
|
mds->sessionmap.get_client_session_set(sessions);
|
||||||
|
|
||||||
utime_t cutoff = ceph_clock_now(g_ceph_context);
|
utime_t cutoff = ceph_clock_now(g_ceph_context);
|
||||||
cutoff -= g_conf->mds_recall_state_timeout;
|
cutoff -= g_conf->mds_recall_state_timeout;
|
||||||
|
utime_t last_recall = mds->mdcache->last_recall_state;
|
||||||
|
|
||||||
std::list<MDSHealthMetric> late_recall_metrics;
|
std::list<MDSHealthMetric> late_recall_metrics;
|
||||||
std::list<MDSHealthMetric> large_completed_requests_metrics;
|
std::list<MDSHealthMetric> large_completed_requests_metrics;
|
||||||
@ -396,7 +398,10 @@ void Beacon::notify_health(MDSRank const *mds)
|
|||||||
dout(20) << "Session servicing RECALL " << session->info.inst
|
dout(20) << "Session servicing RECALL " << session->info.inst
|
||||||
<< ": " << session->recalled_at << " " << session->recall_release_count
|
<< ": " << session->recalled_at << " " << session->recall_release_count
|
||||||
<< "/" << session->recall_count << dendl;
|
<< "/" << session->recall_count << dendl;
|
||||||
if (session->recalled_at < cutoff) {
|
if (last_recall < cutoff || session->last_recall_sent < last_recall) {
|
||||||
|
dout(20) << " no longer recall" << dendl;
|
||||||
|
session->clear_recalled_at();
|
||||||
|
} else if (session->recalled_at < cutoff) {
|
||||||
dout(20) << " exceeded timeout " << session->recalled_at << " vs. " << cutoff << dendl;
|
dout(20) << " exceeded timeout " << session->recalled_at << " vs. " << cutoff << dendl;
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "Client " << session->get_human_name() << " failing to respond to cache pressure";
|
oss << "Client " << session->get_human_name() << " failing to respond to cache pressure";
|
||||||
|
@ -7334,10 +7334,11 @@ void MDCache::check_memory_usage()
|
|||||||
|
|
||||||
if (num_inodes_with_caps > g_conf->mds_cache_size) {
|
if (num_inodes_with_caps > g_conf->mds_cache_size) {
|
||||||
float ratio = (float)g_conf->mds_cache_size * .9 / (float)num_inodes_with_caps;
|
float ratio = (float)g_conf->mds_cache_size * .9 / (float)num_inodes_with_caps;
|
||||||
if (ratio < 1.0)
|
if (ratio < 1.0) {
|
||||||
|
last_recall_state = ceph_clock_now(g_ceph_context);
|
||||||
mds->server->recall_client_state(ratio);
|
mds->server->recall_client_state(ratio);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -693,6 +693,8 @@ public:
|
|||||||
void trim_client_leases();
|
void trim_client_leases();
|
||||||
void check_memory_usage();
|
void check_memory_usage();
|
||||||
|
|
||||||
|
utime_t last_recall_state;
|
||||||
|
|
||||||
// shutdown
|
// shutdown
|
||||||
void shutdown_start();
|
void shutdown_start();
|
||||||
void shutdown_check();
|
void shutdown_check();
|
||||||
|
@ -789,11 +789,8 @@ void Session::notify_cap_release(size_t n_caps)
|
|||||||
{
|
{
|
||||||
if (!recalled_at.is_zero()) {
|
if (!recalled_at.is_zero()) {
|
||||||
recall_release_count += n_caps;
|
recall_release_count += n_caps;
|
||||||
if (recall_release_count >= recall_count) {
|
if (recall_release_count >= recall_count)
|
||||||
recalled_at = utime_t();
|
clear_recalled_at();
|
||||||
recall_count = 0;
|
|
||||||
recall_release_count = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -808,13 +805,22 @@ void Session::notify_recall_sent(int const new_limit)
|
|||||||
if (recalled_at.is_zero()) {
|
if (recalled_at.is_zero()) {
|
||||||
// Entering recall phase, set up counters so we can later
|
// Entering recall phase, set up counters so we can later
|
||||||
// judge whether the client has respected the recall request
|
// judge whether the client has respected the recall request
|
||||||
recalled_at = ceph_clock_now(g_ceph_context);
|
recalled_at = last_recall_sent = ceph_clock_now(g_ceph_context);
|
||||||
assert (new_limit < caps.size()); // Behaviour of Server::recall_client_state
|
assert (new_limit < caps.size()); // Behaviour of Server::recall_client_state
|
||||||
recall_count = caps.size() - new_limit;
|
recall_count = caps.size() - new_limit;
|
||||||
recall_release_count = 0;
|
recall_release_count = 0;
|
||||||
|
} else {
|
||||||
|
last_recall_sent = ceph_clock_now(g_ceph_context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Session::clear_recalled_at()
|
||||||
|
{
|
||||||
|
recalled_at = last_recall_sent = utime_t();
|
||||||
|
recall_count = 0;
|
||||||
|
recall_release_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
void Session::set_client_metadata(map<string, string> const &meta)
|
void Session::set_client_metadata(map<string, string> const &meta)
|
||||||
{
|
{
|
||||||
info.client_metadata = meta;
|
info.client_metadata = meta;
|
||||||
|
@ -129,6 +129,7 @@ public:
|
|||||||
|
|
||||||
// Ephemeral state for tracking progress of capability recalls
|
// Ephemeral state for tracking progress of capability recalls
|
||||||
utime_t recalled_at; // When was I asked to SESSION_RECALL?
|
utime_t recalled_at; // When was I asked to SESSION_RECALL?
|
||||||
|
utime_t last_recall_sent;
|
||||||
uint32_t recall_count; // How many caps was I asked to SESSION_RECALL?
|
uint32_t recall_count; // How many caps was I asked to SESSION_RECALL?
|
||||||
uint32_t recall_release_count; // How many caps have I actually revoked?
|
uint32_t recall_release_count; // How many caps have I actually revoked?
|
||||||
|
|
||||||
@ -148,6 +149,7 @@ public:
|
|||||||
|
|
||||||
void notify_cap_release(size_t n_caps);
|
void notify_cap_release(size_t n_caps);
|
||||||
void notify_recall_sent(int const new_limit);
|
void notify_recall_sent(int const new_limit);
|
||||||
|
void clear_recalled_at();
|
||||||
|
|
||||||
inodeno_t next_ino() {
|
inodeno_t next_ino() {
|
||||||
if (info.prealloc_inos.empty())
|
if (info.prealloc_inos.empty())
|
||||||
@ -315,7 +317,7 @@ public:
|
|||||||
|
|
||||||
Session() :
|
Session() :
|
||||||
state(STATE_CLOSED), state_seq(0), importing_count(0),
|
state(STATE_CLOSED), state_seq(0), importing_count(0),
|
||||||
recalled_at(), recall_count(0), recall_release_count(0),
|
recall_count(0), recall_release_count(0),
|
||||||
auth_caps(g_ceph_context),
|
auth_caps(g_ceph_context),
|
||||||
connection(NULL), item_session_list(this),
|
connection(NULL), item_session_list(this),
|
||||||
requests(0), // member_offset passed to front() manually
|
requests(0), // member_offset passed to front() manually
|
||||||
|
Loading…
Reference in New Issue
Block a user