Merge pull request #11373 from ukernel/wip-mds-recall-warning

mds: fix false "failing to respond to cache pressure" warning

Reviewed-by: Greg Farnum <gfarnum@redhat.com>
Reviewed-by: John Spray <john.spray@redhat.com>
This commit is contained in:
John Spray 2016-10-19 12:19:48 +02:00 committed by GitHub
commit c2d0235ce5
5 changed files with 26 additions and 10 deletions

View File

@ -385,8 +385,10 @@ void Beacon::notify_health(MDSRank const *mds)
{
set<Session*> sessions;
mds->sessionmap.get_client_session_set(sessions);
utime_t cutoff = ceph_clock_now(g_ceph_context);
cutoff -= g_conf->mds_recall_state_timeout;
utime_t last_recall = mds->mdcache->last_recall_state;
std::list<MDSHealthMetric> late_recall_metrics;
std::list<MDSHealthMetric> large_completed_requests_metrics;
@ -396,7 +398,10 @@ void Beacon::notify_health(MDSRank const *mds)
dout(20) << "Session servicing RECALL " << session->info.inst
<< ": " << session->recalled_at << " " << session->recall_release_count
<< "/" << session->recall_count << dendl;
if (session->recalled_at < cutoff) {
if (last_recall < cutoff || session->last_recall_sent < last_recall) {
dout(20) << " no longer recall" << dendl;
session->clear_recalled_at();
} else if (session->recalled_at < cutoff) {
dout(20) << " exceeded timeout " << session->recalled_at << " vs. " << cutoff << dendl;
std::ostringstream oss;
oss << "Client " << session->get_human_name() << " failing to respond to cache pressure";

View File

@ -7334,10 +7334,11 @@ void MDCache::check_memory_usage()
if (num_inodes_with_caps > g_conf->mds_cache_size) {
float ratio = (float)g_conf->mds_cache_size * .9 / (float)num_inodes_with_caps;
if (ratio < 1.0)
if (ratio < 1.0) {
last_recall_state = ceph_clock_now(g_ceph_context);
mds->server->recall_client_state(ratio);
}
}
}

View File

@ -693,6 +693,8 @@ public:
void trim_client_leases();
void check_memory_usage();
utime_t last_recall_state;
// shutdown
void shutdown_start();
void shutdown_check();

View File

@ -789,11 +789,8 @@ void Session::notify_cap_release(size_t n_caps)
{
if (!recalled_at.is_zero()) {
recall_release_count += n_caps;
if (recall_release_count >= recall_count) {
recalled_at = utime_t();
recall_count = 0;
recall_release_count = 0;
}
if (recall_release_count >= recall_count)
clear_recalled_at();
}
}
@ -808,13 +805,22 @@ void Session::notify_recall_sent(int const new_limit)
if (recalled_at.is_zero()) {
// Entering recall phase, set up counters so we can later
// judge whether the client has respected the recall request
recalled_at = ceph_clock_now(g_ceph_context);
recalled_at = last_recall_sent = ceph_clock_now(g_ceph_context);
assert (new_limit < caps.size()); // Behaviour of Server::recall_client_state
recall_count = caps.size() - new_limit;
recall_release_count = 0;
} else {
last_recall_sent = ceph_clock_now(g_ceph_context);
}
}
void Session::clear_recalled_at()
{
recalled_at = last_recall_sent = utime_t();
recall_count = 0;
recall_release_count = 0;
}
void Session::set_client_metadata(map<string, string> const &meta)
{
info.client_metadata = meta;

View File

@ -129,6 +129,7 @@ public:
// Ephemeral state for tracking progress of capability recalls
utime_t recalled_at; // When was I asked to SESSION_RECALL?
utime_t last_recall_sent;
uint32_t recall_count; // How many caps was I asked to SESSION_RECALL?
uint32_t recall_release_count; // How many caps have I actually revoked?
@ -148,6 +149,7 @@ public:
void notify_cap_release(size_t n_caps);
void notify_recall_sent(int const new_limit);
void clear_recalled_at();
inodeno_t next_ino() {
if (info.prealloc_inos.empty())
@ -315,7 +317,7 @@ public:
Session() :
state(STATE_CLOSED), state_seq(0), importing_count(0),
recalled_at(), recall_count(0), recall_release_count(0),
recall_count(0), recall_release_count(0),
auth_caps(g_ceph_context),
connection(NULL), item_session_list(this),
requests(0), // member_offset passed to front() manually