Merge pull request #40835 from gregsfortytwo/wip-stretch-mon-state

Fix issues with in-memory monitor stretch state

Reviewed-by: Sam Just <sjust@redhat.com>
This commit is contained in:
Gregory Farnum 2021-04-13 15:38:01 -07:00 committed by GitHub
commit 05861cabb2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 61 additions and 10 deletions

View File

@ -6527,7 +6527,7 @@ void Monitor::notify_new_monmap(bool can_change_external_state)
}
if (monmap->stretch_mode_enabled) {
maybe_engage_stretch_mode();
try_engage_stretch_mode();
}
if (is_stretch_mode()) {
@ -6561,10 +6561,10 @@ struct CMonEnableStretchMode : public Context {
Monitor *m;
CMonEnableStretchMode(Monitor *mon) : m(mon) {}
void finish(int r) {
m->maybe_engage_stretch_mode();
m->try_engage_stretch_mode();
}
};
void Monitor::maybe_engage_stretch_mode()
void Monitor::try_engage_stretch_mode()
{
dout(20) << __func__ << dendl;
if (stretch_mode_engaged) return;
@ -6655,8 +6655,14 @@ void Monitor::go_recovery_stretch_mode()
if (!osdmon()->is_writeable()) {
osdmon()->wait_for_writeable_ctx(new CMonGoRecovery(this));
}
osdmon()->trigger_recovery_stretch_mode();
}
void Monitor::set_recovery_stretch_mode()
{
degraded_stretch_mode = true;
recovering_stretch_mode = true;
osdmon()->trigger_recovery_stretch_mode();
osdmon()->set_recovery_stretch_mode();
}
void Monitor::maybe_go_degraded_stretch_mode()
@ -6717,6 +6723,7 @@ void Monitor::set_degraded_stretch_mode()
{
degraded_stretch_mode = true;
recovering_stretch_mode = false;
osdmon()->set_degraded_stretch_mode();
}
struct CMonGoHealthy : public Context {
@ -6741,7 +6748,6 @@ void Monitor::trigger_healthy_stretch_mode()
}
ceph_assert(osdmon()->osdmap.recovering_stretch_mode);
set_healthy_stretch_mode();
osdmon()->trigger_healthy_stretch_mode();
monmon()->trigger_healthy_stretch_mode();
}
@ -6750,6 +6756,7 @@ void Monitor::set_healthy_stretch_mode()
{
degraded_stretch_mode = false;
recovering_stretch_mode = false;
osdmon()->set_healthy_stretch_mode();
}
bool Monitor::session_stretch_allowed(MonSession *s, MonOpRequestRef& op)

View File

@ -266,12 +266,26 @@ public:
bool is_stretch_mode() { return stretch_mode_engaged; }
bool is_degraded_stretch_mode() { return degraded_stretch_mode; }
bool is_recovering_stretch_mode() { return recovering_stretch_mode; }
void maybe_engage_stretch_mode();
/**
* This set of functions maintains the in-memory stretch state
* and sets up transitions of the map states by calling in to
* MonmapMonitor and OSDMonitor.
*
* The [maybe_]go_* functions are called on the leader to
* decide if transitions should happen; the trigger_* functions
* set up the map transitions; and the set_* functions actually
* change the memory state -- but these are only called
* via OSDMonitor::update_from_paxos, to guarantee consistent
* updates across the entire cluster.
*/
void try_engage_stretch_mode();
void maybe_go_degraded_stretch_mode();
void trigger_degraded_stretch_mode(const set<string>& dead_mons,
const set<int>& dead_buckets);
void set_degraded_stretch_mode();
void go_recovery_stretch_mode();
void set_recovery_stretch_mode();
void trigger_healthy_stretch_mode();
void set_healthy_stretch_mode();
void enable_stretch_mode();

View File

@ -942,7 +942,7 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap)
}
if (osdmap.stretch_mode_enabled) {
dout(20) << "Stretch mode enabled in this map" << dendl;
mon.maybe_engage_stretch_mode();
mon.try_engage_stretch_mode();
if (osdmap.degraded_stretch_mode) {
dout(20) << "Degraded stretch mode set in this map" << dendl;
if (!osdmap.recovering_stretch_mode) {
@ -956,16 +956,17 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap)
dout(10) << "Enabling recovery stretch mode in this map" << dendl;
mon.go_recovery_stretch_mode();
}
} else {
mon.set_recovery_stretch_mode();
}
} else {
mon.set_healthy_stretch_mode();
}
if (marked_osd_down &&
(!osdmap.degraded_stretch_mode || osdmap.recovering_stretch_mode)) {
dout(20) << "Checking degraded stretch mode due to osd changes" << dendl;
mon.maybe_go_degraded_stretch_mode();
}
if (osdmap.recovering_stretch_mode && stretch_recovery_triggered.is_zero()) {
stretch_recovery_triggered = ceph_clock_now();
}
}
}
@ -14478,6 +14479,23 @@ void OSDMonitor::trigger_recovery_stretch_mode()
propose_pending();
}
void OSDMonitor::set_degraded_stretch_mode()
{
stretch_recovery_triggered.set_from_double(0);
}
void OSDMonitor::set_recovery_stretch_mode()
{
if (stretch_recovery_triggered.is_zero()) {
stretch_recovery_triggered = ceph_clock_now();
}
}
void OSDMonitor::set_healthy_stretch_mode()
{
stretch_recovery_triggered.set_from_double(0);
}
void OSDMonitor::notify_new_pg_digest()
{
dout(20) << __func__ << dendl;

View File

@ -827,10 +827,22 @@ public:
*/
void trigger_degraded_stretch_mode(const set<int>& dead_buckets,
const set<string>& live_zones);
/**
* This is just to maintain stretch_recovery_triggered; below
*/
void set_degraded_stretch_mode();
/**
* Set recovery stretch mode in the OSDMap, resetting pool size back to normal
*/
void trigger_recovery_stretch_mode();
/**
* This is just to maintain stretch_recovery_triggered; below
*/
void set_recovery_stretch_mode();
/**
* This is just to maintain stretch_recovery_triggered; below
*/
void set_healthy_stretch_mode();
/**
* Tells the OSD there's a new pg digest, in case it's interested.
* (It's interested when in recovering stretch mode.)