osd: eliminate CRASHED state

This was an intermediate state that indicated that replay would be needed. It was poorly named, and not very useful. Instead, just set the REPLAY bit if we need replay, and then do it. No need for a separate CRASHED. Signed-off-by: Sage Weil <sage@newdream.net>
2025-01-03 01:22:53 +00:00 · 2011-10-21 14:44:56 -07:00 · 2011-10-21 14:44:56 -07:00 · cf6a9404f0
commit cf6a9404f0
parent d6661f9393
4 changed files with 6 additions and 11 deletions
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@ -1045,7 +1045,7 @@ void PG::build_prior(std::auto_ptr<PriorSet> &prior_set)
  PriorSet &prior(*prior_set.get());
 				 
  if (prior.crashed) {
-    state_set(PG_STATE_CRASHED);
+    state_set(PG_STATE_REPLAY);
  }
  if (prior.pg_down) {
    state_set(PG_STATE_DOWN);
@ -1327,19 +1327,20 @@ void PG::activate(ObjectStore::Transaction& t, list<Context*>& tfin,
 		  map<int, MOSDPGInfo*> *activator_map)
 {
  assert(!is_active());
+
  // -- crash recovery?
-  if (is_crashed()) {
+  if (is_replay()) {
    if (g_conf->osd_replay_window > 0) {
      replay_until = ceph_clock_now(g_ceph_context);
      replay_until += g_conf->osd_replay_window;
      dout(10) << "crashed, allowing op replay for " << g_conf->osd_replay_window
 	       << " until " << replay_until << dendl;
-      state_set(PG_STATE_REPLAY);
      osd->replay_queue_lock.Lock();
      osd->replay_queue.push_back(pair<pg_t,utime_t>(info.pgid, replay_until));
      osd->replay_queue_lock.Unlock();
    } else {
      dout(10) << "crashed, but osd_replay_window=0.  skipping replay." << dendl;
+      state_clear(PG_STATE_REPLAY);
    }
  }

@ -1348,7 +1349,6 @@ void PG::activate(ObjectStore::Transaction& t, list<Context*>& tfin,
  state_clear(PG_STATE_STRAY);
  state_clear(PG_STATE_DOWN);
  state_clear(PG_STATE_PEERING);
-  state_clear(PG_STATE_CRASHED);
  if (is_primary() && 
      osd->osdmap->get_pg_size(info.pgid) != acting.size())
    state_set(PG_STATE_DEGRADED);
@ -1534,7 +1534,7 @@ void PG::activate(ObjectStore::Transaction& t, list<Context*>& tfin,

 void PG::replay_queued_ops()
 {
-  assert(is_replay() && is_active() && !is_crashed());
+  assert(is_replay() && is_active());
  eversion_t c = info.last_update;
  list<Message*> replay;
  dout(10) << "replay_queued_ops" << dendl;
@ -1556,7 +1556,7 @@ void PG::replay_queued_ops()
  replay_queue.clear();
  osd->requeue_ops(this, replay);
  osd->requeue_ops(this, waiting_for_active);
-  state_clear(PG_STATE_REPLAY);
+
  update_stats();
 }

@ -3430,7 +3430,6 @@ void PG::start_peering_interval(const OSDMap *lastmap,
  state_clear(PG_STATE_PEERING);  // we'll need to restart peering
  state_clear(PG_STATE_DEGRADED);
  state_clear(PG_STATE_REPLAY);
-  state_clear(PG_STATE_CRASHED);

  osd->cancel_generate_backlog(this);

--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@ -1592,7 +1592,6 @@ public:
  int get_state() const { return state; }
  bool       is_active() const { return state_test(PG_STATE_ACTIVE); }
  bool       is_peering() const { return state_test(PG_STATE_PEERING); }
-  bool       is_crashed() const { return state_test(PG_STATE_CRASHED); }
  bool       is_down() const { return state_test(PG_STATE_DOWN); }
  bool       is_replay() const { return state_test(PG_STATE_REPLAY); }
  bool       is_clean() const { return state_test(PG_STATE_CLEAN); }
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@ -170,8 +170,6 @@ std::string pg_state_string(int state)
    oss << "active+";
  if (state & PG_STATE_CLEAN)
    oss << "clean+";
-  if (state & PG_STATE_CRASHED)
-    oss << "crashed+";
  if (state & PG_STATE_DOWN)
    oss << "down+";
  if (state & PG_STATE_REPLAY)
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@ -492,7 +492,6 @@ inline ostream& operator<<(ostream& out, const osd_stat_t& s) {
 #define PG_STATE_CREATING     (1<<0)  // creating
 #define PG_STATE_ACTIVE       (1<<1)  // i am active.  (primary: replicas too)
 #define PG_STATE_CLEAN        (1<<2)  // peers are complete, clean of stray replicas.
-#define PG_STATE_CRASHED      (1<<3)  // all replicas went down, clients needs to replay
 #define PG_STATE_DOWN         (1<<4)  // a needed replica is down, PG offline
 #define PG_STATE_REPLAY       (1<<5)  // crashed, waiting for replay
 #define PG_STATE_STRAY        (1<<6)  // i must notify the primary i exist.