From 69940e27173e9897006ea103528c49cfda11e767 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 20 Dec 2010 13:22:49 -0800
Subject: [PATCH] osd: compensate for replicas with tail > last_complete

Normally we shouldn't ever have a last_complete < log.tail (&& !backlog).
But maybe we do (old bugs, whatever; see #590).  In that case, the primary
can compensate by sending more log info to the replica.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 src/osd/PG.cc | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 0a61d128cb3..7175e13b7f1 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -1855,13 +1855,24 @@ void PG::activate(ObjectStore::Transaction& t, list<Context*>& tfin,
       } 
       else {
 	m = new MOSDPGLog(osd->osdmap->get_epoch(), info);
-	if (pi.last_update < log.tail) {
-	  // summary/backlog
+	if (pi.log_tail > pi.last_complete && !pi.log_backlog) {
+	  // the replica's tail is after it's last_complete and it has no backlog.
+	  // ick, this shouldn't normally happen.  but we can compensate!
+	  dout(10) << "activate peer osd" << peer << " has last_complete < log tail and no backlog, compensating" << dendl;
+	  if (log.tail >= pi.last_complete) {
+	    // _our_ log is sufficient, phew!
+	    m->log.copy_after(log, pi.last_complete);
+	  } else {
+	    assert(log.backlog);
+	    m->log = log;
+	  }
+	} else if (log.tail > pi.last_update) {
+	  // our tail is too new; send the full backlog.
 	  assert(log.backlog);
 	  m->log = log;
 	} else {
-	  // incremental log
-	  assert(pi.last_update < info.last_update);
+	  // send new stuff to append to replicas log
+	  assert(info.last_update > pi.last_update);
 	  m->log.copy_after(log, pi.last_update);
 	}
       }