From d6a4f6adfaa75c3140d07d6df7be03586cc16183 Mon Sep 17 00:00:00 2001
From: Yehuda Sadeh <yehuda@inktank.com>
Date: Wed, 18 Dec 2013 13:10:21 -0800
Subject: [PATCH 1/4] rgw: don't return data within the librados cb

Fixes: #7030
The callback is running within a single Finisher thread, thus we
shouldn't block there. Append read data to a list and flush it within
the iterate context.

Reviewed-by: Sage Weil <sage@inktank.com>
Signed-off-by: Yehuda Sadeh <yehuda@inktank.com>
---
 src/rgw/rgw_rados.cc | 51 +++++++++++++++++++++++++++++++++++++-------
 src/rgw/rgw_rados.h  |  2 ++
 2 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index 37eacc0533a..a5247ffca29 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -4151,6 +4151,7 @@ struct get_obj_data : public RefCountedObject {
   atomic_t cancelled;
   atomic_t err_code;
   Throttle throttle;
+  list<bufferlist> read_list;
 
   get_obj_data(CephContext *_cct)
     : cct(_cct),
@@ -4338,14 +4339,7 @@ void RGWRados::get_obj_aio_completion_cb(completion_t c, void *arg)
     goto done_unlock;
   }
 
-  for (iter = bl_list.begin(); iter != bl_list.end(); ++iter) {
-    bufferlist& bl = *iter;
-    int r = d->client_cb->handle_data(bl, 0, bl.length());
-    if (r < 0) {
-      d->set_cancelled(r);
-      break;
-    }
-  }
+  d->read_list.splice(d->read_list.end(), bl_list);
 
 done_unlock:
   d->data_lock.Unlock();
@@ -4354,6 +4348,37 @@ done:
   return;
 }
 
+int RGWRados::flush_read_list(struct get_obj_data *d)
+{
+  d->data_lock.Lock();
+  list<bufferlist> l;
+  l.swap(d->read_list);
+  d->get();
+  d->read_list.clear();
+
+  d->data_lock.Unlock();
+
+  int r = 0;
+
+  list<bufferlist>::iterator iter;
+  for (iter = l.begin(); iter != l.end(); ++iter) {
+    bufferlist& bl = *iter;
+    r = d->client_cb->handle_data(bl, 0, bl.length());
+    if (r < 0) {
+      dout(0) << "ERROR: flush_read_list(): d->client_c->handle_data() returned " << r << dendl;
+      break;
+    }
+  }
+
+  d->data_lock.Lock();
+  d->put();
+  if (r < 0) {
+    d->set_cancelled(r);
+  }
+  d->data_lock.Unlock();
+  return r;
+}
+
 int RGWRados::get_obj_iterate_cb(void *ctx, RGWObjState *astate,
 		         rgw_obj& obj,
 			 off_t obj_ofs,
@@ -4398,6 +4423,10 @@ int RGWRados::get_obj_iterate_cb(void *ctx, RGWObjState *astate,
     }
   }
 
+  r = flush_read_list(d);
+  if (r < 0)
+    return r;
+
   get_obj_bucket_and_oid_key(obj, bucket, oid, key);
 
   d->throttle.get(len);
@@ -4458,6 +4487,12 @@ int RGWRados::get_obj_iterate(void *ctx, void **handle, rgw_obj& obj,
       data->cancel_all_io();
       break;
     }
+    r = flush_read_list(data);
+    if (r < 0) {
+      dout(10) << "get_obj_iterate() r=" << r << ", canceling all io" << dendl;
+      data->cancel_all_io();
+      break;
+    }
   }
 
 done:
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
index 476572ce3f6..b0b39ecc2fd 100644
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -1263,6 +1263,8 @@ public:
                       off_t ofs, off_t end,
 	              RGWGetDataCB *cb);
 
+  int flush_read_list(struct get_obj_data *d);
+
   int get_obj_iterate_cb(void *ctx, RGWObjState *astate,
                          rgw_obj& obj,
                          off_t obj_ofs, off_t read_ofs, off_t len,

From c8890ab2d46fe8e12200a0d2f9eab31c461fb871 Mon Sep 17 00:00:00 2001
From: Yehuda Sadeh <yehuda@inktank.com>
Date: Wed, 18 Dec 2013 13:11:01 -0800
Subject: [PATCH 2/4] rgw: fix use-after-free when releasing completion handle

Backport: emperor, dumpling
Signed-off-by: Yehuda Sadeh <yehuda@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 src/rgw/rgw_rados.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index a5247ffca29..ac9503a306f 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -4187,7 +4187,6 @@ struct get_obj_data : public RefCountedObject {
 
     c->wait_for_complete_and_cb();
     int r = c->get_return_value();
-    c->release();
 
     lock.Lock();
     completion_map.erase(cur_ofs);
@@ -4196,6 +4195,8 @@ struct get_obj_data : public RefCountedObject {
       *done = true;
     }
     lock.Unlock();
+
+    c->release();
     
     return r;
   }

From 0bd5cb65ef1357110b791b83787dd7e05384b47e Mon Sep 17 00:00:00 2001
From: David Zafman <david.zafman@inktank.com>
Date: Mon, 16 Dec 2013 22:08:07 -0800
Subject: [PATCH 3/4] Add backward comptible acting set until all OSDs updated

Add configuration variable to override compatible acting set handling.
Later we'll check the osdmap that all OSDs are updated to use new acting sets.

Fixes: #6990

Signed-off-by: David Zafman <david.zafman@inktank.com>
Reviewed-by: Samuel Just <sam.just@inktank.com>
(cherry picked from commit 19cff890eb6083eefdb7b709773313b2c8acbcea)
---
 src/common/config_opts.h |  3 +++
 src/osd/PG.cc            | 15 ++++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index c51f7e3cd15..2eb31b95617 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -524,6 +524,9 @@ OPTION(osd_max_object_size, OPT_U64, 100*1024L*1024L*1024L) // OSD's maximum obj
 OPTION(osd_max_attr_size, OPT_U64, 0)
 
 OPTION(osd_objectstore, OPT_STR, "filestore")  // ObjectStore backend type
+// Override maintaining compatibility with older OSDs
+// Set to true for testing.  Users should NOT set this.
+OPTION(osd_debug_override_acting_compat, OPT_BOOL, false)
 
 /// filestore wb throttle limits
 OPTION(filestore_wbthrottle_enable, OPT_BOOL, true)
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 17337803a23..78e77064259 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -1018,6 +1018,18 @@ bool PG::choose_acting(int& newest_update_osd)
     return false;
   }
 
+  // TODO: Add check of osdmap for all OSDs to be able to handle new acting
+  // Determine if compatibility needed
+  bool compat_mode = !cct->_conf->osd_debug_override_acting_compat;
+
+  if (compat_mode) {
+    // May not be necessary, but the old mechanism only did one at a time
+    if (!backfill.empty())
+      backfill.resize(1);
+
+    want.insert(want.end(), backfill.begin(), backfill.end());
+  }
+
   if (want != acting) {
     dout(10) << "choose_acting want " << want << " != acting " << acting
 	     << ", requesting pg_temp change" << dendl;
@@ -1038,7 +1050,8 @@ bool PG::choose_acting(int& newest_update_osd)
   // we've accepted the acting set.  Now we can create
   // actingbackfill and backfill_targets vectors.
   actingbackfill = acting;
-  actingbackfill.insert(actingbackfill.end(), backfill.begin(), backfill.end());
+  if (!compat_mode)
+    actingbackfill.insert(actingbackfill.end(), backfill.begin(), backfill.end());
   assert(backfill_targets.empty() || backfill_targets == backfill);
   if (backfill_targets.empty()) {
     backfill_targets = backfill;

From 8879e439213006fa1fab2f62cbe11c8e56a8bcbc Mon Sep 17 00:00:00 2001
From: David Zafman <david.zafman@inktank.com>
Date: Thu, 19 Dec 2013 14:37:28 -0800
Subject: [PATCH 4/4] osd: Fix assert which doesn't apply when compat_mode on

Signed-off-by: David Zafman <david.zafman@inktank.com>
Reviewed-by: Samuel Just <sam.just@inktank.com>
(cherry picked from commit edaec9a8361396bd4c12814c16610669694b5b6c)
---
 src/osd/PG.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 78e77064259..6a26dd4e976 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -1038,7 +1038,7 @@ bool PG::choose_acting(int& newest_update_osd)
     if (want == up) {
       // There can't be any pending backfill if
       // want is the same as crush map up OSDs.
-      assert(backfill.empty());
+      assert(compat_mode || backfill.empty());
       vector<int> empty;
       osd->queue_want_pg_temp(info.pgid, empty);
     } else