From b540b4299d6cc62f7aa83db81cc19658596b45df Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@redhat.com>
Date: Tue, 16 May 2017 15:10:09 -0400
Subject: [PATCH 1/3] mon/OSDMonitor: introduce debug option to allow filestore
 for ec overwrites

Signed-off-by: Sage Weil <sage@redhat.com>
---
 src/common/config_opts.h | 1 +
 src/mon/OSDMonitor.cc    | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index 405aeacf6d8..79a2e19c255 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -383,6 +383,7 @@ OPTION(mon_debug_dump_transactions, OPT_BOOL, false)
 OPTION(mon_debug_dump_json, OPT_BOOL, false)
 OPTION(mon_debug_dump_location, OPT_STR, "/var/log/ceph/$cluster-$name.tdump")
 OPTION(mon_debug_no_require_luminous, OPT_BOOL, false)
+OPTION(mon_debug_no_require_bluestore_for_ec_overwrites, OPT_BOOL, false)
 OPTION(mon_inject_transaction_delay_max, OPT_DOUBLE, 10.0)      // seconds
 OPTION(mon_inject_transaction_delay_probability, OPT_DOUBLE, 0) // range [0, 1]
 
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 9b82b4a9f99..ea045056530 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -5994,7 +5994,8 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
       return -EINVAL;
     }
     stringstream err;
-    if (!is_pool_currently_all_bluestore(pool, p, &err)) {
+    if (!g_conf->mon_debug_no_require_bluestore_for_ec_overwrites &&
+	!is_pool_currently_all_bluestore(pool, p, &err)) {
       ss << "pool must only be stored on bluestore for scrubbing to work: " << err.str();
       return -EINVAL;
     }

From 234a989de4ed754ddea644df504eabdbb8f6c0c5 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@redhat.com>
Date: Tue, 16 May 2017 17:33:48 -0400
Subject: [PATCH 2/3] osd: encode snaps more efficiently

1- encode into a sized buffer.
2- do not needlessly copy the set<> to a vector<> before encoding.
set<> and vector<> encode identically.  Since we are converting from sorted
set<> to unsorted vector<>, the order doesn't change either.

Signed-off-by: Sage Weil <sage@redhat.com>
---
 src/osd/ECTransaction.cc     | 7 +++----
 src/osd/ReplicatedBackend.cc | 7 +++----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/osd/ECTransaction.cc b/src/osd/ECTransaction.cc
index 31197786642..91fbf5a58d9 100644
--- a/src/osd/ECTransaction.cc
+++ b/src/osd/ECTransaction.cc
@@ -166,10 +166,9 @@ void ECTransaction::generate_transactions(
       if (entry &&
 	  entry->is_modify() &&
 	  op.updated_snaps) {
-	vector<snapid_t> snaps(
-	  op.updated_snaps->second.begin(),
-	  op.updated_snaps->second.end());
-	::encode(snaps, entry->snaps);
+	bufferlist bl(op.updated_snaps->second.size() * 8 + 8);
+	::encode(op.updated_snaps->second, bl);
+	entry->snaps.swap(bl);
       }
 
       ldpp_dout(dpp, 20) << "generate_transactions: "
diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc
index def68de4227..d51506df2ef 100644
--- a/src/osd/ReplicatedBackend.cc
+++ b/src/osd/ReplicatedBackend.cc
@@ -358,10 +358,9 @@ void generate_transaction(
     le.mark_unrollbackable();
     auto oiter = pgt->op_map.find(le.soid);
     if (oiter != pgt->op_map.end() && oiter->second.updated_snaps) {
-      vector<snapid_t> snaps(
-	oiter->second.updated_snaps->second.begin(),
-	oiter->second.updated_snaps->second.end());
-      ::encode(snaps, le.snaps);
+      bufferlist bl(oiter->second.updated_snaps->second.size() * 8 + 8);
+      ::encode(oiter->second.updated_snaps->second, bl);
+      le.snaps.swap(bl);
     }
   }
 

From 9da684316630ac1c087e03ca6ec039bd4222c0bd Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@redhat.com>
Date: Tue, 16 May 2017 17:31:07 -0400
Subject: [PATCH 3/3] osd/PGLog: avoid pinning large buffers with ObjectModDesc

Accidentally removed by 5e0ec06376f832d32a6b1af390f925a59b03798f.

Signed-off-by: Sage Weil <sage@redhat.com>
---
 src/osd/PGLog.h     | 3 +++
 src/osd/osd_types.h | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h
index e4d0fa304ab..d4dfc450a42 100644
--- a/src/osd/PGLog.h
+++ b/src/osd/PGLog.h
@@ -440,6 +440,9 @@ public:
 	assert(get_can_rollback_to() == head);
       }
 
+      // make sure our buffers don't pin bigger buffers
+      e.mod_desc.trim_bl();
+
       // add to log
       log.push_back(e);
 
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 68804cad151..0f1856c60cf 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -3225,7 +3225,7 @@ public:
    * in the case that bl contains ptrs which point into a much larger
    * message buffer
    */
-  void trim_bl() {
+  void trim_bl() const {
     if (bl.length() > 0)
       bl.rebuild();
   }