From b540b4299d6cc62f7aa83db81cc19658596b45df Mon Sep 17 00:00:00 2001 From: Sage Weil <sage@redhat.com> Date: Tue, 16 May 2017 15:10:09 -0400 Subject: [PATCH 1/3] mon/OSDMonitor: introduce debug option to allow filestore for ec overwrites Signed-off-by: Sage Weil <sage@redhat.com> --- src/common/config_opts.h | 1 + src/mon/OSDMonitor.cc | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 405aeacf6d8..79a2e19c255 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -383,6 +383,7 @@ OPTION(mon_debug_dump_transactions, OPT_BOOL, false) OPTION(mon_debug_dump_json, OPT_BOOL, false) OPTION(mon_debug_dump_location, OPT_STR, "/var/log/ceph/$cluster-$name.tdump") OPTION(mon_debug_no_require_luminous, OPT_BOOL, false) +OPTION(mon_debug_no_require_bluestore_for_ec_overwrites, OPT_BOOL, false) OPTION(mon_inject_transaction_delay_max, OPT_DOUBLE, 10.0) // seconds OPTION(mon_inject_transaction_delay_probability, OPT_DOUBLE, 0) // range [0, 1] diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 9b82b4a9f99..ea045056530 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -5994,7 +5994,8 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap, return -EINVAL; } stringstream err; - if (!is_pool_currently_all_bluestore(pool, p, &err)) { + if (!g_conf->mon_debug_no_require_bluestore_for_ec_overwrites && + !is_pool_currently_all_bluestore(pool, p, &err)) { ss << "pool must only be stored on bluestore for scrubbing to work: " << err.str(); return -EINVAL; } From 234a989de4ed754ddea644df504eabdbb8f6c0c5 Mon Sep 17 00:00:00 2001 From: Sage Weil <sage@redhat.com> Date: Tue, 16 May 2017 17:33:48 -0400 Subject: [PATCH 2/3] osd: encode snaps more efficiently 1- encode into a sized buffer. 2- do not needlessly copy the set<> to a vector<> before encoding. set<> and vector<> encode identically. Since we are converting from sorted set<> to unsorted vector<>, the order doesn't change either. Signed-off-by: Sage Weil <sage@redhat.com> --- src/osd/ECTransaction.cc | 7 +++---- src/osd/ReplicatedBackend.cc | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/osd/ECTransaction.cc b/src/osd/ECTransaction.cc index 31197786642..91fbf5a58d9 100644 --- a/src/osd/ECTransaction.cc +++ b/src/osd/ECTransaction.cc @@ -166,10 +166,9 @@ void ECTransaction::generate_transactions( if (entry && entry->is_modify() && op.updated_snaps) { - vector<snapid_t> snaps( - op.updated_snaps->second.begin(), - op.updated_snaps->second.end()); - ::encode(snaps, entry->snaps); + bufferlist bl(op.updated_snaps->second.size() * 8 + 8); + ::encode(op.updated_snaps->second, bl); + entry->snaps.swap(bl); } ldpp_dout(dpp, 20) << "generate_transactions: " diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index def68de4227..d51506df2ef 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -358,10 +358,9 @@ void generate_transaction( le.mark_unrollbackable(); auto oiter = pgt->op_map.find(le.soid); if (oiter != pgt->op_map.end() && oiter->second.updated_snaps) { - vector<snapid_t> snaps( - oiter->second.updated_snaps->second.begin(), - oiter->second.updated_snaps->second.end()); - ::encode(snaps, le.snaps); + bufferlist bl(oiter->second.updated_snaps->second.size() * 8 + 8); + ::encode(oiter->second.updated_snaps->second, bl); + le.snaps.swap(bl); } } From 9da684316630ac1c087e03ca6ec039bd4222c0bd Mon Sep 17 00:00:00 2001 From: Sage Weil <sage@redhat.com> Date: Tue, 16 May 2017 17:31:07 -0400 Subject: [PATCH 3/3] osd/PGLog: avoid pinning large buffers with ObjectModDesc Accidentally removed by 5e0ec06376f832d32a6b1af390f925a59b03798f. Signed-off-by: Sage Weil <sage@redhat.com> --- src/osd/PGLog.h | 3 +++ src/osd/osd_types.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index e4d0fa304ab..d4dfc450a42 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -440,6 +440,9 @@ public: assert(get_can_rollback_to() == head); } + // make sure our buffers don't pin bigger buffers + e.mod_desc.trim_bl(); + // add to log log.push_back(e); diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 68804cad151..0f1856c60cf 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -3225,7 +3225,7 @@ public: * in the case that bl contains ptrs which point into a much larger * message buffer */ - void trim_bl() { + void trim_bl() const { if (bl.length() > 0) bl.rebuild(); }