Merge pull request #38219 from rzarzynski/wip-crimson-restore-obs

crimson: conditionally reload ObjectState if execution fails

Reviewed-by: Samuel Just <sjust@redhat.com>
Reviewed-by: Kefu Chai <kchai@redhat.com>
This commit is contained in:
Kefu Chai 2020-12-01 23:58:45 +08:00 committed by GitHub
commit 02c06942f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 95 additions and 27 deletions

View File

@ -194,6 +194,10 @@ public:
uint32_t get_pool_stripe_width() const {
return pool_info.get_stripe_width();
}
bool has_seen_write() const {
return num_write > 0;
}
};
template <class Context, class MainFunc, class EffectFunc>

View File

@ -612,6 +612,53 @@ osd_op_params_t&& PG::fill_op_params_bump_pg_version(
return std::move(osd_op_p);
}
seastar::future<Ref<MOSDOpReply>> PG::handle_failed_op(
const std::error_code& e,
ObjectContextRef obc,
const OpsExecuter& ox,
const MOSDOp& m) const
{
// Oops, an operation had failed. do_osd_ops() altogether with
// OpsExecuter already dropped the ObjectStore::Transaction if
// there was any. However, this is not enough to completely
// rollback as we gave OpsExecuter the very single copy of `obc`
// we maintain and we did it for both reading and writing.
// Now all modifications must be reverted.
//
// Let's just reload from the store. Evicting from the shared
// LRU would be tricky as next MOSDOp (the one at `get_obc`
// phase) could actually already finished the lookup. Fortunately,
// this is supposed to live on cold paths, so performance is not
// a concern -- simplicity wins.
//
// The conditional's purpose is to efficiently handle hot errors
// which may appear as a result of e.g. CEPH_OSD_OP_CMPXATTR or
// CEPH_OSD_OP_OMAP_CMP. These are read-like ops and clients
// typically append them before any write. If OpsExecuter hasn't
// seen any modifying operation, `obc` is supposed to be kept
// unchanged.
assert(e.value() > 0);
const bool need_reload_obc = ox.has_seen_write();
logger().debug(
"{}: {} - object {} got error code {}, {}; need_reload_obc {}",
__func__,
m,
obc->obs.oi.soid,
e.value(),
e.message(),
need_reload_obc);
return (need_reload_obc ? reload_obc(*obc)
: load_obc_ertr::now()
).safe_then([&e, &m, obc = std::move(obc), this] {
auto reply = make_message<MOSDOpReply>(
&m, -e.value(), get_osdmap_epoch(), 0, false);
reply->set_enoent_reply_versions(
peering_state.get_info().last_update,
peering_state.get_info().last_user_version);
return seastar::make_ready_future<Ref<MOSDOpReply>>(std::move(reply));
}, load_obc_ertr::assert_all{ "can't live with object state messed up" });
}
seastar::future<Ref<MOSDOpReply>> PG::do_osd_ops(
Ref<MOSDOp> m,
ObjectContextRef obc,
@ -669,7 +716,6 @@ seastar::future<Ref<MOSDOpReply>> PG::do_osd_ops(
}).safe_then([this,
m,
obc,
ox_deleter = std::move(ox),
rvec = op_info.allows_returnvec()] {
// TODO: should stop at the first op which returns a negative retval,
// cmpext uses it for returning the index of first unmatched byte
@ -688,32 +734,18 @@ seastar::future<Ref<MOSDOpReply>> PG::do_osd_ops(
*m,
obc->obs.oi.soid);
return seastar::make_ready_future<Ref<MOSDOpReply>>(std::move(reply));
}, OpsExecuter::osd_op_errorator::all_same_way([=] (const std::error_code& e) {
assert(e.value() > 0);
logger().debug(
"do_osd_ops: {} - object {} got error code {}, {}",
*m,
obc->obs.oi.soid,
e.value(),
e.message());
auto reply = make_message<MOSDOpReply>(
m.get(), -e.value(), get_osdmap_epoch(), 0, false);
reply->set_enoent_reply_versions(peering_state.get_info().last_update,
peering_state.get_info().last_user_version);
return seastar::make_ready_future<Ref<MOSDOpReply>>(std::move(reply));
})).handle_exception_type([=](const crimson::osd::error& e) {
}, osd_op_errorator::all_same_way([ox = ox.get(),
m,
obc,
this] (const std::error_code& e) {
return handle_failed_op(e, std::move(obc), *ox, *m);
})).handle_exception_type([ox_deleter = std::move(ox),
m,
obc,
this] (const crimson::osd::error& e) {
// we need this handler because throwing path which aren't errorated yet.
logger().debug(
"do_osd_ops: {} - object {} got unhandled exception {} ({})",
*m,
obc->obs.oi.soid,
e.code(),
e.what());
auto reply = make_message<MOSDOpReply>(
m.get(), -e.code().value(), get_osdmap_epoch(), 0, false);
reply->set_enoent_reply_versions(peering_state.get_info().last_update,
peering_state.get_info().last_user_version);
return seastar::make_ready_future<Ref<MOSDOpReply>>(std::move(reply));
logger().debug("encountered the legacy error handling path!");
return handle_failed_op(e.code(), std::move(obc), *ox_deleter, *m);
});
}
@ -884,6 +916,29 @@ PG::load_head_obc(ObjectContextRef obc)
});
}
PG::load_obc_ertr::future<>
PG::reload_obc(crimson::osd::ObjectContext& obc) const
{
assert(obc.is_head());
return backend->load_metadata(obc.get_oid()).safe_then([&obc](auto md)
-> load_obc_ertr::future<> {
logger().debug(
"{}: reloaded obs {} for {}",
__func__,
md->os.oi,
obc.get_oid());
if (!md->ss) {
logger().error(
"{}: oid {} missing snapset",
__func__,
obc.get_oid());
return crimson::ct_error::object_corrupted::make();
}
obc.set_head_state(std::move(md->os), std::move(*(md->ss)));
return load_obc_ertr::now();
});
}
PG::load_obc_ertr::future<>
PG::with_locked_obc(Ref<MOSDOp> &m, const OpInfo &op_info,
Operation *op, PG::with_obc_func_t &&f)

View File

@ -34,8 +34,8 @@
#include "crimson/osd/pg_recovery_listener.h"
#include "crimson/osd/recovery_backend.h"
class OSDMap;
class MQuery;
class OSDMap;
class PGBackend;
class PGPeeringEvent;
class osd_op_params_t;
@ -54,6 +54,7 @@ namespace crimson::os {
namespace crimson::osd {
class ClientRequest;
class OpsExecuter;
class PG : public boost::intrusive_ref_counter<
PG,
@ -500,6 +501,9 @@ public:
load_obc_ertr::future<crimson::osd::ObjectContextRef>
load_head_obc(ObjectContextRef obc);
load_obc_ertr::future<>
reload_obc(crimson::osd::ObjectContext& obc) const;
public:
using with_obc_func_t =
std::function<load_obc_ertr::future<> (ObjectContextRef)>;
@ -536,6 +540,11 @@ private:
osd_op_params_t&& osd_op_p,
Ref<MOSDOp> m,
const bool user_modify);
seastar::future<Ref<MOSDOpReply>> handle_failed_op(
const std::error_code& e,
ObjectContextRef obc,
const OpsExecuter& ox,
const MOSDOp& m) const;
seastar::future<Ref<MOSDOpReply>> do_osd_ops(
Ref<MOSDOp> m,
ObjectContextRef obc,