diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 3cd06d5d2da..0aa36d41c0a 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -5900,7 +5900,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector& ops) result = 0; { tracepoint(osd, do_osd_op_pre_try_flush, soid.oid.name.c_str(), soid.snap.val); - if (ctx->lock_type != ObjectContext::RWState::RWNONE) { + if (ctx->lock_type != RWState::RWNONE) { dout(10) << "cache-try-flush without SKIPRWLOCKS flag set" << dendl; result = -EINVAL; break; @@ -5933,7 +5933,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector& ops) result = 0; { tracepoint(osd, do_osd_op_pre_cache_flush, soid.oid.name.c_str(), soid.snap.val); - if (ctx->lock_type == ObjectContext::RWState::RWNONE) { + if (ctx->lock_type == RWState::RWNONE) { dout(10) << "cache-flush with SKIPRWLOCKS flag set" << dendl; result = -EINVAL; break; @@ -10276,7 +10276,7 @@ int PrimaryLogPG::try_flush_mark_clean(FlushOpRef fop) // try to take the lock manually, since we don't // have a ctx yet. if (ctx->lock_manager.get_lock_type( - ObjectContext::RWState::RWWRITE, + RWState::RWWRITE, oid, obc, fop->op)) { @@ -10287,7 +10287,7 @@ int PrimaryLogPG::try_flush_mark_clean(FlushOpRef fop) // fop->op is now waiting on the lock; get fop->dup_ops to wait too. for (auto op : fop->dup_ops) { bool locked = ctx->lock_manager.get_lock_type( - ObjectContext::RWState::RWWRITE, + RWState::RWWRITE, oid, obc, op); @@ -14233,7 +14233,7 @@ bool PrimaryLogPG::agent_maybe_evict(ObjectContextRef& obc, bool after_flush) auto null_op_req = OpRequestRef(); if (!ctx->lock_manager.get_lock_type( - ObjectContext::RWState::RWWRITE, + RWState::RWWRITE, obc->obs.oi.soid, obc, null_op_req)) { diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index a350714bbfc..bce20c6b19a 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -683,7 +683,7 @@ public: return inflightreads == 0; } - ObjectContext::RWState::State lock_type; + RWState::State lock_type; ObcLockManager lock_manager; std::map> op_finishers; @@ -708,7 +708,7 @@ public: num_write(0), sent_reply(false), inflightreads(0), - lock_type(ObjectContext::RWState::RWNONE) { + lock_type(RWState::RWNONE) { if (obc->ssc) { new_snapset = obc->ssc->snapset; snapset = &obc->ssc->snapset; @@ -725,7 +725,7 @@ public: num_read(0), num_write(0), inflightreads(0), - lock_type(ObjectContext::RWState::RWNONE) {} + lock_type(RWState::RWNONE) {} void reset_obs(ObjectContextRef obc) { new_obs = ObjectState(obc->obs.oi, obc->obs.exists); if (obc->ssc) { @@ -849,12 +849,12 @@ protected: * to get the second. */ if (write_ordered && ctx->op->may_read()) { - ctx->lock_type = ObjectContext::RWState::RWEXCL; + ctx->lock_type = RWState::RWEXCL; } else if (write_ordered) { - ctx->lock_type = ObjectContext::RWState::RWWRITE; + ctx->lock_type = RWState::RWWRITE; } else { ceph_assert(ctx->op->may_read()); - ctx->lock_type = ObjectContext::RWState::RWREAD; + ctx->lock_type = RWState::RWREAD; } if (ctx->head_obc) { @@ -864,7 +864,7 @@ protected: ctx->head_obc->obs.oi.soid, ctx->head_obc, ctx->op)) { - ctx->lock_type = ObjectContext::RWState::RWNONE; + ctx->lock_type = RWState::RWNONE; return false; } } @@ -876,7 +876,7 @@ protected: return true; } else { ceph_assert(!ctx->head_obc); - ctx->lock_type = ObjectContext::RWState::RWNONE; + ctx->lock_type = RWState::RWNONE; return false; } } diff --git a/src/osd/object_state.h b/src/osd/object_state.h new file mode 100644 index 00000000000..9ef708a664c --- /dev/null +++ b/src/osd/object_state.h @@ -0,0 +1,190 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "osd_types.h" + +struct ObjectState { + object_info_t oi; + bool exists; ///< the stored object exists (i.e., we will remember the object_info_t) + + ObjectState() : exists(false) {} + + ObjectState(const object_info_t &oi_, bool exists_) + : oi(oi_), exists(exists_) {} + ObjectState(object_info_t &&oi_, bool exists_) + : oi(std::move(oi_)), exists(exists_) {} + ObjectState(const hobject_t &obj) : oi(obj), exists(false) {} +}; + +struct RWState { + enum State { + RWNONE, + RWREAD, + RWWRITE, + RWEXCL, + }; + static const char *get_state_name(State s) { + switch (s) { + case RWNONE: return "none"; + case RWREAD: return "read"; + case RWWRITE: return "write"; + case RWEXCL: return "excl"; + default: return "???"; + } + } + const char *get_state_name() const { + return get_state_name(state); + } + + int count; ///< number of readers or writers + int waiters = 0; ///< number waiting + + State state:4; ///< rw state + /// if set, restart backfill when we can get a read lock + bool recovery_read_marker:1; + /// if set, requeue snaptrim on lock release + bool snaptrimmer_write_marker:1; + + RWState() + : count(0), + state(RWNONE), + recovery_read_marker(false), + snaptrimmer_write_marker(false) + {} + + /// this function adjusts the counts if necessary + bool get_read_lock() { + // don't starve anybody! + if (waiters > 0) { + return false; + } + switch (state) { + case RWNONE: + ceph_assert(count == 0); + state = RWREAD; + // fall through + case RWREAD: + count++; + return true; + case RWWRITE: + return false; + case RWEXCL: + return false; + default: + ceph_abort_msg("unhandled case"); + return false; + } + } + + bool get_write_lock(bool greedy=false) { + if (!greedy) { + // don't starve anybody! + if (waiters > 0 || + recovery_read_marker) { + return false; + } + } + switch (state) { + case RWNONE: + ceph_assert(count == 0); + state = RWWRITE; + // fall through + case RWWRITE: + count++; + return true; + case RWREAD: + return false; + case RWEXCL: + return false; + default: + ceph_abort_msg("unhandled case"); + return false; + } + } + bool get_excl_lock() { + switch (state) { + case RWNONE: + ceph_assert(count == 0); + state = RWEXCL; + count = 1; + return true; + case RWWRITE: + return false; + case RWREAD: + return false; + case RWEXCL: + return false; + default: + ceph_abort_msg("unhandled case"); + return false; + } + } + /// same as get_write_lock, but ignore starvation + bool take_write_lock() { + if (state == RWWRITE) { + count++; + return true; + } + return get_write_lock(); + } + bool dec() { + ceph_assert(count > 0); + count--; + if (count == 0) { + state = RWNONE; + return true; + } else { + return false; + } + } + bool put_read() { + ceph_assert(state == RWREAD); + return dec(); + } + bool put_write() { + ceph_assert(state == RWWRITE); + return dec(); + } + bool put_excl() { + ceph_assert(state == RWEXCL); + return dec(); + } + void inc_waiters() { + ++waiters; + } + void release_waiters() { + waiters = 0; + } + void dec_waiters(int count) { + ceph_assert(waiters >= count); + waiters -= count; + } + bool empty() const { return state == RWNONE; } + + bool get_snaptrimmer_write(bool mark_if_unsuccessful) { + if (get_write_lock()) { + return true; + } else { + if (mark_if_unsuccessful) + snaptrimmer_write_marker = true; + return false; + } + } + bool get_recovery_read() { + recovery_read_marker = true; + if (get_read_lock()) { + return true; + } + return false; + } +}; + +inline ostream& operator<<(ostream& out, const RWState& rw) +{ + return out << "rwstate(" << rw.get_state_name() + << " n=" << rw.count + << " w=" << rw.waiters + << ")"; +} diff --git a/src/osd/osd_internal_types.h b/src/osd/osd_internal_types.h index d539d04658b..1373470d06f 100644 --- a/src/osd/osd_internal_types.h +++ b/src/osd/osd_internal_types.h @@ -6,6 +6,7 @@ #include "osd_types.h" #include "OpRequest.h" +#include "object_state.h" /* * keep tabs on object modifications that are in flight. @@ -24,21 +25,7 @@ struct SnapSetContext { explicit SnapSetContext(const hobject_t& o) : oid(o), ref(0), registered(false), exists(true) { } }; - -struct ObjectContext; - -struct ObjectState { - object_info_t oi; - bool exists; ///< the stored object exists (i.e., we will remember the object_info_t) - - ObjectState() : exists(false) {} - - ObjectState(const object_info_t &oi_, bool exists_) - : oi(oi_), exists(exists_) {} - ObjectState(object_info_t &&oi_, bool exists_) - : oi(std::move(oi_)), exists(exists_) {} -}; - +class ObjectContext; typedef std::shared_ptr ObjectContextRef; struct ObjectContext { @@ -56,173 +43,58 @@ public: // attr cache map attr_cache; - struct RWState { - enum State { - RWNONE, - RWREAD, - RWWRITE, - RWEXCL, - }; - static const char *get_state_name(State s) { - switch (s) { - case RWNONE: return "none"; - case RWREAD: return "read"; - case RWWRITE: return "write"; - case RWEXCL: return "excl"; - default: return "???"; - } - } - const char *get_state_name() const { - return get_state_name(state); - } - - std::list waiters; ///< ops waiting on state change - int count; ///< number of readers or writers - - State state:4; ///< rw state - /// if set, restart backfill when we can get a read lock - bool recovery_read_marker:1; - /// if set, requeue snaptrim on lock release - bool snaptrimmer_write_marker:1; - - RWState() - : count(0), - state(RWNONE), - recovery_read_marker(false), - snaptrimmer_write_marker(false) - {} - bool get_read(OpRequestRef& op) { - if (get_read_lock()) { - return true; - } // else - // Now we really need to bump up the ref-counter. - waiters.emplace_back(op); - return false; - } - /// this function adjusts the counts if necessary - bool get_read_lock() { - // don't starve anybody! - if (!waiters.empty()) { - return false; - } - switch (state) { - case RWNONE: - ceph_assert(count == 0); - state = RWREAD; - // fall through - case RWREAD: - count++; - return true; - case RWWRITE: - return false; - case RWEXCL: - return false; - default: - ceph_abort_msg("unhandled case"); - return false; - } - } - - bool get_write(OpRequestRef& op, bool greedy=false) { - if (get_write_lock(greedy)) { - return true; - } // else - if (op) - waiters.emplace_back(op); - return false; - } - bool get_write_lock(bool greedy=false) { - if (!greedy) { - // don't starve anybody! - if (!waiters.empty() || - recovery_read_marker) { - return false; - } - } - switch (state) { - case RWNONE: - ceph_assert(count == 0); - state = RWWRITE; - // fall through - case RWWRITE: - count++; - return true; - case RWREAD: - return false; - case RWEXCL: - return false; - default: - ceph_abort_msg("unhandled case"); - return false; - } - } - bool get_excl_lock() { - switch (state) { - case RWNONE: - ceph_assert(count == 0); - state = RWEXCL; - count = 1; - return true; - case RWWRITE: - return false; - case RWREAD: - return false; - case RWEXCL: - return false; - default: - ceph_abort_msg("unhandled case"); - return false; - } - } - bool get_excl(OpRequestRef& op) { - if (get_excl_lock()) { - return true; - } // else - if (op) - waiters.emplace_back(op); - return false; - } - /// same as get_write_lock, but ignore starvation - bool take_write_lock() { - if (state == RWWRITE) { - count++; - return true; - } - return get_write_lock(); - } - void dec(list *requeue) { - ceph_assert(count > 0); - ceph_assert(requeue); - count--; - if (count == 0) { - state = RWNONE; - requeue->splice(requeue->end(), waiters); - } - } - void put_read(list *requeue) { - ceph_assert(state == RWREAD); - dec(requeue); - } - void put_write(list *requeue) { - ceph_assert(state == RWWRITE); - dec(requeue); - } - void put_excl(list *requeue) { - ceph_assert(state == RWEXCL); - dec(requeue); - } - bool empty() const { return state == RWNONE; } - } rwstate; - + RWState rwstate; + std::list waiters; ///< ops waiting on state change bool get_read(OpRequestRef& op) { - return rwstate.get_read(op); + if (rwstate.get_read_lock()) { + return true; + } // else + // Now we really need to bump up the ref-counter. + waiters.emplace_back(op); + rwstate.inc_waiters(); + return false; } - bool get_write(OpRequestRef& op) { - return rwstate.get_write(op, false); + bool get_write(OpRequestRef& op, bool greedy=false) { + if (rwstate.get_write_lock(greedy)) { + return true; + } // else + if (op) { + waiters.emplace_back(op); + rwstate.inc_waiters(); + } + return false; } - bool get_excl(OpRequestRef op) { - return rwstate.get_excl(op); + bool get_excl(OpRequestRef& op) { + if (rwstate.get_excl_lock()) { + return true; + } // else + if (op) { + waiters.emplace_back(op); + rwstate.inc_waiters(); + } + return false; } + void wake(list *requeue) { + rwstate.release_waiters(); + requeue->splice(requeue->end(), waiters); + } + void put_read(list *requeue) { + if (rwstate.put_read()) { + wake(requeue); + } + } + void put_write(list *requeue) { + if (rwstate.put_write()) { + wake(requeue); + } + } + void put_excl(list *requeue) { + if (rwstate.put_excl()) { + wake(requeue); + } + } + bool empty() const { return rwstate.empty(); } + bool get_lock_type(OpRequestRef& op, RWState::State type) { switch (type) { case RWState::RWWRITE: @@ -237,46 +109,36 @@ public: } } bool get_write_greedy(OpRequestRef& op) { - return rwstate.get_write(op, true); + return get_write(op, true); } bool get_snaptrimmer_write(bool mark_if_unsuccessful) { - if (rwstate.get_write_lock()) { - return true; - } else { - if (mark_if_unsuccessful) - rwstate.snaptrimmer_write_marker = true; - return false; - } + return rwstate.get_snaptrimmer_write(mark_if_unsuccessful); } bool get_recovery_read() { - rwstate.recovery_read_marker = true; - if (rwstate.get_read_lock()) { - return true; - } - return false; + return rwstate.get_recovery_read(); } bool try_get_read_lock() { return rwstate.get_read_lock(); } void drop_recovery_read(list *ls) { ceph_assert(rwstate.recovery_read_marker); - rwstate.put_read(ls); + put_read(ls); rwstate.recovery_read_marker = false; } void put_lock_type( - ObjectContext::RWState::State type, + RWState::State type, list *to_wake, bool *requeue_recovery, bool *requeue_snaptrimmer) { switch (type) { - case ObjectContext::RWState::RWWRITE: - rwstate.put_write(to_wake); + case RWState::RWWRITE: + put_write(to_wake); break; - case ObjectContext::RWState::RWREAD: - rwstate.put_read(to_wake); + case RWState::RWREAD: + put_read(to_wake); break; - case ObjectContext::RWState::RWEXCL: - rwstate.put_excl(to_wake); + case RWState::RWEXCL: + put_excl(to_wake); break; default: ceph_abort_msg("invalid lock type"); @@ -291,7 +153,7 @@ public: } } bool is_request_pending() { - return (rwstate.count > 0); + return !rwstate.empty(); } ObjectContext() @@ -331,14 +193,6 @@ inline ostream& operator<<(ostream& out, const ObjectState& obs) return out; } -inline ostream& operator<<(ostream& out, const ObjectContext::RWState& rw) -{ - return out << "rwstate(" << rw.get_state_name() - << " n=" << rw.count - << " w=" << rw.waiters.size() - << ")"; -} - inline ostream& operator<<(ostream& out, const ObjectContext& obc) { return out << "obc(" << obc.obs << " " << obc.rwstate << ")"; @@ -347,10 +201,10 @@ inline ostream& operator<<(ostream& out, const ObjectContext& obc) class ObcLockManager { struct ObjectLockState { ObjectContextRef obc; - ObjectContext::RWState::State type; + RWState::State type; ObjectLockState( ObjectContextRef obc, - ObjectContext::RWState::State type) + RWState::State type) : obc(std::move(obc)), type(type) {} }; map locks; @@ -363,7 +217,7 @@ public: return locks.empty(); } bool get_lock_type( - ObjectContext::RWState::State type, + RWState::State type, const hobject_t &hoid, ObjectContextRef& obc, OpRequestRef& op) { @@ -383,7 +237,7 @@ public: if (obc->rwstate.take_write_lock()) { locks.insert( make_pair( - hoid, ObjectLockState(obc, ObjectContext::RWState::RWWRITE))); + hoid, ObjectLockState(obc, RWState::RWWRITE))); return true; } else { return false; @@ -398,7 +252,7 @@ public: if (obc->get_snaptrimmer_write(mark_if_unsuccessful)) { locks.insert( make_pair( - hoid, ObjectLockState(obc, ObjectContext::RWState::RWWRITE))); + hoid, ObjectLockState(obc, RWState::RWWRITE))); return true; } else { return false; @@ -413,7 +267,7 @@ public: if (obc->get_write_greedy(op)) { locks.insert( make_pair( - hoid, ObjectLockState(obc, ObjectContext::RWState::RWWRITE))); + hoid, ObjectLockState(obc, RWState::RWWRITE))); return true; } else { return false; @@ -429,7 +283,7 @@ public: locks.insert( make_pair( hoid, - ObjectLockState(obc, ObjectContext::RWState::RWREAD))); + ObjectLockState(obc, RWState::RWREAD))); return true; } else { return false;