src/osd: factor out RWState, move SnapSetContext, ObjectState, RWState

We'll want to reuse this logic in crimson, but the actual operation
wake/wait mechanics will be different.

Signed-off-by: Samuel Just <sjust@redhat.com>
This commit is contained in:
Samuel Just 2019-10-07 11:36:43 -07:00
parent fc8f8855e4
commit 2472b37679
4 changed files with 272 additions and 228 deletions

View File

@ -5900,7 +5900,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = 0;
{
tracepoint(osd, do_osd_op_pre_try_flush, soid.oid.name.c_str(), soid.snap.val);
if (ctx->lock_type != ObjectContext::RWState::RWNONE) {
if (ctx->lock_type != RWState::RWNONE) {
dout(10) << "cache-try-flush without SKIPRWLOCKS flag set" << dendl;
result = -EINVAL;
break;
@ -5933,7 +5933,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = 0;
{
tracepoint(osd, do_osd_op_pre_cache_flush, soid.oid.name.c_str(), soid.snap.val);
if (ctx->lock_type == ObjectContext::RWState::RWNONE) {
if (ctx->lock_type == RWState::RWNONE) {
dout(10) << "cache-flush with SKIPRWLOCKS flag set" << dendl;
result = -EINVAL;
break;
@ -10276,7 +10276,7 @@ int PrimaryLogPG::try_flush_mark_clean(FlushOpRef fop)
// try to take the lock manually, since we don't
// have a ctx yet.
if (ctx->lock_manager.get_lock_type(
ObjectContext::RWState::RWWRITE,
RWState::RWWRITE,
oid,
obc,
fop->op)) {
@ -10287,7 +10287,7 @@ int PrimaryLogPG::try_flush_mark_clean(FlushOpRef fop)
// fop->op is now waiting on the lock; get fop->dup_ops to wait too.
for (auto op : fop->dup_ops) {
bool locked = ctx->lock_manager.get_lock_type(
ObjectContext::RWState::RWWRITE,
RWState::RWWRITE,
oid,
obc,
op);
@ -14233,7 +14233,7 @@ bool PrimaryLogPG::agent_maybe_evict(ObjectContextRef& obc, bool after_flush)
auto null_op_req = OpRequestRef();
if (!ctx->lock_manager.get_lock_type(
ObjectContext::RWState::RWWRITE,
RWState::RWWRITE,
obc->obs.oi.soid,
obc,
null_op_req)) {

View File

@ -683,7 +683,7 @@ public:
return inflightreads == 0;
}
ObjectContext::RWState::State lock_type;
RWState::State lock_type;
ObcLockManager lock_manager;
std::map<int, std::unique_ptr<OpFinisher>> op_finishers;
@ -708,7 +708,7 @@ public:
num_write(0),
sent_reply(false),
inflightreads(0),
lock_type(ObjectContext::RWState::RWNONE) {
lock_type(RWState::RWNONE) {
if (obc->ssc) {
new_snapset = obc->ssc->snapset;
snapset = &obc->ssc->snapset;
@ -725,7 +725,7 @@ public:
num_read(0),
num_write(0),
inflightreads(0),
lock_type(ObjectContext::RWState::RWNONE) {}
lock_type(RWState::RWNONE) {}
void reset_obs(ObjectContextRef obc) {
new_obs = ObjectState(obc->obs.oi, obc->obs.exists);
if (obc->ssc) {
@ -849,12 +849,12 @@ protected:
* to get the second.
*/
if (write_ordered && ctx->op->may_read()) {
ctx->lock_type = ObjectContext::RWState::RWEXCL;
ctx->lock_type = RWState::RWEXCL;
} else if (write_ordered) {
ctx->lock_type = ObjectContext::RWState::RWWRITE;
ctx->lock_type = RWState::RWWRITE;
} else {
ceph_assert(ctx->op->may_read());
ctx->lock_type = ObjectContext::RWState::RWREAD;
ctx->lock_type = RWState::RWREAD;
}
if (ctx->head_obc) {
@ -864,7 +864,7 @@ protected:
ctx->head_obc->obs.oi.soid,
ctx->head_obc,
ctx->op)) {
ctx->lock_type = ObjectContext::RWState::RWNONE;
ctx->lock_type = RWState::RWNONE;
return false;
}
}
@ -876,7 +876,7 @@ protected:
return true;
} else {
ceph_assert(!ctx->head_obc);
ctx->lock_type = ObjectContext::RWState::RWNONE;
ctx->lock_type = RWState::RWNONE;
return false;
}
}

190
src/osd/object_state.h Normal file
View File

@ -0,0 +1,190 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#pragma once
#include "osd_types.h"
struct ObjectState {
object_info_t oi;
bool exists; ///< the stored object exists (i.e., we will remember the object_info_t)
ObjectState() : exists(false) {}
ObjectState(const object_info_t &oi_, bool exists_)
: oi(oi_), exists(exists_) {}
ObjectState(object_info_t &&oi_, bool exists_)
: oi(std::move(oi_)), exists(exists_) {}
ObjectState(const hobject_t &obj) : oi(obj), exists(false) {}
};
struct RWState {
enum State {
RWNONE,
RWREAD,
RWWRITE,
RWEXCL,
};
static const char *get_state_name(State s) {
switch (s) {
case RWNONE: return "none";
case RWREAD: return "read";
case RWWRITE: return "write";
case RWEXCL: return "excl";
default: return "???";
}
}
const char *get_state_name() const {
return get_state_name(state);
}
int count; ///< number of readers or writers
int waiters = 0; ///< number waiting
State state:4; ///< rw state
/// if set, restart backfill when we can get a read lock
bool recovery_read_marker:1;
/// if set, requeue snaptrim on lock release
bool snaptrimmer_write_marker:1;
RWState()
: count(0),
state(RWNONE),
recovery_read_marker(false),
snaptrimmer_write_marker(false)
{}
/// this function adjusts the counts if necessary
bool get_read_lock() {
// don't starve anybody!
if (waiters > 0) {
return false;
}
switch (state) {
case RWNONE:
ceph_assert(count == 0);
state = RWREAD;
// fall through
case RWREAD:
count++;
return true;
case RWWRITE:
return false;
case RWEXCL:
return false;
default:
ceph_abort_msg("unhandled case");
return false;
}
}
bool get_write_lock(bool greedy=false) {
if (!greedy) {
// don't starve anybody!
if (waiters > 0 ||
recovery_read_marker) {
return false;
}
}
switch (state) {
case RWNONE:
ceph_assert(count == 0);
state = RWWRITE;
// fall through
case RWWRITE:
count++;
return true;
case RWREAD:
return false;
case RWEXCL:
return false;
default:
ceph_abort_msg("unhandled case");
return false;
}
}
bool get_excl_lock() {
switch (state) {
case RWNONE:
ceph_assert(count == 0);
state = RWEXCL;
count = 1;
return true;
case RWWRITE:
return false;
case RWREAD:
return false;
case RWEXCL:
return false;
default:
ceph_abort_msg("unhandled case");
return false;
}
}
/// same as get_write_lock, but ignore starvation
bool take_write_lock() {
if (state == RWWRITE) {
count++;
return true;
}
return get_write_lock();
}
bool dec() {
ceph_assert(count > 0);
count--;
if (count == 0) {
state = RWNONE;
return true;
} else {
return false;
}
}
bool put_read() {
ceph_assert(state == RWREAD);
return dec();
}
bool put_write() {
ceph_assert(state == RWWRITE);
return dec();
}
bool put_excl() {
ceph_assert(state == RWEXCL);
return dec();
}
void inc_waiters() {
++waiters;
}
void release_waiters() {
waiters = 0;
}
void dec_waiters(int count) {
ceph_assert(waiters >= count);
waiters -= count;
}
bool empty() const { return state == RWNONE; }
bool get_snaptrimmer_write(bool mark_if_unsuccessful) {
if (get_write_lock()) {
return true;
} else {
if (mark_if_unsuccessful)
snaptrimmer_write_marker = true;
return false;
}
}
bool get_recovery_read() {
recovery_read_marker = true;
if (get_read_lock()) {
return true;
}
return false;
}
};
inline ostream& operator<<(ostream& out, const RWState& rw)
{
return out << "rwstate(" << rw.get_state_name()
<< " n=" << rw.count
<< " w=" << rw.waiters
<< ")";
}

View File

@ -6,6 +6,7 @@
#include "osd_types.h"
#include "OpRequest.h"
#include "object_state.h"
/*
* keep tabs on object modifications that are in flight.
@ -24,21 +25,7 @@ struct SnapSetContext {
explicit SnapSetContext(const hobject_t& o) :
oid(o), ref(0), registered(false), exists(true) { }
};
struct ObjectContext;
struct ObjectState {
object_info_t oi;
bool exists; ///< the stored object exists (i.e., we will remember the object_info_t)
ObjectState() : exists(false) {}
ObjectState(const object_info_t &oi_, bool exists_)
: oi(oi_), exists(exists_) {}
ObjectState(object_info_t &&oi_, bool exists_)
: oi(std::move(oi_)), exists(exists_) {}
};
class ObjectContext;
typedef std::shared_ptr<ObjectContext> ObjectContextRef;
struct ObjectContext {
@ -56,173 +43,58 @@ public:
// attr cache
map<string, bufferlist> attr_cache;
struct RWState {
enum State {
RWNONE,
RWREAD,
RWWRITE,
RWEXCL,
};
static const char *get_state_name(State s) {
switch (s) {
case RWNONE: return "none";
case RWREAD: return "read";
case RWWRITE: return "write";
case RWEXCL: return "excl";
default: return "???";
}
}
const char *get_state_name() const {
return get_state_name(state);
}
std::list<OpRequestRef> waiters; ///< ops waiting on state change
int count; ///< number of readers or writers
State state:4; ///< rw state
/// if set, restart backfill when we can get a read lock
bool recovery_read_marker:1;
/// if set, requeue snaptrim on lock release
bool snaptrimmer_write_marker:1;
RWState()
: count(0),
state(RWNONE),
recovery_read_marker(false),
snaptrimmer_write_marker(false)
{}
bool get_read(OpRequestRef& op) {
if (get_read_lock()) {
return true;
} // else
// Now we really need to bump up the ref-counter.
waiters.emplace_back(op);
return false;
}
/// this function adjusts the counts if necessary
bool get_read_lock() {
// don't starve anybody!
if (!waiters.empty()) {
return false;
}
switch (state) {
case RWNONE:
ceph_assert(count == 0);
state = RWREAD;
// fall through
case RWREAD:
count++;
return true;
case RWWRITE:
return false;
case RWEXCL:
return false;
default:
ceph_abort_msg("unhandled case");
return false;
}
}
bool get_write(OpRequestRef& op, bool greedy=false) {
if (get_write_lock(greedy)) {
return true;
} // else
if (op)
waiters.emplace_back(op);
return false;
}
bool get_write_lock(bool greedy=false) {
if (!greedy) {
// don't starve anybody!
if (!waiters.empty() ||
recovery_read_marker) {
return false;
}
}
switch (state) {
case RWNONE:
ceph_assert(count == 0);
state = RWWRITE;
// fall through
case RWWRITE:
count++;
return true;
case RWREAD:
return false;
case RWEXCL:
return false;
default:
ceph_abort_msg("unhandled case");
return false;
}
}
bool get_excl_lock() {
switch (state) {
case RWNONE:
ceph_assert(count == 0);
state = RWEXCL;
count = 1;
return true;
case RWWRITE:
return false;
case RWREAD:
return false;
case RWEXCL:
return false;
default:
ceph_abort_msg("unhandled case");
return false;
}
}
bool get_excl(OpRequestRef& op) {
if (get_excl_lock()) {
return true;
} // else
if (op)
waiters.emplace_back(op);
return false;
}
/// same as get_write_lock, but ignore starvation
bool take_write_lock() {
if (state == RWWRITE) {
count++;
return true;
}
return get_write_lock();
}
void dec(list<OpRequestRef> *requeue) {
ceph_assert(count > 0);
ceph_assert(requeue);
count--;
if (count == 0) {
state = RWNONE;
requeue->splice(requeue->end(), waiters);
}
}
void put_read(list<OpRequestRef> *requeue) {
ceph_assert(state == RWREAD);
dec(requeue);
}
void put_write(list<OpRequestRef> *requeue) {
ceph_assert(state == RWWRITE);
dec(requeue);
}
void put_excl(list<OpRequestRef> *requeue) {
ceph_assert(state == RWEXCL);
dec(requeue);
}
bool empty() const { return state == RWNONE; }
} rwstate;
RWState rwstate;
std::list<OpRequestRef> waiters; ///< ops waiting on state change
bool get_read(OpRequestRef& op) {
return rwstate.get_read(op);
if (rwstate.get_read_lock()) {
return true;
} // else
// Now we really need to bump up the ref-counter.
waiters.emplace_back(op);
rwstate.inc_waiters();
return false;
}
bool get_write(OpRequestRef& op) {
return rwstate.get_write(op, false);
bool get_write(OpRequestRef& op, bool greedy=false) {
if (rwstate.get_write_lock(greedy)) {
return true;
} // else
if (op) {
waiters.emplace_back(op);
rwstate.inc_waiters();
}
return false;
}
bool get_excl(OpRequestRef op) {
return rwstate.get_excl(op);
bool get_excl(OpRequestRef& op) {
if (rwstate.get_excl_lock()) {
return true;
} // else
if (op) {
waiters.emplace_back(op);
rwstate.inc_waiters();
}
return false;
}
void wake(list<OpRequestRef> *requeue) {
rwstate.release_waiters();
requeue->splice(requeue->end(), waiters);
}
void put_read(list<OpRequestRef> *requeue) {
if (rwstate.put_read()) {
wake(requeue);
}
}
void put_write(list<OpRequestRef> *requeue) {
if (rwstate.put_write()) {
wake(requeue);
}
}
void put_excl(list<OpRequestRef> *requeue) {
if (rwstate.put_excl()) {
wake(requeue);
}
}
bool empty() const { return rwstate.empty(); }
bool get_lock_type(OpRequestRef& op, RWState::State type) {
switch (type) {
case RWState::RWWRITE:
@ -237,46 +109,36 @@ public:
}
}
bool get_write_greedy(OpRequestRef& op) {
return rwstate.get_write(op, true);
return get_write(op, true);
}
bool get_snaptrimmer_write(bool mark_if_unsuccessful) {
if (rwstate.get_write_lock()) {
return true;
} else {
if (mark_if_unsuccessful)
rwstate.snaptrimmer_write_marker = true;
return false;
}
return rwstate.get_snaptrimmer_write(mark_if_unsuccessful);
}
bool get_recovery_read() {
rwstate.recovery_read_marker = true;
if (rwstate.get_read_lock()) {
return true;
}
return false;
return rwstate.get_recovery_read();
}
bool try_get_read_lock() {
return rwstate.get_read_lock();
}
void drop_recovery_read(list<OpRequestRef> *ls) {
ceph_assert(rwstate.recovery_read_marker);
rwstate.put_read(ls);
put_read(ls);
rwstate.recovery_read_marker = false;
}
void put_lock_type(
ObjectContext::RWState::State type,
RWState::State type,
list<OpRequestRef> *to_wake,
bool *requeue_recovery,
bool *requeue_snaptrimmer) {
switch (type) {
case ObjectContext::RWState::RWWRITE:
rwstate.put_write(to_wake);
case RWState::RWWRITE:
put_write(to_wake);
break;
case ObjectContext::RWState::RWREAD:
rwstate.put_read(to_wake);
case RWState::RWREAD:
put_read(to_wake);
break;
case ObjectContext::RWState::RWEXCL:
rwstate.put_excl(to_wake);
case RWState::RWEXCL:
put_excl(to_wake);
break;
default:
ceph_abort_msg("invalid lock type");
@ -291,7 +153,7 @@ public:
}
}
bool is_request_pending() {
return (rwstate.count > 0);
return !rwstate.empty();
}
ObjectContext()
@ -331,14 +193,6 @@ inline ostream& operator<<(ostream& out, const ObjectState& obs)
return out;
}
inline ostream& operator<<(ostream& out, const ObjectContext::RWState& rw)
{
return out << "rwstate(" << rw.get_state_name()
<< " n=" << rw.count
<< " w=" << rw.waiters.size()
<< ")";
}
inline ostream& operator<<(ostream& out, const ObjectContext& obc)
{
return out << "obc(" << obc.obs << " " << obc.rwstate << ")";
@ -347,10 +201,10 @@ inline ostream& operator<<(ostream& out, const ObjectContext& obc)
class ObcLockManager {
struct ObjectLockState {
ObjectContextRef obc;
ObjectContext::RWState::State type;
RWState::State type;
ObjectLockState(
ObjectContextRef obc,
ObjectContext::RWState::State type)
RWState::State type)
: obc(std::move(obc)), type(type) {}
};
map<hobject_t, ObjectLockState> locks;
@ -363,7 +217,7 @@ public:
return locks.empty();
}
bool get_lock_type(
ObjectContext::RWState::State type,
RWState::State type,
const hobject_t &hoid,
ObjectContextRef& obc,
OpRequestRef& op) {
@ -383,7 +237,7 @@ public:
if (obc->rwstate.take_write_lock()) {
locks.insert(
make_pair(
hoid, ObjectLockState(obc, ObjectContext::RWState::RWWRITE)));
hoid, ObjectLockState(obc, RWState::RWWRITE)));
return true;
} else {
return false;
@ -398,7 +252,7 @@ public:
if (obc->get_snaptrimmer_write(mark_if_unsuccessful)) {
locks.insert(
make_pair(
hoid, ObjectLockState(obc, ObjectContext::RWState::RWWRITE)));
hoid, ObjectLockState(obc, RWState::RWWRITE)));
return true;
} else {
return false;
@ -413,7 +267,7 @@ public:
if (obc->get_write_greedy(op)) {
locks.insert(
make_pair(
hoid, ObjectLockState(obc, ObjectContext::RWState::RWWRITE)));
hoid, ObjectLockState(obc, RWState::RWWRITE)));
return true;
} else {
return false;
@ -429,7 +283,7 @@ public:
locks.insert(
make_pair(
hoid,
ObjectLockState(obc, ObjectContext::RWState::RWREAD)));
ObjectLockState(obc, RWState::RWREAD)));
return true;
} else {
return false;