mirror of
https://github.com/ceph/ceph
synced 2025-01-03 01:22:53 +00:00
Merge pull request #14627 from badone/wip-peering-state-timing
osd: Implement peering state timing Reviewed-by: Josh Durgin <jdurgin@redhat.com>
This commit is contained in:
commit
7e79a4eadc
@ -2111,6 +2111,20 @@ bool OSD::asok_command(string admin_command, cmdmap_t& cmdmap, string format,
|
||||
store->generate_db_histogram(f);
|
||||
} else if (admin_command == "flush_store_cache") {
|
||||
store->flush_cache();
|
||||
} else if (admin_command == "dump_pgstate_history") {
|
||||
f->open_object_section("pgstate_history");
|
||||
RWLock::RLocker l2(pg_map_lock);
|
||||
for (ceph::unordered_map<spg_t,PG*>::iterator it = pg_map.begin();
|
||||
it != pg_map.end();
|
||||
++it) {
|
||||
|
||||
PG *pg = it->second;
|
||||
f->dump_stream("pg") << pg->get_pgid();
|
||||
pg->lock();
|
||||
pg->pgstate_history.dump(f);
|
||||
pg->unlock();
|
||||
}
|
||||
f->close_section();
|
||||
} else {
|
||||
assert(0 == "broken asok registration");
|
||||
}
|
||||
@ -2621,6 +2635,10 @@ void OSD::final_init()
|
||||
asok_hook,
|
||||
"Flush bluestore internal cache");
|
||||
assert(r == 0);
|
||||
r = admin_socket->register_command("dump_pgstate_history", "dump_pgstate_history",
|
||||
asok_hook,
|
||||
"show recent state history");
|
||||
assert(r == 0);
|
||||
|
||||
test_ops_hook = new TestOpsSocketHook(&(this->service), this->store);
|
||||
// Note: pools are CephString instead of CephPoolname because
|
||||
@ -2998,6 +3016,7 @@ int OSD::shutdown()
|
||||
cct->get_admin_socket()->unregister_command("dump_objectstore_kv_stats");
|
||||
cct->get_admin_socket()->unregister_command("calc_objectstore_db_histogram");
|
||||
cct->get_admin_socket()->unregister_command("flush_store_cache");
|
||||
cct->get_admin_socket()->unregister_command("dump_pgstate_history");
|
||||
delete asok_hook;
|
||||
asok_hook = NULL;
|
||||
|
||||
|
125
src/osd/PG.cc
125
src/osd/PG.cc
@ -92,6 +92,66 @@ static ostream& _prefix(std::ostream *_dout, T *t)
|
||||
|
||||
MEMPOOL_DEFINE_OBJECT_FACTORY(PG::CephPeeringEvt, pg_peering_evt, osd);
|
||||
|
||||
void PGStateHistory::enter(PG* pg, const utime_t entime, const char* state)
|
||||
{
|
||||
// Ignore trimming state machine for now
|
||||
if (::strstr(state, "Trimming") != NULL) {
|
||||
return;
|
||||
} else if (pi != nullptr) {
|
||||
pi->enter_state(entime, state);
|
||||
} else {
|
||||
// Store current state since we can't reliably take the PG lock here
|
||||
if ( tmppi == nullptr) {
|
||||
tmppi = std::unique_ptr<PGStateInstance>(new PGStateInstance);
|
||||
}
|
||||
|
||||
thispg = pg;
|
||||
tmppi->enter_state(entime, state);
|
||||
}
|
||||
}
|
||||
|
||||
void PGStateHistory::exit(const char* state) {
|
||||
// Ignore trimming state machine for now
|
||||
// Do nothing if PG is being destroyed!
|
||||
if (::strstr(state, "Trimming") != NULL || pg_in_destructor) {
|
||||
return;
|
||||
} else {
|
||||
bool ilocked = false;
|
||||
if(!thispg->is_locked()) {
|
||||
thispg->lock();
|
||||
ilocked = true;
|
||||
}
|
||||
if (pi == nullptr) {
|
||||
buffer.push_back(std::unique_ptr<PGStateInstance>(tmppi.release()));
|
||||
pi = buffer.back().get();
|
||||
pi->setepoch(thispg->get_osdmap()->get_epoch());
|
||||
}
|
||||
|
||||
pi->exit_state(ceph_clock_now());
|
||||
if (::strcmp(state, "Reset") == 0) {
|
||||
this->reset();
|
||||
}
|
||||
if(ilocked) {
|
||||
thispg->unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PGStateHistory::dump(Formatter* f) const {
|
||||
f->open_array_section("history");
|
||||
for (auto pi = buffer.begin(); pi != buffer.end(); ++pi) {
|
||||
f->open_object_section("states");
|
||||
f->dump_stream("epoch") << (*pi)->this_epoch;
|
||||
for (auto she : (*pi)->state_history) {
|
||||
f->dump_string("state", std::get<2>(she));
|
||||
f->dump_stream("enter") << std::get<0>(she);
|
||||
f->dump_stream("exit") << std::get<1>(she);
|
||||
}
|
||||
f->close_section();
|
||||
}
|
||||
f->close_section();
|
||||
}
|
||||
|
||||
void PG::get(const char* tag)
|
||||
{
|
||||
ref++;
|
||||
@ -274,6 +334,7 @@ PG::PG(OSDService *o, OSDMapRef curmap,
|
||||
|
||||
PG::~PG()
|
||||
{
|
||||
pgstate_history.set_pg_in_destructor();
|
||||
#ifdef PG_DEBUG_REFS
|
||||
osd->remove_pgid(info.pgid, this);
|
||||
#endif
|
||||
@ -5864,7 +5925,7 @@ void PG::update_store_on_load()
|
||||
/*------Crashed-------*/
|
||||
PG::RecoveryState::Crashed::Crashed(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Crashed")
|
||||
NamedState(context< RecoveryMachine >().pg, "Crashed")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
assert(0 == "we got a bad state machine event");
|
||||
@ -5874,7 +5935,7 @@ PG::RecoveryState::Crashed::Crashed(my_context ctx)
|
||||
/*------Initial-------*/
|
||||
PG::RecoveryState::Initial::Initial(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Initial")
|
||||
NamedState(context< RecoveryMachine >().pg, "Initial")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
}
|
||||
@ -5928,7 +5989,7 @@ void PG::RecoveryState::Initial::exit()
|
||||
/*------Started-------*/
|
||||
PG::RecoveryState::Started::Started(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
}
|
||||
@ -5993,7 +6054,7 @@ void PG::RecoveryState::Started::exit()
|
||||
/*--------Reset---------*/
|
||||
PG::RecoveryState::Reset::Reset(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Reset")
|
||||
NamedState(context< RecoveryMachine >().pg, "Reset")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
PG *pg = context< RecoveryMachine >().pg;
|
||||
@ -6086,7 +6147,7 @@ void PG::RecoveryState::Reset::exit()
|
||||
/*-------Start---------*/
|
||||
PG::RecoveryState::Start::Start(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Start")
|
||||
NamedState(context< RecoveryMachine >().pg, "Start")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
|
||||
@ -6111,7 +6172,7 @@ void PG::RecoveryState::Start::exit()
|
||||
/*---------Primary--------*/
|
||||
PG::RecoveryState::Primary::Primary(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
PG *pg = context< RecoveryMachine >().pg;
|
||||
@ -6169,7 +6230,7 @@ void PG::RecoveryState::Primary::exit()
|
||||
/*---------Peering--------*/
|
||||
PG::RecoveryState::Peering::Peering(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Peering"),
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Peering"),
|
||||
history_les_bound(false)
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
@ -6265,7 +6326,7 @@ void PG::RecoveryState::Peering::exit()
|
||||
/*------Backfilling-------*/
|
||||
PG::RecoveryState::Backfilling::Backfilling(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active/Backfilling")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Active/Backfilling")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
PG *pg = context< RecoveryMachine >().pg;
|
||||
@ -6322,7 +6383,7 @@ void PG::RecoveryState::Backfilling::exit()
|
||||
|
||||
PG::RecoveryState::WaitRemoteBackfillReserved::WaitRemoteBackfillReserved(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active/WaitRemoteBackfillReserved"),
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Active/WaitRemoteBackfillReserved"),
|
||||
backfill_osd_it(context< Active >().remote_shards_to_reserve_backfill.begin())
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
@ -6404,7 +6465,7 @@ PG::RecoveryState::WaitRemoteBackfillReserved::react(const RemoteReservationReje
|
||||
/*--WaitLocalBackfillReserved--*/
|
||||
PG::RecoveryState::WaitLocalBackfillReserved::WaitLocalBackfillReserved(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active/WaitLocalBackfillReserved")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Active/WaitLocalBackfillReserved")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
PG *pg = context< RecoveryMachine >().pg;
|
||||
@ -6429,7 +6490,7 @@ void PG::RecoveryState::WaitLocalBackfillReserved::exit()
|
||||
/*----NotBackfilling------*/
|
||||
PG::RecoveryState::NotBackfilling::NotBackfilling(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active/NotBackfilling")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Active/NotBackfilling")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
PG *pg = context< RecoveryMachine >().pg;
|
||||
@ -6459,7 +6520,7 @@ void PG::RecoveryState::NotBackfilling::exit()
|
||||
/*----NotRecovering------*/
|
||||
PG::RecoveryState::NotRecovering::NotRecovering(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active/NotRecovering")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Active/NotRecovering")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
PG *pg = context< RecoveryMachine >().pg;
|
||||
@ -6477,7 +6538,7 @@ void PG::RecoveryState::NotRecovering::exit()
|
||||
/*---RepNotRecovering----*/
|
||||
PG::RecoveryState::RepNotRecovering::RepNotRecovering(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/ReplicaActive/RepNotRecovering")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/ReplicaActive/RepNotRecovering")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
}
|
||||
@ -6493,7 +6554,7 @@ void PG::RecoveryState::RepNotRecovering::exit()
|
||||
/*---RepWaitRecoveryReserved--*/
|
||||
PG::RecoveryState::RepWaitRecoveryReserved::RepWaitRecoveryReserved(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/ReplicaActive/RepWaitRecoveryReserved")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/ReplicaActive/RepWaitRecoveryReserved")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
PG *pg = context< RecoveryMachine >().pg;
|
||||
@ -6531,7 +6592,7 @@ void PG::RecoveryState::RepWaitRecoveryReserved::exit()
|
||||
/*-RepWaitBackfillReserved*/
|
||||
PG::RecoveryState::RepWaitBackfillReserved::RepWaitBackfillReserved(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/ReplicaActive/RepWaitBackfillReserved")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/ReplicaActive/RepWaitBackfillReserved")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
}
|
||||
@ -6613,7 +6674,7 @@ PG::RecoveryState::RepWaitBackfillReserved::react(const RemoteReservationRejecte
|
||||
/*---RepRecovering-------*/
|
||||
PG::RecoveryState::RepRecovering::RepRecovering(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/ReplicaActive/RepRecovering")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/ReplicaActive/RepRecovering")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
}
|
||||
@ -6638,7 +6699,7 @@ void PG::RecoveryState::RepRecovering::exit()
|
||||
/*------Activating--------*/
|
||||
PG::RecoveryState::Activating::Activating(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active/Activating")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Active/Activating")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
}
|
||||
@ -6653,7 +6714,7 @@ void PG::RecoveryState::Activating::exit()
|
||||
|
||||
PG::RecoveryState::WaitLocalRecoveryReserved::WaitLocalRecoveryReserved(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active/WaitLocalRecoveryReserved")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Active/WaitLocalRecoveryReserved")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
PG *pg = context< RecoveryMachine >().pg;
|
||||
@ -6695,7 +6756,7 @@ void PG::RecoveryState::WaitLocalRecoveryReserved::exit()
|
||||
|
||||
PG::RecoveryState::WaitRemoteRecoveryReserved::WaitRemoteRecoveryReserved(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active/WaitRemoteRecoveryReserved"),
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Active/WaitRemoteRecoveryReserved"),
|
||||
remote_recovery_reservation_it(context< Active >().remote_shards_to_reserve_recovery.begin())
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
@ -6735,7 +6796,7 @@ void PG::RecoveryState::WaitRemoteRecoveryReserved::exit()
|
||||
|
||||
PG::RecoveryState::Recovering::Recovering(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active/Recovering")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Active/Recovering")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
|
||||
@ -6800,7 +6861,7 @@ void PG::RecoveryState::Recovering::exit()
|
||||
|
||||
PG::RecoveryState::Recovered::Recovered(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active/Recovered")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Active/Recovered")
|
||||
{
|
||||
pg_shard_t auth_log_shard;
|
||||
|
||||
@ -6843,7 +6904,7 @@ void PG::RecoveryState::Recovered::exit()
|
||||
|
||||
PG::RecoveryState::Clean::Clean(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active/Clean")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Active/Clean")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
|
||||
@ -6888,7 +6949,7 @@ set<pg_shard_t> unique_osd_shard_set(const pg_shard_t & skip, const T &in)
|
||||
/*---------Active---------*/
|
||||
PG::RecoveryState::Active::Active(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active"),
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Active"),
|
||||
remote_shards_to_reserve_recovery(
|
||||
unique_osd_shard_set(
|
||||
context< RecoveryMachine >().pg->pg_whoami,
|
||||
@ -7203,7 +7264,7 @@ void PG::RecoveryState::Active::exit()
|
||||
/*------ReplicaActive-----*/
|
||||
PG::RecoveryState::ReplicaActive::ReplicaActive(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/ReplicaActive")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/ReplicaActive")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
|
||||
@ -7295,7 +7356,7 @@ void PG::RecoveryState::ReplicaActive::exit()
|
||||
/*-------Stray---*/
|
||||
PG::RecoveryState::Stray::Stray(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Stray")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Stray")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
|
||||
@ -7407,7 +7468,7 @@ void PG::RecoveryState::Stray::exit()
|
||||
/*--------GetInfo---------*/
|
||||
PG::RecoveryState::GetInfo::GetInfo(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Peering/GetInfo")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Peering/GetInfo")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
|
||||
@ -7553,7 +7614,7 @@ void PG::RecoveryState::GetInfo::exit()
|
||||
PG::RecoveryState::GetLog::GetLog(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(
|
||||
context< RecoveryMachine >().pg->cct, "Started/Primary/Peering/GetLog"),
|
||||
context< RecoveryMachine >().pg, "Started/Primary/Peering/GetLog"),
|
||||
msg(0)
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
@ -7687,7 +7748,7 @@ void PG::RecoveryState::GetLog::exit()
|
||||
/*------WaitActingChange--------*/
|
||||
PG::RecoveryState::WaitActingChange::WaitActingChange(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Peering/WaitActingChange")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Peering/WaitActingChange")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
}
|
||||
@ -7750,7 +7811,7 @@ void PG::RecoveryState::WaitActingChange::exit()
|
||||
/*------Down--------*/
|
||||
PG::RecoveryState::Down::Down(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Peering/Down")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Peering/Down")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
PG *pg = context< RecoveryMachine >().pg;
|
||||
@ -7791,7 +7852,7 @@ boost::statechart::result PG::RecoveryState::Down::react(const QueryState& q)
|
||||
/*------Incomplete--------*/
|
||||
PG::RecoveryState::Incomplete::Incomplete(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Peering/Incomplete")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Peering/Incomplete")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
PG *pg = context< RecoveryMachine >().pg;
|
||||
@ -7858,7 +7919,7 @@ void PG::RecoveryState::Incomplete::exit()
|
||||
/*------GetMissing--------*/
|
||||
PG::RecoveryState::GetMissing::GetMissing(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Peering/GetMissing")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Peering/GetMissing")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
|
||||
@ -7998,7 +8059,7 @@ void PG::RecoveryState::GetMissing::exit()
|
||||
/*------WaitUpThru--------*/
|
||||
PG::RecoveryState::WaitUpThru::WaitUpThru(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Peering/WaitUpThru")
|
||||
NamedState(context< RecoveryMachine >().pg, "Started/Primary/Peering/WaitUpThru")
|
||||
{
|
||||
context< RecoveryMachine >().log_enter(state_name);
|
||||
}
|
||||
|
70
src/osd/PG.h
70
src/osd/PG.h
@ -23,6 +23,7 @@
|
||||
#include <boost/statechart/transition.hpp>
|
||||
#include <boost/statechart/event_base.hpp>
|
||||
#include <boost/scoped_ptr.hpp>
|
||||
#include <boost/circular_buffer.hpp>
|
||||
#include "include/memory.h"
|
||||
#include "include/mempool.h"
|
||||
|
||||
@ -45,13 +46,14 @@
|
||||
#include <atomic>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <stack>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
using namespace std;
|
||||
|
||||
// #include "include/unordered_map.h"
|
||||
// #include "include/unordered_set.h"
|
||||
|
||||
|
||||
//#define DEBUG_RECOVERY_OIDS // track set of recovering oids explicitly, to find counting bugs
|
||||
|
||||
class OSD;
|
||||
@ -74,6 +76,59 @@ namespace Scrub {
|
||||
void intrusive_ptr_add_ref(PG *pg);
|
||||
void intrusive_ptr_release(PG *pg);
|
||||
|
||||
using state_history_entry = std::tuple<utime_t, utime_t, const char*>;
|
||||
using embedded_state = std::pair<utime_t, const char*>;
|
||||
|
||||
struct PGStateInstance {
|
||||
// Time spent in pg states
|
||||
|
||||
void setepoch(const epoch_t current_epoch) {
|
||||
this_epoch = current_epoch;
|
||||
}
|
||||
|
||||
void enter_state(const utime_t entime, const char* state) {
|
||||
embedded_states.push(std::make_pair(entime, state));
|
||||
}
|
||||
|
||||
void exit_state(const utime_t extime) {
|
||||
embedded_state this_state = embedded_states.top();
|
||||
state_history.push_back(state_history_entry{
|
||||
this_state.first, extime, this_state.second});
|
||||
embedded_states.pop();
|
||||
}
|
||||
|
||||
epoch_t this_epoch;
|
||||
utime_t enter_time;
|
||||
std::vector<state_history_entry> state_history;
|
||||
std::stack<embedded_state> embedded_states;
|
||||
};
|
||||
|
||||
class PGStateHistory {
|
||||
// Member access protected with the PG lock
|
||||
public:
|
||||
PGStateHistory() : buffer(10) {}
|
||||
|
||||
void enter(PG* pg, const utime_t entime, const char* state);
|
||||
|
||||
void exit(const char* state);
|
||||
|
||||
void reset() {
|
||||
pi = nullptr;
|
||||
}
|
||||
|
||||
void set_pg_in_destructor() { pg_in_destructor = true; }
|
||||
|
||||
void dump(Formatter* f) const;
|
||||
|
||||
private:
|
||||
bool pg_in_destructor = false;
|
||||
PG* thispg = nullptr;
|
||||
std::unique_ptr<PGStateInstance> tmppi;
|
||||
PGStateInstance* pi = nullptr;
|
||||
boost::circular_buffer<std::unique_ptr<PGStateInstance>> buffer;
|
||||
|
||||
};
|
||||
|
||||
#ifdef PG_DEBUG_REFS
|
||||
#include "common/tracked_int_ptr.hpp"
|
||||
uint64_t get_with_id(PG *pg);
|
||||
@ -629,14 +684,19 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
PGStateHistory pgstate_history;
|
||||
|
||||
struct NamedState {
|
||||
const char *state_name;
|
||||
utime_t enter_time;
|
||||
PG* pg;
|
||||
const char *get_state_name() { return state_name; }
|
||||
NamedState(CephContext *cct_, const char *state_name_)
|
||||
: state_name(state_name_),
|
||||
enter_time(ceph_clock_now()) {}
|
||||
virtual ~NamedState() {}
|
||||
NamedState(PG *pg_, const char *state_name_)
|
||||
: state_name(state_name_), enter_time(ceph_clock_now()), pg(pg_) {
|
||||
pg->pgstate_history.enter(pg, enter_time, state_name);
|
||||
}
|
||||
virtual ~NamedState() { pg->pgstate_history.exit(state_name); }
|
||||
};
|
||||
|
||||
|
||||
|
@ -13346,7 +13346,7 @@ void PrimaryLogPG::SnapTrimmer::log_exit(const char *state_name, utime_t enter_t
|
||||
/* NotTrimming */
|
||||
PrimaryLogPG::NotTrimming::NotTrimming(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< SnapTrimmer >().pg->cct, "NotTrimming")
|
||||
NamedState(context< SnapTrimmer >().pg, "NotTrimming")
|
||||
{
|
||||
context< SnapTrimmer >().log_enter(state_name);
|
||||
}
|
||||
@ -13400,7 +13400,7 @@ boost::statechart::result PrimaryLogPG::WaitReservation::react(const SnapTrimRes
|
||||
/* AwaitAsyncWork */
|
||||
PrimaryLogPG::AwaitAsyncWork::AwaitAsyncWork(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< SnapTrimmer >().pg->cct, "Trimming/AwaitAsyncWork")
|
||||
NamedState(context< SnapTrimmer >().pg, "Trimming/AwaitAsyncWork")
|
||||
{
|
||||
auto *pg = context< SnapTrimmer >().pg;
|
||||
context< SnapTrimmer >().log_enter(state_name);
|
||||
|
@ -1483,7 +1483,7 @@ private:
|
||||
|
||||
explicit Trimming(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< SnapTrimmer >().pg->cct, "Trimming") {
|
||||
NamedState(context< SnapTrimmer >().pg, "Trimming") {
|
||||
context< SnapTrimmer >().log_enter(state_name);
|
||||
assert(context< SnapTrimmer >().can_trim());
|
||||
assert(in_flight.empty());
|
||||
@ -1508,7 +1508,7 @@ private:
|
||||
Context *wakeup = nullptr;
|
||||
explicit WaitTrimTimer(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitTrimTimer") {
|
||||
NamedState(context< SnapTrimmer >().pg, "Trimming/WaitTrimTimer") {
|
||||
context< SnapTrimmer >().log_enter(state_name);
|
||||
assert(context<Trimming>().in_flight.empty());
|
||||
struct OnTimer : Context {
|
||||
@ -1558,7 +1558,7 @@ private:
|
||||
> reactions;
|
||||
explicit WaitRWLock(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitRWLock") {
|
||||
NamedState(context< SnapTrimmer >().pg, "Trimming/WaitRWLock") {
|
||||
context< SnapTrimmer >().log_enter(state_name);
|
||||
assert(context<Trimming>().in_flight.empty());
|
||||
}
|
||||
@ -1581,7 +1581,7 @@ private:
|
||||
> reactions;
|
||||
explicit WaitRepops(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitRepops") {
|
||||
NamedState(context< SnapTrimmer >().pg, "Trimming/WaitRepops") {
|
||||
context< SnapTrimmer >().log_enter(state_name);
|
||||
assert(!context<Trimming>().in_flight.empty());
|
||||
}
|
||||
@ -1635,7 +1635,7 @@ private:
|
||||
|
||||
explicit WaitReservation(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitReservation") {
|
||||
NamedState(context< SnapTrimmer >().pg, "Trimming/WaitReservation") {
|
||||
context< SnapTrimmer >().log_enter(state_name);
|
||||
assert(context<Trimming>().in_flight.empty());
|
||||
auto *pg = context< SnapTrimmer >().pg;
|
||||
@ -1667,7 +1667,7 @@ private:
|
||||
> reactions;
|
||||
explicit WaitScrub(my_context ctx)
|
||||
: my_base(ctx),
|
||||
NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitScrub") {
|
||||
NamedState(context< SnapTrimmer >().pg, "Trimming/WaitScrub") {
|
||||
context< SnapTrimmer >().log_enter(state_name);
|
||||
}
|
||||
void exit() {
|
||||
|
Loading…
Reference in New Issue
Block a user