Merge pull request #42479 from ronen-fr/wip-ronenf-scrub-prefix

osd/scrub: remove Scrubber sub-objects reliance on PG::gen_prefix()

Reviewed-by: Samuel Just <sjust@redhat.com>
Reviewed-by: Josh Durgin <jdurgin@redhat.com>
Reviewed-by: Neha Ojha <nojha@redhat.com>
Reviewed-by: Laura Flores <lflores@redhat.com>
This commit is contained in:
Ronen Friedman 2021-11-09 15:00:31 +02:00 committed by GitHub
commit 6ac7f24417
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 105 additions and 74 deletions

View File

@ -176,8 +176,6 @@ class PG : public DoutPrefixProvider, public PeeringState::PeeringListener {
friend struct NamedState;
friend class PeeringState;
friend class PgScrubber;
friend class Scrub::LocalReservation; // dout()-only friendship
friend class Scrub::ReservedByRemotePrimary; // dout()-only friendship
public:
const pg_shard_t pg_whoami;

View File

@ -10,16 +10,17 @@
#include "osd/PrimaryLogPG.h"
#include "scrub_machine.h"
#define dout_context (m_pg->get_cct())
#define dout_context (m_osds->cct)
#define dout_subsys ceph_subsys_osd
#undef dout_prefix
#define dout_prefix _prefix(_dout, this->m_pg)
#define dout_prefix _prefix(_dout, this)
using std::vector;
template <class T> static ostream& _prefix(std::ostream* _dout, T* t)
template <class T>
static ostream& _prefix(std::ostream* _dout, T* t)
{
return t->gen_prefix(*_dout) << " PrimaryLog scrubber pg(" << t->pg_id << ") ";
return t->gen_prefix(*_dout);
}
using namespace Scrub;

View File

@ -30,14 +30,15 @@ using namespace std::chrono;
using namespace std::chrono_literals;
using namespace std::literals;
#define dout_context (m_pg->get_cct())
#define dout_context (m_osds->cct)
#define dout_subsys ceph_subsys_osd
#undef dout_prefix
#define dout_prefix _prefix(_dout, this->m_pg)
#define dout_prefix _prefix(_dout, this)
template <class T> static ostream& _prefix(std::ostream* _dout, T* t)
template <class T>
static ostream& _prefix(std::ostream* _dout, T* t)
{
return t->gen_prefix(*_dout) << " scrubber pg(" << t->pg_id << ") ";
return t->gen_prefix(*_dout);
}
ostream& operator<<(ostream& out, const scrub_flags_t& sf)
@ -182,7 +183,6 @@ void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued)
if (check_interval(epoch_queued)) {
dout(10) << "scrubber event -->> StartScrub epoch: " << epoch_queued << dendl;
reset_epoch(epoch_queued);
m_fsm->my_states();
m_fsm->process_event(StartScrub{});
dout(10) << "scrubber event --<< StartScrub" << dendl;
}
@ -195,7 +195,6 @@ void PgScrubber::initiate_scrub_after_repair(epoch_t epoch_queued)
if (check_interval(epoch_queued)) {
dout(10) << "scrubber event -->> AfterRepairScrub epoch: " << epoch_queued << dendl;
reset_epoch(epoch_queued);
m_fsm->my_states();
m_fsm->process_event(AfterRepairScrub{});
dout(10) << "scrubber event --<< AfterRepairScrub" << dendl;
}
@ -205,7 +204,6 @@ void PgScrubber::send_scrub_unblock(epoch_t epoch_queued)
{
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
if (is_message_relevant(epoch_queued)) {
m_fsm->my_states();
m_fsm->process_event(Unblocked{});
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -215,7 +213,6 @@ void PgScrubber::send_scrub_resched(epoch_t epoch_queued)
{
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
if (is_message_relevant(epoch_queued)) {
m_fsm->my_states();
m_fsm->process_event(InternalSchedScrub{});
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -232,7 +229,6 @@ void PgScrubber::send_start_replica(epoch_t epoch_queued, Scrub::act_token_t tok
}
if (check_interval(epoch_queued) && is_token_current(token)) {
m_fsm->my_states();
// save us some time by not waiting for updates if there are none
// to wait for. Affects the transition from NotActive into either
// ReplicaWaitUpdates or ActiveReplica.
@ -249,7 +245,6 @@ void PgScrubber::send_sched_replica(epoch_t epoch_queued, Scrub::act_token_t tok
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued
<< " token: " << token << dendl;
if (check_interval(epoch_queued) && is_token_current(token)) {
m_fsm->my_states();
m_fsm->process_event(SchedReplica{}); // retest for map availability
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -260,7 +255,6 @@ void PgScrubber::active_pushes_notification(epoch_t epoch_queued)
// note: Primary only
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
if (is_message_relevant(epoch_queued)) {
m_fsm->my_states();
m_fsm->process_event(ActivePushesUpd{});
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -271,7 +265,6 @@ void PgScrubber::update_applied_notification(epoch_t epoch_queued)
// note: Primary only
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
if (is_message_relevant(epoch_queued)) {
m_fsm->my_states();
m_fsm->process_event(UpdatesApplied{});
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -282,7 +275,6 @@ void PgScrubber::digest_update_notification(epoch_t epoch_queued)
// note: Primary only
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
if (is_message_relevant(epoch_queued)) {
m_fsm->my_states();
m_fsm->process_event(DigestUpdate{});
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -292,7 +284,6 @@ void PgScrubber::send_local_map_done(epoch_t epoch_queued)
{
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
if (is_message_relevant(epoch_queued)) {
m_fsm->my_states();
m_fsm->process_event(Scrub::IntLocalMapDone{});
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -302,7 +293,6 @@ void PgScrubber::send_replica_maps_ready(epoch_t epoch_queued)
{
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
if (is_message_relevant(epoch_queued)) {
m_fsm->my_states();
m_fsm->process_event(GotReplicas{});
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -312,7 +302,6 @@ void PgScrubber::send_replica_pushes_upd(epoch_t epoch_queued)
{
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
if (check_interval(epoch_queued)) {
m_fsm->my_states();
m_fsm->process_event(ReplicaPushesUpd{});
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -323,7 +312,6 @@ void PgScrubber::send_remotes_reserved(epoch_t epoch_queued)
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
// note: scrub is not active yet
if (check_interval(epoch_queued)) {
m_fsm->my_states();
m_fsm->process_event(RemotesReserved{});
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -333,7 +321,6 @@ void PgScrubber::send_reservation_failure(epoch_t epoch_queued)
{
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
if (check_interval(epoch_queued)) { // do not check for 'active'!
m_fsm->my_states();
m_fsm->process_event(ReservationFailure{});
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -343,7 +330,6 @@ void PgScrubber::send_full_reset(epoch_t epoch_queued)
{
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
m_fsm->my_states();
m_fsm->process_event(Scrub::FullReset{});
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -353,7 +339,6 @@ void PgScrubber::send_chunk_free(epoch_t epoch_queued)
{
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
if (check_interval(epoch_queued)) {
m_fsm->my_states();
m_fsm->process_event(Scrub::SelectedChunkFree{});
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -363,7 +348,6 @@ void PgScrubber::send_chunk_busy(epoch_t epoch_queued)
{
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
if (check_interval(epoch_queued)) {
m_fsm->my_states();
m_fsm->process_event(Scrub::ChunkIsBusy{});
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -373,7 +357,6 @@ void PgScrubber::send_get_next_chunk(epoch_t epoch_queued)
{
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
if (is_message_relevant(epoch_queued)) {
m_fsm->my_states();
m_fsm->process_event(Scrub::NextChunk{});
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -385,7 +368,6 @@ void PgScrubber::send_scrub_is_finished(epoch_t epoch_queued)
// can't check for "active"
m_fsm->my_states();
m_fsm->process_event(Scrub::ScrubFinished{});
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -395,7 +377,6 @@ void PgScrubber::send_maps_compared(epoch_t epoch_queued)
{
dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
m_fsm->my_states();
m_fsm->process_event(Scrub::MapsCompared{});
dout(10) << "scrubber event --<< " << __func__ << dendl;
@ -594,13 +575,15 @@ bool PgScrubber::reserve_local()
// try to create the reservation object (which translates into asking the
// OSD for the local scrub resource). If failing - undo it immediately
m_local_osd_resource.emplace(m_pg, m_osds);
if (!m_local_osd_resource->is_reserved()) {
m_local_osd_resource.reset();
return false;
m_local_osd_resource.emplace(m_osds);
if (m_local_osd_resource->is_reserved()) {
dout(15) << __func__ << ": local resources reserved" << dendl;
return true;
}
return true;
dout(10) << __func__ << ": failed to reserve local scrub resources" << dendl;
m_local_osd_resource.reset();
return false;
}
// ----------------------------------------------------------------------------
@ -1549,7 +1532,7 @@ void PgScrubber::handle_scrub_reserve_request(OpRequestRef op)
if (m_remote_osd_resource.has_value() && m_remote_osd_resource->is_stale()) {
// we are holding a stale reservation from a past epoch
m_remote_osd_resource.reset();
dout(10) << __func__ << " stale reservation request" << dendl;
dout(10) << __func__ << " cleared existing stale reservation" << dendl;
}
if (request_ep < m_pg->get_same_interval_since()) {
@ -1574,7 +1557,7 @@ void PgScrubber::handle_scrub_reserve_request(OpRequestRef op)
} else if (m_pg->cct->_conf->osd_scrub_during_recovery ||
!m_osds->is_recovery_active()) {
m_remote_osd_resource.emplace(m_pg, m_osds, request_ep);
m_remote_osd_resource.emplace(this, m_pg, m_osds, request_ep);
// OSD resources allocated?
granted = m_remote_osd_resource->is_reserved();
if (!granted) {
@ -1650,7 +1633,7 @@ void PgScrubber::message_all_replicas(int32_t opcode, std::string_view op_text)
{
ceph_assert(m_pg->recovery_state.get_backfill_targets().empty());
std::vector<std::pair<int, Message*>> messages;
std::vector<pair<int, Message*>> messages;
messages.reserve(m_pg->get_actingset().size());
epoch_t epch = get_osdmap_epoch();
@ -2167,6 +2150,16 @@ ostream& operator<<(ostream& out, const PgScrubber& scrubber)
return out << scrubber.m_flags;
}
std::ostream& PgScrubber::gen_prefix(std::ostream& out) const
{
const auto fsm_state = m_fsm ? m_fsm->current_states_desc() : "- :";
if (m_pg) {
return m_pg->gen_prefix(out) << "scrubber " << fsm_state << ": ";
} else {
return out << " scrubber [~] " << fsm_state << ": ";
}
}
ostream& PgScrubber::show(ostream& out) const
{
return out << " [ " << m_pg_id << ": " << m_flags << " ] ";
@ -2229,6 +2222,13 @@ ReplicaReservations::ReplicaReservations(PG* pg, pg_shard_t whoami, ScrubQueue::
{
epoch_t epoch = m_pg->get_osdmap_epoch();
{
std::stringstream prefix;
prefix << "osd." << m_osds->whoami << " ep: " << epoch
<< " scrubber::ReplicaReservations pg[" << pg->pg_id << "]: ";
m_log_msg_prefix = prefix.str();
}
// handle the special case of no replicas
if (m_pending <= 0) {
// just signal the scrub state-machine to continue
@ -2243,7 +2243,7 @@ ReplicaReservations::ReplicaReservations(PG* pg, pg_shard_t whoami, ScrubQueue::
MOSDScrubReserve::REQUEST, m_pg->pg_whoami);
m_osds->send_message_osd_cluster(p.osd, m, epoch);
m_waited_for_peers.push_back(p);
dout(10) << __func__ << " <ReplicaReservations> reserve<-> " << p.osd << dendl;
dout(10) << __func__ << ": reserve " << p.osd << dendl;
}
}
}
@ -2261,7 +2261,7 @@ void ReplicaReservations::send_reject()
void ReplicaReservations::discard_all()
{
dout(10) << __func__ << " " << m_reserved_peers << dendl;
dout(10) << __func__ << ": " << m_reserved_peers << dendl;
m_had_rejections = true; // preventing late-coming responses from triggering events
m_reserved_peers.clear();
@ -2297,7 +2297,7 @@ ReplicaReservations::~ReplicaReservations()
*/
void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from)
{
dout(10) << __func__ << " <ReplicaReservations> granted-> " << from << dendl;
dout(10) << __func__ << ": granted by " << from << dendl;
op->mark_started();
{
@ -2310,17 +2310,19 @@ void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from)
// are we forced to reject the reservation?
if (m_had_rejections) {
dout(10) << " rejecting late-coming reservation from " << from << dendl;
dout(10) << __func__ << ": rejecting late-coming reservation from "
<< from << dendl;
release_replica(from, m_pg->get_osdmap_epoch());
} else if (std::find(m_reserved_peers.begin(), m_reserved_peers.end(), from) !=
m_reserved_peers.end()) {
dout(10) << " already had osd." << from << " reserved" << dendl;
dout(10) << __func__ << ": already had osd." << from << " reserved" << dendl;
} else {
dout(10) << " osd." << from << " scrub reserve = success" << dendl;
dout(10) << __func__ << ": osd." << from << " scrub reserve = success"
<< dendl;
m_reserved_peers.push_back(from);
if (--m_pending == 0) {
send_all_done();
@ -2330,8 +2332,8 @@ void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from)
void ReplicaReservations::handle_reserve_reject(OpRequestRef op, pg_shard_t from)
{
dout(10) << __func__ << " <ReplicaReservations> rejected-> " << from << dendl;
dout(10) << __func__ << " " << *op->get_req() << dendl;
dout(10) << __func__ << ": rejected by " << from << dendl;
dout(15) << __func__ << ": " << *op->get_req() << dendl;
op->mark_started();
{
@ -2344,36 +2346,37 @@ void ReplicaReservations::handle_reserve_reject(OpRequestRef op, pg_shard_t from
if (m_had_rejections) {
// our failure was already handled when the first rejection arrived
dout(15) << " ignoring late-coming rejection from " << from << dendl;
dout(15) << __func__ << ": ignoring late-coming rejection from "
<< from << dendl;
} else if (std::find(m_reserved_peers.begin(), m_reserved_peers.end(), from) !=
m_reserved_peers.end()) {
dout(10) << " already had osd." << from << " reserved" << dendl;
dout(10) << __func__ << ": already had osd." << from << " reserved" << dendl;
} else {
dout(10) << " osd." << from << " scrub reserve = fail" << dendl;
dout(10) << __func__ << ": osd." << from << " scrub reserve = fail" << dendl;
m_had_rejections = true; // preventing any additional notifications
send_reject();
}
}
std::ostream& ReplicaReservations::gen_prefix(std::ostream& out) const
{
return out << m_log_msg_prefix;
}
// ///////////////////// LocalReservation //////////////////////////////////
LocalReservation::LocalReservation(PG* pg, OSDService* osds)
: m_pg{pg} // holding the "whole PG" for dout() sake
, m_osds{osds}
// note: no dout()s in LocalReservation functions. Client logs interactions.
LocalReservation::LocalReservation(OSDService* osds)
: m_osds{osds}
{
if (!m_osds->get_scrub_services().inc_scrubs_local()) {
dout(10) << __func__ << ": failed to reserve locally " << dendl;
if (m_osds->get_scrub_services().inc_scrubs_local()) {
// the failure is signalled by not having m_holding_local_reservation set
return;
m_holding_local_reservation = true;
}
dout(20) << __func__ << ": local OSD scrub resources reserved" << dendl;
m_holding_local_reservation = true;
}
LocalReservation::~LocalReservation()
@ -2384,11 +2387,16 @@ LocalReservation::~LocalReservation()
}
}
// ///////////////////// ReservedByRemotePrimary ///////////////////////////////
ReservedByRemotePrimary::ReservedByRemotePrimary(PG* pg, OSDService* osds, epoch_t epoch)
: m_pg{pg}, m_osds{osds}, m_reserved_at{epoch}
ReservedByRemotePrimary::ReservedByRemotePrimary(const PgScrubber* scrubber,
PG* pg,
OSDService* osds,
epoch_t epoch)
: m_scrubber{scrubber}
, m_pg{pg}
, m_osds{osds}
, m_reserved_at{epoch}
{
if (!m_osds->get_scrub_services().inc_scrubs_remote()) {
dout(10) << __func__ << ": failed to reserve at Primary request" << dendl;
@ -2413,6 +2421,11 @@ ReservedByRemotePrimary::~ReservedByRemotePrimary()
}
}
std::ostream& ReservedByRemotePrimary::gen_prefix(std::ostream& out) const
{
return m_scrubber->gen_prefix(out);
}
// ///////////////////// MapsCollectionStatus ////////////////////////////////
auto MapsCollectionStatus::mark_arriving_map(pg_shard_t from)

View File

@ -57,6 +57,8 @@ class ReplicaReservations {
void send_reject();
public:
std::string m_log_msg_prefix;
/**
* quietly discard all knowledge about existing reservations. No messages
* are sent to peers.
@ -72,18 +74,19 @@ class ReplicaReservations {
void handle_reserve_grant(OpRequestRef op, pg_shard_t from);
void handle_reserve_reject(OpRequestRef op, pg_shard_t from);
std::ostream& gen_prefix(std::ostream& out) const;
};
/**
* wraps the local OSD scrub resource reservation in an RAII wrapper
*/
class LocalReservation {
PG* m_pg;
OSDService* m_osds;
bool m_holding_local_reservation{false};
public:
LocalReservation(PG* pg, OSDService* osds);
LocalReservation(OSDService* osds);
~LocalReservation();
bool is_reserved() const { return m_holding_local_reservation; }
};
@ -92,18 +95,21 @@ class LocalReservation {
* wraps the OSD resource we are using when reserved as a replica by a scrubbing master.
*/
class ReservedByRemotePrimary {
const PgScrubber* m_scrubber; ///< we will be using its gen_prefix()
PG* m_pg;
OSDService* m_osds;
bool m_reserved_by_remote_primary{false};
const epoch_t m_reserved_at;
public:
ReservedByRemotePrimary(PG* pg, OSDService* osds, epoch_t epoch);
ReservedByRemotePrimary(const PgScrubber* scrubber, PG* pg, OSDService* osds, epoch_t epoch);
~ReservedByRemotePrimary();
[[nodiscard]] bool is_reserved() const { return m_reserved_by_remote_primary; }
/// compare the remembered reserved-at epoch to the current interval
[[nodiscard]] bool is_stale() const;
std::ostream& gen_prefix(std::ostream& out) const;
};
/**
@ -426,6 +432,7 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
void set_scrub_duration() final;
utime_t scrub_begin_stamp;
std::ostream& gen_prefix(std::ostream& out) const final;
protected:
bool state_test(uint64_t m) const { return m_pg->state_test(m); }

View File

@ -41,12 +41,15 @@ void on_event_discard(std::string_view nm)
dout(20) << " event: --^^^^---- " << nm << dendl;
}
void ScrubMachine::my_states() const
std::string ScrubMachine::current_states_desc() const
{
std::string sts{"<"};
for (auto si = state_begin(); si != state_end(); ++si) {
const auto& siw{*si}; // prevents a warning re side-effects
dout(20) << " state: " << boost::core::demangle(typeid(siw).name()) << dendl;
const auto& siw{ *si }; // prevents a warning re side-effects
// the '7' is the size of the 'scrub::'
sts += boost::core::demangle(typeid(siw).name()).substr(7, std::string::npos) + "/";
}
return sts + ">";
}
void ScrubMachine::assert_not_active() const
@ -69,10 +72,17 @@ bool ScrubMachine::is_accepting_updates() const
// for the rest of the code in this file - we know what PG we are dealing with:
#undef dout_prefix
#define dout_prefix _prefix(_dout, this->context<ScrubMachine>().m_pg)
template <class T> static ostream& _prefix(std::ostream* _dout, T* t)
#define dout_prefix _prefix(_dout, this->context<ScrubMachine>())
template <class T>
static ostream& _prefix(std::ostream* _dout, T& t)
{
return t->gen_prefix(*_dout) << " scrubberFSM pg(" << t->pg_id << ") ";
return t.gen_prefix(*_dout);
}
std::ostream& ScrubMachine::gen_prefix(std::ostream& out) const
{
return m_scrbr->gen_prefix(out) << "FSM: ";
}
// ////////////// the actual actions
@ -457,9 +467,8 @@ sc::result WaitDigestUpdate::react(const ScrubFinished&)
}
ScrubMachine::ScrubMachine(PG* pg, ScrubMachineListener* pg_scrub)
: m_pg{pg}, m_pg_id{pg->pg_id}, m_scrbr{pg_scrub}
: m_pg_id{pg->pg_id}, m_scrbr{pg_scrub}
{
dout(15) << "ScrubMachine created " << m_pg_id << dendl;
}
ScrubMachine::~ScrubMachine() = default;

View File

@ -124,11 +124,11 @@ class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> {
explicit ScrubMachine(PG* pg, ScrubMachineListener* pg_scrub);
~ScrubMachine();
PG* m_pg; // only used for dout messages
spg_t m_pg_id;
ScrubMachineListener* m_scrbr;
std::ostream& gen_prefix(std::ostream& out) const;
void my_states() const;
std::string current_states_desc() const;
void assert_not_active() const;
[[nodiscard]] bool is_reserving() const;
[[nodiscard]] bool is_accepting_updates() const;

View File

@ -174,4 +174,7 @@ struct ScrubMachineListener {
/// a log/debug interface
virtual std::string dump_awaited_maps() const = 0;
/// exposed to be used by the scrub_machine logger
virtual std::ostream& gen_prefix(std::ostream& out) const = 0;
};