osd/scrub: reformat scrub files to 80 cols

Reformatting the OSD scrub code files to match styleguide.
Specifically:
- force 80-cols lines; and
- (sadly) force 'use tabs' (replacing each 8 indentation
  blanks with a tab.

clang-format version used: 13
Configuration file used is detailed in PR comment.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
This commit is contained in:
Ronen Friedman 2022-04-30 13:43:58 +00:00
parent dbca95185e
commit 7e2ba75f07
11 changed files with 944 additions and 674 deletions

View File

@ -6,10 +6,10 @@
#include <sstream>
#include "common/scrub_types.h"
#include "osd/osd_types_fmt.h"
#include "osd/PeeringState.h"
#include "osd/PrimaryLogPG.h"
#include "osd/osd_types_fmt.h"
#include "scrub_machine.h"
#define dout_context (m_osds->cct)
@ -33,10 +33,12 @@ bool PrimaryLogScrub::get_store_errors(const scrub_ls_arg_t& arg,
}
if (arg.get_snapsets) {
res_inout.vals =
m_store->get_snap_errors(m_pg->get_pgid().pool(), arg.start_after, arg.max_return);
res_inout.vals = m_store->get_snap_errors(m_pg->get_pgid().pool(),
arg.start_after,
arg.max_return);
} else {
res_inout.vals = m_store->get_object_errors(m_pg->get_pgid().pool(), arg.start_after,
res_inout.vals = m_store->get_object_errors(m_pg->get_pgid().pool(),
arg.start_after,
arg.max_return);
}
return true;
@ -49,23 +51,23 @@ void PrimaryLogScrub::submit_digest_fixes(const digests_fixes_t& fixes)
// encounter previous-chunk digest updates after starting a new chunk
num_digest_updates_pending = fixes.size();
dout(10) << __func__
<< ": num_digest_updates_pending: " << num_digest_updates_pending
<< dendl;
<< ": num_digest_updates_pending: " << num_digest_updates_pending
<< dendl;
for (auto& [obj, dgs] : fixes) {
ObjectContextRef obc = m_pl_pg->get_object_context(obj, false);
if (!obc) {
m_osds->clog->error() << m_pg_id << " " << m_mode_desc
<< " cannot get object context for object " << obj;
<< " cannot get object context for object " << obj;
num_digest_updates_pending--;
continue;
}
if (obc->obs.oi.soid != obj) {
m_osds->clog->error()
<< m_pg_id << " " << m_mode_desc << " " << obj
<< " : object has a valid oi attr with a mismatched name, "
<< " obc->obs.oi.soid: " << obc->obs.oi.soid;
<< m_pg_id << " " << m_mode_desc << " " << obj
<< " : object has a valid oi attr with a mismatched name, "
<< " obc->obs.oi.soid: " << obc->obs.oi.soid;
num_digest_updates_pending--;
continue;
}
@ -88,9 +90,9 @@ void PrimaryLogScrub::submit_digest_fixes(const digests_fixes_t& fixes)
ctx->register_on_success([this]() {
if ((num_digest_updates_pending >= 1) &&
(--num_digest_updates_pending == 0)) {
m_osds->queue_scrub_digest_update(m_pl_pg,
m_pl_pg->is_scrub_blocking_ops());
(--num_digest_updates_pending == 0)) {
m_osds->queue_scrub_digest_update(m_pl_pg,
m_pl_pg->is_scrub_blocking_ops());
}
});
@ -110,10 +112,9 @@ void PrimaryLogScrub::_scrub_finish()
{
auto& info = m_pg->get_pg_info(ScrubberPasskey{}); ///< a temporary alias
dout(10) << __func__
<< " info stats: " << (info.stats.stats_invalid ? "invalid" : "valid")
<< " m_is_repair: " << m_is_repair
<< dendl;
dout(10) << __func__ << " info stats: "
<< (info.stats.stats_invalid ? "invalid" : "valid")
<< " m_is_repair: " << m_is_repair << dendl;
if (info.stats.stats_invalid) {
m_pl_pg->recovery_state.update_stats([=](auto& history, auto& stats) {
@ -138,21 +139,26 @@ void PrimaryLogScrub::_scrub_finish()
<< m_scrub_cstat.sum.num_objects_pinned << "/"
<< info.stats.stats.sum.num_objects_pinned << " pinned, "
<< m_scrub_cstat.sum.num_objects_hit_set_archive << "/"
<< info.stats.stats.sum.num_objects_hit_set_archive << " hit_set_archive, "
<< m_scrub_cstat.sum.num_bytes << "/" << info.stats.stats.sum.num_bytes
<< " bytes, " << m_scrub_cstat.sum.num_objects_manifest << "/"
<< info.stats.stats.sum.num_objects_hit_set_archive
<< " hit_set_archive, " << m_scrub_cstat.sum.num_bytes << "/"
<< info.stats.stats.sum.num_bytes << " bytes, "
<< m_scrub_cstat.sum.num_objects_manifest << "/"
<< info.stats.stats.sum.num_objects_manifest << " manifest objects, "
<< m_scrub_cstat.sum.num_bytes_hit_set_archive << "/"
<< info.stats.stats.sum.num_bytes_hit_set_archive << " hit_set_archive bytes."
<< dendl;
<< info.stats.stats.sum.num_bytes_hit_set_archive
<< " hit_set_archive bytes." << dendl;
if (m_scrub_cstat.sum.num_objects != info.stats.stats.sum.num_objects ||
m_scrub_cstat.sum.num_object_clones != info.stats.stats.sum.num_object_clones ||
(m_scrub_cstat.sum.num_objects_dirty != info.stats.stats.sum.num_objects_dirty &&
m_scrub_cstat.sum.num_object_clones !=
info.stats.stats.sum.num_object_clones ||
(m_scrub_cstat.sum.num_objects_dirty !=
info.stats.stats.sum.num_objects_dirty &&
!info.stats.dirty_stats_invalid) ||
(m_scrub_cstat.sum.num_objects_omap != info.stats.stats.sum.num_objects_omap &&
(m_scrub_cstat.sum.num_objects_omap !=
info.stats.stats.sum.num_objects_omap &&
!info.stats.omap_stats_invalid) ||
(m_scrub_cstat.sum.num_objects_pinned != info.stats.stats.sum.num_objects_pinned &&
(m_scrub_cstat.sum.num_objects_pinned !=
info.stats.stats.sum.num_objects_pinned &&
!info.stats.pin_stats_invalid) ||
(m_scrub_cstat.sum.num_objects_hit_set_archive !=
info.stats.stats.sum.num_objects_hit_set_archive &&
@ -166,23 +172,27 @@ void PrimaryLogScrub::_scrub_finish()
m_scrub_cstat.sum.num_whiteouts != info.stats.stats.sum.num_whiteouts ||
m_scrub_cstat.sum.num_bytes != info.stats.stats.sum.num_bytes) {
m_osds->clog->error() << info.pgid << " " << m_mode_desc << " : stat mismatch, got "
m_osds->clog->error() << info.pgid << " " << m_mode_desc
<< " : stat mismatch, got "
<< m_scrub_cstat.sum.num_objects << "/"
<< info.stats.stats.sum.num_objects << " objects, "
<< m_scrub_cstat.sum.num_object_clones << "/"
<< info.stats.stats.sum.num_object_clones << " clones, "
<< m_scrub_cstat.sum.num_objects_dirty << "/"
<< info.stats.stats.sum.num_objects_dirty << " dirty, "
<< m_scrub_cstat.sum.num_objects_omap << "/"
<< info.stats.stats.sum.num_objects_omap << " omap, "
<< m_scrub_cstat.sum.num_objects_pinned << "/"
<< info.stats.stats.sum.num_objects_pinned << " pinned, "
<< m_scrub_cstat.sum.num_objects_hit_set_archive << "/"
<< info.stats.stats.sum.num_object_clones
<< " clones, " << m_scrub_cstat.sum.num_objects_dirty
<< "/" << info.stats.stats.sum.num_objects_dirty
<< " dirty, " << m_scrub_cstat.sum.num_objects_omap
<< "/" << info.stats.stats.sum.num_objects_omap
<< " omap, " << m_scrub_cstat.sum.num_objects_pinned
<< "/" << info.stats.stats.sum.num_objects_pinned
<< " pinned, "
<< m_scrub_cstat.sum.num_objects_hit_set_archive
<< "/"
<< info.stats.stats.sum.num_objects_hit_set_archive
<< " hit_set_archive, " << m_scrub_cstat.sum.num_whiteouts
<< "/" << info.stats.stats.sum.num_whiteouts << " whiteouts, "
<< m_scrub_cstat.sum.num_bytes << "/"
<< info.stats.stats.sum.num_bytes << " bytes, "
<< " hit_set_archive, "
<< m_scrub_cstat.sum.num_whiteouts << "/"
<< info.stats.stats.sum.num_whiteouts
<< " whiteouts, " << m_scrub_cstat.sum.num_bytes
<< "/" << info.stats.stats.sum.num_bytes << " bytes, "
<< m_scrub_cstat.sum.num_objects_manifest << "/"
<< info.stats.stats.sum.num_objects_manifest
<< " manifest objects, "
@ -212,7 +222,8 @@ void PrimaryLogScrub::_scrub_finish()
m_pl_pg->object_contexts.clear();
}
PrimaryLogScrub::PrimaryLogScrub(PrimaryLogPG* pg) : PgScrubber{pg}, m_pl_pg{pg} {}
PrimaryLogScrub::PrimaryLogScrub(PrimaryLogPG* pg) : PgScrubber{pg}, m_pl_pg{pg}
{}
void PrimaryLogScrub::_scrub_clear_state()
{
@ -220,22 +231,27 @@ void PrimaryLogScrub::_scrub_clear_state()
m_scrub_cstat = object_stat_collection_t();
}
void PrimaryLogScrub::stats_of_handled_objects(const object_stat_sum_t& delta_stats,
const hobject_t& soid)
void PrimaryLogScrub::stats_of_handled_objects(
const object_stat_sum_t& delta_stats,
const hobject_t& soid)
{
// We scrub objects in hobject_t order, so objects before m_start have already been
// scrubbed and their stats have already been added to the scrubber. Objects after that
// point haven't been included in the scrubber's stats accounting yet, so they will be
// included when the scrubber gets to that object.
// We scrub objects in hobject_t order, so objects before m_start have already
// been scrubbed and their stats have already been added to the scrubber.
// Objects after that point haven't been included in the scrubber's stats
// accounting yet, so they will be included when the scrubber gets to that
// object.
if (is_primary() && is_scrub_active()) {
if (soid < m_start) {
dout(20) << fmt::format("{} {} < [{},{})", __func__, soid, m_start, m_end) << dendl;
dout(20) << fmt::format("{} {} < [{},{})", __func__, soid, m_start, m_end)
<< dendl;
m_scrub_cstat.add(delta_stats);
} else {
dout(25) << fmt::format("{} {} >= [{},{})", __func__, soid, m_start, m_end) << dendl;
dout(25)
<< fmt::format("{} {} >= [{},{})", __func__, soid, m_start, m_end)
<< dendl;
}
}
}

View File

@ -14,8 +14,8 @@
#include "messages/MOSDRepScrubMap.h"
#include "messages/MOSDScrub.h"
#include "messages/MOSDScrubReserve.h"
#include "osd/OSD.h"
#include "scrub_machine.h"
class PrimaryLogPG;
@ -42,7 +42,8 @@ class PrimaryLogScrub : public PgScrubber {
void submit_digest_fixes(const digests_fixes_t& fixes) final;
private:
// we know our PG is actually a PrimaryLogPG. Let's alias the pointer to that object:
// we know our PG is actually a PrimaryLogPG. Let's alias the pointer to that
// object:
PrimaryLogPG* const m_pl_pg;
// handle our part in stats collection

View File

@ -4,11 +4,11 @@
#ifndef CEPH_SCRUB_RESULT_H
#define CEPH_SCRUB_RESULT_H
#include "osd/SnapMapper.h" // for OSDriver
#include "common/map_cacher.hpp"
#include "osd/SnapMapper.h" // for OSDriver
namespace librados {
struct object_id_t;
struct object_id_t;
}
struct inconsistent_obj_wrapper;
@ -17,7 +17,7 @@ struct inconsistent_snapset_wrapper;
namespace Scrub {
class Store {
public:
public:
~Store();
static Store* create(ObjectStore* store,
ObjectStore::Transaction* t,
@ -31,19 +31,25 @@ public:
void add_error(int64_t pool, const inconsistent_snapset_wrapper& e);
bool empty() const;
void flush(ObjectStore::Transaction *);
void cleanup(ObjectStore::Transaction *);
std::vector<ceph::buffer::list> get_snap_errors(int64_t pool,
const librados::object_id_t& start,
uint64_t max_return) const;
std::vector<ceph::buffer::list> get_object_errors(int64_t pool,
const librados::object_id_t& start,
uint64_t max_return) const;
private:
void flush(ObjectStore::Transaction*);
void cleanup(ObjectStore::Transaction*);
std::vector<ceph::buffer::list> get_snap_errors(
int64_t pool,
const librados::object_id_t& start,
uint64_t max_return) const;
std::vector<ceph::buffer::list> get_object_errors(
int64_t pool,
const librados::object_id_t& start,
uint64_t max_return) const;
private:
Store(const coll_t& coll, const ghobject_t& oid, ObjectStore* store);
std::vector<ceph::buffer::list> get_errors(const std::string& start, const std::string& end,
uint64_t max_return) const;
private:
std::vector<ceph::buffer::list> get_errors(const std::string& start,
const std::string& end,
uint64_t max_return) const;
private:
const coll_t coll;
const ghobject_t hoid;
// a temp object holding mappings from seq-id to inconsistencies found in
@ -52,6 +58,6 @@ private:
mutable MapCacher::MapCacher<std::string, ceph::buffer::list> backend;
std::map<std::string, ceph::buffer::list> results;
};
}
} // namespace Scrub
#endif // CEPH_SCRUB_RESULT_H
#endif // CEPH_SCRUB_RESULT_H

View File

@ -19,15 +19,18 @@ using namespace ::std::literals;
#define dout_prefix *_dout << "osd." << whoami << " "
ScrubQueue::ScrubJob::ScrubJob(CephContext* cct, const spg_t& pg, int node_id)
: RefCountedObject{cct}, pgid{pg}, whoami{node_id}, cct{cct}
: RefCountedObject{cct}
, pgid{pg}
, whoami{node_id}
, cct{cct}
{}
// debug usage only
ostream& operator<<(ostream& out, const ScrubQueue::ScrubJob& sjob)
{
out << sjob.pgid << ", " << sjob.schedule.scheduled_at
<< " dead: " << sjob.schedule.deadline << " - " << sjob.registration_state()
<< " / failure: " << sjob.resources_failure
<< " dead: " << sjob.schedule.deadline << " - "
<< sjob.registration_state() << " / failure: " << sjob.resources_failure
<< " / pen. t.o.: " << sjob.penalty_timeout
<< " / queue state: " << ScrubQueue::qu_state_text(sjob.state);
@ -64,7 +67,8 @@ std::string ScrubQueue::ScrubJob::scheduling_state(utime_t now_is,
return fmt::format("queued for {}scrub", (is_deep_expected ? "deep " : ""));
}
return fmt::format("{}scrub scheduled @ {}", (is_deep_expected ? "deep " : ""),
return fmt::format("{}scrub scheduled @ {}",
(is_deep_expected ? "deep " : ""),
schedule.scheduled_at);
}
@ -80,7 +84,8 @@ std::string ScrubQueue::ScrubJob::scheduling_state(utime_t now_is,
ScrubQueue::ScrubQueue(CephContext* cct, OSDService& osds)
: cct{cct}, osd_service{osds}
: cct{cct}
, osd_service{osds}
{
// initialize the daily loadavg with current 15min loadavg
if (double loadavgs[3]; getloadavg(loadavgs, 3) == 3) {
@ -128,8 +133,9 @@ void ScrubQueue::remove_from_osd_queue(ScrubJobRef scrub_job)
<< dendl;
qu_state_t expected_state{qu_state_t::registered};
auto ret = scrub_job->state.compare_exchange_strong(expected_state,
qu_state_t::unregistering);
auto ret =
scrub_job->state.compare_exchange_strong(expected_state,
qu_state_t::unregistering);
if (ret) {
@ -141,7 +147,8 @@ void ScrubQueue::remove_from_osd_queue(ScrubJobRef scrub_job)
// job wasn't in state 'registered' coming in
dout(5) << "removing pg[" << scrub_job->pgid
<< "] failed. State was: " << qu_state_text(expected_state) << dendl;
<< "] failed. State was: " << qu_state_text(expected_state)
<< dendl;
}
}
@ -299,8 +306,8 @@ std::string_view ScrubQueue::qu_state_text(qu_state_t st)
Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub(
Scrub::ScrubPreconds& preconds)
{
dout(10) << " reg./pen. sizes: " << to_scrub.size() << " / " << penalized.size()
<< dendl;
dout(10) << " reg./pen. sizes: " << to_scrub.size() << " / "
<< penalized.size() << dendl;
utime_t now_is = ceph_clock_now();
@ -322,7 +329,8 @@ Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub(
restore_penalized = false;
// remove the 'updated' flag from all entries
std::for_each(to_scrub.begin(), to_scrub.end(),
std::for_each(to_scrub.begin(),
to_scrub.end(),
[](const auto& jobref) -> void { jobref->updated = false; });
// add failed scrub attempts to the penalized list
@ -343,8 +351,8 @@ Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub(
// - we will try the penalized
if (res == Scrub::schedule_result_t::none_ready && !penalized_copy.empty()) {
res = select_from_group(penalized_copy, preconds, now_is);
dout(10) << "tried the penalized. Res: " << ScrubQueue::attempt_res_text(res)
<< dendl;
dout(10) << "tried the penalized. Res: "
<< ScrubQueue::attempt_res_text(res) << dendl;
restore_penalized = true;
}
@ -379,8 +387,9 @@ struct cmp_sched_time_t {
} // namespace
// called under lock
ScrubQueue::ScrubQContainer ScrubQueue::collect_ripe_jobs(ScrubQContainer& group,
utime_t time_now)
ScrubQueue::ScrubQContainer ScrubQueue::collect_ripe_jobs(
ScrubQContainer& group,
utime_t time_now)
{
rm_unregistered_jobs(group);
@ -388,7 +397,9 @@ ScrubQueue::ScrubQContainer ScrubQueue::collect_ripe_jobs(ScrubQContainer& group
ScrubQueue::ScrubQContainer ripes;
ripes.reserve(group.size());
std::copy_if(group.begin(), group.end(), std::back_inserter(ripes),
std::copy_if(group.begin(),
group.end(),
std::back_inserter(ripes),
[time_now](const auto& jobref) -> bool {
return jobref->schedule.scheduled_at <= time_now;
});
@ -408,7 +419,9 @@ ScrubQueue::ScrubQContainer ScrubQueue::collect_ripe_jobs(ScrubQContainer& group
// not holding jobs_lock. 'group' is a copy of the actual list.
Scrub::schedule_result_t ScrubQueue::select_from_group(
ScrubQContainer& group, const Scrub::ScrubPreconds& preconds, utime_t now_is)
ScrubQContainer& group,
const Scrub::ScrubPreconds& preconds,
utime_t now_is)
{
dout(15) << "jobs #: " << group.size() << dendl;
@ -429,8 +442,9 @@ Scrub::schedule_result_t ScrubQueue::select_from_group(
// we have a candidate to scrub. We turn to the OSD to verify that the PG
// configuration allows the specified type of scrub, and to initiate the
// scrub.
switch (osd_service.initiate_a_scrub(candidate->pgid,
preconds.allow_requested_repair_only)) {
switch (
osd_service.initiate_a_scrub(candidate->pgid,
preconds.allow_requested_repair_only)) {
case Scrub::schedule_result_t::scrub_initiated:
// the happy path. We are done
@ -544,8 +558,9 @@ bool ScrubQueue::scrub_load_below_threshold() const
// allow scrub if below daily avg and currently decreasing
if (loadavgs[0] < daily_loadavg && loadavgs[0] < loadavgs[2]) {
dout(20) << "loadavg " << loadavgs[0] << " < daily_loadavg " << daily_loadavg
<< " and < 15m avg " << loadavgs[2] << " = yes" << dendl;
dout(20) << "loadavg " << loadavgs[0] << " < daily_loadavg "
<< daily_loadavg << " and < 15m avg " << loadavgs[2] << " = yes"
<< dendl;
return true;
}
@ -575,7 +590,9 @@ void ScrubQueue::scan_penalized(bool forgive_all, utime_t time_now)
} else {
auto forgiven_last = std::partition(
penalized.begin(), penalized.end(), [time_now](const auto& e) {
penalized.begin(),
penalized.end(),
[time_now](const auto& e) {
return (*e).updated || ((*e).penalty_timeout <= time_now);
});
@ -599,9 +616,9 @@ bool ScrubQueue::scrub_time_permit(utime_t now) const
time_t tt = now.sec();
localtime_r(&tt, &bdt);
bool day_permit =
isbetween_modulo(cct->_conf->osd_scrub_begin_week_day,
cct->_conf->osd_scrub_end_week_day, bdt.tm_wday);
bool day_permit = isbetween_modulo(cct->_conf->osd_scrub_begin_week_day,
cct->_conf->osd_scrub_end_week_day,
bdt.tm_wday);
if (!day_permit) {
dout(20) << "should run between week day "
<< cct->_conf->osd_scrub_begin_week_day << " - "
@ -610,9 +627,9 @@ bool ScrubQueue::scrub_time_permit(utime_t now) const
return false;
}
bool time_permit =
isbetween_modulo(cct->_conf->osd_scrub_begin_hour,
cct->_conf->osd_scrub_end_hour, bdt.tm_hour);
bool time_permit = isbetween_modulo(cct->_conf->osd_scrub_begin_hour,
cct->_conf->osd_scrub_end_hour,
bdt.tm_hour);
dout(20) << "should run between " << cct->_conf->osd_scrub_begin_hour << " - "
<< cct->_conf->osd_scrub_end_hour << " now (" << bdt.tm_hour
<< ") = " << (time_permit ? "yes" : "no") << dendl;
@ -625,7 +642,8 @@ void ScrubQueue::ScrubJob::dump(ceph::Formatter* f) const
f->dump_stream("pgid") << pgid;
f->dump_stream("sched_time") << schedule.scheduled_at;
f->dump_stream("deadline") << schedule.deadline;
f->dump_bool("forced", schedule.scheduled_at == PgScrubber::scrub_must_stamp());
f->dump_bool("forced",
schedule.scheduled_at == PgScrubber::scrub_must_stamp());
f->close_section();
}
@ -636,10 +654,12 @@ void ScrubQueue::dump_scrubs(ceph::Formatter* f) const
f->open_array_section("scrubs");
std::for_each(to_scrub.cbegin(), to_scrub.cend(),
[&f](const ScrubJobRef& j) { j->dump(f); });
std::for_each(to_scrub.cbegin(), to_scrub.cend(), [&f](const ScrubJobRef& j) {
j->dump(f);
});
std::for_each(penalized.cbegin(), penalized.cend(),
std::for_each(penalized.cbegin(),
penalized.cend(),
[&f](const ScrubJobRef& j) { j->dump(f); });
f->close_section();
@ -653,9 +673,13 @@ ScrubQueue::ScrubQContainer ScrubQueue::list_registered_jobs() const
std::lock_guard lck{jobs_lock};
std::copy_if(to_scrub.begin(), to_scrub.end(), std::back_inserter(all_jobs),
std::copy_if(to_scrub.begin(),
to_scrub.end(),
std::back_inserter(all_jobs),
registered_job);
std::copy_if(penalized.begin(), penalized.end(), std::back_inserter(all_jobs),
std::copy_if(penalized.begin(),
penalized.end(),
std::back_inserter(all_jobs),
registered_job);
return all_jobs;
@ -709,9 +733,9 @@ bool ScrubQueue::inc_scrubs_remote()
std::lock_guard lck{resource_lock};
if (scrubs_local + scrubs_remote < cct->_conf->osd_max_scrubs) {
dout(20) << ": " << scrubs_remote << " -> " << (scrubs_remote + 1) << " (max "
<< cct->_conf->osd_max_scrubs << ", local " << scrubs_local << ")"
<< dendl;
dout(20) << ": " << scrubs_remote << " -> " << (scrubs_remote + 1)
<< " (max " << cct->_conf->osd_max_scrubs << ", local "
<< scrubs_local << ")" << dendl;
++scrubs_remote;
return true;
}

View File

@ -178,9 +178,9 @@ class ScrubQueue {
struct ScrubJob final : public RefCountedObject {
/**
* a time scheduled for scrub, and a deadline: The scrub could be delayed if
* system load is too high (but not if after the deadline),or if trying to
* scrub out of scrub hours.
* a time scheduled for scrub, and a deadline: The scrub could be delayed
* if system load is too high (but not if after the deadline),or if trying
* to scrub out of scrub hours.
*/
scrub_schedule_t schedule;
@ -354,8 +354,8 @@ class ScrubQueue {
* (read - with higher value) configuration element
* (osd_scrub_extended_sleep).
*/
double scrub_sleep_time(
bool must_scrub) const; /// \todo (future) return milliseconds
double scrub_sleep_time(bool must_scrub) const; /// \todo (future) return
/// milliseconds
/**
* called every heartbeat to update the "daily" load average
@ -450,7 +450,8 @@ class ScrubQueue {
*/
void move_failed_pgs(utime_t now_is);
Scrub::schedule_result_t select_from_group(ScrubQContainer& group,
const Scrub::ScrubPreconds& preconds,
utime_t now_is);
Scrub::schedule_result_t select_from_group(
ScrubQContainer& group,
const Scrub::ScrubPreconds& preconds,
utime_t now_is);
};

File diff suppressed because it is too large Load Diff

View File

@ -90,14 +90,16 @@ struct BuildMap;
/**
* Reserving/freeing scrub resources at the replicas.
*
* When constructed - sends reservation requests to the acting_set.
* A rejection triggers a "couldn't acquire the replicas' scrub resources" event.
* All previous requests, whether already granted or not, are explicitly released.
* When constructed - sends reservation requests to the acting_set.
* A rejection triggers a "couldn't acquire the replicas' scrub resources"
* event. All previous requests, whether already granted or not, are explicitly
* released.
*
* A note re performance: I've measured a few container alternatives for
* m_reserved_peers, with its specific usage pattern. Std::set is extremely slow, as
* expected. flat_set is only slightly better. Surprisingly - std::vector (with no
* sorting) is better than boost::small_vec. And for std::vector: no need to pre-reserve.
* A note re performance: I've measured a few container alternatives for
* m_reserved_peers, with its specific usage pattern. Std::set is extremely
* slow, as expected. flat_set is only slightly better. Surprisingly -
* std::vector (with no sorting) is better than boost::small_vec. And for
* std::vector: no need to pre-reserve.
*/
class ReplicaReservations {
using OrigSet = decltype(std::declval<PG>().get_actingset());
@ -110,7 +112,7 @@ class ReplicaReservations {
bool m_had_rejections{false};
int m_pending{-1};
const pg_info_t& m_pg_info;
ScrubQueue::ScrubJobRef m_scrub_job; ///< a ref to this PG's scrub job
ScrubQueue::ScrubJobRef m_scrub_job; ///< a ref to this PG's scrub job
void release_replica(pg_shard_t peer, epoch_t epoch);
@ -125,12 +127,15 @@ class ReplicaReservations {
/**
* quietly discard all knowledge about existing reservations. No messages
* are sent to peers.
* To be used upon interval change, as we know the the running scrub is no longer
* relevant, and that the replicas had reset the reservations on their side.
* To be used upon interval change, as we know the the running scrub is no
* longer relevant, and that the replicas had reset the reservations on
* their side.
*/
void discard_all();
ReplicaReservations(PG* pg, pg_shard_t whoami, ScrubQueue::ScrubJobRef scrubjob);
ReplicaReservations(PG* pg,
pg_shard_t whoami,
ScrubQueue::ScrubJobRef scrubjob);
~ReplicaReservations();
@ -155,19 +160,26 @@ class LocalReservation {
};
/**
* wraps the OSD resource we are using when reserved as a replica by a scrubbing primary.
* wraps the OSD resource we are using when reserved as a replica by a
* scrubbing primary.
*/
class ReservedByRemotePrimary {
const PgScrubber* m_scrubber; ///< we will be using its gen_prefix()
const PgScrubber* m_scrubber; ///< we will be using its gen_prefix()
PG* m_pg;
OSDService* m_osds;
bool m_reserved_by_remote_primary{false};
const epoch_t m_reserved_at;
public:
ReservedByRemotePrimary(const PgScrubber* scrubber, PG* pg, OSDService* osds, epoch_t epoch);
ReservedByRemotePrimary(const PgScrubber* scrubber,
PG* pg,
OSDService* osds,
epoch_t epoch);
~ReservedByRemotePrimary();
[[nodiscard]] bool is_reserved() const { return m_reserved_by_remote_primary; }
[[nodiscard]] bool is_reserved() const
{
return m_reserved_by_remote_primary;
}
/// compare the remembered reserved-at epoch to the current interval
[[nodiscard]] bool is_stale() const;
@ -176,10 +188,10 @@ class ReservedByRemotePrimary {
};
/**
* Once all replicas' scrub maps are received, we go on to compare the maps. That is -
* unless we we have not yet completed building our own scrub map. MapsCollectionStatus
* combines the status of waiting for both the local map and the replicas, without
* resorting to adding dummy entries into a list.
* Once all replicas' scrub maps are received, we go on to compare the maps.
* That is - unless we we have not yet completed building our own scrub map.
* MapsCollectionStatus combines the status of waiting for both the local map
* and the replicas, without resorting to adding dummy entries into a list.
*/
class MapsCollectionStatus {
@ -202,7 +214,10 @@ class MapsCollectionStatus {
/// @returns true if indeed waiting for this one. Otherwise: an error string
auto mark_arriving_map(pg_shard_t from) -> std::tuple<bool, std::string_view>;
[[nodiscard]] std::vector<pg_shard_t> get_awaited() const { return m_maps_awaited_for; }
[[nodiscard]] std::vector<pg_shard_t> get_awaited() const
{
return m_maps_awaited_for;
}
void reset();
@ -231,7 +246,8 @@ struct scrub_flags_t {
*/
bool auto_repair{false};
/// this flag indicates that we are scrubbing post repair to verify everything is fixed
/// this flag indicates that we are scrubbing post repair to verify everything
/// is fixed
bool check_repair{false};
/// checked at the end of the scrub, to possibly initiate a deep-scrub
@ -239,8 +255,8 @@ struct scrub_flags_t {
/**
* scrub must not be aborted.
* Set for explicitly requested scrubs, and for scrubs originated by the pairing
* process with the 'repair' flag set (in the RequestScrub event).
* Set for explicitly requested scrubs, and for scrubs originated by the
* pairing process with the 'repair' flag set (in the RequestScrub event).
*/
bool required{false};
};
@ -256,12 +272,12 @@ ostream& operator<<(ostream& out, const scrub_flags_t& sf);
* the actual scrubbing code.
*/
class PgScrubber : public ScrubPgIF,
public ScrubMachineListener,
public SnapMapperAccessor {
public ScrubMachineListener,
public SnapMapperAccessor {
public:
explicit PgScrubber(PG* pg);
friend class ScrubBackend; // will be replaced by a limited interface
friend class ScrubBackend; // will be replaced by a limited interface
// ------------------ the I/F exposed to the PG (ScrubPgIF) -------------
@ -290,10 +306,11 @@ class PgScrubber : public ScrubPgIF,
void send_replica_pushes_upd(epoch_t epoch_queued) final;
/**
* The PG has updated its 'applied version'. It might be that we are waiting for this
* information: after selecting a range of objects to scrub, we've marked the latest
* version of these objects in m_subset_last_update. We will not start the map building
* before we know that the PG has reached this version.
* The PG has updated its 'applied version'. It might be that we are waiting
* for this information: after selecting a range of objects to scrub, we've
* marked the latest version of these objects in m_subset_last_update. We will
* not start the map building before we know that the PG has reached this
* version.
*/
void on_applied_when_primary(const eversion_t& applied_version) final;
@ -319,7 +336,8 @@ class PgScrubber : public ScrubPgIF,
bool write_blocked_by_scrub(const hobject_t& soid) final;
/// true if the given range intersects the scrub interval in any way
bool range_intersects_scrub(const hobject_t& start, const hobject_t& end) final;
bool range_intersects_scrub(const hobject_t& start,
const hobject_t& end) final;
/**
* we are a replica being asked by the Primary to reserve OSD resources for
@ -342,7 +360,8 @@ class PgScrubber : public ScrubPgIF,
void on_primary_change(const requested_scrub_t& request_flags) final;
void on_maybe_registration_change(const requested_scrub_t& request_flags) final;
void on_maybe_registration_change(
const requested_scrub_t& request_flags) final;
void scrub_requested(scrub_level_t scrub_level,
scrub_type_t scrub_type,
@ -373,14 +392,17 @@ class PgScrubber : public ScrubPgIF,
return m_replica_request_priority;
};
unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority,
unsigned int suggested_priority) const final;
unsigned int scrub_requeue_priority(
Scrub::scrub_prio_t with_priority,
unsigned int suggested_priority) const final;
/// the version that refers to m_flags.priority
unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority) const final;
unsigned int scrub_requeue_priority(
Scrub::scrub_prio_t with_priority) const final;
void add_callback(Context* context) final { m_callbacks.push_back(context); }
[[nodiscard]] bool are_callbacks_pending() const final // used for an assert in PG.cc
[[nodiscard]] bool are_callbacks_pending() const final // used for an assert
// in PG.cc
{
return !m_callbacks.empty();
}
@ -396,7 +418,7 @@ class PgScrubber : public ScrubPgIF,
* add to scrub statistics, but only if the soid is below the scrub start
*/
void stats_of_handled_objects(const object_stat_sum_t& delta_stats,
const hobject_t& soid) override
const hobject_t& soid) override
{
ceph_assert(false);
}
@ -404,8 +426,9 @@ class PgScrubber : public ScrubPgIF,
/**
* finalize the parameters of the initiated scrubbing session:
*
* The "current scrub" flags (m_flags) are set from the 'planned_scrub' flag-set;
* PG_STATE_SCRUBBING, and possibly PG_STATE_DEEP_SCRUB & PG_STATE_REPAIR are set.
* The "current scrub" flags (m_flags) are set from the 'planned_scrub'
* flag-set; PG_STATE_SCRUBBING, and possibly PG_STATE_DEEP_SCRUB &
* PG_STATE_REPAIR are set.
*/
void set_op_parameters(requested_scrub_t& request) final;
@ -423,10 +446,14 @@ class PgScrubber : public ScrubPgIF,
std::stringstream& ss) override;
int m_debug_blockrange{0};
// -------------------------------------------------------------------------------------------
// the I/F used by the state-machine (i.e. the implementation of ScrubMachineListener)
// --------------------------------------------------------------------------
// the I/F used by the state-machine (i.e. the implementation of
// ScrubMachineListener)
[[nodiscard]] bool is_primary() const final { return m_pg->recovery_state.is_primary(); }
[[nodiscard]] bool is_primary() const final
{
return m_pg->recovery_state.is_primary();
}
void select_range_n_notify() final;
@ -446,13 +473,13 @@ class PgScrubber : public ScrubPgIF,
void on_replica_init() final;
void replica_handling_done() final;
/// the version of 'scrub_clear_state()' that does not try to invoke FSM services
/// (thus can be called from FSM reactions)
/// the version of 'scrub_clear_state()' that does not try to invoke FSM
/// services (thus can be called from FSM reactions)
void clear_pgscrub_state() final;
/*
* Send an 'InternalSchedScrub' FSM event either immediately, or - if 'm_need_sleep'
* is asserted - after a configuration-dependent timeout.
* Send an 'InternalSchedScrub' FSM event either immediately, or - if
* 'm_need_sleep' is asserted - after a configuration-dependent timeout.
*/
void add_delayed_scheduling() final;
@ -462,10 +489,11 @@ class PgScrubber : public ScrubPgIF,
void scrub_finish() final;
ScrubMachineListener::MsgAndEpoch
prep_replica_map_msg(Scrub::PreemptionNoted was_preempted) final;
ScrubMachineListener::MsgAndEpoch prep_replica_map_msg(
Scrub::PreemptionNoted was_preempted) final;
void send_replica_map(const ScrubMachineListener::MsgAndEpoch& preprepared) final;
void send_replica_map(
const ScrubMachineListener::MsgAndEpoch& preprepared) final;
void send_preempted_replica() final;
@ -511,7 +539,8 @@ class PgScrubber : public ScrubPgIF,
std::ostream& gen_prefix(std::ostream& out) const final;
// fetching the snap-set for a given object (used by the scrub-backend)
int get_snaps(const hobject_t& hoid, std::set<snapid_t>* snaps_set) const final
int get_snaps(const hobject_t& hoid,
std::set<snapid_t>* snaps_set) const final
{
return m_pg->snap_mapper.get_snaps(hoid, snaps_set);
}
@ -525,18 +554,20 @@ class PgScrubber : public ScrubPgIF,
[[nodiscard]] bool is_scrub_registered() const;
/// the 'is-in-scheduling-queue' status, using relaxed-semantics access to the status
/// the 'is-in-scheduling-queue' status, using relaxed-semantics access to the
/// status
std::string_view registration_state() const;
virtual void _scrub_clear_state() {}
utime_t m_scrub_reg_stamp; ///< stamp we registered for
ScrubQueue::ScrubJobRef m_scrub_job; ///< the scrub-job used by the OSD to schedule us
utime_t m_scrub_reg_stamp; ///< stamp we registered for
ScrubQueue::ScrubJobRef m_scrub_job; ///< the scrub-job used by the OSD to
///< schedule us
ostream& show(ostream& out) const override;
public:
// ------------------ the I/F used by the ScrubBackend (not named yet) -------------
// ------------------ the I/F used by the ScrubBackend (not named yet)
// note: the reason we must have these forwarders, is because of the
// artificial PG vs. PrimaryLogPG distinction. Some of the services used
@ -594,21 +625,22 @@ class PgScrubber : public ScrubPgIF,
*
* It isn't if:
* - (1) we are no longer 'actively scrubbing'; or
* - (2) the message is from an epoch prior to when we started the current scrub
* session; or
* - (2) the message is from an epoch prior to when we started the current
* scrub session; or
* - (3) the message epoch is from a previous interval; or
* - (4) the 'abort' configuration flags were set.
*
* For (1) & (2) - the incoming message is discarded, w/o further action.
*
* For (3): (see check_interval() for a full description) if we have not reacted yet
* to this specific new interval, we do now:
* - replica reservations are silently discarded (we count on the replicas to notice
* the interval change and un-reserve themselves);
* For (3): (see check_interval() for a full description) if we have not
* reacted yet to this specific new interval, we do now:
* - replica reservations are silently discarded (we count on the replicas to
* notice the interval change and un-reserve themselves);
* - the scrubbing is halted.
*
* For (4): the message will be discarded, but also:
* if this is the first time we've noticed the 'abort' request, we perform the abort.
* if this is the first time we've noticed the 'abort' request, we perform
* the abort.
*
* \returns should the incoming event be processed?
*/
@ -631,18 +663,20 @@ class PgScrubber : public ScrubPgIF,
epoch_t m_last_aborted{}; // last time we've noticed a request to abort
bool m_needs_sleep{true}; ///< should we sleep before being rescheduled? always
///< 'true', unless we just got out of a sleep period
bool m_needs_sleep{true}; ///< should we sleep before being rescheduled?
///< always 'true', unless we just got out of a
///< sleep period
utime_t m_sleep_started_at;
// 'optional', as 'ReplicaReservations' & 'LocalReservation' are 'RAII-designed'
// to guarantee un-reserving when deleted.
// 'optional', as 'ReplicaReservations' & 'LocalReservation' are
// 'RAII-designed' to guarantee un-reserving when deleted.
std::optional<Scrub::ReplicaReservations> m_reservations;
std::optional<Scrub::LocalReservation> m_local_osd_resource;
/// the 'remote' resource we, as a replica, grant our Primary when it is scrubbing
/// the 'remote' resource we, as a replica, grant our Primary when it is
/// scrubbing
std::optional<Scrub::ReservedByRemotePrimary> m_remote_osd_resource;
void cleanup_on_finish(); // scrub_clear_state() as called for a Primary when
@ -656,7 +690,8 @@ class PgScrubber : public ScrubPgIF,
*/
virtual void _scrub_finish() {}
// common code used by build_primary_map_chunk() and build_replica_map_chunk():
// common code used by build_primary_map_chunk() and
// build_replica_map_chunk():
int build_scrub_map_chunk(ScrubMap& map, // primary or replica?
ScrubMapBuilder& pos,
hobject_t start,
@ -668,45 +703,49 @@ class PgScrubber : public ScrubPgIF,
OSDService* const m_osds;
const pg_shard_t m_pg_whoami; ///< a local copy of m_pg->pg_whoami;
epoch_t m_interval_start{0}; ///< interval's 'from' of when scrubbing was first scheduled
epoch_t m_interval_start{0}; ///< interval's 'from' of when scrubbing was
///< first scheduled
/*
* the exact epoch when the scrubbing actually started (started here - cleared checks
* for no-scrub conf). Incoming events are verified against this, with stale events
* discarded.
* the exact epoch when the scrubbing actually started (started here - cleared
* checks for no-scrub conf). Incoming events are verified against this, with
* stale events discarded.
*/
epoch_t m_epoch_start{0}; ///< the actual epoch when scrubbing started
/**
* (replica) a tag identifying a specific scrub "session". Incremented whenever the
* Primary releases the replica scrub resources.
* When the scrub session is terminated (even if the interval remains unchanged, as
* might happen following an asok no-scrub command), stale scrub-resched messages
* (replica) a tag identifying a specific scrub "session". Incremented
* whenever the Primary releases the replica scrub resources. When the scrub
* session is terminated (even if the interval remains unchanged, as might
* happen following an asok no-scrub command), stale scrub-resched messages
* triggered by the backend will be discarded.
*/
Scrub::act_token_t m_current_token{1};
/**
* (primary/replica) a test aid. A counter that is incremented whenever a scrub starts,
* and again when it terminates. Exposed as part of the 'pg query' command, to be used
* by test scripts.
* (primary/replica) a test aid. A counter that is incremented whenever a
* scrub starts, and again when it terminates. Exposed as part of the 'pg
* query' command, to be used by test scripts.
*
* @ATTN: not guaranteed to be accurate. To be only used for tests. This is why it
* is initialized to a meaningless number;
* @ATTN: not guaranteed to be accurate. To be only used for tests. This is
* why it is initialized to a meaningless number;
*/
int32_t m_sessions_counter{(int32_t)((int64_t)(this) & 0x0000'0000'00ff'fff0)};
bool m_publish_sessions{false}; //< will the counter be part of 'query' output?
int32_t m_sessions_counter{
(int32_t)((int64_t)(this) & 0x0000'0000'00ff'fff0)};
bool m_publish_sessions{false}; //< will the counter be part of 'query'
//output?
scrub_flags_t m_flags;
/// a reference to the details of the next scrub (as requested and managed by the PG)
/// a reference to the details of the next scrub (as requested and managed by
/// the PG)
requested_scrub_t& m_planned_scrub;
bool m_active{false};
/**
* a flag designed to prevent the initiation of a second scrub on a PG for which scrubbing
* has been initiated.
* a flag designed to prevent the initiation of a second scrub on a PG for
* which scrubbing has been initiated.
*
* set once scrubbing was initiated (i.e. - even before the FSM event that
* will trigger a state-change out of Inactive was handled), and only reset
@ -717,7 +756,8 @@ class PgScrubber : public ScrubPgIF,
* - all the time from scrub_finish() calling update_stats() till the
* FSM handles the 'finished' event
*
* Compared with 'm_active', this flag is asserted earlier and remains ON for longer.
* Compared with 'm_active', this flag is asserted earlier and remains ON for
* longer.
*/
bool m_queued_or_active{false};
@ -746,9 +786,9 @@ class PgScrubber : public ScrubPgIF,
* 'm_is_deep' - is the running scrub a deep one?
*
* Note that most of the code directly checks PG_STATE_DEEP_SCRUB, which is
* primary-only (and is set earlier - when scheduling the scrub). 'm_is_deep' is
* meaningful both for the primary and the replicas, and is used as a parameter when
* building the scrub maps.
* primary-only (and is set earlier - when scheduling the scrub). 'm_is_deep'
* is meaningful both for the primary and the replicas, and is used as a
* parameter when building the scrub maps.
*/
bool m_is_deep{false};
@ -770,15 +810,14 @@ class PgScrubber : public ScrubPgIF,
* "scrub
*
* Note: based on PG_STATE_REPAIR, and not on m_is_repair. I.e. for
* auto_repair will show as "deep-scrub" and not as "repair" (until the first error
* is detected).
* auto_repair will show as "deep-scrub" and not as "repair" (until the first
* error is detected).
*/
std::string_view m_mode_desc;
void update_op_mode_text();
private:
private:
/**
* initiate a deep-scrub after the current scrub ended with errors.
*/
@ -838,19 +877,21 @@ private:
std::unique_ptr<ScrubBackend> m_be;
/**
* we mark the request priority as it arrived. It influences the queuing priority
* when we wait for local updates
* we mark the request priority as it arrived. It influences the queuing
* priority when we wait for local updates
*/
Scrub::scrub_prio_t m_replica_request_priority;
/**
* the 'preemption' "state-machine".
* Note: I was considering an orthogonal sub-machine implementation, but as
* the state diagram is extremely simple, the added complexity wasn't justified.
* the state diagram is extremely simple, the added complexity wasn't
* justified.
*/
class preemption_data_t : public Scrub::preemption_t {
public:
explicit preemption_data_t(PG* pg); // the PG access is used for conf access (and logs)
explicit preemption_data_t(PG* pg); // the PG access is used for conf
// access (and logs)
[[nodiscard]] bool is_preemptable() const final { return m_preemptable; }
@ -882,7 +923,8 @@ private:
}
}
/// used by a replica to set preemptability state according to the Primary's request
/// used by a replica to set preemptability state according to the Primary's
/// request
void force_preemptability(bool is_allowed)
{
// note: no need to lock for a replica

View File

@ -1,8 +1,6 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#include "scrub_machine.h"
#include <chrono>
#include <typeinfo>
@ -10,7 +8,9 @@
#include "osd/OSD.h"
#include "osd/OpRequest.h"
#include "ScrubStore.h"
#include "scrub_machine.h"
#define dout_context g_ceph_context
#define dout_subsys ceph_subsys_osd
@ -44,9 +44,11 @@ std::string ScrubMachine::current_states_desc() const
{
std::string sts{"<"};
for (auto si = state_begin(); si != state_end(); ++si) {
const auto& siw{ *si }; // prevents a warning re side-effects
const auto& siw{*si}; // prevents a warning re side-effects
// the '7' is the size of the 'scrub::'
sts += boost::core::demangle(typeid(siw).name()).substr(7, std::string::npos) + "/";
sts +=
boost::core::demangle(typeid(siw).name()).substr(7, std::string::npos) +
"/";
}
return sts + ">";
}
@ -268,8 +270,9 @@ WaitPushes::WaitPushes(my_context ctx) : my_base(ctx)
sc::result WaitPushes::react(const ActivePushesUpd&)
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
dout(10) << "WaitPushes::react(const ActivePushesUpd&) pending_active_pushes: "
<< scrbr->pending_active_pushes() << dendl;
dout(10)
<< "WaitPushes::react(const ActivePushesUpd&) pending_active_pushes: "
<< scrbr->pending_active_pushes() << dendl;
if (!scrbr->pending_active_pushes()) {
// done waiting
@ -328,8 +331,8 @@ BuildMap::BuildMap(my_context ctx) : my_base(ctx)
dout(10) << " -- state -->> Act/BuildMap" << dendl;
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
// no need to check for an epoch change, as all possible flows that brought us here have
// a check_interval() verification of their final event.
// no need to check for an epoch change, as all possible flows that brought
// us here have a check_interval() verification of their final event.
if (scrbr->get_preemptor().was_preempted()) {
@ -374,7 +377,7 @@ sc::result BuildMap::react(const IntLocalMapDone&)
DrainReplMaps::DrainReplMaps(my_context ctx) : my_base(ctx)
{
dout(10) << "-- state -->> Act/DrainReplMaps" << dendl;
// we may have received all maps already. Send the event that will make us check.
// we may have got all maps already. Send the event that will make us check.
post_event(GotReplicas{});
}
@ -388,7 +391,8 @@ sc::result DrainReplMaps::react(const GotReplicas&)
return transit<PendingTimer>();
}
dout(15) << "DrainReplMaps::react(const GotReplicas&): still draining incoming maps: "
dout(15) << "DrainReplMaps::react(const GotReplicas&): still draining "
"incoming maps: "
<< scrbr->dump_awaited_maps() << dendl;
return discard_event();
}
@ -402,17 +406,18 @@ WaitReplicas::WaitReplicas(my_context ctx) : my_base(ctx)
}
/**
* note: now that maps_compare_n_cleanup() is "futurized"(*), and we remain in this state
* for a while even after we got all our maps, we must prevent are_all_maps_available()
* (actually - the code after the if()) from being called more than once.
* This is basically a separate state, but it's too transitory and artificial to justify
* the cost of a separate state.
* note: now that maps_compare_n_cleanup() is "futurized"(*), and we remain in
* this state for a while even after we got all our maps, we must prevent
* are_all_maps_available() (actually - the code after the if()) from being
* called more than once.
* This is basically a separate state, but it's too transitory and artificial
* to justify the cost of a separate state.
* (*) "futurized" - in Crimson, the call to maps_compare_n_cleanup() returns immediately
* after initiating the process. The actual termination of the maps comparing etc' is
* signalled via an event. As we share the code with "classic" OSD, here too
* maps_compare_n_cleanup() is responsible for signalling the completion of the
* processing.
* (*) "futurized" - in Crimson, the call to maps_compare_n_cleanup() returns
* immediately after initiating the process. The actual termination of the
* maps comparing etc' is signalled via an event. As we share the code with
* "classic" OSD, here too maps_compare_n_cleanup() is responsible for
* signalling the completion of the processing.
*/
sc::result WaitReplicas::react(const GotReplicas&)
{
@ -433,7 +438,8 @@ sc::result WaitReplicas::react(const GotReplicas&)
} else {
// maps_compare_n_cleanup() will arrange for MapsCompared event to be sent:
// maps_compare_n_cleanup() will arrange for MapsCompared event to be
// sent:
scrbr->maps_compare_n_cleanup();
return discard_event();
}
@ -445,7 +451,8 @@ sc::result WaitReplicas::react(const GotReplicas&)
sc::result WaitReplicas::react(const DigestUpdate&)
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
auto warn_msg = "WaitReplicas::react(const DigestUpdate&): Unexpected DigestUpdate event"s;
auto warn_msg =
"WaitReplicas::react(const DigestUpdate&): Unexpected DigestUpdate event"s;
dout(10) << warn_msg << dendl;
scrbr->log_cluster_warning(warn_msg);
return discard_event();
@ -488,9 +495,9 @@ sc::result WaitDigestUpdate::react(const ScrubFinished&)
}
ScrubMachine::ScrubMachine(PG* pg, ScrubMachineListener* pg_scrub)
: m_pg_id{pg->pg_id}, m_scrbr{pg_scrub}
{
}
: m_pg_id{pg->pg_id}
, m_scrbr{pg_scrub}
{}
ScrubMachine::~ScrubMachine() = default;
@ -538,7 +545,8 @@ ActiveReplica::ActiveReplica(my_context ctx) : my_base(ctx)
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
dout(10) << "-- state -->> ActiveReplica" << dendl;
scrbr->on_replica_init(); // as we might have skipped ReplicaWaitUpdates
// and as we might have skipped ReplicaWaitUpdates:
scrbr->on_replica_init();
post_event(SchedReplica{});
}

View File

@ -16,9 +16,9 @@
#include "common/version.h"
#include "include/Context.h"
#include "osd/scrubber_common.h"
#include "scrub_machine_lstnr.h"
#include "osd/scrubber_common.h"
class PG; // holding a pointer to that one - just for testing
class PgScrubber;
@ -51,17 +51,21 @@ void on_event_discard(std::string_view nm);
std::string_view print() const { return #E; } \
};
MEV(RemotesReserved) ///< all replicas have granted our reserve request
/// all replicas have granted our reserve request
MEV(RemotesReserved)
MEV(ReservationFailure) ///< a reservation request has failed
/// a reservation request has failed
MEV(ReservationFailure)
MEV(StartScrub) ///< initiate a new scrubbing session (relevant if we are a Primary)
/// initiate a new scrubbing session (relevant if we are a Primary)
MEV(StartScrub)
MEV(AfterRepairScrub) ///< initiate a new scrubbing session. Only triggered at Recovery
///< completion.
/// initiate a new scrubbing session. Only triggered at Recovery completion
MEV(AfterRepairScrub)
MEV(Unblocked) ///< triggered when the PG unblocked an object that was marked for
///< scrubbing. Via the PGScrubUnblocked op
/// triggered when the PG unblocked an object that was marked for scrubbing.
/// Via the PGScrubUnblocked op
MEV(Unblocked)
MEV(InternalSchedScrub)
@ -69,48 +73,63 @@ MEV(SelectedChunkFree)
MEV(ChunkIsBusy)
MEV(ActivePushesUpd) ///< Update to active_pushes. 'active_pushes' represents recovery
///< that is in-flight to the local ObjectStore
/// Update to active_pushes. 'active_pushes' represents recovery that
/// is in-flight to the local ObjectStore
MEV(ActivePushesUpd)
MEV(UpdatesApplied) ///< (Primary only) all updates are committed
/// (Primary only) all updates are committed
MEV(UpdatesApplied)
MEV(InternalAllUpdates) ///< the internal counterpart of UpdatesApplied
/// the internal counterpart of UpdatesApplied
MEV(InternalAllUpdates)
MEV(GotReplicas) ///< got a map from a replica
/// got a map from a replica
MEV(GotReplicas)
MEV(IntBmPreempted) ///< internal - BuildMap preempted. Required, as detected within the
///< ctor
/// internal - BuildMap preempted. Required, as detected within the ctor
MEV(IntBmPreempted)
MEV(InternalError)
MEV(IntLocalMapDone)
MEV(DigestUpdate) ///< external. called upon success of a MODIFY op. See
///< scrub_snapshot_metadata()
/// external. called upon success of a MODIFY op. See
/// scrub_snapshot_metadata()
MEV(DigestUpdate)
MEV(MapsCompared) ///< maps_compare_n_cleanup() transactions are done
/// maps_compare_n_cleanup() transactions are done
MEV(MapsCompared)
MEV(StartReplica) ///< initiating replica scrub.
/// initiating replica scrub
MEV(StartReplica)
MEV(StartReplicaNoWait) ///< 'start replica' when there are no pending updates
/// 'start replica' when there are no pending updates
MEV(StartReplicaNoWait)
MEV(SchedReplica)
MEV(ReplicaPushesUpd) ///< Update to active_pushes. 'active_pushes' represents recovery
///< that is in-flight to the local ObjectStore
/// Update to active_pushes. 'active_pushes' represents recovery
/// that is in-flight to the local ObjectStore
MEV(ReplicaPushesUpd)
MEV(FullReset) ///< guarantee that the FSM is in the quiescent state (i.e. NotActive)
/// guarantee that the FSM is in the quiescent state (i.e. NotActive)
MEV(FullReset)
MEV(NextChunk) ///< finished handling this chunk. Go get the next one
/// finished handling this chunk. Go get the next one
MEV(NextChunk)
MEV(ScrubFinished) ///< all chunks handled
/// all chunks handled
MEV(ScrubFinished)
//
// STATES
//
struct NotActive; ///< the quiescent state. No active scrubbing.
struct ReservingReplicas; ///< securing scrub resources from replicas' OSDs
struct ActiveScrubbing; ///< the active state for a Primary. A sub-machine.
struct ReplicaWaitUpdates; ///< an active state for a replica. Waiting for all active
///< operations to finish.
struct ReplicaWaitUpdates; ///< an active state for a replica. Waiting for all
///< active operations to finish.
struct ActiveReplica; ///< an active state for a replica.
@ -135,27 +154,30 @@ class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> {
/**
* The Scrubber's base (quiescent) state.
* Scrubbing is triggered by one of the following events:
* - (standard scenario for a Primary): 'StartScrub'. Initiates the OSDs resources
* reservation process. Will be issued by PG::scrub(), following a
* queued "PGScrub" op.
* - a special end-of-recovery Primary scrub event ('AfterRepairScrub') that is
* not required to reserve resources.
* - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by an incoming
* MOSDRepScrub message.
*
* note (20.8.21): originally, AfterRepairScrub was triggering a scrub without waiting
* for replica resources to be acquired. But once replicas started using the
* resource-request to identify and tag the scrub session, this bypass cannot be
* supported anymore.
* - (standard scenario for a Primary): 'StartScrub'. Initiates the OSDs
* resources reservation process. Will be issued by PG::scrub(), following a
* queued "PGScrub" op.
*
* - a special end-of-recovery Primary scrub event ('AfterRepairScrub').
*
* - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by
* an incoming MOSDRepScrub message.
*
* note (20.8.21): originally, AfterRepairScrub was triggering a scrub without
* waiting for replica resources to be acquired. But once replicas started
* using the resource-request to identify and tag the scrub session, this
* bypass cannot be supported anymore.
*/
struct NotActive : sc::state<NotActive, ScrubMachine> {
explicit NotActive(my_context ctx);
using reactions = mpl::list<sc::custom_reaction<StartScrub>,
// a scrubbing that was initiated at recovery completion
sc::custom_reaction<AfterRepairScrub>,
sc::transition<StartReplica, ReplicaWaitUpdates>,
sc::transition<StartReplicaNoWait, ActiveReplica>>;
using reactions =
mpl::list<sc::custom_reaction<StartScrub>,
// a scrubbing that was initiated at recovery completion:
sc::custom_reaction<AfterRepairScrub>,
sc::transition<StartReplica, ReplicaWaitUpdates>,
sc::transition<StartReplicaNoWait, ActiveReplica>>;
sc::result react(const StartScrub&);
sc::result react(const AfterRepairScrub&);
};
@ -178,26 +200,35 @@ struct ReservingReplicas : sc::state<ReservingReplicas, ScrubMachine> {
// the "active" sub-states
struct RangeBlocked; ///< the objects range is blocked
struct PendingTimer; ///< either delaying the scrub by some time and requeuing, or just
///< requeue
struct NewChunk; ///< select a chunk to scrub, and verify its availability
/// the objects range is blocked
struct RangeBlocked;
/// either delaying the scrub by some time and requeuing, or just requeue
struct PendingTimer;
/// select a chunk to scrub, and verify its availability
struct NewChunk;
struct WaitPushes;
struct WaitLastUpdate;
struct BuildMap;
struct DrainReplMaps; ///< a problem during BuildMap. Wait for all replicas to report,
///< then restart.
struct WaitReplicas; ///< wait for all replicas to report
/// a problem during BuildMap. Wait for all replicas to report, then restart.
struct DrainReplMaps;
/// wait for all replicas to report
struct WaitReplicas;
struct WaitDigestUpdate;
struct ActiveScrubbing : sc::state<ActiveScrubbing, ScrubMachine, PendingTimer> {
struct ActiveScrubbing
: sc::state<ActiveScrubbing, ScrubMachine, PendingTimer> {
explicit ActiveScrubbing(my_context ctx);
~ActiveScrubbing();
using reactions = mpl::list<
sc::custom_reaction<InternalError>,
sc::custom_reaction<FullReset>>;
using reactions = mpl::list<sc::custom_reaction<InternalError>,
sc::custom_reaction<FullReset>>;
sc::result react(const FullReset&);
sc::result react(const InternalError&);
@ -231,9 +262,10 @@ struct NewChunk : sc::state<NewChunk, ActiveScrubbing> {
* initiate the update process for this chunk
*
* Wait fo 'active_pushes' to clear.
* 'active_pushes' represents recovery that is in-flight to the local Objectstore, hence
* scrub waits until the correct data is readable (in-flight data to the Objectstore is
* not readable until written to disk, termed 'applied' here)
* 'active_pushes' represents recovery that is in-flight to the local
* Objectstore, hence scrub waits until the correct data is readable
* (in-flight data to the Objectstore is not readable until written to
* disk, termed 'applied' here)
*/
struct WaitPushes : sc::state<WaitPushes, ActiveScrubbing> {
@ -250,10 +282,11 @@ struct WaitLastUpdate : sc::state<WaitLastUpdate, ActiveScrubbing> {
void on_new_updates(const UpdatesApplied&);
using reactions = mpl::list<sc::custom_reaction<InternalAllUpdates>,
sc::in_state_reaction<UpdatesApplied,
WaitLastUpdate,
&WaitLastUpdate::on_new_updates>>;
using reactions =
mpl::list<sc::custom_reaction<InternalAllUpdates>,
sc::in_state_reaction<UpdatesApplied,
WaitLastUpdate,
&WaitLastUpdate::on_new_updates>>;
sc::result react(const InternalAllUpdates&);
};
@ -266,14 +299,12 @@ struct BuildMap : sc::state<BuildMap, ActiveScrubbing> {
// handled by our parent state;
// - if preempted, we switch to DrainReplMaps, where we will wait for all
// replicas to send their maps before acknowledging the preemption;
// - an interval change will be handled by the relevant 'send-event' functions,
// and will translated into a 'FullReset' event.
using reactions =
mpl::list<sc::transition<IntBmPreempted, DrainReplMaps>,
sc::transition<InternalSchedScrub, BuildMap>, // looping, waiting
// for the backend to
// finish
sc::custom_reaction<IntLocalMapDone>>;
// - an interval change will be handled by the relevant 'send-event'
// functions, and will translated into a 'FullReset' event.
using reactions = mpl::list<sc::transition<IntBmPreempted, DrainReplMaps>,
// looping, waiting for the backend to finish:
sc::transition<InternalSchedScrub, BuildMap>,
sc::custom_reaction<IntLocalMapDone>>;
sc::result react(const IntLocalMapDone&);
};
@ -285,8 +316,8 @@ struct DrainReplMaps : sc::state<DrainReplMaps, ActiveScrubbing> {
explicit DrainReplMaps(my_context ctx);
using reactions =
mpl::list<sc::custom_reaction<GotReplicas> // all replicas are accounted for
>;
// all replicas are accounted for:
mpl::list<sc::custom_reaction<GotReplicas>>;
sc::result react(const GotReplicas&);
};
@ -294,11 +325,11 @@ struct DrainReplMaps : sc::state<DrainReplMaps, ActiveScrubbing> {
struct WaitReplicas : sc::state<WaitReplicas, ActiveScrubbing> {
explicit WaitReplicas(my_context ctx);
using reactions =
mpl::list<sc::custom_reaction<GotReplicas>, // all replicas are accounted for
sc::transition<MapsCompared, WaitDigestUpdate>,
sc::custom_reaction<DigestUpdate>
>;
using reactions = mpl::list<
// all replicas are accounted for:
sc::custom_reaction<GotReplicas>,
sc::transition<MapsCompared, WaitDigestUpdate>,
sc::custom_reaction<DigestUpdate>>;
sc::result react(const GotReplicas&);
sc::result react(const DigestUpdate&);
@ -309,13 +340,13 @@ struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing> {
explicit WaitDigestUpdate(my_context ctx);
using reactions = mpl::list<sc::custom_reaction<DigestUpdate>,
sc::custom_reaction<ScrubFinished>,
sc::transition<NextChunk, PendingTimer>>;
sc::custom_reaction<ScrubFinished>,
sc::transition<NextChunk, PendingTimer>>;
sc::result react(const DigestUpdate&);
sc::result react(const ScrubFinished&);
};
// ----------------------------- the "replica active" states -----------------------
// ----------------------------- the "replica active" states
/*
* Waiting for 'active_pushes' to complete
@ -326,8 +357,8 @@ struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing> {
*/
struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ScrubMachine> {
explicit ReplicaWaitUpdates(my_context ctx);
using reactions =
mpl::list<sc::custom_reaction<ReplicaPushesUpd>, sc::custom_reaction<FullReset>>;
using reactions = mpl::list<sc::custom_reaction<ReplicaPushesUpd>,
sc::custom_reaction<FullReset>>;
sc::result react(const ReplicaPushesUpd&);
sc::result react(const FullReset&);

View File

@ -7,7 +7,6 @@
*/
#include "common/version.h"
#include "include/Context.h"
#include "osd/osd_types.h"
namespace Scrub {
@ -90,13 +89,13 @@ struct ScrubMachineListener {
virtual void replica_handling_done() = 0;
/// the version of 'scrub_clear_state()' that does not try to invoke FSM services
/// (thus can be called from FSM reactions)
/// the version of 'scrub_clear_state()' that does not try to invoke FSM
/// services (thus can be called from FSM reactions)
virtual void clear_pgscrub_state() = 0;
/*
* Send an 'InternalSchedScrub' FSM event either immediately, or - if 'm_need_sleep'
* is asserted - after a configuration-dependent timeout.
* Send an 'InternalSchedScrub' FSM event either immediately, or - if
* 'm_need_sleep' is asserted - after a configuration-dependent timeout.
*/
virtual void add_delayed_scheduling() = 0;
@ -113,8 +112,8 @@ struct ScrubMachineListener {
/**
* Prepare a MOSDRepScrubMap message carrying the requested scrub map
* @param was_preempted - were we preempted?
* @return the message, and the current value of 'm_replica_min_epoch' (which is
* used when sending the message, but will be overwritten before that).
* @return the message, and the current value of 'm_replica_min_epoch' (which
* is used when sending the message, but will be overwritten before that).
*/
[[nodiscard]] virtual MsgAndEpoch prep_replica_map_msg(
Scrub::PreemptionNoted was_preempted) = 0;

View File

@ -52,8 +52,8 @@ struct requested_scrub_t {
/**
* scrub must not be aborted.
* Set for explicitly requested scrubs, and for scrubs originated by the pairing
* process with the 'repair' flag set (in the RequestScrub event).
* Set for explicitly requested scrubs, and for scrubs originated by the
* pairing process with the 'repair' flag set (in the RequestScrub event).
*
* Will be copied into the 'required' scrub flag upon scrub start.
*/
@ -64,14 +64,15 @@ struct requested_scrub_t {
* - scrub_requested() with need_auto param set, which only happens in
* - scrub_finish() - if deep_scrub_on_error is set, and we have errors
*
* If set, will prevent the OSD from casually postponing our scrub. When scrubbing
* starts, will cause must_scrub, must_deep_scrub and auto_repair to be set.
* If set, will prevent the OSD from casually postponing our scrub. When
* scrubbing starts, will cause must_scrub, must_deep_scrub and auto_repair to
* be set.
*/
bool need_auto{false};
/**
* Set for scrub-after-recovery just before we initiate the recovery deep scrub,
* or if scrub_requested() was called with either need_auto ot repair.
* Set for scrub-after-recovery just before we initiate the recovery deep
* scrub, or if scrub_requested() was called with either need_auto ot repair.
* Affects PG_STATE_DEEP_SCRUB.
*/
bool must_deep_scrub{false};
@ -98,8 +99,8 @@ struct requested_scrub_t {
bool must_repair{false};
/*
* the value of auto_repair is determined in sched_scrub() (once per scrub. previous
* value is not remembered). Set if
* the value of auto_repair is determined in sched_scrub() (once per scrub.
* previous value is not remembered). Set if
* - allowed by configuration and backend, and
* - must_scrub is not set (i.e. - this is a periodic scrub),
* - time_for_deep was just set
@ -122,7 +123,10 @@ struct ScrubPgIF {
virtual ~ScrubPgIF() = default;
friend std::ostream& operator<<(std::ostream& out, const ScrubPgIF& s) { return s.show(out); }
friend std::ostream& operator<<(std::ostream& out, const ScrubPgIF& s)
{
return s.show(out);
}
virtual std::ostream& show(std::ostream& out) const = 0;
@ -146,9 +150,11 @@ struct ScrubPgIF {
virtual void send_replica_pushes_upd(epoch_t epoch_queued) = 0;
virtual void send_start_replica(epoch_t epoch_queued, Scrub::act_token_t token) = 0;
virtual void send_start_replica(epoch_t epoch_queued,
Scrub::act_token_t token) = 0;
virtual void send_sched_replica(epoch_t epoch_queued, Scrub::act_token_t token) = 0;
virtual void send_sched_replica(epoch_t epoch_queued,
Scrub::act_token_t token) = 0;
virtual void send_full_reset(epoch_t epoch_queued) = 0;
@ -164,12 +170,14 @@ struct ScrubPgIF {
virtual void send_maps_compared(epoch_t epoch_queued) = 0;
virtual void on_applied_when_primary(const eversion_t &applied_version) = 0;
virtual void on_applied_when_primary(const eversion_t& applied_version) = 0;
// --------------------------------------------------
[[nodiscard]] virtual bool are_callbacks_pending()
const = 0; // currently only used for an assert
[[nodiscard]] virtual bool are_callbacks_pending() const = 0; // currently
// only used
// for an
// assert
/**
* the scrubber is marked 'active':
@ -215,17 +223,19 @@ struct ScrubPgIF {
const requested_scrub_t& request_flags) const = 0;
/**
* Return true if soid is currently being scrubbed and pending IOs should block.
* May have a side effect of preempting an in-progress scrub -- will return false
* in that case.
* Return true if soid is currently being scrubbed and pending IOs should
* block. May have a side effect of preempting an in-progress scrub -- will
* return false in that case.
*
* @param soid object to check for ongoing scrub
* @return boolean whether a request on soid should block until scrub completion
* @return boolean whether a request on soid should block until scrub
* completion
*/
virtual bool write_blocked_by_scrub(const hobject_t& soid) = 0;
/// Returns whether any objects in the range [begin, end] are being scrubbed
virtual bool range_intersects_scrub(const hobject_t& start, const hobject_t& end) = 0;
virtual bool range_intersects_scrub(const hobject_t& start,
const hobject_t& end) = 0;
/// the op priority, taken from the primary's request message
virtual Scrub::scrub_prio_t replica_op_priority() const = 0;
@ -233,8 +243,9 @@ struct ScrubPgIF {
/// the priority of the on-going scrub (used when requeuing events)
virtual unsigned int scrub_requeue_priority(
Scrub::scrub_prio_t with_priority) const = 0;
virtual unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority,
unsigned int suggested_priority) const = 0;
virtual unsigned int scrub_requeue_priority(
Scrub::scrub_prio_t with_priority,
unsigned int suggested_priority) const = 0;
virtual void add_callback(Context* context) = 0;
@ -243,8 +254,8 @@ struct ScrubPgIF {
const hobject_t& soid) = 0;
/**
* the version of 'scrub_clear_state()' that does not try to invoke FSM services
* (thus can be called from FSM reactions)
* the version of 'scrub_clear_state()' that does not try to invoke FSM
* services (thus can be called from FSM reactions)
*/
virtual void clear_pgscrub_state() = 0;
@ -255,8 +266,8 @@ struct ScrubPgIF {
virtual void send_remotes_reserved(epoch_t epoch_queued) = 0;
/**
* triggers the 'ReservationFailure' (at least one replica denied us the requested
* resources) state-machine event
* triggers the 'ReservationFailure' (at least one replica denied us the
* requested resources) state-machine event
*/
virtual void send_reservation_failure(epoch_t epoch_queued) = 0;
@ -309,7 +320,8 @@ struct ScrubPgIF {
*/
virtual void update_scrub_job(const requested_scrub_t& request_flags) = 0;
virtual void on_maybe_registration_change(const requested_scrub_t& request_flags) = 0;
virtual void on_maybe_registration_change(
const requested_scrub_t& request_flags) = 0;
// on the replica:
virtual void handle_scrub_reserve_request(OpRequestRef op) = 0;
@ -317,7 +329,8 @@ struct ScrubPgIF {
// and on the primary:
virtual void handle_scrub_reserve_grant(OpRequestRef op, pg_shard_t from) = 0;
virtual void handle_scrub_reserve_reject(OpRequestRef op, pg_shard_t from) = 0;
virtual void handle_scrub_reserve_reject(OpRequestRef op,
pg_shard_t from) = 0;
virtual void rm_from_osd_scrubbing() = 0;