mirror of
https://github.com/ceph/ceph
synced 2025-01-01 08:32:24 +00:00
osd/scrub: reformat scrub files to 80 cols
Reformatting the OSD scrub code files to match styleguide. Specifically: - force 80-cols lines; and - (sadly) force 'use tabs' (replacing each 8 indentation blanks with a tab. clang-format version used: 13 Configuration file used is detailed in PR comment. Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
This commit is contained in:
parent
dbca95185e
commit
7e2ba75f07
@ -6,10 +6,10 @@
|
||||
#include <sstream>
|
||||
|
||||
#include "common/scrub_types.h"
|
||||
#include "osd/osd_types_fmt.h"
|
||||
|
||||
#include "osd/PeeringState.h"
|
||||
#include "osd/PrimaryLogPG.h"
|
||||
#include "osd/osd_types_fmt.h"
|
||||
|
||||
#include "scrub_machine.h"
|
||||
|
||||
#define dout_context (m_osds->cct)
|
||||
@ -33,10 +33,12 @@ bool PrimaryLogScrub::get_store_errors(const scrub_ls_arg_t& arg,
|
||||
}
|
||||
|
||||
if (arg.get_snapsets) {
|
||||
res_inout.vals =
|
||||
m_store->get_snap_errors(m_pg->get_pgid().pool(), arg.start_after, arg.max_return);
|
||||
res_inout.vals = m_store->get_snap_errors(m_pg->get_pgid().pool(),
|
||||
arg.start_after,
|
||||
arg.max_return);
|
||||
} else {
|
||||
res_inout.vals = m_store->get_object_errors(m_pg->get_pgid().pool(), arg.start_after,
|
||||
res_inout.vals = m_store->get_object_errors(m_pg->get_pgid().pool(),
|
||||
arg.start_after,
|
||||
arg.max_return);
|
||||
}
|
||||
return true;
|
||||
@ -49,23 +51,23 @@ void PrimaryLogScrub::submit_digest_fixes(const digests_fixes_t& fixes)
|
||||
// encounter previous-chunk digest updates after starting a new chunk
|
||||
num_digest_updates_pending = fixes.size();
|
||||
dout(10) << __func__
|
||||
<< ": num_digest_updates_pending: " << num_digest_updates_pending
|
||||
<< dendl;
|
||||
<< ": num_digest_updates_pending: " << num_digest_updates_pending
|
||||
<< dendl;
|
||||
|
||||
for (auto& [obj, dgs] : fixes) {
|
||||
|
||||
ObjectContextRef obc = m_pl_pg->get_object_context(obj, false);
|
||||
if (!obc) {
|
||||
m_osds->clog->error() << m_pg_id << " " << m_mode_desc
|
||||
<< " cannot get object context for object " << obj;
|
||||
<< " cannot get object context for object " << obj;
|
||||
num_digest_updates_pending--;
|
||||
continue;
|
||||
}
|
||||
if (obc->obs.oi.soid != obj) {
|
||||
m_osds->clog->error()
|
||||
<< m_pg_id << " " << m_mode_desc << " " << obj
|
||||
<< " : object has a valid oi attr with a mismatched name, "
|
||||
<< " obc->obs.oi.soid: " << obc->obs.oi.soid;
|
||||
<< m_pg_id << " " << m_mode_desc << " " << obj
|
||||
<< " : object has a valid oi attr with a mismatched name, "
|
||||
<< " obc->obs.oi.soid: " << obc->obs.oi.soid;
|
||||
num_digest_updates_pending--;
|
||||
continue;
|
||||
}
|
||||
@ -88,9 +90,9 @@ void PrimaryLogScrub::submit_digest_fixes(const digests_fixes_t& fixes)
|
||||
|
||||
ctx->register_on_success([this]() {
|
||||
if ((num_digest_updates_pending >= 1) &&
|
||||
(--num_digest_updates_pending == 0)) {
|
||||
m_osds->queue_scrub_digest_update(m_pl_pg,
|
||||
m_pl_pg->is_scrub_blocking_ops());
|
||||
(--num_digest_updates_pending == 0)) {
|
||||
m_osds->queue_scrub_digest_update(m_pl_pg,
|
||||
m_pl_pg->is_scrub_blocking_ops());
|
||||
}
|
||||
});
|
||||
|
||||
@ -110,10 +112,9 @@ void PrimaryLogScrub::_scrub_finish()
|
||||
{
|
||||
auto& info = m_pg->get_pg_info(ScrubberPasskey{}); ///< a temporary alias
|
||||
|
||||
dout(10) << __func__
|
||||
<< " info stats: " << (info.stats.stats_invalid ? "invalid" : "valid")
|
||||
<< " m_is_repair: " << m_is_repair
|
||||
<< dendl;
|
||||
dout(10) << __func__ << " info stats: "
|
||||
<< (info.stats.stats_invalid ? "invalid" : "valid")
|
||||
<< " m_is_repair: " << m_is_repair << dendl;
|
||||
|
||||
if (info.stats.stats_invalid) {
|
||||
m_pl_pg->recovery_state.update_stats([=](auto& history, auto& stats) {
|
||||
@ -138,21 +139,26 @@ void PrimaryLogScrub::_scrub_finish()
|
||||
<< m_scrub_cstat.sum.num_objects_pinned << "/"
|
||||
<< info.stats.stats.sum.num_objects_pinned << " pinned, "
|
||||
<< m_scrub_cstat.sum.num_objects_hit_set_archive << "/"
|
||||
<< info.stats.stats.sum.num_objects_hit_set_archive << " hit_set_archive, "
|
||||
<< m_scrub_cstat.sum.num_bytes << "/" << info.stats.stats.sum.num_bytes
|
||||
<< " bytes, " << m_scrub_cstat.sum.num_objects_manifest << "/"
|
||||
<< info.stats.stats.sum.num_objects_hit_set_archive
|
||||
<< " hit_set_archive, " << m_scrub_cstat.sum.num_bytes << "/"
|
||||
<< info.stats.stats.sum.num_bytes << " bytes, "
|
||||
<< m_scrub_cstat.sum.num_objects_manifest << "/"
|
||||
<< info.stats.stats.sum.num_objects_manifest << " manifest objects, "
|
||||
<< m_scrub_cstat.sum.num_bytes_hit_set_archive << "/"
|
||||
<< info.stats.stats.sum.num_bytes_hit_set_archive << " hit_set_archive bytes."
|
||||
<< dendl;
|
||||
<< info.stats.stats.sum.num_bytes_hit_set_archive
|
||||
<< " hit_set_archive bytes." << dendl;
|
||||
|
||||
if (m_scrub_cstat.sum.num_objects != info.stats.stats.sum.num_objects ||
|
||||
m_scrub_cstat.sum.num_object_clones != info.stats.stats.sum.num_object_clones ||
|
||||
(m_scrub_cstat.sum.num_objects_dirty != info.stats.stats.sum.num_objects_dirty &&
|
||||
m_scrub_cstat.sum.num_object_clones !=
|
||||
info.stats.stats.sum.num_object_clones ||
|
||||
(m_scrub_cstat.sum.num_objects_dirty !=
|
||||
info.stats.stats.sum.num_objects_dirty &&
|
||||
!info.stats.dirty_stats_invalid) ||
|
||||
(m_scrub_cstat.sum.num_objects_omap != info.stats.stats.sum.num_objects_omap &&
|
||||
(m_scrub_cstat.sum.num_objects_omap !=
|
||||
info.stats.stats.sum.num_objects_omap &&
|
||||
!info.stats.omap_stats_invalid) ||
|
||||
(m_scrub_cstat.sum.num_objects_pinned != info.stats.stats.sum.num_objects_pinned &&
|
||||
(m_scrub_cstat.sum.num_objects_pinned !=
|
||||
info.stats.stats.sum.num_objects_pinned &&
|
||||
!info.stats.pin_stats_invalid) ||
|
||||
(m_scrub_cstat.sum.num_objects_hit_set_archive !=
|
||||
info.stats.stats.sum.num_objects_hit_set_archive &&
|
||||
@ -166,23 +172,27 @@ void PrimaryLogScrub::_scrub_finish()
|
||||
m_scrub_cstat.sum.num_whiteouts != info.stats.stats.sum.num_whiteouts ||
|
||||
m_scrub_cstat.sum.num_bytes != info.stats.stats.sum.num_bytes) {
|
||||
|
||||
m_osds->clog->error() << info.pgid << " " << m_mode_desc << " : stat mismatch, got "
|
||||
m_osds->clog->error() << info.pgid << " " << m_mode_desc
|
||||
<< " : stat mismatch, got "
|
||||
<< m_scrub_cstat.sum.num_objects << "/"
|
||||
<< info.stats.stats.sum.num_objects << " objects, "
|
||||
<< m_scrub_cstat.sum.num_object_clones << "/"
|
||||
<< info.stats.stats.sum.num_object_clones << " clones, "
|
||||
<< m_scrub_cstat.sum.num_objects_dirty << "/"
|
||||
<< info.stats.stats.sum.num_objects_dirty << " dirty, "
|
||||
<< m_scrub_cstat.sum.num_objects_omap << "/"
|
||||
<< info.stats.stats.sum.num_objects_omap << " omap, "
|
||||
<< m_scrub_cstat.sum.num_objects_pinned << "/"
|
||||
<< info.stats.stats.sum.num_objects_pinned << " pinned, "
|
||||
<< m_scrub_cstat.sum.num_objects_hit_set_archive << "/"
|
||||
<< info.stats.stats.sum.num_object_clones
|
||||
<< " clones, " << m_scrub_cstat.sum.num_objects_dirty
|
||||
<< "/" << info.stats.stats.sum.num_objects_dirty
|
||||
<< " dirty, " << m_scrub_cstat.sum.num_objects_omap
|
||||
<< "/" << info.stats.stats.sum.num_objects_omap
|
||||
<< " omap, " << m_scrub_cstat.sum.num_objects_pinned
|
||||
<< "/" << info.stats.stats.sum.num_objects_pinned
|
||||
<< " pinned, "
|
||||
<< m_scrub_cstat.sum.num_objects_hit_set_archive
|
||||
<< "/"
|
||||
<< info.stats.stats.sum.num_objects_hit_set_archive
|
||||
<< " hit_set_archive, " << m_scrub_cstat.sum.num_whiteouts
|
||||
<< "/" << info.stats.stats.sum.num_whiteouts << " whiteouts, "
|
||||
<< m_scrub_cstat.sum.num_bytes << "/"
|
||||
<< info.stats.stats.sum.num_bytes << " bytes, "
|
||||
<< " hit_set_archive, "
|
||||
<< m_scrub_cstat.sum.num_whiteouts << "/"
|
||||
<< info.stats.stats.sum.num_whiteouts
|
||||
<< " whiteouts, " << m_scrub_cstat.sum.num_bytes
|
||||
<< "/" << info.stats.stats.sum.num_bytes << " bytes, "
|
||||
<< m_scrub_cstat.sum.num_objects_manifest << "/"
|
||||
<< info.stats.stats.sum.num_objects_manifest
|
||||
<< " manifest objects, "
|
||||
@ -212,7 +222,8 @@ void PrimaryLogScrub::_scrub_finish()
|
||||
m_pl_pg->object_contexts.clear();
|
||||
}
|
||||
|
||||
PrimaryLogScrub::PrimaryLogScrub(PrimaryLogPG* pg) : PgScrubber{pg}, m_pl_pg{pg} {}
|
||||
PrimaryLogScrub::PrimaryLogScrub(PrimaryLogPG* pg) : PgScrubber{pg}, m_pl_pg{pg}
|
||||
{}
|
||||
|
||||
void PrimaryLogScrub::_scrub_clear_state()
|
||||
{
|
||||
@ -220,22 +231,27 @@ void PrimaryLogScrub::_scrub_clear_state()
|
||||
m_scrub_cstat = object_stat_collection_t();
|
||||
}
|
||||
|
||||
void PrimaryLogScrub::stats_of_handled_objects(const object_stat_sum_t& delta_stats,
|
||||
const hobject_t& soid)
|
||||
void PrimaryLogScrub::stats_of_handled_objects(
|
||||
const object_stat_sum_t& delta_stats,
|
||||
const hobject_t& soid)
|
||||
{
|
||||
// We scrub objects in hobject_t order, so objects before m_start have already been
|
||||
// scrubbed and their stats have already been added to the scrubber. Objects after that
|
||||
// point haven't been included in the scrubber's stats accounting yet, so they will be
|
||||
// included when the scrubber gets to that object.
|
||||
// We scrub objects in hobject_t order, so objects before m_start have already
|
||||
// been scrubbed and their stats have already been added to the scrubber.
|
||||
// Objects after that point haven't been included in the scrubber's stats
|
||||
// accounting yet, so they will be included when the scrubber gets to that
|
||||
// object.
|
||||
if (is_primary() && is_scrub_active()) {
|
||||
if (soid < m_start) {
|
||||
|
||||
dout(20) << fmt::format("{} {} < [{},{})", __func__, soid, m_start, m_end) << dendl;
|
||||
dout(20) << fmt::format("{} {} < [{},{})", __func__, soid, m_start, m_end)
|
||||
<< dendl;
|
||||
m_scrub_cstat.add(delta_stats);
|
||||
|
||||
} else {
|
||||
|
||||
dout(25) << fmt::format("{} {} >= [{},{})", __func__, soid, m_start, m_end) << dendl;
|
||||
dout(25)
|
||||
<< fmt::format("{} {} >= [{},{})", __func__, soid, m_start, m_end)
|
||||
<< dendl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -14,8 +14,8 @@
|
||||
#include "messages/MOSDRepScrubMap.h"
|
||||
#include "messages/MOSDScrub.h"
|
||||
#include "messages/MOSDScrubReserve.h"
|
||||
|
||||
#include "osd/OSD.h"
|
||||
|
||||
#include "scrub_machine.h"
|
||||
|
||||
class PrimaryLogPG;
|
||||
@ -42,7 +42,8 @@ class PrimaryLogScrub : public PgScrubber {
|
||||
void submit_digest_fixes(const digests_fixes_t& fixes) final;
|
||||
|
||||
private:
|
||||
// we know our PG is actually a PrimaryLogPG. Let's alias the pointer to that object:
|
||||
// we know our PG is actually a PrimaryLogPG. Let's alias the pointer to that
|
||||
// object:
|
||||
PrimaryLogPG* const m_pl_pg;
|
||||
|
||||
// handle our part in stats collection
|
||||
|
@ -4,11 +4,11 @@
|
||||
#ifndef CEPH_SCRUB_RESULT_H
|
||||
#define CEPH_SCRUB_RESULT_H
|
||||
|
||||
#include "osd/SnapMapper.h" // for OSDriver
|
||||
#include "common/map_cacher.hpp"
|
||||
#include "osd/SnapMapper.h" // for OSDriver
|
||||
|
||||
namespace librados {
|
||||
struct object_id_t;
|
||||
struct object_id_t;
|
||||
}
|
||||
|
||||
struct inconsistent_obj_wrapper;
|
||||
@ -17,7 +17,7 @@ struct inconsistent_snapset_wrapper;
|
||||
namespace Scrub {
|
||||
|
||||
class Store {
|
||||
public:
|
||||
public:
|
||||
~Store();
|
||||
static Store* create(ObjectStore* store,
|
||||
ObjectStore::Transaction* t,
|
||||
@ -31,19 +31,25 @@ public:
|
||||
void add_error(int64_t pool, const inconsistent_snapset_wrapper& e);
|
||||
|
||||
bool empty() const;
|
||||
void flush(ObjectStore::Transaction *);
|
||||
void cleanup(ObjectStore::Transaction *);
|
||||
std::vector<ceph::buffer::list> get_snap_errors(int64_t pool,
|
||||
const librados::object_id_t& start,
|
||||
uint64_t max_return) const;
|
||||
std::vector<ceph::buffer::list> get_object_errors(int64_t pool,
|
||||
const librados::object_id_t& start,
|
||||
uint64_t max_return) const;
|
||||
private:
|
||||
void flush(ObjectStore::Transaction*);
|
||||
void cleanup(ObjectStore::Transaction*);
|
||||
|
||||
std::vector<ceph::buffer::list> get_snap_errors(
|
||||
int64_t pool,
|
||||
const librados::object_id_t& start,
|
||||
uint64_t max_return) const;
|
||||
|
||||
std::vector<ceph::buffer::list> get_object_errors(
|
||||
int64_t pool,
|
||||
const librados::object_id_t& start,
|
||||
uint64_t max_return) const;
|
||||
|
||||
private:
|
||||
Store(const coll_t& coll, const ghobject_t& oid, ObjectStore* store);
|
||||
std::vector<ceph::buffer::list> get_errors(const std::string& start, const std::string& end,
|
||||
uint64_t max_return) const;
|
||||
private:
|
||||
std::vector<ceph::buffer::list> get_errors(const std::string& start,
|
||||
const std::string& end,
|
||||
uint64_t max_return) const;
|
||||
private:
|
||||
const coll_t coll;
|
||||
const ghobject_t hoid;
|
||||
// a temp object holding mappings from seq-id to inconsistencies found in
|
||||
@ -52,6 +58,6 @@ private:
|
||||
mutable MapCacher::MapCacher<std::string, ceph::buffer::list> backend;
|
||||
std::map<std::string, ceph::buffer::list> results;
|
||||
};
|
||||
}
|
||||
} // namespace Scrub
|
||||
|
||||
#endif // CEPH_SCRUB_RESULT_H
|
||||
#endif // CEPH_SCRUB_RESULT_H
|
||||
|
@ -19,15 +19,18 @@ using namespace ::std::literals;
|
||||
#define dout_prefix *_dout << "osd." << whoami << " "
|
||||
|
||||
ScrubQueue::ScrubJob::ScrubJob(CephContext* cct, const spg_t& pg, int node_id)
|
||||
: RefCountedObject{cct}, pgid{pg}, whoami{node_id}, cct{cct}
|
||||
: RefCountedObject{cct}
|
||||
, pgid{pg}
|
||||
, whoami{node_id}
|
||||
, cct{cct}
|
||||
{}
|
||||
|
||||
// debug usage only
|
||||
ostream& operator<<(ostream& out, const ScrubQueue::ScrubJob& sjob)
|
||||
{
|
||||
out << sjob.pgid << ", " << sjob.schedule.scheduled_at
|
||||
<< " dead: " << sjob.schedule.deadline << " - " << sjob.registration_state()
|
||||
<< " / failure: " << sjob.resources_failure
|
||||
<< " dead: " << sjob.schedule.deadline << " - "
|
||||
<< sjob.registration_state() << " / failure: " << sjob.resources_failure
|
||||
<< " / pen. t.o.: " << sjob.penalty_timeout
|
||||
<< " / queue state: " << ScrubQueue::qu_state_text(sjob.state);
|
||||
|
||||
@ -64,7 +67,8 @@ std::string ScrubQueue::ScrubJob::scheduling_state(utime_t now_is,
|
||||
return fmt::format("queued for {}scrub", (is_deep_expected ? "deep " : ""));
|
||||
}
|
||||
|
||||
return fmt::format("{}scrub scheduled @ {}", (is_deep_expected ? "deep " : ""),
|
||||
return fmt::format("{}scrub scheduled @ {}",
|
||||
(is_deep_expected ? "deep " : ""),
|
||||
schedule.scheduled_at);
|
||||
}
|
||||
|
||||
@ -80,7 +84,8 @@ std::string ScrubQueue::ScrubJob::scheduling_state(utime_t now_is,
|
||||
|
||||
|
||||
ScrubQueue::ScrubQueue(CephContext* cct, OSDService& osds)
|
||||
: cct{cct}, osd_service{osds}
|
||||
: cct{cct}
|
||||
, osd_service{osds}
|
||||
{
|
||||
// initialize the daily loadavg with current 15min loadavg
|
||||
if (double loadavgs[3]; getloadavg(loadavgs, 3) == 3) {
|
||||
@ -128,8 +133,9 @@ void ScrubQueue::remove_from_osd_queue(ScrubJobRef scrub_job)
|
||||
<< dendl;
|
||||
|
||||
qu_state_t expected_state{qu_state_t::registered};
|
||||
auto ret = scrub_job->state.compare_exchange_strong(expected_state,
|
||||
qu_state_t::unregistering);
|
||||
auto ret =
|
||||
scrub_job->state.compare_exchange_strong(expected_state,
|
||||
qu_state_t::unregistering);
|
||||
|
||||
if (ret) {
|
||||
|
||||
@ -141,7 +147,8 @@ void ScrubQueue::remove_from_osd_queue(ScrubJobRef scrub_job)
|
||||
|
||||
// job wasn't in state 'registered' coming in
|
||||
dout(5) << "removing pg[" << scrub_job->pgid
|
||||
<< "] failed. State was: " << qu_state_text(expected_state) << dendl;
|
||||
<< "] failed. State was: " << qu_state_text(expected_state)
|
||||
<< dendl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -299,8 +306,8 @@ std::string_view ScrubQueue::qu_state_text(qu_state_t st)
|
||||
Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub(
|
||||
Scrub::ScrubPreconds& preconds)
|
||||
{
|
||||
dout(10) << " reg./pen. sizes: " << to_scrub.size() << " / " << penalized.size()
|
||||
<< dendl;
|
||||
dout(10) << " reg./pen. sizes: " << to_scrub.size() << " / "
|
||||
<< penalized.size() << dendl;
|
||||
|
||||
utime_t now_is = ceph_clock_now();
|
||||
|
||||
@ -322,7 +329,8 @@ Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub(
|
||||
restore_penalized = false;
|
||||
|
||||
// remove the 'updated' flag from all entries
|
||||
std::for_each(to_scrub.begin(), to_scrub.end(),
|
||||
std::for_each(to_scrub.begin(),
|
||||
to_scrub.end(),
|
||||
[](const auto& jobref) -> void { jobref->updated = false; });
|
||||
|
||||
// add failed scrub attempts to the penalized list
|
||||
@ -343,8 +351,8 @@ Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub(
|
||||
// - we will try the penalized
|
||||
if (res == Scrub::schedule_result_t::none_ready && !penalized_copy.empty()) {
|
||||
res = select_from_group(penalized_copy, preconds, now_is);
|
||||
dout(10) << "tried the penalized. Res: " << ScrubQueue::attempt_res_text(res)
|
||||
<< dendl;
|
||||
dout(10) << "tried the penalized. Res: "
|
||||
<< ScrubQueue::attempt_res_text(res) << dendl;
|
||||
restore_penalized = true;
|
||||
}
|
||||
|
||||
@ -379,8 +387,9 @@ struct cmp_sched_time_t {
|
||||
} // namespace
|
||||
|
||||
// called under lock
|
||||
ScrubQueue::ScrubQContainer ScrubQueue::collect_ripe_jobs(ScrubQContainer& group,
|
||||
utime_t time_now)
|
||||
ScrubQueue::ScrubQContainer ScrubQueue::collect_ripe_jobs(
|
||||
ScrubQContainer& group,
|
||||
utime_t time_now)
|
||||
{
|
||||
rm_unregistered_jobs(group);
|
||||
|
||||
@ -388,7 +397,9 @@ ScrubQueue::ScrubQContainer ScrubQueue::collect_ripe_jobs(ScrubQContainer& group
|
||||
ScrubQueue::ScrubQContainer ripes;
|
||||
ripes.reserve(group.size());
|
||||
|
||||
std::copy_if(group.begin(), group.end(), std::back_inserter(ripes),
|
||||
std::copy_if(group.begin(),
|
||||
group.end(),
|
||||
std::back_inserter(ripes),
|
||||
[time_now](const auto& jobref) -> bool {
|
||||
return jobref->schedule.scheduled_at <= time_now;
|
||||
});
|
||||
@ -408,7 +419,9 @@ ScrubQueue::ScrubQContainer ScrubQueue::collect_ripe_jobs(ScrubQContainer& group
|
||||
|
||||
// not holding jobs_lock. 'group' is a copy of the actual list.
|
||||
Scrub::schedule_result_t ScrubQueue::select_from_group(
|
||||
ScrubQContainer& group, const Scrub::ScrubPreconds& preconds, utime_t now_is)
|
||||
ScrubQContainer& group,
|
||||
const Scrub::ScrubPreconds& preconds,
|
||||
utime_t now_is)
|
||||
{
|
||||
dout(15) << "jobs #: " << group.size() << dendl;
|
||||
|
||||
@ -429,8 +442,9 @@ Scrub::schedule_result_t ScrubQueue::select_from_group(
|
||||
// we have a candidate to scrub. We turn to the OSD to verify that the PG
|
||||
// configuration allows the specified type of scrub, and to initiate the
|
||||
// scrub.
|
||||
switch (osd_service.initiate_a_scrub(candidate->pgid,
|
||||
preconds.allow_requested_repair_only)) {
|
||||
switch (
|
||||
osd_service.initiate_a_scrub(candidate->pgid,
|
||||
preconds.allow_requested_repair_only)) {
|
||||
|
||||
case Scrub::schedule_result_t::scrub_initiated:
|
||||
// the happy path. We are done
|
||||
@ -544,8 +558,9 @@ bool ScrubQueue::scrub_load_below_threshold() const
|
||||
|
||||
// allow scrub if below daily avg and currently decreasing
|
||||
if (loadavgs[0] < daily_loadavg && loadavgs[0] < loadavgs[2]) {
|
||||
dout(20) << "loadavg " << loadavgs[0] << " < daily_loadavg " << daily_loadavg
|
||||
<< " and < 15m avg " << loadavgs[2] << " = yes" << dendl;
|
||||
dout(20) << "loadavg " << loadavgs[0] << " < daily_loadavg "
|
||||
<< daily_loadavg << " and < 15m avg " << loadavgs[2] << " = yes"
|
||||
<< dendl;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -575,7 +590,9 @@ void ScrubQueue::scan_penalized(bool forgive_all, utime_t time_now)
|
||||
} else {
|
||||
|
||||
auto forgiven_last = std::partition(
|
||||
penalized.begin(), penalized.end(), [time_now](const auto& e) {
|
||||
penalized.begin(),
|
||||
penalized.end(),
|
||||
[time_now](const auto& e) {
|
||||
return (*e).updated || ((*e).penalty_timeout <= time_now);
|
||||
});
|
||||
|
||||
@ -599,9 +616,9 @@ bool ScrubQueue::scrub_time_permit(utime_t now) const
|
||||
time_t tt = now.sec();
|
||||
localtime_r(&tt, &bdt);
|
||||
|
||||
bool day_permit =
|
||||
isbetween_modulo(cct->_conf->osd_scrub_begin_week_day,
|
||||
cct->_conf->osd_scrub_end_week_day, bdt.tm_wday);
|
||||
bool day_permit = isbetween_modulo(cct->_conf->osd_scrub_begin_week_day,
|
||||
cct->_conf->osd_scrub_end_week_day,
|
||||
bdt.tm_wday);
|
||||
if (!day_permit) {
|
||||
dout(20) << "should run between week day "
|
||||
<< cct->_conf->osd_scrub_begin_week_day << " - "
|
||||
@ -610,9 +627,9 @@ bool ScrubQueue::scrub_time_permit(utime_t now) const
|
||||
return false;
|
||||
}
|
||||
|
||||
bool time_permit =
|
||||
isbetween_modulo(cct->_conf->osd_scrub_begin_hour,
|
||||
cct->_conf->osd_scrub_end_hour, bdt.tm_hour);
|
||||
bool time_permit = isbetween_modulo(cct->_conf->osd_scrub_begin_hour,
|
||||
cct->_conf->osd_scrub_end_hour,
|
||||
bdt.tm_hour);
|
||||
dout(20) << "should run between " << cct->_conf->osd_scrub_begin_hour << " - "
|
||||
<< cct->_conf->osd_scrub_end_hour << " now (" << bdt.tm_hour
|
||||
<< ") = " << (time_permit ? "yes" : "no") << dendl;
|
||||
@ -625,7 +642,8 @@ void ScrubQueue::ScrubJob::dump(ceph::Formatter* f) const
|
||||
f->dump_stream("pgid") << pgid;
|
||||
f->dump_stream("sched_time") << schedule.scheduled_at;
|
||||
f->dump_stream("deadline") << schedule.deadline;
|
||||
f->dump_bool("forced", schedule.scheduled_at == PgScrubber::scrub_must_stamp());
|
||||
f->dump_bool("forced",
|
||||
schedule.scheduled_at == PgScrubber::scrub_must_stamp());
|
||||
f->close_section();
|
||||
}
|
||||
|
||||
@ -636,10 +654,12 @@ void ScrubQueue::dump_scrubs(ceph::Formatter* f) const
|
||||
|
||||
f->open_array_section("scrubs");
|
||||
|
||||
std::for_each(to_scrub.cbegin(), to_scrub.cend(),
|
||||
[&f](const ScrubJobRef& j) { j->dump(f); });
|
||||
std::for_each(to_scrub.cbegin(), to_scrub.cend(), [&f](const ScrubJobRef& j) {
|
||||
j->dump(f);
|
||||
});
|
||||
|
||||
std::for_each(penalized.cbegin(), penalized.cend(),
|
||||
std::for_each(penalized.cbegin(),
|
||||
penalized.cend(),
|
||||
[&f](const ScrubJobRef& j) { j->dump(f); });
|
||||
|
||||
f->close_section();
|
||||
@ -653,9 +673,13 @@ ScrubQueue::ScrubQContainer ScrubQueue::list_registered_jobs() const
|
||||
|
||||
std::lock_guard lck{jobs_lock};
|
||||
|
||||
std::copy_if(to_scrub.begin(), to_scrub.end(), std::back_inserter(all_jobs),
|
||||
std::copy_if(to_scrub.begin(),
|
||||
to_scrub.end(),
|
||||
std::back_inserter(all_jobs),
|
||||
registered_job);
|
||||
std::copy_if(penalized.begin(), penalized.end(), std::back_inserter(all_jobs),
|
||||
std::copy_if(penalized.begin(),
|
||||
penalized.end(),
|
||||
std::back_inserter(all_jobs),
|
||||
registered_job);
|
||||
|
||||
return all_jobs;
|
||||
@ -709,9 +733,9 @@ bool ScrubQueue::inc_scrubs_remote()
|
||||
std::lock_guard lck{resource_lock};
|
||||
|
||||
if (scrubs_local + scrubs_remote < cct->_conf->osd_max_scrubs) {
|
||||
dout(20) << ": " << scrubs_remote << " -> " << (scrubs_remote + 1) << " (max "
|
||||
<< cct->_conf->osd_max_scrubs << ", local " << scrubs_local << ")"
|
||||
<< dendl;
|
||||
dout(20) << ": " << scrubs_remote << " -> " << (scrubs_remote + 1)
|
||||
<< " (max " << cct->_conf->osd_max_scrubs << ", local "
|
||||
<< scrubs_local << ")" << dendl;
|
||||
++scrubs_remote;
|
||||
return true;
|
||||
}
|
||||
|
@ -178,9 +178,9 @@ class ScrubQueue {
|
||||
struct ScrubJob final : public RefCountedObject {
|
||||
|
||||
/**
|
||||
* a time scheduled for scrub, and a deadline: The scrub could be delayed if
|
||||
* system load is too high (but not if after the deadline),or if trying to
|
||||
* scrub out of scrub hours.
|
||||
* a time scheduled for scrub, and a deadline: The scrub could be delayed
|
||||
* if system load is too high (but not if after the deadline),or if trying
|
||||
* to scrub out of scrub hours.
|
||||
*/
|
||||
scrub_schedule_t schedule;
|
||||
|
||||
@ -354,8 +354,8 @@ class ScrubQueue {
|
||||
* (read - with higher value) configuration element
|
||||
* (osd_scrub_extended_sleep).
|
||||
*/
|
||||
double scrub_sleep_time(
|
||||
bool must_scrub) const; /// \todo (future) return milliseconds
|
||||
double scrub_sleep_time(bool must_scrub) const; /// \todo (future) return
|
||||
/// milliseconds
|
||||
|
||||
/**
|
||||
* called every heartbeat to update the "daily" load average
|
||||
@ -450,7 +450,8 @@ class ScrubQueue {
|
||||
*/
|
||||
void move_failed_pgs(utime_t now_is);
|
||||
|
||||
Scrub::schedule_result_t select_from_group(ScrubQContainer& group,
|
||||
const Scrub::ScrubPreconds& preconds,
|
||||
utime_t now_is);
|
||||
Scrub::schedule_result_t select_from_group(
|
||||
ScrubQContainer& group,
|
||||
const Scrub::ScrubPreconds& preconds,
|
||||
utime_t now_is);
|
||||
};
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -90,14 +90,16 @@ struct BuildMap;
|
||||
/**
|
||||
* Reserving/freeing scrub resources at the replicas.
|
||||
*
|
||||
* When constructed - sends reservation requests to the acting_set.
|
||||
* A rejection triggers a "couldn't acquire the replicas' scrub resources" event.
|
||||
* All previous requests, whether already granted or not, are explicitly released.
|
||||
* When constructed - sends reservation requests to the acting_set.
|
||||
* A rejection triggers a "couldn't acquire the replicas' scrub resources"
|
||||
* event. All previous requests, whether already granted or not, are explicitly
|
||||
* released.
|
||||
*
|
||||
* A note re performance: I've measured a few container alternatives for
|
||||
* m_reserved_peers, with its specific usage pattern. Std::set is extremely slow, as
|
||||
* expected. flat_set is only slightly better. Surprisingly - std::vector (with no
|
||||
* sorting) is better than boost::small_vec. And for std::vector: no need to pre-reserve.
|
||||
* A note re performance: I've measured a few container alternatives for
|
||||
* m_reserved_peers, with its specific usage pattern. Std::set is extremely
|
||||
* slow, as expected. flat_set is only slightly better. Surprisingly -
|
||||
* std::vector (with no sorting) is better than boost::small_vec. And for
|
||||
* std::vector: no need to pre-reserve.
|
||||
*/
|
||||
class ReplicaReservations {
|
||||
using OrigSet = decltype(std::declval<PG>().get_actingset());
|
||||
@ -110,7 +112,7 @@ class ReplicaReservations {
|
||||
bool m_had_rejections{false};
|
||||
int m_pending{-1};
|
||||
const pg_info_t& m_pg_info;
|
||||
ScrubQueue::ScrubJobRef m_scrub_job; ///< a ref to this PG's scrub job
|
||||
ScrubQueue::ScrubJobRef m_scrub_job; ///< a ref to this PG's scrub job
|
||||
|
||||
void release_replica(pg_shard_t peer, epoch_t epoch);
|
||||
|
||||
@ -125,12 +127,15 @@ class ReplicaReservations {
|
||||
/**
|
||||
* quietly discard all knowledge about existing reservations. No messages
|
||||
* are sent to peers.
|
||||
* To be used upon interval change, as we know the the running scrub is no longer
|
||||
* relevant, and that the replicas had reset the reservations on their side.
|
||||
* To be used upon interval change, as we know the the running scrub is no
|
||||
* longer relevant, and that the replicas had reset the reservations on
|
||||
* their side.
|
||||
*/
|
||||
void discard_all();
|
||||
|
||||
ReplicaReservations(PG* pg, pg_shard_t whoami, ScrubQueue::ScrubJobRef scrubjob);
|
||||
ReplicaReservations(PG* pg,
|
||||
pg_shard_t whoami,
|
||||
ScrubQueue::ScrubJobRef scrubjob);
|
||||
|
||||
~ReplicaReservations();
|
||||
|
||||
@ -155,19 +160,26 @@ class LocalReservation {
|
||||
};
|
||||
|
||||
/**
|
||||
* wraps the OSD resource we are using when reserved as a replica by a scrubbing primary.
|
||||
* wraps the OSD resource we are using when reserved as a replica by a
|
||||
* scrubbing primary.
|
||||
*/
|
||||
class ReservedByRemotePrimary {
|
||||
const PgScrubber* m_scrubber; ///< we will be using its gen_prefix()
|
||||
const PgScrubber* m_scrubber; ///< we will be using its gen_prefix()
|
||||
PG* m_pg;
|
||||
OSDService* m_osds;
|
||||
bool m_reserved_by_remote_primary{false};
|
||||
const epoch_t m_reserved_at;
|
||||
|
||||
public:
|
||||
ReservedByRemotePrimary(const PgScrubber* scrubber, PG* pg, OSDService* osds, epoch_t epoch);
|
||||
ReservedByRemotePrimary(const PgScrubber* scrubber,
|
||||
PG* pg,
|
||||
OSDService* osds,
|
||||
epoch_t epoch);
|
||||
~ReservedByRemotePrimary();
|
||||
[[nodiscard]] bool is_reserved() const { return m_reserved_by_remote_primary; }
|
||||
[[nodiscard]] bool is_reserved() const
|
||||
{
|
||||
return m_reserved_by_remote_primary;
|
||||
}
|
||||
|
||||
/// compare the remembered reserved-at epoch to the current interval
|
||||
[[nodiscard]] bool is_stale() const;
|
||||
@ -176,10 +188,10 @@ class ReservedByRemotePrimary {
|
||||
};
|
||||
|
||||
/**
|
||||
* Once all replicas' scrub maps are received, we go on to compare the maps. That is -
|
||||
* unless we we have not yet completed building our own scrub map. MapsCollectionStatus
|
||||
* combines the status of waiting for both the local map and the replicas, without
|
||||
* resorting to adding dummy entries into a list.
|
||||
* Once all replicas' scrub maps are received, we go on to compare the maps.
|
||||
* That is - unless we we have not yet completed building our own scrub map.
|
||||
* MapsCollectionStatus combines the status of waiting for both the local map
|
||||
* and the replicas, without resorting to adding dummy entries into a list.
|
||||
*/
|
||||
class MapsCollectionStatus {
|
||||
|
||||
@ -202,7 +214,10 @@ class MapsCollectionStatus {
|
||||
/// @returns true if indeed waiting for this one. Otherwise: an error string
|
||||
auto mark_arriving_map(pg_shard_t from) -> std::tuple<bool, std::string_view>;
|
||||
|
||||
[[nodiscard]] std::vector<pg_shard_t> get_awaited() const { return m_maps_awaited_for; }
|
||||
[[nodiscard]] std::vector<pg_shard_t> get_awaited() const
|
||||
{
|
||||
return m_maps_awaited_for;
|
||||
}
|
||||
|
||||
void reset();
|
||||
|
||||
@ -231,7 +246,8 @@ struct scrub_flags_t {
|
||||
*/
|
||||
bool auto_repair{false};
|
||||
|
||||
/// this flag indicates that we are scrubbing post repair to verify everything is fixed
|
||||
/// this flag indicates that we are scrubbing post repair to verify everything
|
||||
/// is fixed
|
||||
bool check_repair{false};
|
||||
|
||||
/// checked at the end of the scrub, to possibly initiate a deep-scrub
|
||||
@ -239,8 +255,8 @@ struct scrub_flags_t {
|
||||
|
||||
/**
|
||||
* scrub must not be aborted.
|
||||
* Set for explicitly requested scrubs, and for scrubs originated by the pairing
|
||||
* process with the 'repair' flag set (in the RequestScrub event).
|
||||
* Set for explicitly requested scrubs, and for scrubs originated by the
|
||||
* pairing process with the 'repair' flag set (in the RequestScrub event).
|
||||
*/
|
||||
bool required{false};
|
||||
};
|
||||
@ -256,12 +272,12 @@ ostream& operator<<(ostream& out, const scrub_flags_t& sf);
|
||||
* the actual scrubbing code.
|
||||
*/
|
||||
class PgScrubber : public ScrubPgIF,
|
||||
public ScrubMachineListener,
|
||||
public SnapMapperAccessor {
|
||||
public ScrubMachineListener,
|
||||
public SnapMapperAccessor {
|
||||
public:
|
||||
explicit PgScrubber(PG* pg);
|
||||
|
||||
friend class ScrubBackend; // will be replaced by a limited interface
|
||||
friend class ScrubBackend; // will be replaced by a limited interface
|
||||
|
||||
// ------------------ the I/F exposed to the PG (ScrubPgIF) -------------
|
||||
|
||||
@ -290,10 +306,11 @@ class PgScrubber : public ScrubPgIF,
|
||||
|
||||
void send_replica_pushes_upd(epoch_t epoch_queued) final;
|
||||
/**
|
||||
* The PG has updated its 'applied version'. It might be that we are waiting for this
|
||||
* information: after selecting a range of objects to scrub, we've marked the latest
|
||||
* version of these objects in m_subset_last_update. We will not start the map building
|
||||
* before we know that the PG has reached this version.
|
||||
* The PG has updated its 'applied version'. It might be that we are waiting
|
||||
* for this information: after selecting a range of objects to scrub, we've
|
||||
* marked the latest version of these objects in m_subset_last_update. We will
|
||||
* not start the map building before we know that the PG has reached this
|
||||
* version.
|
||||
*/
|
||||
void on_applied_when_primary(const eversion_t& applied_version) final;
|
||||
|
||||
@ -319,7 +336,8 @@ class PgScrubber : public ScrubPgIF,
|
||||
bool write_blocked_by_scrub(const hobject_t& soid) final;
|
||||
|
||||
/// true if the given range intersects the scrub interval in any way
|
||||
bool range_intersects_scrub(const hobject_t& start, const hobject_t& end) final;
|
||||
bool range_intersects_scrub(const hobject_t& start,
|
||||
const hobject_t& end) final;
|
||||
|
||||
/**
|
||||
* we are a replica being asked by the Primary to reserve OSD resources for
|
||||
@ -342,7 +360,8 @@ class PgScrubber : public ScrubPgIF,
|
||||
|
||||
void on_primary_change(const requested_scrub_t& request_flags) final;
|
||||
|
||||
void on_maybe_registration_change(const requested_scrub_t& request_flags) final;
|
||||
void on_maybe_registration_change(
|
||||
const requested_scrub_t& request_flags) final;
|
||||
|
||||
void scrub_requested(scrub_level_t scrub_level,
|
||||
scrub_type_t scrub_type,
|
||||
@ -373,14 +392,17 @@ class PgScrubber : public ScrubPgIF,
|
||||
return m_replica_request_priority;
|
||||
};
|
||||
|
||||
unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority,
|
||||
unsigned int suggested_priority) const final;
|
||||
unsigned int scrub_requeue_priority(
|
||||
Scrub::scrub_prio_t with_priority,
|
||||
unsigned int suggested_priority) const final;
|
||||
/// the version that refers to m_flags.priority
|
||||
unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority) const final;
|
||||
unsigned int scrub_requeue_priority(
|
||||
Scrub::scrub_prio_t with_priority) const final;
|
||||
|
||||
void add_callback(Context* context) final { m_callbacks.push_back(context); }
|
||||
|
||||
[[nodiscard]] bool are_callbacks_pending() const final // used for an assert in PG.cc
|
||||
[[nodiscard]] bool are_callbacks_pending() const final // used for an assert
|
||||
// in PG.cc
|
||||
{
|
||||
return !m_callbacks.empty();
|
||||
}
|
||||
@ -396,7 +418,7 @@ class PgScrubber : public ScrubPgIF,
|
||||
* add to scrub statistics, but only if the soid is below the scrub start
|
||||
*/
|
||||
void stats_of_handled_objects(const object_stat_sum_t& delta_stats,
|
||||
const hobject_t& soid) override
|
||||
const hobject_t& soid) override
|
||||
{
|
||||
ceph_assert(false);
|
||||
}
|
||||
@ -404,8 +426,9 @@ class PgScrubber : public ScrubPgIF,
|
||||
/**
|
||||
* finalize the parameters of the initiated scrubbing session:
|
||||
*
|
||||
* The "current scrub" flags (m_flags) are set from the 'planned_scrub' flag-set;
|
||||
* PG_STATE_SCRUBBING, and possibly PG_STATE_DEEP_SCRUB & PG_STATE_REPAIR are set.
|
||||
* The "current scrub" flags (m_flags) are set from the 'planned_scrub'
|
||||
* flag-set; PG_STATE_SCRUBBING, and possibly PG_STATE_DEEP_SCRUB &
|
||||
* PG_STATE_REPAIR are set.
|
||||
*/
|
||||
void set_op_parameters(requested_scrub_t& request) final;
|
||||
|
||||
@ -423,10 +446,14 @@ class PgScrubber : public ScrubPgIF,
|
||||
std::stringstream& ss) override;
|
||||
int m_debug_blockrange{0};
|
||||
|
||||
// -------------------------------------------------------------------------------------------
|
||||
// the I/F used by the state-machine (i.e. the implementation of ScrubMachineListener)
|
||||
// --------------------------------------------------------------------------
|
||||
// the I/F used by the state-machine (i.e. the implementation of
|
||||
// ScrubMachineListener)
|
||||
|
||||
[[nodiscard]] bool is_primary() const final { return m_pg->recovery_state.is_primary(); }
|
||||
[[nodiscard]] bool is_primary() const final
|
||||
{
|
||||
return m_pg->recovery_state.is_primary();
|
||||
}
|
||||
|
||||
void select_range_n_notify() final;
|
||||
|
||||
@ -446,13 +473,13 @@ class PgScrubber : public ScrubPgIF,
|
||||
void on_replica_init() final;
|
||||
void replica_handling_done() final;
|
||||
|
||||
/// the version of 'scrub_clear_state()' that does not try to invoke FSM services
|
||||
/// (thus can be called from FSM reactions)
|
||||
/// the version of 'scrub_clear_state()' that does not try to invoke FSM
|
||||
/// services (thus can be called from FSM reactions)
|
||||
void clear_pgscrub_state() final;
|
||||
|
||||
/*
|
||||
* Send an 'InternalSchedScrub' FSM event either immediately, or - if 'm_need_sleep'
|
||||
* is asserted - after a configuration-dependent timeout.
|
||||
* Send an 'InternalSchedScrub' FSM event either immediately, or - if
|
||||
* 'm_need_sleep' is asserted - after a configuration-dependent timeout.
|
||||
*/
|
||||
void add_delayed_scheduling() final;
|
||||
|
||||
@ -462,10 +489,11 @@ class PgScrubber : public ScrubPgIF,
|
||||
|
||||
void scrub_finish() final;
|
||||
|
||||
ScrubMachineListener::MsgAndEpoch
|
||||
prep_replica_map_msg(Scrub::PreemptionNoted was_preempted) final;
|
||||
ScrubMachineListener::MsgAndEpoch prep_replica_map_msg(
|
||||
Scrub::PreemptionNoted was_preempted) final;
|
||||
|
||||
void send_replica_map(const ScrubMachineListener::MsgAndEpoch& preprepared) final;
|
||||
void send_replica_map(
|
||||
const ScrubMachineListener::MsgAndEpoch& preprepared) final;
|
||||
|
||||
void send_preempted_replica() final;
|
||||
|
||||
@ -511,7 +539,8 @@ class PgScrubber : public ScrubPgIF,
|
||||
std::ostream& gen_prefix(std::ostream& out) const final;
|
||||
|
||||
// fetching the snap-set for a given object (used by the scrub-backend)
|
||||
int get_snaps(const hobject_t& hoid, std::set<snapid_t>* snaps_set) const final
|
||||
int get_snaps(const hobject_t& hoid,
|
||||
std::set<snapid_t>* snaps_set) const final
|
||||
{
|
||||
return m_pg->snap_mapper.get_snaps(hoid, snaps_set);
|
||||
}
|
||||
@ -525,18 +554,20 @@ class PgScrubber : public ScrubPgIF,
|
||||
|
||||
[[nodiscard]] bool is_scrub_registered() const;
|
||||
|
||||
/// the 'is-in-scheduling-queue' status, using relaxed-semantics access to the status
|
||||
/// the 'is-in-scheduling-queue' status, using relaxed-semantics access to the
|
||||
/// status
|
||||
std::string_view registration_state() const;
|
||||
|
||||
virtual void _scrub_clear_state() {}
|
||||
|
||||
utime_t m_scrub_reg_stamp; ///< stamp we registered for
|
||||
ScrubQueue::ScrubJobRef m_scrub_job; ///< the scrub-job used by the OSD to schedule us
|
||||
utime_t m_scrub_reg_stamp; ///< stamp we registered for
|
||||
ScrubQueue::ScrubJobRef m_scrub_job; ///< the scrub-job used by the OSD to
|
||||
///< schedule us
|
||||
|
||||
ostream& show(ostream& out) const override;
|
||||
|
||||
public:
|
||||
// ------------------ the I/F used by the ScrubBackend (not named yet) -------------
|
||||
// ------------------ the I/F used by the ScrubBackend (not named yet)
|
||||
|
||||
// note: the reason we must have these forwarders, is because of the
|
||||
// artificial PG vs. PrimaryLogPG distinction. Some of the services used
|
||||
@ -594,21 +625,22 @@ class PgScrubber : public ScrubPgIF,
|
||||
*
|
||||
* It isn't if:
|
||||
* - (1) we are no longer 'actively scrubbing'; or
|
||||
* - (2) the message is from an epoch prior to when we started the current scrub
|
||||
* session; or
|
||||
* - (2) the message is from an epoch prior to when we started the current
|
||||
* scrub session; or
|
||||
* - (3) the message epoch is from a previous interval; or
|
||||
* - (4) the 'abort' configuration flags were set.
|
||||
*
|
||||
* For (1) & (2) - the incoming message is discarded, w/o further action.
|
||||
*
|
||||
* For (3): (see check_interval() for a full description) if we have not reacted yet
|
||||
* to this specific new interval, we do now:
|
||||
* - replica reservations are silently discarded (we count on the replicas to notice
|
||||
* the interval change and un-reserve themselves);
|
||||
* For (3): (see check_interval() for a full description) if we have not
|
||||
* reacted yet to this specific new interval, we do now:
|
||||
* - replica reservations are silently discarded (we count on the replicas to
|
||||
* notice the interval change and un-reserve themselves);
|
||||
* - the scrubbing is halted.
|
||||
*
|
||||
* For (4): the message will be discarded, but also:
|
||||
* if this is the first time we've noticed the 'abort' request, we perform the abort.
|
||||
* if this is the first time we've noticed the 'abort' request, we perform
|
||||
* the abort.
|
||||
*
|
||||
* \returns should the incoming event be processed?
|
||||
*/
|
||||
@ -631,18 +663,20 @@ class PgScrubber : public ScrubPgIF,
|
||||
|
||||
epoch_t m_last_aborted{}; // last time we've noticed a request to abort
|
||||
|
||||
bool m_needs_sleep{true}; ///< should we sleep before being rescheduled? always
|
||||
///< 'true', unless we just got out of a sleep period
|
||||
bool m_needs_sleep{true}; ///< should we sleep before being rescheduled?
|
||||
///< always 'true', unless we just got out of a
|
||||
///< sleep period
|
||||
|
||||
utime_t m_sleep_started_at;
|
||||
|
||||
|
||||
// 'optional', as 'ReplicaReservations' & 'LocalReservation' are 'RAII-designed'
|
||||
// to guarantee un-reserving when deleted.
|
||||
// 'optional', as 'ReplicaReservations' & 'LocalReservation' are
|
||||
// 'RAII-designed' to guarantee un-reserving when deleted.
|
||||
std::optional<Scrub::ReplicaReservations> m_reservations;
|
||||
std::optional<Scrub::LocalReservation> m_local_osd_resource;
|
||||
|
||||
/// the 'remote' resource we, as a replica, grant our Primary when it is scrubbing
|
||||
/// the 'remote' resource we, as a replica, grant our Primary when it is
|
||||
/// scrubbing
|
||||
std::optional<Scrub::ReservedByRemotePrimary> m_remote_osd_resource;
|
||||
|
||||
void cleanup_on_finish(); // scrub_clear_state() as called for a Primary when
|
||||
@ -656,7 +690,8 @@ class PgScrubber : public ScrubPgIF,
|
||||
*/
|
||||
virtual void _scrub_finish() {}
|
||||
|
||||
// common code used by build_primary_map_chunk() and build_replica_map_chunk():
|
||||
// common code used by build_primary_map_chunk() and
|
||||
// build_replica_map_chunk():
|
||||
int build_scrub_map_chunk(ScrubMap& map, // primary or replica?
|
||||
ScrubMapBuilder& pos,
|
||||
hobject_t start,
|
||||
@ -668,45 +703,49 @@ class PgScrubber : public ScrubPgIF,
|
||||
OSDService* const m_osds;
|
||||
const pg_shard_t m_pg_whoami; ///< a local copy of m_pg->pg_whoami;
|
||||
|
||||
epoch_t m_interval_start{0}; ///< interval's 'from' of when scrubbing was first scheduled
|
||||
epoch_t m_interval_start{0}; ///< interval's 'from' of when scrubbing was
|
||||
///< first scheduled
|
||||
|
||||
/*
|
||||
* the exact epoch when the scrubbing actually started (started here - cleared checks
|
||||
* for no-scrub conf). Incoming events are verified against this, with stale events
|
||||
* discarded.
|
||||
* the exact epoch when the scrubbing actually started (started here - cleared
|
||||
* checks for no-scrub conf). Incoming events are verified against this, with
|
||||
* stale events discarded.
|
||||
*/
|
||||
epoch_t m_epoch_start{0}; ///< the actual epoch when scrubbing started
|
||||
|
||||
/**
|
||||
* (replica) a tag identifying a specific scrub "session". Incremented whenever the
|
||||
* Primary releases the replica scrub resources.
|
||||
* When the scrub session is terminated (even if the interval remains unchanged, as
|
||||
* might happen following an asok no-scrub command), stale scrub-resched messages
|
||||
* (replica) a tag identifying a specific scrub "session". Incremented
|
||||
* whenever the Primary releases the replica scrub resources. When the scrub
|
||||
* session is terminated (even if the interval remains unchanged, as might
|
||||
* happen following an asok no-scrub command), stale scrub-resched messages
|
||||
* triggered by the backend will be discarded.
|
||||
*/
|
||||
Scrub::act_token_t m_current_token{1};
|
||||
|
||||
/**
|
||||
* (primary/replica) a test aid. A counter that is incremented whenever a scrub starts,
|
||||
* and again when it terminates. Exposed as part of the 'pg query' command, to be used
|
||||
* by test scripts.
|
||||
* (primary/replica) a test aid. A counter that is incremented whenever a
|
||||
* scrub starts, and again when it terminates. Exposed as part of the 'pg
|
||||
* query' command, to be used by test scripts.
|
||||
*
|
||||
* @ATTN: not guaranteed to be accurate. To be only used for tests. This is why it
|
||||
* is initialized to a meaningless number;
|
||||
* @ATTN: not guaranteed to be accurate. To be only used for tests. This is
|
||||
* why it is initialized to a meaningless number;
|
||||
*/
|
||||
int32_t m_sessions_counter{(int32_t)((int64_t)(this) & 0x0000'0000'00ff'fff0)};
|
||||
bool m_publish_sessions{false}; //< will the counter be part of 'query' output?
|
||||
int32_t m_sessions_counter{
|
||||
(int32_t)((int64_t)(this) & 0x0000'0000'00ff'fff0)};
|
||||
bool m_publish_sessions{false}; //< will the counter be part of 'query'
|
||||
//output?
|
||||
|
||||
scrub_flags_t m_flags;
|
||||
|
||||
/// a reference to the details of the next scrub (as requested and managed by the PG)
|
||||
/// a reference to the details of the next scrub (as requested and managed by
|
||||
/// the PG)
|
||||
requested_scrub_t& m_planned_scrub;
|
||||
|
||||
bool m_active{false};
|
||||
|
||||
/**
|
||||
* a flag designed to prevent the initiation of a second scrub on a PG for which scrubbing
|
||||
* has been initiated.
|
||||
* a flag designed to prevent the initiation of a second scrub on a PG for
|
||||
* which scrubbing has been initiated.
|
||||
*
|
||||
* set once scrubbing was initiated (i.e. - even before the FSM event that
|
||||
* will trigger a state-change out of Inactive was handled), and only reset
|
||||
@ -717,7 +756,8 @@ class PgScrubber : public ScrubPgIF,
|
||||
* - all the time from scrub_finish() calling update_stats() till the
|
||||
* FSM handles the 'finished' event
|
||||
*
|
||||
* Compared with 'm_active', this flag is asserted earlier and remains ON for longer.
|
||||
* Compared with 'm_active', this flag is asserted earlier and remains ON for
|
||||
* longer.
|
||||
*/
|
||||
bool m_queued_or_active{false};
|
||||
|
||||
@ -746,9 +786,9 @@ class PgScrubber : public ScrubPgIF,
|
||||
* 'm_is_deep' - is the running scrub a deep one?
|
||||
*
|
||||
* Note that most of the code directly checks PG_STATE_DEEP_SCRUB, which is
|
||||
* primary-only (and is set earlier - when scheduling the scrub). 'm_is_deep' is
|
||||
* meaningful both for the primary and the replicas, and is used as a parameter when
|
||||
* building the scrub maps.
|
||||
* primary-only (and is set earlier - when scheduling the scrub). 'm_is_deep'
|
||||
* is meaningful both for the primary and the replicas, and is used as a
|
||||
* parameter when building the scrub maps.
|
||||
*/
|
||||
bool m_is_deep{false};
|
||||
|
||||
@ -770,15 +810,14 @@ class PgScrubber : public ScrubPgIF,
|
||||
* "scrub
|
||||
*
|
||||
* Note: based on PG_STATE_REPAIR, and not on m_is_repair. I.e. for
|
||||
* auto_repair will show as "deep-scrub" and not as "repair" (until the first error
|
||||
* is detected).
|
||||
* auto_repair will show as "deep-scrub" and not as "repair" (until the first
|
||||
* error is detected).
|
||||
*/
|
||||
std::string_view m_mode_desc;
|
||||
|
||||
void update_op_mode_text();
|
||||
|
||||
private:
|
||||
|
||||
private:
|
||||
/**
|
||||
* initiate a deep-scrub after the current scrub ended with errors.
|
||||
*/
|
||||
@ -838,19 +877,21 @@ private:
|
||||
std::unique_ptr<ScrubBackend> m_be;
|
||||
|
||||
/**
|
||||
* we mark the request priority as it arrived. It influences the queuing priority
|
||||
* when we wait for local updates
|
||||
* we mark the request priority as it arrived. It influences the queuing
|
||||
* priority when we wait for local updates
|
||||
*/
|
||||
Scrub::scrub_prio_t m_replica_request_priority;
|
||||
|
||||
/**
|
||||
* the 'preemption' "state-machine".
|
||||
* Note: I was considering an orthogonal sub-machine implementation, but as
|
||||
* the state diagram is extremely simple, the added complexity wasn't justified.
|
||||
* the state diagram is extremely simple, the added complexity wasn't
|
||||
* justified.
|
||||
*/
|
||||
class preemption_data_t : public Scrub::preemption_t {
|
||||
public:
|
||||
explicit preemption_data_t(PG* pg); // the PG access is used for conf access (and logs)
|
||||
explicit preemption_data_t(PG* pg); // the PG access is used for conf
|
||||
// access (and logs)
|
||||
|
||||
[[nodiscard]] bool is_preemptable() const final { return m_preemptable; }
|
||||
|
||||
@ -882,7 +923,8 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
/// used by a replica to set preemptability state according to the Primary's request
|
||||
/// used by a replica to set preemptability state according to the Primary's
|
||||
/// request
|
||||
void force_preemptability(bool is_allowed)
|
||||
{
|
||||
// note: no need to lock for a replica
|
||||
|
@ -1,8 +1,6 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
|
||||
#include "scrub_machine.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <typeinfo>
|
||||
|
||||
@ -10,7 +8,9 @@
|
||||
|
||||
#include "osd/OSD.h"
|
||||
#include "osd/OpRequest.h"
|
||||
|
||||
#include "ScrubStore.h"
|
||||
#include "scrub_machine.h"
|
||||
|
||||
#define dout_context g_ceph_context
|
||||
#define dout_subsys ceph_subsys_osd
|
||||
@ -44,9 +44,11 @@ std::string ScrubMachine::current_states_desc() const
|
||||
{
|
||||
std::string sts{"<"};
|
||||
for (auto si = state_begin(); si != state_end(); ++si) {
|
||||
const auto& siw{ *si }; // prevents a warning re side-effects
|
||||
const auto& siw{*si}; // prevents a warning re side-effects
|
||||
// the '7' is the size of the 'scrub::'
|
||||
sts += boost::core::demangle(typeid(siw).name()).substr(7, std::string::npos) + "/";
|
||||
sts +=
|
||||
boost::core::demangle(typeid(siw).name()).substr(7, std::string::npos) +
|
||||
"/";
|
||||
}
|
||||
return sts + ">";
|
||||
}
|
||||
@ -268,8 +270,9 @@ WaitPushes::WaitPushes(my_context ctx) : my_base(ctx)
|
||||
sc::result WaitPushes::react(const ActivePushesUpd&)
|
||||
{
|
||||
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
|
||||
dout(10) << "WaitPushes::react(const ActivePushesUpd&) pending_active_pushes: "
|
||||
<< scrbr->pending_active_pushes() << dendl;
|
||||
dout(10)
|
||||
<< "WaitPushes::react(const ActivePushesUpd&) pending_active_pushes: "
|
||||
<< scrbr->pending_active_pushes() << dendl;
|
||||
|
||||
if (!scrbr->pending_active_pushes()) {
|
||||
// done waiting
|
||||
@ -328,8 +331,8 @@ BuildMap::BuildMap(my_context ctx) : my_base(ctx)
|
||||
dout(10) << " -- state -->> Act/BuildMap" << dendl;
|
||||
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
|
||||
|
||||
// no need to check for an epoch change, as all possible flows that brought us here have
|
||||
// a check_interval() verification of their final event.
|
||||
// no need to check for an epoch change, as all possible flows that brought
|
||||
// us here have a check_interval() verification of their final event.
|
||||
|
||||
if (scrbr->get_preemptor().was_preempted()) {
|
||||
|
||||
@ -374,7 +377,7 @@ sc::result BuildMap::react(const IntLocalMapDone&)
|
||||
DrainReplMaps::DrainReplMaps(my_context ctx) : my_base(ctx)
|
||||
{
|
||||
dout(10) << "-- state -->> Act/DrainReplMaps" << dendl;
|
||||
// we may have received all maps already. Send the event that will make us check.
|
||||
// we may have got all maps already. Send the event that will make us check.
|
||||
post_event(GotReplicas{});
|
||||
}
|
||||
|
||||
@ -388,7 +391,8 @@ sc::result DrainReplMaps::react(const GotReplicas&)
|
||||
return transit<PendingTimer>();
|
||||
}
|
||||
|
||||
dout(15) << "DrainReplMaps::react(const GotReplicas&): still draining incoming maps: "
|
||||
dout(15) << "DrainReplMaps::react(const GotReplicas&): still draining "
|
||||
"incoming maps: "
|
||||
<< scrbr->dump_awaited_maps() << dendl;
|
||||
return discard_event();
|
||||
}
|
||||
@ -402,17 +406,18 @@ WaitReplicas::WaitReplicas(my_context ctx) : my_base(ctx)
|
||||
}
|
||||
|
||||
/**
|
||||
* note: now that maps_compare_n_cleanup() is "futurized"(*), and we remain in this state
|
||||
* for a while even after we got all our maps, we must prevent are_all_maps_available()
|
||||
* (actually - the code after the if()) from being called more than once.
|
||||
* This is basically a separate state, but it's too transitory and artificial to justify
|
||||
* the cost of a separate state.
|
||||
* note: now that maps_compare_n_cleanup() is "futurized"(*), and we remain in
|
||||
* this state for a while even after we got all our maps, we must prevent
|
||||
* are_all_maps_available() (actually - the code after the if()) from being
|
||||
* called more than once.
|
||||
* This is basically a separate state, but it's too transitory and artificial
|
||||
* to justify the cost of a separate state.
|
||||
|
||||
* (*) "futurized" - in Crimson, the call to maps_compare_n_cleanup() returns immediately
|
||||
* after initiating the process. The actual termination of the maps comparing etc' is
|
||||
* signalled via an event. As we share the code with "classic" OSD, here too
|
||||
* maps_compare_n_cleanup() is responsible for signalling the completion of the
|
||||
* processing.
|
||||
* (*) "futurized" - in Crimson, the call to maps_compare_n_cleanup() returns
|
||||
* immediately after initiating the process. The actual termination of the
|
||||
* maps comparing etc' is signalled via an event. As we share the code with
|
||||
* "classic" OSD, here too maps_compare_n_cleanup() is responsible for
|
||||
* signalling the completion of the processing.
|
||||
*/
|
||||
sc::result WaitReplicas::react(const GotReplicas&)
|
||||
{
|
||||
@ -433,7 +438,8 @@ sc::result WaitReplicas::react(const GotReplicas&)
|
||||
|
||||
} else {
|
||||
|
||||
// maps_compare_n_cleanup() will arrange for MapsCompared event to be sent:
|
||||
// maps_compare_n_cleanup() will arrange for MapsCompared event to be
|
||||
// sent:
|
||||
scrbr->maps_compare_n_cleanup();
|
||||
return discard_event();
|
||||
}
|
||||
@ -445,7 +451,8 @@ sc::result WaitReplicas::react(const GotReplicas&)
|
||||
sc::result WaitReplicas::react(const DigestUpdate&)
|
||||
{
|
||||
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
|
||||
auto warn_msg = "WaitReplicas::react(const DigestUpdate&): Unexpected DigestUpdate event"s;
|
||||
auto warn_msg =
|
||||
"WaitReplicas::react(const DigestUpdate&): Unexpected DigestUpdate event"s;
|
||||
dout(10) << warn_msg << dendl;
|
||||
scrbr->log_cluster_warning(warn_msg);
|
||||
return discard_event();
|
||||
@ -488,9 +495,9 @@ sc::result WaitDigestUpdate::react(const ScrubFinished&)
|
||||
}
|
||||
|
||||
ScrubMachine::ScrubMachine(PG* pg, ScrubMachineListener* pg_scrub)
|
||||
: m_pg_id{pg->pg_id}, m_scrbr{pg_scrub}
|
||||
{
|
||||
}
|
||||
: m_pg_id{pg->pg_id}
|
||||
, m_scrbr{pg_scrub}
|
||||
{}
|
||||
|
||||
ScrubMachine::~ScrubMachine() = default;
|
||||
|
||||
@ -538,7 +545,8 @@ ActiveReplica::ActiveReplica(my_context ctx) : my_base(ctx)
|
||||
{
|
||||
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
|
||||
dout(10) << "-- state -->> ActiveReplica" << dendl;
|
||||
scrbr->on_replica_init(); // as we might have skipped ReplicaWaitUpdates
|
||||
// and as we might have skipped ReplicaWaitUpdates:
|
||||
scrbr->on_replica_init();
|
||||
post_event(SchedReplica{});
|
||||
}
|
||||
|
||||
|
@ -16,9 +16,9 @@
|
||||
|
||||
#include "common/version.h"
|
||||
#include "include/Context.h"
|
||||
#include "osd/scrubber_common.h"
|
||||
|
||||
#include "scrub_machine_lstnr.h"
|
||||
#include "osd/scrubber_common.h"
|
||||
|
||||
class PG; // holding a pointer to that one - just for testing
|
||||
class PgScrubber;
|
||||
@ -51,17 +51,21 @@ void on_event_discard(std::string_view nm);
|
||||
std::string_view print() const { return #E; } \
|
||||
};
|
||||
|
||||
MEV(RemotesReserved) ///< all replicas have granted our reserve request
|
||||
/// all replicas have granted our reserve request
|
||||
MEV(RemotesReserved)
|
||||
|
||||
MEV(ReservationFailure) ///< a reservation request has failed
|
||||
/// a reservation request has failed
|
||||
MEV(ReservationFailure)
|
||||
|
||||
MEV(StartScrub) ///< initiate a new scrubbing session (relevant if we are a Primary)
|
||||
/// initiate a new scrubbing session (relevant if we are a Primary)
|
||||
MEV(StartScrub)
|
||||
|
||||
MEV(AfterRepairScrub) ///< initiate a new scrubbing session. Only triggered at Recovery
|
||||
///< completion.
|
||||
/// initiate a new scrubbing session. Only triggered at Recovery completion
|
||||
MEV(AfterRepairScrub)
|
||||
|
||||
MEV(Unblocked) ///< triggered when the PG unblocked an object that was marked for
|
||||
///< scrubbing. Via the PGScrubUnblocked op
|
||||
/// triggered when the PG unblocked an object that was marked for scrubbing.
|
||||
/// Via the PGScrubUnblocked op
|
||||
MEV(Unblocked)
|
||||
|
||||
MEV(InternalSchedScrub)
|
||||
|
||||
@ -69,48 +73,63 @@ MEV(SelectedChunkFree)
|
||||
|
||||
MEV(ChunkIsBusy)
|
||||
|
||||
MEV(ActivePushesUpd) ///< Update to active_pushes. 'active_pushes' represents recovery
|
||||
///< that is in-flight to the local ObjectStore
|
||||
/// Update to active_pushes. 'active_pushes' represents recovery that
|
||||
/// is in-flight to the local ObjectStore
|
||||
MEV(ActivePushesUpd)
|
||||
|
||||
MEV(UpdatesApplied) ///< (Primary only) all updates are committed
|
||||
/// (Primary only) all updates are committed
|
||||
MEV(UpdatesApplied)
|
||||
|
||||
MEV(InternalAllUpdates) ///< the internal counterpart of UpdatesApplied
|
||||
/// the internal counterpart of UpdatesApplied
|
||||
MEV(InternalAllUpdates)
|
||||
|
||||
MEV(GotReplicas) ///< got a map from a replica
|
||||
/// got a map from a replica
|
||||
MEV(GotReplicas)
|
||||
|
||||
MEV(IntBmPreempted) ///< internal - BuildMap preempted. Required, as detected within the
|
||||
///< ctor
|
||||
/// internal - BuildMap preempted. Required, as detected within the ctor
|
||||
MEV(IntBmPreempted)
|
||||
|
||||
MEV(InternalError)
|
||||
|
||||
MEV(IntLocalMapDone)
|
||||
|
||||
MEV(DigestUpdate) ///< external. called upon success of a MODIFY op. See
|
||||
///< scrub_snapshot_metadata()
|
||||
/// external. called upon success of a MODIFY op. See
|
||||
/// scrub_snapshot_metadata()
|
||||
MEV(DigestUpdate)
|
||||
|
||||
MEV(MapsCompared) ///< maps_compare_n_cleanup() transactions are done
|
||||
/// maps_compare_n_cleanup() transactions are done
|
||||
MEV(MapsCompared)
|
||||
|
||||
MEV(StartReplica) ///< initiating replica scrub.
|
||||
/// initiating replica scrub
|
||||
MEV(StartReplica)
|
||||
|
||||
MEV(StartReplicaNoWait) ///< 'start replica' when there are no pending updates
|
||||
/// 'start replica' when there are no pending updates
|
||||
MEV(StartReplicaNoWait)
|
||||
|
||||
MEV(SchedReplica)
|
||||
|
||||
MEV(ReplicaPushesUpd) ///< Update to active_pushes. 'active_pushes' represents recovery
|
||||
///< that is in-flight to the local ObjectStore
|
||||
/// Update to active_pushes. 'active_pushes' represents recovery
|
||||
/// that is in-flight to the local ObjectStore
|
||||
MEV(ReplicaPushesUpd)
|
||||
|
||||
MEV(FullReset) ///< guarantee that the FSM is in the quiescent state (i.e. NotActive)
|
||||
/// guarantee that the FSM is in the quiescent state (i.e. NotActive)
|
||||
MEV(FullReset)
|
||||
|
||||
MEV(NextChunk) ///< finished handling this chunk. Go get the next one
|
||||
/// finished handling this chunk. Go get the next one
|
||||
MEV(NextChunk)
|
||||
|
||||
MEV(ScrubFinished) ///< all chunks handled
|
||||
/// all chunks handled
|
||||
MEV(ScrubFinished)
|
||||
|
||||
//
|
||||
// STATES
|
||||
//
|
||||
|
||||
struct NotActive; ///< the quiescent state. No active scrubbing.
|
||||
struct ReservingReplicas; ///< securing scrub resources from replicas' OSDs
|
||||
struct ActiveScrubbing; ///< the active state for a Primary. A sub-machine.
|
||||
struct ReplicaWaitUpdates; ///< an active state for a replica. Waiting for all active
|
||||
///< operations to finish.
|
||||
struct ReplicaWaitUpdates; ///< an active state for a replica. Waiting for all
|
||||
///< active operations to finish.
|
||||
struct ActiveReplica; ///< an active state for a replica.
|
||||
|
||||
|
||||
@ -135,27 +154,30 @@ class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> {
|
||||
/**
|
||||
* The Scrubber's base (quiescent) state.
|
||||
* Scrubbing is triggered by one of the following events:
|
||||
* - (standard scenario for a Primary): 'StartScrub'. Initiates the OSDs resources
|
||||
* reservation process. Will be issued by PG::scrub(), following a
|
||||
* queued "PGScrub" op.
|
||||
* - a special end-of-recovery Primary scrub event ('AfterRepairScrub') that is
|
||||
* not required to reserve resources.
|
||||
* - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by an incoming
|
||||
* MOSDRepScrub message.
|
||||
*
|
||||
* note (20.8.21): originally, AfterRepairScrub was triggering a scrub without waiting
|
||||
* for replica resources to be acquired. But once replicas started using the
|
||||
* resource-request to identify and tag the scrub session, this bypass cannot be
|
||||
* supported anymore.
|
||||
* - (standard scenario for a Primary): 'StartScrub'. Initiates the OSDs
|
||||
* resources reservation process. Will be issued by PG::scrub(), following a
|
||||
* queued "PGScrub" op.
|
||||
*
|
||||
* - a special end-of-recovery Primary scrub event ('AfterRepairScrub').
|
||||
*
|
||||
* - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by
|
||||
* an incoming MOSDRepScrub message.
|
||||
*
|
||||
* note (20.8.21): originally, AfterRepairScrub was triggering a scrub without
|
||||
* waiting for replica resources to be acquired. But once replicas started
|
||||
* using the resource-request to identify and tag the scrub session, this
|
||||
* bypass cannot be supported anymore.
|
||||
*/
|
||||
struct NotActive : sc::state<NotActive, ScrubMachine> {
|
||||
explicit NotActive(my_context ctx);
|
||||
|
||||
using reactions = mpl::list<sc::custom_reaction<StartScrub>,
|
||||
// a scrubbing that was initiated at recovery completion
|
||||
sc::custom_reaction<AfterRepairScrub>,
|
||||
sc::transition<StartReplica, ReplicaWaitUpdates>,
|
||||
sc::transition<StartReplicaNoWait, ActiveReplica>>;
|
||||
using reactions =
|
||||
mpl::list<sc::custom_reaction<StartScrub>,
|
||||
// a scrubbing that was initiated at recovery completion:
|
||||
sc::custom_reaction<AfterRepairScrub>,
|
||||
sc::transition<StartReplica, ReplicaWaitUpdates>,
|
||||
sc::transition<StartReplicaNoWait, ActiveReplica>>;
|
||||
sc::result react(const StartScrub&);
|
||||
sc::result react(const AfterRepairScrub&);
|
||||
};
|
||||
@ -178,26 +200,35 @@ struct ReservingReplicas : sc::state<ReservingReplicas, ScrubMachine> {
|
||||
|
||||
// the "active" sub-states
|
||||
|
||||
struct RangeBlocked; ///< the objects range is blocked
|
||||
struct PendingTimer; ///< either delaying the scrub by some time and requeuing, or just
|
||||
///< requeue
|
||||
struct NewChunk; ///< select a chunk to scrub, and verify its availability
|
||||
/// the objects range is blocked
|
||||
struct RangeBlocked;
|
||||
|
||||
/// either delaying the scrub by some time and requeuing, or just requeue
|
||||
struct PendingTimer;
|
||||
|
||||
/// select a chunk to scrub, and verify its availability
|
||||
struct NewChunk;
|
||||
|
||||
struct WaitPushes;
|
||||
struct WaitLastUpdate;
|
||||
struct BuildMap;
|
||||
struct DrainReplMaps; ///< a problem during BuildMap. Wait for all replicas to report,
|
||||
///< then restart.
|
||||
struct WaitReplicas; ///< wait for all replicas to report
|
||||
|
||||
/// a problem during BuildMap. Wait for all replicas to report, then restart.
|
||||
struct DrainReplMaps;
|
||||
|
||||
/// wait for all replicas to report
|
||||
struct WaitReplicas;
|
||||
|
||||
struct WaitDigestUpdate;
|
||||
|
||||
struct ActiveScrubbing : sc::state<ActiveScrubbing, ScrubMachine, PendingTimer> {
|
||||
struct ActiveScrubbing
|
||||
: sc::state<ActiveScrubbing, ScrubMachine, PendingTimer> {
|
||||
|
||||
explicit ActiveScrubbing(my_context ctx);
|
||||
~ActiveScrubbing();
|
||||
|
||||
using reactions = mpl::list<
|
||||
sc::custom_reaction<InternalError>,
|
||||
sc::custom_reaction<FullReset>>;
|
||||
using reactions = mpl::list<sc::custom_reaction<InternalError>,
|
||||
sc::custom_reaction<FullReset>>;
|
||||
|
||||
sc::result react(const FullReset&);
|
||||
sc::result react(const InternalError&);
|
||||
@ -231,9 +262,10 @@ struct NewChunk : sc::state<NewChunk, ActiveScrubbing> {
|
||||
* initiate the update process for this chunk
|
||||
*
|
||||
* Wait fo 'active_pushes' to clear.
|
||||
* 'active_pushes' represents recovery that is in-flight to the local Objectstore, hence
|
||||
* scrub waits until the correct data is readable (in-flight data to the Objectstore is
|
||||
* not readable until written to disk, termed 'applied' here)
|
||||
* 'active_pushes' represents recovery that is in-flight to the local
|
||||
* Objectstore, hence scrub waits until the correct data is readable
|
||||
* (in-flight data to the Objectstore is not readable until written to
|
||||
* disk, termed 'applied' here)
|
||||
*/
|
||||
struct WaitPushes : sc::state<WaitPushes, ActiveScrubbing> {
|
||||
|
||||
@ -250,10 +282,11 @@ struct WaitLastUpdate : sc::state<WaitLastUpdate, ActiveScrubbing> {
|
||||
|
||||
void on_new_updates(const UpdatesApplied&);
|
||||
|
||||
using reactions = mpl::list<sc::custom_reaction<InternalAllUpdates>,
|
||||
sc::in_state_reaction<UpdatesApplied,
|
||||
WaitLastUpdate,
|
||||
&WaitLastUpdate::on_new_updates>>;
|
||||
using reactions =
|
||||
mpl::list<sc::custom_reaction<InternalAllUpdates>,
|
||||
sc::in_state_reaction<UpdatesApplied,
|
||||
WaitLastUpdate,
|
||||
&WaitLastUpdate::on_new_updates>>;
|
||||
|
||||
sc::result react(const InternalAllUpdates&);
|
||||
};
|
||||
@ -266,14 +299,12 @@ struct BuildMap : sc::state<BuildMap, ActiveScrubbing> {
|
||||
// handled by our parent state;
|
||||
// - if preempted, we switch to DrainReplMaps, where we will wait for all
|
||||
// replicas to send their maps before acknowledging the preemption;
|
||||
// - an interval change will be handled by the relevant 'send-event' functions,
|
||||
// and will translated into a 'FullReset' event.
|
||||
using reactions =
|
||||
mpl::list<sc::transition<IntBmPreempted, DrainReplMaps>,
|
||||
sc::transition<InternalSchedScrub, BuildMap>, // looping, waiting
|
||||
// for the backend to
|
||||
// finish
|
||||
sc::custom_reaction<IntLocalMapDone>>;
|
||||
// - an interval change will be handled by the relevant 'send-event'
|
||||
// functions, and will translated into a 'FullReset' event.
|
||||
using reactions = mpl::list<sc::transition<IntBmPreempted, DrainReplMaps>,
|
||||
// looping, waiting for the backend to finish:
|
||||
sc::transition<InternalSchedScrub, BuildMap>,
|
||||
sc::custom_reaction<IntLocalMapDone>>;
|
||||
|
||||
sc::result react(const IntLocalMapDone&);
|
||||
};
|
||||
@ -285,8 +316,8 @@ struct DrainReplMaps : sc::state<DrainReplMaps, ActiveScrubbing> {
|
||||
explicit DrainReplMaps(my_context ctx);
|
||||
|
||||
using reactions =
|
||||
mpl::list<sc::custom_reaction<GotReplicas> // all replicas are accounted for
|
||||
>;
|
||||
// all replicas are accounted for:
|
||||
mpl::list<sc::custom_reaction<GotReplicas>>;
|
||||
|
||||
sc::result react(const GotReplicas&);
|
||||
};
|
||||
@ -294,11 +325,11 @@ struct DrainReplMaps : sc::state<DrainReplMaps, ActiveScrubbing> {
|
||||
struct WaitReplicas : sc::state<WaitReplicas, ActiveScrubbing> {
|
||||
explicit WaitReplicas(my_context ctx);
|
||||
|
||||
using reactions =
|
||||
mpl::list<sc::custom_reaction<GotReplicas>, // all replicas are accounted for
|
||||
sc::transition<MapsCompared, WaitDigestUpdate>,
|
||||
sc::custom_reaction<DigestUpdate>
|
||||
>;
|
||||
using reactions = mpl::list<
|
||||
// all replicas are accounted for:
|
||||
sc::custom_reaction<GotReplicas>,
|
||||
sc::transition<MapsCompared, WaitDigestUpdate>,
|
||||
sc::custom_reaction<DigestUpdate>>;
|
||||
|
||||
sc::result react(const GotReplicas&);
|
||||
sc::result react(const DigestUpdate&);
|
||||
@ -309,13 +340,13 @@ struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing> {
|
||||
explicit WaitDigestUpdate(my_context ctx);
|
||||
|
||||
using reactions = mpl::list<sc::custom_reaction<DigestUpdate>,
|
||||
sc::custom_reaction<ScrubFinished>,
|
||||
sc::transition<NextChunk, PendingTimer>>;
|
||||
sc::custom_reaction<ScrubFinished>,
|
||||
sc::transition<NextChunk, PendingTimer>>;
|
||||
sc::result react(const DigestUpdate&);
|
||||
sc::result react(const ScrubFinished&);
|
||||
};
|
||||
|
||||
// ----------------------------- the "replica active" states -----------------------
|
||||
// ----------------------------- the "replica active" states
|
||||
|
||||
/*
|
||||
* Waiting for 'active_pushes' to complete
|
||||
@ -326,8 +357,8 @@ struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing> {
|
||||
*/
|
||||
struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ScrubMachine> {
|
||||
explicit ReplicaWaitUpdates(my_context ctx);
|
||||
using reactions =
|
||||
mpl::list<sc::custom_reaction<ReplicaPushesUpd>, sc::custom_reaction<FullReset>>;
|
||||
using reactions = mpl::list<sc::custom_reaction<ReplicaPushesUpd>,
|
||||
sc::custom_reaction<FullReset>>;
|
||||
|
||||
sc::result react(const ReplicaPushesUpd&);
|
||||
sc::result react(const FullReset&);
|
||||
|
@ -7,7 +7,6 @@
|
||||
*/
|
||||
#include "common/version.h"
|
||||
#include "include/Context.h"
|
||||
|
||||
#include "osd/osd_types.h"
|
||||
|
||||
namespace Scrub {
|
||||
@ -90,13 +89,13 @@ struct ScrubMachineListener {
|
||||
|
||||
virtual void replica_handling_done() = 0;
|
||||
|
||||
/// the version of 'scrub_clear_state()' that does not try to invoke FSM services
|
||||
/// (thus can be called from FSM reactions)
|
||||
/// the version of 'scrub_clear_state()' that does not try to invoke FSM
|
||||
/// services (thus can be called from FSM reactions)
|
||||
virtual void clear_pgscrub_state() = 0;
|
||||
|
||||
/*
|
||||
* Send an 'InternalSchedScrub' FSM event either immediately, or - if 'm_need_sleep'
|
||||
* is asserted - after a configuration-dependent timeout.
|
||||
* Send an 'InternalSchedScrub' FSM event either immediately, or - if
|
||||
* 'm_need_sleep' is asserted - after a configuration-dependent timeout.
|
||||
*/
|
||||
virtual void add_delayed_scheduling() = 0;
|
||||
|
||||
@ -113,8 +112,8 @@ struct ScrubMachineListener {
|
||||
/**
|
||||
* Prepare a MOSDRepScrubMap message carrying the requested scrub map
|
||||
* @param was_preempted - were we preempted?
|
||||
* @return the message, and the current value of 'm_replica_min_epoch' (which is
|
||||
* used when sending the message, but will be overwritten before that).
|
||||
* @return the message, and the current value of 'm_replica_min_epoch' (which
|
||||
* is used when sending the message, but will be overwritten before that).
|
||||
*/
|
||||
[[nodiscard]] virtual MsgAndEpoch prep_replica_map_msg(
|
||||
Scrub::PreemptionNoted was_preempted) = 0;
|
||||
|
@ -52,8 +52,8 @@ struct requested_scrub_t {
|
||||
|
||||
/**
|
||||
* scrub must not be aborted.
|
||||
* Set for explicitly requested scrubs, and for scrubs originated by the pairing
|
||||
* process with the 'repair' flag set (in the RequestScrub event).
|
||||
* Set for explicitly requested scrubs, and for scrubs originated by the
|
||||
* pairing process with the 'repair' flag set (in the RequestScrub event).
|
||||
*
|
||||
* Will be copied into the 'required' scrub flag upon scrub start.
|
||||
*/
|
||||
@ -64,14 +64,15 @@ struct requested_scrub_t {
|
||||
* - scrub_requested() with need_auto param set, which only happens in
|
||||
* - scrub_finish() - if deep_scrub_on_error is set, and we have errors
|
||||
*
|
||||
* If set, will prevent the OSD from casually postponing our scrub. When scrubbing
|
||||
* starts, will cause must_scrub, must_deep_scrub and auto_repair to be set.
|
||||
* If set, will prevent the OSD from casually postponing our scrub. When
|
||||
* scrubbing starts, will cause must_scrub, must_deep_scrub and auto_repair to
|
||||
* be set.
|
||||
*/
|
||||
bool need_auto{false};
|
||||
|
||||
/**
|
||||
* Set for scrub-after-recovery just before we initiate the recovery deep scrub,
|
||||
* or if scrub_requested() was called with either need_auto ot repair.
|
||||
* Set for scrub-after-recovery just before we initiate the recovery deep
|
||||
* scrub, or if scrub_requested() was called with either need_auto ot repair.
|
||||
* Affects PG_STATE_DEEP_SCRUB.
|
||||
*/
|
||||
bool must_deep_scrub{false};
|
||||
@ -98,8 +99,8 @@ struct requested_scrub_t {
|
||||
bool must_repair{false};
|
||||
|
||||
/*
|
||||
* the value of auto_repair is determined in sched_scrub() (once per scrub. previous
|
||||
* value is not remembered). Set if
|
||||
* the value of auto_repair is determined in sched_scrub() (once per scrub.
|
||||
* previous value is not remembered). Set if
|
||||
* - allowed by configuration and backend, and
|
||||
* - must_scrub is not set (i.e. - this is a periodic scrub),
|
||||
* - time_for_deep was just set
|
||||
@ -122,7 +123,10 @@ struct ScrubPgIF {
|
||||
|
||||
virtual ~ScrubPgIF() = default;
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& out, const ScrubPgIF& s) { return s.show(out); }
|
||||
friend std::ostream& operator<<(std::ostream& out, const ScrubPgIF& s)
|
||||
{
|
||||
return s.show(out);
|
||||
}
|
||||
|
||||
virtual std::ostream& show(std::ostream& out) const = 0;
|
||||
|
||||
@ -146,9 +150,11 @@ struct ScrubPgIF {
|
||||
|
||||
virtual void send_replica_pushes_upd(epoch_t epoch_queued) = 0;
|
||||
|
||||
virtual void send_start_replica(epoch_t epoch_queued, Scrub::act_token_t token) = 0;
|
||||
virtual void send_start_replica(epoch_t epoch_queued,
|
||||
Scrub::act_token_t token) = 0;
|
||||
|
||||
virtual void send_sched_replica(epoch_t epoch_queued, Scrub::act_token_t token) = 0;
|
||||
virtual void send_sched_replica(epoch_t epoch_queued,
|
||||
Scrub::act_token_t token) = 0;
|
||||
|
||||
virtual void send_full_reset(epoch_t epoch_queued) = 0;
|
||||
|
||||
@ -164,12 +170,14 @@ struct ScrubPgIF {
|
||||
|
||||
virtual void send_maps_compared(epoch_t epoch_queued) = 0;
|
||||
|
||||
virtual void on_applied_when_primary(const eversion_t &applied_version) = 0;
|
||||
virtual void on_applied_when_primary(const eversion_t& applied_version) = 0;
|
||||
|
||||
// --------------------------------------------------
|
||||
|
||||
[[nodiscard]] virtual bool are_callbacks_pending()
|
||||
const = 0; // currently only used for an assert
|
||||
[[nodiscard]] virtual bool are_callbacks_pending() const = 0; // currently
|
||||
// only used
|
||||
// for an
|
||||
// assert
|
||||
|
||||
/**
|
||||
* the scrubber is marked 'active':
|
||||
@ -215,17 +223,19 @@ struct ScrubPgIF {
|
||||
const requested_scrub_t& request_flags) const = 0;
|
||||
|
||||
/**
|
||||
* Return true if soid is currently being scrubbed and pending IOs should block.
|
||||
* May have a side effect of preempting an in-progress scrub -- will return false
|
||||
* in that case.
|
||||
* Return true if soid is currently being scrubbed and pending IOs should
|
||||
* block. May have a side effect of preempting an in-progress scrub -- will
|
||||
* return false in that case.
|
||||
*
|
||||
* @param soid object to check for ongoing scrub
|
||||
* @return boolean whether a request on soid should block until scrub completion
|
||||
* @return boolean whether a request on soid should block until scrub
|
||||
* completion
|
||||
*/
|
||||
virtual bool write_blocked_by_scrub(const hobject_t& soid) = 0;
|
||||
|
||||
/// Returns whether any objects in the range [begin, end] are being scrubbed
|
||||
virtual bool range_intersects_scrub(const hobject_t& start, const hobject_t& end) = 0;
|
||||
virtual bool range_intersects_scrub(const hobject_t& start,
|
||||
const hobject_t& end) = 0;
|
||||
|
||||
/// the op priority, taken from the primary's request message
|
||||
virtual Scrub::scrub_prio_t replica_op_priority() const = 0;
|
||||
@ -233,8 +243,9 @@ struct ScrubPgIF {
|
||||
/// the priority of the on-going scrub (used when requeuing events)
|
||||
virtual unsigned int scrub_requeue_priority(
|
||||
Scrub::scrub_prio_t with_priority) const = 0;
|
||||
virtual unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority,
|
||||
unsigned int suggested_priority) const = 0;
|
||||
virtual unsigned int scrub_requeue_priority(
|
||||
Scrub::scrub_prio_t with_priority,
|
||||
unsigned int suggested_priority) const = 0;
|
||||
|
||||
virtual void add_callback(Context* context) = 0;
|
||||
|
||||
@ -243,8 +254,8 @@ struct ScrubPgIF {
|
||||
const hobject_t& soid) = 0;
|
||||
|
||||
/**
|
||||
* the version of 'scrub_clear_state()' that does not try to invoke FSM services
|
||||
* (thus can be called from FSM reactions)
|
||||
* the version of 'scrub_clear_state()' that does not try to invoke FSM
|
||||
* services (thus can be called from FSM reactions)
|
||||
*/
|
||||
virtual void clear_pgscrub_state() = 0;
|
||||
|
||||
@ -255,8 +266,8 @@ struct ScrubPgIF {
|
||||
virtual void send_remotes_reserved(epoch_t epoch_queued) = 0;
|
||||
|
||||
/**
|
||||
* triggers the 'ReservationFailure' (at least one replica denied us the requested
|
||||
* resources) state-machine event
|
||||
* triggers the 'ReservationFailure' (at least one replica denied us the
|
||||
* requested resources) state-machine event
|
||||
*/
|
||||
virtual void send_reservation_failure(epoch_t epoch_queued) = 0;
|
||||
|
||||
@ -309,7 +320,8 @@ struct ScrubPgIF {
|
||||
*/
|
||||
virtual void update_scrub_job(const requested_scrub_t& request_flags) = 0;
|
||||
|
||||
virtual void on_maybe_registration_change(const requested_scrub_t& request_flags) = 0;
|
||||
virtual void on_maybe_registration_change(
|
||||
const requested_scrub_t& request_flags) = 0;
|
||||
|
||||
// on the replica:
|
||||
virtual void handle_scrub_reserve_request(OpRequestRef op) = 0;
|
||||
@ -317,7 +329,8 @@ struct ScrubPgIF {
|
||||
|
||||
// and on the primary:
|
||||
virtual void handle_scrub_reserve_grant(OpRequestRef op, pg_shard_t from) = 0;
|
||||
virtual void handle_scrub_reserve_reject(OpRequestRef op, pg_shard_t from) = 0;
|
||||
virtual void handle_scrub_reserve_reject(OpRequestRef op,
|
||||
pg_shard_t from) = 0;
|
||||
|
||||
virtual void rm_from_osd_scrubbing() = 0;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user