osd/PrimaryLogPG: limit the number of concurrently trimming pgs

This patch introduces an AsyncReserver for snap trimming to limit the
number of pgs on any single OSD which can be trimming, as with backfill.
Unlike backfill, we don't take remote reservations on the assumption
that the set of pgs with trimming work to do is already well
distributed, so it doesn't seem worth the implementation overhead to get
reservations from the peers as well.

Signed-off-by: Samuel Just <sjust@redhat.com>
This commit is contained in:
Samuel Just 2017-02-01 12:07:09 -08:00
parent df2e1fd84e
commit 21cc515adf
5 changed files with 153 additions and 71 deletions

View File

@ -774,6 +774,8 @@ OPTION(osd_heartbeat_use_min_delay_socket, OPT_BOOL, false) // prio the heartbea
// max number of parallel snap trims/pg
OPTION(osd_pg_max_concurrent_snap_trims, OPT_U64, 2)
// max number of trimming pgs
OPTION(osd_max_trimming_pgs, OPT_U64, 2)
// minimum number of peers that must be reachable to mark ourselves
// back up after being wrongly marked down.

View File

@ -261,6 +261,8 @@ OSDService::OSDService(OSD *osd) :
remote_reserver(&reserver_finisher, cct->_conf->osd_max_backfills,
cct->_conf->osd_min_recovery_priority),
pg_temp_lock("OSDService::pg_temp_lock"),
snap_reserver(&reserver_finisher,
cct->_conf->osd_max_trimming_pgs),
recovery_lock("OSDService::recovery_lock"),
recovery_ops_active(0),
recovery_ops_reserved(0),
@ -9296,6 +9298,9 @@ void OSD::handle_conf_change(const struct md_config_t *conf,
service.local_reserver.set_min_priority(cct->_conf->osd_min_recovery_priority);
service.remote_reserver.set_min_priority(cct->_conf->osd_min_recovery_priority);
}
if (changed.count("osd_max_trimming_pgs")) {
service.snap_reserver.set_max(cct->_conf->osd_max_trimming_pgs);
}
if (changed.count("osd_op_complaint_time") ||
changed.count("osd_op_log_threshold")) {
op_tracker.set_complaint_and_threshold(cct->_conf->osd_op_complaint_time,

View File

@ -911,7 +911,10 @@ public:
void send_pg_temp();
void queue_for_peering(PG *pg);
AsyncReserver<spg_t> snap_reserver;
void queue_for_snap_trim(PG *pg);
void queue_for_scrub(PG *pg) {
op_wq.queue(
make_pair(

View File

@ -13062,11 +13062,6 @@ void PrimaryLogPG::_scrub_finish()
#undef dout_prefix
#define dout_prefix *_dout << pg->gen_prefix()
PrimaryLogPG::SnapTrimmer::~SnapTrimmer()
{
in_flight.clear();
}
void PrimaryLogPG::SnapTrimmer::log_enter(const char *state_name)
{
ldout(pg->cct, 20) << "enter " << state_name << dendl;
@ -13100,26 +13095,42 @@ boost::statechart::result PrimaryLogPG::NotTrimming::react(const KickTrim&)
PrimaryLogPG *pg = context< SnapTrimmer >().pg;
ldout(pg->cct, 10) << "NotTrimming react KickTrim" << dendl;
assert(pg->is_primary() && pg->is_active());
if (!(pg->is_primary() && pg->is_active())) {
ldout(pg->cct, 10) << "NotTrimming not primary or active" << dendl;
return discard_event();
}
if (!pg->is_clean() ||
pg->snap_trimq.empty()) {
ldout(pg->cct, 10) << "NotTrimming not clean or nothing to trim" << dendl;
return discard_event();
}
if (pg->scrubber.active) {
ldout(pg->cct, 10) << " scrubbing, will requeue snap_trimmer after" << dendl;
pg->scrubber.queue_snap_trim = true;
return transit< WaitScrub >();
} else {
context<SnapTrimmer>().snap_to_trim = pg->snap_trimq.range_start();
ldout(pg->cct, 10) << "NotTrimming: trimming "
<< pg->snap_trimq.range_start()
<< dendl;
return transit< AwaitAsyncWork >();
return transit< Trimming >();
}
}
boost::statechart::result PrimaryLogPG::WaitReservation::react(const SnapTrimReserved&)
{
PrimaryLogPG *pg = context< SnapTrimmer >().pg;
ldout(pg->cct, 10) << "WaitReservation react SnapTrimReserved" << dendl;
pending = nullptr;
if (!context< SnapTrimmer >().can_trim()) {
post_event(KickTrim());
return transit< NotTrimming >();
}
context<Trimming>().snap_to_trim = pg->snap_trimq.range_start();
ldout(pg->cct, 10) << "NotTrimming: trimming "
<< pg->snap_trimq.range_start()
<< dendl;
return transit< AwaitAsyncWork >();
}
/* AwaitAsyncWork */
PrimaryLogPG::AwaitAsyncWork::AwaitAsyncWork(my_context ctx)
: my_base(ctx),
@ -13130,22 +13141,15 @@ PrimaryLogPG::AwaitAsyncWork::AwaitAsyncWork(my_context ctx)
context< SnapTrimmer >().pg);
}
void PrimaryLogPG::AwaitAsyncWork::exit()
{
context< SnapTrimmer >().log_exit(state_name, enter_time);
}
boost::statechart::result PrimaryLogPG::AwaitAsyncWork::react(const DoSnapWork&)
{
PrimaryLogPGRef pg = context< SnapTrimmer >().pg;
ldout(pg->cct, 10) << "AwaitAsyncWork react" << dendl;
snapid_t snap_to_trim = context<SnapTrimmer>().snap_to_trim;
auto &in_flight = context<SnapTrimmer>().in_flight;
snapid_t snap_to_trim = context<Trimming>().snap_to_trim;
auto &in_flight = context<Trimming>().in_flight;
assert(in_flight.empty());
assert(pg->is_primary() && pg->is_active());
if (!pg->is_clean() ||
pg->scrubber.active) {
if (!context< SnapTrimmer >().can_trim()) {
ldout(pg->cct, 10) << "something changed, reverting to NotTrimming" << dendl;
post_event(KickTrim());
return transit< NotTrimming >();
@ -13202,8 +13206,7 @@ boost::statechart::result PrimaryLogPG::AwaitAsyncWork::react(const DoSnapWork&)
return transit< WaitRWLock >();
} else {
ldout(pg->cct, 10) << "letting the ones we already started finish"
<< dendl;
ldout(pg->cct, 10) << "letting the ones we already started finish" << dendl;
return transit< WaitRepops >();
}
}

View File

@ -1429,7 +1429,7 @@ public:
PG::_init(*t, child, pool);
}
private:
struct NotTrimming;
struct DoSnapWork : boost::statechart::event< DoSnapWork > {
DoSnapWork() : boost::statechart::event < DoSnapWork >() {}
};
@ -1448,49 +1448,148 @@ private:
struct Reset : boost::statechart::event< Reset > {
Reset() : boost::statechart::event< Reset >() {}
};
struct SnapTrimReserved : boost::statechart::event< SnapTrimReserved > {
SnapTrimReserved() : boost::statechart::event< SnapTrimReserved >() {}
};
struct NotTrimming;
struct SnapTrimmer : public boost::statechart::state_machine< SnapTrimmer, NotTrimming > {
PrimaryLogPG *pg;
set<hobject_t> in_flight;
snapid_t snap_to_trim;
explicit SnapTrimmer(PrimaryLogPG *pg) : pg(pg) {}
~SnapTrimmer();
void log_enter(const char *state_name);
void log_exit(const char *state_name, utime_t duration);
bool can_trim() {
return pg->is_clean() && !pg->scrubber.active && !pg->snap_trimq.empty();
}
} snap_trimmer_machine;
/* SnapTrimmerStates */
struct AwaitAsyncWork : boost::statechart::state< AwaitAsyncWork, SnapTrimmer >, NamedState {
struct WaitReservation;
struct Trimming : boost::statechart::state< Trimming, SnapTrimmer, WaitReservation >, NamedState {
typedef boost::mpl::list <
boost::statechart::custom_reaction< DoSnapWork >,
boost::statechart::custom_reaction< KickTrim >,
boost::statechart::transition< Reset, NotTrimming >
> reactions;
explicit AwaitAsyncWork(my_context ctx);
void exit();
boost::statechart::result react(const DoSnapWork&);
set<hobject_t> in_flight;
snapid_t snap_to_trim;
explicit Trimming(my_context ctx)
: my_base(ctx),
NamedState(context< SnapTrimmer >().pg->cct, "Trimming") {
context< SnapTrimmer >().log_enter(state_name);
assert(context< SnapTrimmer >().can_trim());
assert(in_flight.empty());
}
void exit() {
context< SnapTrimmer >().log_exit(state_name, enter_time);
auto *pg = context< SnapTrimmer >().pg;
pg->osd->snap_reserver.cancel_reservation(pg->get_pgid());
}
boost::statechart::result react(const KickTrim&) {
return discard_event();
}
};
struct WaitRWLock : boost::statechart::state< WaitRWLock, SnapTrimmer >, NamedState {
/* SnapTrimmerStates */
struct WaitRWLock : boost::statechart::state< WaitRWLock, Trimming >, NamedState {
typedef boost::mpl::list <
boost::statechart::custom_reaction< TrimWriteUnblocked >,
boost::statechart::custom_reaction< KickTrim >,
boost::statechart::transition< Reset, NotTrimming >
boost::statechart::custom_reaction< TrimWriteUnblocked >
> reactions;
explicit WaitRWLock(my_context ctx)
: my_base(ctx),
NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitRWLock") {
context< SnapTrimmer >().log_enter(state_name);
assert(context<SnapTrimmer>().in_flight.empty());
assert(context<Trimming>().in_flight.empty());
}
void exit() {
context< SnapTrimmer >().log_exit(state_name, enter_time);
}
boost::statechart::result react(const TrimWriteUnblocked&) {
post_event(KickTrim());
return discard_event();
if (!context< SnapTrimmer >().can_trim()) {
post_event(KickTrim());
return transit< NotTrimming >();
} else {
return transit< AwaitAsyncWork >();
}
}
};
struct WaitRepops : boost::statechart::state< WaitRepops, Trimming >, NamedState {
typedef boost::mpl::list <
boost::statechart::custom_reaction< RepopsComplete >
> reactions;
explicit WaitRepops(my_context ctx)
: my_base(ctx),
NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitRepops") {
context< SnapTrimmer >().log_enter(state_name);
assert(!context<Trimming>().in_flight.empty());
}
void exit() {
context< SnapTrimmer >().log_exit(state_name, enter_time);
}
boost::statechart::result react(const RepopsComplete&) {
if (!context< SnapTrimmer >().can_trim()) {
post_event(KickTrim());
return transit< NotTrimming >();
} else {
return transit< AwaitAsyncWork >();
}
}
};
struct AwaitAsyncWork : boost::statechart::state< AwaitAsyncWork, Trimming >, NamedState {
typedef boost::mpl::list <
boost::statechart::custom_reaction< DoSnapWork >
> reactions;
explicit AwaitAsyncWork(my_context ctx);
void exit() {
context< SnapTrimmer >().log_exit(state_name, enter_time);
}
boost::statechart::result react(const DoSnapWork&);
};
struct WaitReservation : boost::statechart::state< WaitReservation, Trimming >, NamedState {
/* WaitReservation is a sub-state of trimming simply so that exiting Trimming
* always cancels the reservation */
typedef boost::mpl::list <
boost::statechart::custom_reaction< SnapTrimReserved >
> reactions;
struct ReservationCB : public Context {
PrimaryLogPGRef pg;
bool canceled;
ReservationCB(PrimaryLogPG *pg) : pg(pg), canceled(false) {}
void finish(int) override {
pg->lock();
if (!canceled)
pg->snap_trimmer_machine.process_event(SnapTrimReserved());
pg->unlock();
}
void cancel() {
assert(pg->is_locked());
assert(!canceled);
canceled = true;
}
};
ReservationCB *pending = nullptr;
explicit WaitReservation(my_context ctx)
: my_base(ctx),
NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitReservation") {
context< SnapTrimmer >().log_enter(state_name);
assert(context<Trimming>().in_flight.empty());
auto *pg = context< SnapTrimmer >().pg;
pending = new ReservationCB(pg);
pg->osd->snap_reserver.request_reservation(
pg->get_pgid(),
pending,
0);
}
boost::statechart::result react(const SnapTrimReserved&);
void exit() {
context< SnapTrimmer >().log_exit(state_name, enter_time);
if (pending)
pending->cancel();
pending = nullptr;
}
boost::statechart::result react(const KickTrim&) {
return discard_event();
@ -1507,7 +1606,6 @@ private:
: my_base(ctx),
NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitScrub") {
context< SnapTrimmer >().log_enter(state_name);
assert(context<SnapTrimmer>().in_flight.empty());
}
void exit() {
context< SnapTrimmer >().log_exit(state_name, enter_time);
@ -1521,35 +1619,6 @@ private:
}
};
struct WaitRepops : boost::statechart::state< WaitRepops, SnapTrimmer >, NamedState {
typedef boost::mpl::list <
boost::statechart::custom_reaction< RepopsComplete >,
boost::statechart::custom_reaction< KickTrim >,
boost::statechart::custom_reaction< Reset >
> reactions;
explicit WaitRepops(my_context ctx)
: my_base(ctx),
NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitRepops") {
context< SnapTrimmer >().log_enter(state_name);
assert(!context<SnapTrimmer>().in_flight.empty());
}
void exit() {
context< SnapTrimmer >().log_exit(state_name, enter_time);
assert(context<SnapTrimmer>().in_flight.empty());
}
boost::statechart::result react(const RepopsComplete&) {
post_event(KickTrim());
return transit< NotTrimming >();
}
boost::statechart::result react(const KickTrim&) {
return discard_event();
}
boost::statechart::result react(const Reset&) {
context<SnapTrimmer>().in_flight.clear();
return transit< NotTrimming>();
}
};
struct NotTrimming : boost::statechart::state< NotTrimming, SnapTrimmer >, NamedState {
typedef boost::mpl::list <
boost::statechart::custom_reaction< KickTrim >,