pg: reduce scrub write lock window

Wait for all replicas to construct the base scrub map before finalizing
the scrub and locking out writes.

Signed-off-by: Mike Ryan <mike.ryan@inktank.com>
This commit is contained in:
Mike Ryan 2012-06-26 16:25:27 -07:00 committed by Samuel Just
parent 27409aa161
commit e1d4855fa1
3 changed files with 54 additions and 15 deletions

View File

@ -2630,9 +2630,19 @@ void PG::sub_op_scrub_map(OpRequestRef op)
scrub_received_maps[from].decode(p, info.pgid.pool());
}
if (--scrub_waiting_on == 0) {
assert(last_update_applied == info.last_update);
osd->scrub_finalize_wq.queue(this);
--scrub_waiting_on;
if (scrub_waiting_on == 0) {
if (finalizing_scrub) { // incremental lists received
osd->scrub_finalize_wq.queue(this);
} else { // initial lists received
scrub_block_writes = true;
if (last_update_applied == info.last_update) {
finalizing_scrub = true;
scrub_gather_replica_maps();
++scrub_waiting_on;
osd->scrub_wq.queue(this);
}
}
}
}
@ -2965,8 +2975,9 @@ void PG::replica_scrub(MOSDRepScrub *msg)
* PG_STATE_SCRUBBING is set when the scrub is queued
*
* Once the initial scrub has completed and the requests have gone out to
* replicas for maps, finalizing_scrub is set. scrub_waiting_on is set to
* the number of maps outstanding (active.size()).
* replicas for maps, we set scrub_active and wait for the replicas to
* complete their maps. Once the maps are received, scrub_block_writes is set.
* scrub_waiting_on is set to the number of maps outstanding (active.size()).
*
* If last_update_applied is behind the head of the log, scrub returns to be
* requeued by op_applied.
@ -2998,8 +3009,10 @@ void PG::scrub()
return;
}
if (!finalizing_scrub) {
if (!scrub_active) {
dout(10) << "scrub start" << dendl;
scrub_active = true;
update_stats();
scrub_received_maps.clear();
scrub_epoch_start = info.history.same_interval_since;
@ -3037,18 +3050,35 @@ void PG::scrub()
return;
}
finalizing_scrub = true;
--scrub_waiting_on;
if (scrub_waiting_on == 0) {
// the replicas have completed their scrub map, so lock out writes
scrub_block_writes = true;
} else {
dout(10) << "wait for replicas to build initial scrub map" << dendl;
unlock();
return;
}
if (last_update_applied != info.last_update) {
dout(10) << "wait for cleanup" << dendl;
unlock();
return;
}
// fall through if last_update_applied == info.last_update and scrub_waiting_on == 0
// request incrementals from replicas
scrub_gather_replica_maps();
++scrub_waiting_on;
}
dout(10) << "clean up scrub" << dendl;
assert(last_update_applied == info.last_update);
finalizing_scrub = true;
if (scrub_epoch_start != info.history.same_interval_since) {
dout(10) << "scrub pg changed, aborting" << dendl;
scrub_clear_state();
@ -3085,6 +3115,8 @@ void PG::scrub_clear_state()
osd->requeue_ops(this, waiting_for_active);
finalizing_scrub = false;
scrub_block_writes = false;
scrub_active = false;
if (active_rep_scrub) {
active_rep_scrub->put();
active_rep_scrub = NULL;

View File

@ -736,7 +736,9 @@ public:
// -- scrub --
set<int> scrub_reserved_peers;
map<int,ScrubMap> scrub_received_maps;
bool finalizing_scrub;
bool finalizing_scrub;
bool scrub_block_writes;
bool scrub_active;
bool scrub_reserved, scrub_reserve_failed;
int scrub_waiting_on;
epoch_t scrub_epoch_start;
@ -1243,6 +1245,8 @@ public:
osr(stringify(p)),
finish_sync_event(NULL),
finalizing_scrub(false),
scrub_block_writes(false),
scrub_active(false),
scrub_reserved(false), scrub_reserve_failed(false),
scrub_waiting_on(0),
active_rep_scrub(0),

View File

@ -614,7 +614,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
dout(10) << "do_op " << *m << (m->may_write() ? " may_write" : "") << dendl;
if (finalizing_scrub && m->may_write()) {
if (scrub_block_writes && m->may_write()) {
dout(20) << __func__ << ": waiting for scrub" << dendl;
waiting_for_active.push_back(op);
op->mark_delayed();
@ -1410,7 +1410,7 @@ bool ReplicatedPG::snap_trimmer()
put();
return true;
}
if (!finalizing_scrub) {
if (!scrub_block_writes) {
dout(10) << "snap_trimmer posting" << dendl;
snap_trimmer_machine.process_event(SnapTrim());
}
@ -3408,8 +3408,11 @@ void ReplicatedPG::op_applied(RepGather *repop)
assert(info.last_update >= repop->v);
assert(last_update_applied < repop->v);
last_update_applied = repop->v;
if (last_update_applied == info.last_update && finalizing_scrub) {
if (last_update_applied == info.last_update && scrub_block_writes) {
dout(10) << "requeueing scrub for cleanup" << dendl;
finalizing_scrub = true;
scrub_gather_replica_maps();
++scrub_waiting_on;
osd->scrub_wq.queue(this);
}
@ -5680,7 +5683,7 @@ void ReplicatedPG::on_change()
clear_scrub_reserved();
// clear scrub state
if (finalizing_scrub) {
if (scrub_block_writes) {
scrub_clear_state();
} else if (is_scrubbing()) {
state_clear(PG_STATE_SCRUBBING);
@ -6630,7 +6633,7 @@ boost::statechart::result ReplicatedPG::NotTrimming::react(const SnapTrim&)
} else if (!pg->is_primary() || !pg->is_active() || !pg->is_clean()) {
dout(10) << "NotTrimming not primary, active, clean" << dendl;
return discard_event();
} else if (pg->finalizing_scrub) {
} else if (pg->scrub_block_writes) {
dout(10) << "NotTrimming finalizing scrub" << dendl;
pg->queue_snap_trim();
return discard_event();