Merge branch 'wip_4955' into next

Reviewed-by: Sage Weil <sage@inktank.com>
This commit is contained in:
Samuel Just 2013-05-09 22:37:37 -07:00
commit fd90105683
4 changed files with 71 additions and 47 deletions

View File

@ -1564,10 +1564,7 @@ PG *OSD::_open_lock_pg(
pg_map[pgid] = pg;
if (hold_map_lock)
pg->lock_with_map_lock_held(no_lockdep_check);
else
pg->lock(no_lockdep_check);
pg->lock(no_lockdep_check);
pg->get("PGMap"); // because it's in pg_map
return pg;
}
@ -1674,7 +1671,7 @@ PG *OSD::_lookup_lock_pg_with_map_lock_held(pg_t pgid)
assert(osd_lock.is_locked());
assert(pg_map.count(pgid));
PG *pg = pg_map[pgid];
pg->lock_with_map_lock_held();
pg->lock();
return pg;
}
@ -4989,7 +4986,7 @@ void OSD::do_split(PG *parent, set<pg_t>& childpgids, ObjectStore::Transaction&
{
dout(10) << "do_split to " << childpgids << " on " << *parent << dendl;
parent->lock_with_map_lock_held();
parent->lock();
// create and lock children
map<pg_t,PG*> children;
@ -6419,7 +6416,7 @@ void OSD::enqueue_op(PG *pg, OpRequestRef op)
<< " cost " << op->request->get_cost()
<< " latency " << latency
<< " " << *(op->request) << dendl;
op_wq.queue(make_pair(PGRef(pg), op));
pg->queue_op(op);
}
void OSD::OpWQ::_enqueue(pair<PGRef, OpRequestRef> item)

View File

@ -139,6 +139,7 @@ PG::PG(OSDService *o, OSDMapRef curmap,
p.m_seed,
p.get_split_bits(curmap->get_pg_num(_pool.id)),
_pool.id),
map_lock("PG::map_lock"),
osdmap_ref(curmap), pool(_pool),
_lock("PG::_lock"),
ref(0),
@ -194,25 +195,6 @@ void PG::lock(bool no_lockdep)
dout(30) << "lock" << dendl;
}
void PG::lock_with_map_lock_held(bool no_lockdep)
{
_lock.Lock(no_lockdep);
// if we have unrecorded dirty state with the lock dropped, there is a bug
assert(!dirty_info);
assert(!dirty_big_info);
assert(!dirty_log);
dout(30) << "lock_with_map_lock_held" << dendl;
}
void PG::reassert_lock_with_map_lock_held()
{
assert(_lock.is_locked());
osdmap_ref = osd->osdmap;
dout(30) << "reassert_lock_with_map_lock_held" << dendl;
}
std::string PG::gen_prefix() const
{
stringstream out;
@ -1767,6 +1749,36 @@ bool PG::op_has_sufficient_caps(OpRequestRef op)
return cap;
}
void PG::take_op_map_waiters()
{
Mutex::Locker l(map_lock);
for (list<OpRequestRef>::iterator i = waiting_for_map.begin();
i != waiting_for_map.end();
) {
if (op_must_wait_for_map(get_osdmap_with_maplock(), *i)) {
break;
} else {
osd->op_wq.queue(make_pair(PGRef(this), *i));
waiting_for_map.erase(i++);
}
}
}
void PG::queue_op(OpRequestRef op)
{
Mutex::Locker l(map_lock);
if (!waiting_for_map.empty()) {
// preserve ordering
waiting_for_map.push_back(op);
return;
}
if (op_must_wait_for_map(get_osdmap_with_maplock(), op)) {
waiting_for_map.push_back(op);
return;
}
osd->op_wq.queue(make_pair(PGRef(this), op));
}
void PG::do_request(OpRequestRef op)
{
// do any pending flush
@ -1776,11 +1788,7 @@ void PG::do_request(OpRequestRef op)
osd->reply_op_error(op, -EPERM);
return;
}
if (must_delay_request(op)) {
dout(20) << " waiting for map on " << op << dendl;
waiting_for_map.push_back(op);
return;
}
assert(!op_must_wait_for_map(get_osdmap(), op));
if (can_discard_request(op)) {
return;
}
@ -2118,7 +2126,6 @@ static void split_replay_queue(
void PG::split_ops(PG *child, unsigned split_bits) {
unsigned match = child->info.pgid.m_seed;
assert(waiting_for_map.empty());
assert(waiting_for_all_missing.empty());
assert(waiting_for_missing_object.empty());
assert(waiting_for_degraded_object.empty());
@ -2128,12 +2135,16 @@ void PG::split_ops(PG *child, unsigned split_bits) {
osd->dequeue_pg(this, &waiting_for_active);
split_list(&waiting_for_active, &(child->waiting_for_active), match, split_bits);
{
Mutex::Locker l(map_lock); // to avoid a race with the osd dispatch
split_list(&waiting_for_map, &(child->waiting_for_map), match, split_bits);
}
}
void PG::split_into(pg_t child_pgid, PG *child, unsigned split_bits)
{
child->update_snap_mapper_bits(split_bits);
child->osdmap_ref = osdmap_ref;
child->update_osdmap_ref(get_osdmap());
child->pool = pool;
@ -5380,27 +5391,32 @@ bool PG::split_request(OpRequestRef op, unsigned match, unsigned bits)
return false;
}
bool PG::must_delay_request(OpRequestRef op)
bool PG::op_must_wait_for_map(OSDMapRef curmap, OpRequestRef op)
{
switch (op->request->get_type()) {
case CEPH_MSG_OSD_OP:
return !have_same_or_newer_map(
curmap,
static_cast<MOSDOp*>(op->request)->get_map_epoch());
case MSG_OSD_SUBOP:
return !have_same_or_newer_map(
curmap,
static_cast<MOSDSubOp*>(op->request)->map_epoch);
case MSG_OSD_SUBOPREPLY:
return !have_same_or_newer_map(
curmap,
static_cast<MOSDSubOpReply*>(op->request)->map_epoch);
case MSG_OSD_PG_SCAN:
return !have_same_or_newer_map(
curmap,
static_cast<MOSDPGScan*>(op->request)->map_epoch);
case MSG_OSD_PG_BACKFILL:
return !have_same_or_newer_map(
curmap,
static_cast<MOSDPGBackfill*>(op->request)->map_epoch);
}
assert(0);
@ -5410,7 +5426,7 @@ bool PG::must_delay_request(OpRequestRef op)
void PG::take_waiters()
{
dout(10) << "take_waiters" << dendl;
requeue_ops(waiting_for_map);
take_op_map_waiters();
for (list<CephPeeringEvtRef>::iterator i = peering_waiters.begin();
i != peering_waiters.end();
++i) osd->queue_for_peering(this);
@ -5505,7 +5521,7 @@ void PG::handle_advance_map(OSDMapRef osdmap, OSDMapRef lastmap,
assert(lastmap->get_epoch() == osdmap_ref->get_epoch());
assert(lastmap == osdmap_ref);
dout(10) << "handle_advance_map " << newup << "/" << newacting << dendl;
osdmap_ref = osdmap;
update_osdmap_ref(osdmap);
pool.update(osdmap);
AdvMap evt(osdmap, lastmap, newup, newacting);
recovery_state.handle_event(evt, rctx);

View File

@ -381,9 +381,27 @@ public:
snap_mapper.update_bits(bits);
}
protected:
// Ops waiting for map, should be queued at back
Mutex map_lock;
list<OpRequestRef> waiting_for_map;
OSDMapRef osdmap_ref;
PGPool pool;
void queue_op(OpRequestRef op);
void take_op_map_waiters();
void update_osdmap_ref(OSDMapRef newmap) {
assert(_lock.is_locked_by_me());
Mutex::Locker l(map_lock);
osdmap_ref = newmap;
}
OSDMapRef get_osdmap_with_maplock() const {
assert(map_lock.is_locked());
assert(osdmap_ref);
return osdmap_ref;
}
OSDMapRef get_osdmap() const {
assert(is_locked());
assert(osdmap_ref);
@ -420,13 +438,6 @@ public:
_lock.Unlock();
}
/* During handle_osd_map, the osd holds a write lock to the osdmap.
* *_with_map_lock_held assume that the map_lock is already held */
void lock_with_map_lock_held(bool no_lockdep = false);
// assert we still have lock held, and update our map ref
void reassert_lock_with_map_lock_held();
void assert_locked() {
assert(_lock.is_locked());
}
@ -699,8 +710,6 @@ protected:
// Ops waiting on backfill_pos to change
list<OpRequestRef> waiting_for_backfill_pos;
list<OpRequestRef> waiting_for_map;
list<OpRequestRef> waiting_for_active;
list<OpRequestRef> waiting_for_all_missing;
map<hobject_t, list<OpRequestRef> > waiting_for_missing_object,
@ -1939,7 +1948,7 @@ public:
bool can_discard_backfill(OpRequestRef op);
bool can_discard_request(OpRequestRef op);
bool must_delay_request(OpRequestRef op);
static bool op_must_wait_for_map(OSDMapRef curmap, OpRequestRef op);
static bool split_request(OpRequestRef op, unsigned match, unsigned bits);
@ -1947,6 +1956,9 @@ public:
bool old_peering_evt(CephPeeringEvtRef evt) {
return old_peering_msg(evt->get_epoch_sent(), evt->get_epoch_requested());
}
static bool have_same_or_newer_map(OSDMapRef osdmap, epoch_t e) {
return e <= osdmap->get_epoch();
}
bool have_same_or_newer_map(epoch_t e) {
return e <= get_osdmap()->get_epoch();
}

View File

@ -6328,7 +6328,6 @@ void ReplicatedPG::on_change()
// requeue everything in the reverse order they should be
// reexamined.
requeue_ops(waiting_for_map);
clear_scrub_reserved();
scrub_clear_state();