osd: use new and improved calc_pg_role()

An acting set might be [0,1,2,1], and we want pgs 1.0s2 and 1.0s4 on
osd.1 to get roles 1 and 3, respectively (instead of 1 and -1).  This
allows the second EC PG on the OSD to have role >= 0.

This probably was unnoticed before, but now it leads to hangs in the
rados/thrash-erasure-code collection because proc_lease() bails out
when is_nonprimary() fails (due to role < 0).

Fixes: https://tracker.ceph.com/issues/43189
Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2019-12-09 09:44:11 -06:00
parent b760533591
commit 1cf2b90640
5 changed files with 14 additions and 29 deletions

View File

@ -713,11 +713,8 @@ seastar::future<Ref<PG>> OSD::handle_pg_create_info(
startmap->pg_to_up_acting_osds(
info->pgid.pgid, &up, &up_primary, &acting, &acting_primary);
int role = startmap->calc_pg_role(whoami, acting, acting.size());
if (!pp->is_replicated() && role != info->pgid.shard) {
role = -1;
}
int role = startmap->calc_pg_role(pg_shard_t(whoami, info->pgid.shard),
acting);
create_pg_collection(
rctx.transaction,

View File

@ -316,11 +316,8 @@ seastar::future<> PG::read_state(crimson::os::FuturizedStore* store)
acting,
up_primary,
primary);
int rr = OSDMap::calc_pg_role(pg_whoami.osd, acting);
if (peering_state.get_pool().info.is_replicated() || rr == pg_whoami.shard)
peering_state.set_role(rr);
else
peering_state.set_role(-1);
int rr = OSDMap::calc_pg_role(pg_whoami, acting);
peering_state.set_role(rr);
epoch_t epoch = get_osdmap_epoch();
shard_services.start_operation<LocalPeeringEvent>(

View File

@ -4809,10 +4809,7 @@ PGRef OSD::handle_pg_create_info(const OSDMapRef& osdmap,
rctx.transaction, pgid, pgid.get_split_bits(pp->get_pg_num()));
init_pg_ondisk(rctx.transaction, pgid, pp);
int role = startmap->calc_pg_role(whoami, acting, acting.size());
if (!pp->is_replicated() && role != pgid.shard) {
role = -1;
}
int role = startmap->calc_pg_role(pg_shard_t(whoami, pgid.shard), acting);
PGRef pg = _make_pg(startmap, pgid);
pg->ch = store->create_new_collection(pg->coll);
@ -9001,12 +8998,16 @@ void OSD::handle_pg_create(OpRequestRef op)
dout(20) << "mkpg " << on << " e" << created << "@" << ci->second << dendl;
spg_t pgid;
bool mapped = osdmap->get_primary_shard(on, &pgid);
ceph_assert(mapped);
// is it still ours?
vector<int> up, acting;
int up_primary = -1;
int acting_primary = -1;
osdmap->pg_to_up_acting_osds(on, &up, &up_primary, &acting, &acting_primary);
int role = osdmap->calc_pg_role(whoami, acting, acting.size());
int role = osdmap->calc_pg_role(pg_shard_t(whoami, pgid.shard), acting);
if (acting_primary != whoami) {
dout(10) << "mkpg " << on << " not acting_primary (" << acting_primary
@ -9014,9 +9015,6 @@ void OSD::handle_pg_create(OpRequestRef op)
continue;
}
spg_t pgid;
bool mapped = osdmap->get_primary_shard(on, &pgid);
ceph_assert(mapped);
PastIntervals pi;
pg_history_t history;

View File

@ -1153,11 +1153,7 @@ void PG::read_state(ObjectStore *store)
acting,
up_primary,
primary);
int rr = OSDMap::calc_pg_role(osd->whoami, acting);
if (pool.info.is_replicated() || rr == pg_whoami.shard)
recovery_state.set_role(rr);
else
recovery_state.set_role(-1);
recovery_state.set_role(OSDMap::calc_pg_role(pg_whoami, acting));
}
// init pool options

View File

@ -580,11 +580,8 @@ void PeeringState::start_peering_interval(
else
state_clear(PG_STATE_REMAPPED);
int role = osdmap->calc_pg_role(pg_whoami.osd, acting, acting.size());
if (pool.info.is_replicated() || role == pg_whoami.shard)
set_role(role);
else
set_role(-1);
int role = osdmap->calc_pg_role(pg_whoami, acting);
set_role(role);
// did acting, up, primary|acker change?
if (!lastmap) {
@ -2931,7 +2928,7 @@ void PeeringState::split_into(
newacting,
up_primary,
primary);
child->role = OSDMap::calc_pg_role(pg_whoami.osd, child->acting);
child->role = OSDMap::calc_pg_role(pg_whoami, child->acting);
// this comparison includes primary rank via pg_shard_t
if (get_primary() != child->get_primary())