diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index b81db7cd676..81162012b0e 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -459,7 +459,7 @@ void PGMap::remove_osd(int osd) } } -void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s, bool nocreating, +void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s, bool sameosds) { pg_pool_sum[pgid.pool()].add(s); @@ -468,12 +468,10 @@ void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s, bool nocreating, num_pg++; num_pg_by_state[s.state]++; - if (!nocreating) { - if (s.state & PG_STATE_CREATING) { - creating_pgs.insert(pgid); - if (s.acting_primary >= 0) { - creating_pgs_by_osd_epoch[s.acting_primary][s.mapping_epoch].insert(pgid); - } + if (s.state & PG_STATE_CREATING) { + creating_pgs.insert(pgid); + if (s.acting_primary >= 0) { + creating_pgs_by_osd_epoch[s.acting_primary][s.mapping_epoch].insert(pgid); } } @@ -492,7 +490,7 @@ void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s, bool nocreating, pg_by_osd[*p].insert(pgid); } -void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, bool nocreating, +void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, bool sameosds) { pool_stat_t& ps = pg_pool_sum[pgid.pool()]; @@ -507,17 +505,15 @@ void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, bool nocreating, if (end == 0) num_pg_by_state.erase(s.state); - if (!nocreating) { - if (s.state & PG_STATE_CREATING) { - creating_pgs.erase(pgid); - if (s.acting_primary >= 0) { - map >& r = creating_pgs_by_osd_epoch[s.acting_primary]; - r[s.mapping_epoch].erase(pgid); - if (r[s.mapping_epoch].empty()) - r.erase(s.mapping_epoch); - if (r.empty()) - creating_pgs_by_osd_epoch.erase(s.acting_primary); - } + if (s.state & PG_STATE_CREATING) { + creating_pgs.erase(pgid); + if (s.acting_primary >= 0) { + map >& r = creating_pgs_by_osd_epoch[s.acting_primary]; + r[s.mapping_epoch].erase(pgid); + if (r[s.mapping_epoch].empty()) + r.erase(s.mapping_epoch); + if (r.empty()) + creating_pgs_by_osd_epoch.erase(s.acting_primary); } } @@ -559,9 +555,9 @@ void PGMap::stat_pg_update(const pg_t pgid, pg_stat_t& s, s.up == n.up && s.blocked_by == n.blocked_by; - stat_pg_sub(pgid, s, false, sameosds); + stat_pg_sub(pgid, s, sameosds); s = n; - stat_pg_add(pgid, n, false, sameosds); + stat_pg_add(pgid, n, sameosds); } void PGMap::stat_osd_add(const osd_stat_t &s) diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index 8c2b3cadea2..335c1207bc9 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -258,9 +258,9 @@ public: void redo_full_sets(); void register_nearfull_status(int osd, const osd_stat_t& s); void calc_stats(); - void stat_pg_add(const pg_t &pgid, const pg_stat_t &s, bool nocreating=false, + void stat_pg_add(const pg_t &pgid, const pg_stat_t &s, bool sameosds=false); - void stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, bool nocreating=false, + void stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, bool sameosds=false); void stat_pg_update(const pg_t pgid, pg_stat_t &prev, bufferlist::iterator& blp); void stat_osd_add(const osd_stat_t &s); diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index 5cd2edb58c2..c1566f9c56a 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -440,16 +440,19 @@ void PGMonitor::apply_pgmap_delta(bufferlist& bl) r = -ENOENT; } else { r = mon->store->get(pgmap_pg_prefix, stringify(pgid), pgbl); - dout(20) << " refreshing pg " << pgid << " got " << r << " len " - << pgbl.length() << dendl; - if (pg_pool_sum_old.count(pgid.pool()) == 0) pg_pool_sum_old[pgid.pool()] = pg_map.pg_pool_sum[pgid.pool()]; } if (r >= 0) { pg_map.update_pg(pgid, pgbl); + dout(20) << " refreshing pg " << pgid + << " " << pg_map.pg_stat[pgid].reported_epoch + << ":" << pg_map.pg_stat[pgid].reported_seq + << " " << pg_state_string(pg_map.pg_stat[pgid].state) + << dendl; } else { + dout(20) << " removing pg " << pgid << dendl; pg_map.remove_pg(pgid); if (pgid.ps() == 0) deleted_pools.insert(pgid.pool()); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 5994b01c73b..69f47def18e 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3402,11 +3402,10 @@ void OSD::build_past_intervals_parallel() */ void OSD::handle_pg_peering_evt( spg_t pgid, - const pg_info_t& info, + const pg_history_t& orig_history, pg_interval_map_t& pi, epoch_t epoch, - pg_shard_t from, - bool primary, + bool same_primary, PG::CephPeeringEvtRef evt) { if (service.splitting(pgid)) { @@ -3424,11 +3423,18 @@ void OSD::handle_pg_peering_evt( pgid.pgid, &up, &up_primary, &acting, &acting_primary); int role = osdmap->calc_pg_role(whoami, acting, acting.size()); - pg_history_t history = info.history; + pg_history_t history = orig_history; bool valid_history = project_pg_history( pgid, history, epoch, up, up_primary, acting, acting_primary); - if (!valid_history || epoch < history.same_interval_since) { + if (same_primary && epoch < history.same_primary_since) { + dout(10) << "get_or_create_pg " << pgid << " primary changed in " + << history.same_primary_since << " (msg from " << epoch << ")" + << dendl; + return; + } + if (!valid_history || + (!same_primary && epoch < history.same_interval_since)) { dout(10) << "get_or_create_pg " << pgid << " acting changed in " << history.same_interval_since << " (msg from " << epoch << ")" << dendl; @@ -3554,7 +3560,15 @@ void OSD::handle_pg_peering_evt( } else { // already had it. did the mapping change? PG *pg = _lookup_lock_pg(pgid); - if (epoch < pg->info.history.same_interval_since) { + if (same_primary && epoch < pg->info.history.same_primary_since) { + dout(10) << "get_or_create_pg " << pgid << " primary changed in " + << pg->info.history.same_primary_since + << " (msg from " << epoch << ")" + << dendl; + pg->unlock(); + return; + } + if (!same_primary && epoch < pg->info.history.same_interval_since) { dout(10) << *pg << " get_or_create_pg acting changed in " << pg->info.history.same_interval_since << " (msg from " << epoch << ")" << dendl; @@ -7414,42 +7428,32 @@ void OSD::handle_pg_create(OpRequestRef op) bool mapped = osdmap->get_primary_shard(on, &pgid); assert(mapped); - // does it already exist? - if (_have_pg(pgid)) { - dout(10) << "mkpg " << pgid << " already exists, skipping" << dendl; - continue; - } - + pg_interval_map_t pi; pg_history_t history; history.epoch_created = created; history.last_scrub_stamp = ci->second; history.last_deep_scrub_stamp = ci->second; + + // project history from created epoch (handle_pg_peering_evt does + // it from msg send epoch) bool valid_history = project_pg_history( pgid, history, created, up, up_primary, acting, acting_primary); - /* the pg creation message must have come from a mon and therefore - * cannot be on the other side of a map gap - */ + // the pg creation message must have come from a mon and therefore + // cannot be on the other side of a map gap assert(valid_history); - PG::RecoveryCtx rctx = create_context(); - const pg_pool_t* pp = osdmap->get_pg_pool(pgid.pool()); - PG::_create(*rctx.transaction, pgid, pgid.get_split_bits(pp->get_pg_num())); - PG::_init(*rctx.transaction, pgid, pp); - - pg_interval_map_t pi; - PG *pg = _create_lock_pg( - osdmap, pgid, false, false, - 0, up, up_primary, - acting, acting_primary, - history, pi, - *rctx.transaction); - pg->info.last_epoch_started = created; - pg->handle_create(&rctx); - pg->write_if_dirty(*rctx.transaction); - pg->publish_stats_to_osd(); - pg->unlock(); - wake_pg_waiters(pgid); - dispatch_context(rctx, pg, osdmap); + handle_pg_peering_evt( + pgid, + history, + pi, + m->epoch, + true, // same primary, bc this is a create + PG::CephPeeringEvtRef( + new PG::CephPeeringEvt( + m->epoch, + m->epoch, + PG::NullEvt())) + ); } last_pg_create_epoch = m->epoch; @@ -7674,8 +7678,9 @@ void OSD::handle_pg_notify(OpRequestRef op) handle_pg_peering_evt( spg_t(it->first.info.pgid.pgid, it->first.to), - it->first.info, it->second, - it->first.query_epoch, pg_shard_t(from, it->first.from), true, + it->first.info.history, it->second, + it->first.query_epoch, + false, // same interval PG::CephPeeringEvtRef( new PG::CephPeeringEvt( it->first.epoch_sent, it->first.query_epoch, @@ -7706,8 +7711,8 @@ void OSD::handle_pg_log(OpRequestRef op) op->mark_started(); handle_pg_peering_evt( spg_t(m->info.pgid.pgid, m->to), - m->info, m->past_intervals, m->get_epoch(), - pg_shard_t(from, m->from), false, + m->info.history, m->past_intervals, m->get_epoch(), + false, // same interval PG::CephPeeringEvtRef( new PG::CephPeeringEvt( m->get_epoch(), m->get_query_epoch(), @@ -7740,8 +7745,8 @@ void OSD::handle_pg_info(OpRequestRef op) handle_pg_peering_evt( spg_t(p->first.info.pgid.pgid, p->first.to), - p->first.info, p->second, p->first.epoch_sent, - pg_shard_t(from, p->first.from), false, + p->first.info.history, p->second, p->first.epoch_sent, + false, // same interval PG::CephPeeringEvtRef( new PG::CephPeeringEvt( p->first.epoch_sent, p->first.query_epoch, diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 894da757f75..6cc2f96a3ac 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1961,11 +1961,10 @@ protected: void handle_pg_peering_evt( spg_t pgid, - const pg_info_t& info, + const pg_history_t& orig_history, pg_interval_map_t& pi, epoch_t epoch, - pg_shard_t from, - bool primary, + bool same_primary, PG::CephPeeringEvtRef evt); void load_pgs();