mirror of
https://github.com/ceph/ceph
synced 2024-12-28 14:34:13 +00:00
osd/OSD: deprecate __project_pg_history__
__project_pg_history__ does an inverse traverse of the series of osdmaps passed in to get a pg's pg_history_t filled, which can become super inefficient if the osdmap list to check is very long. E.g., in one of our clusters, we've observed it took approximate 10s for a PG to finish it's projecting: ``` 2018-08-27 13:51:58.694823 7f1e1335a700 15 osd.9 823276 project_pg_history 34.6e9 from 821893 to 823276, start ec=380829/380829 l is/c 820412/820412 les/c/f 820413/820413/0 821785/821785/821785 2018-08-27 13:52:08.634230 7f1e1335a700 15 osd.9 823276 project_pg_history 34.6e9 acting|up changed in 822265 from [57]/[57] 57/5 7 -> [58,57]/[58,57] 58/58 2018-08-27 13:52:08.634244 7f1e1335a700 15 osd.9 823276 project_pg_history 34.6e9 up changed in 822265 from [57] 57 -> [58,57] 58 2018-08-27 13:52:08.634248 7f1e1335a700 15 osd.9 823276 project_pg_history 34.6e9 primary changed in 822265 2018-08-27 13:52:08.634250 7f1e1335a700 15 osd.9 823276 project_pg_history end ec=380829/380829 lis/c 820412/820412 les/c/f 82041 3/820413/0 822265/822265/822265 ``` Quote from Sage: > let's just kill this off entirely, and make the handle_pg_query_nopg reply unconditionally. Or, maybe, do a single sloppy check to see if the primary has changed since the original epoch... if the osdmap happens to be in cache... or not. The querying end will discard the reply if it is out of date from it's perspective, so it doesn't matter, and I suspect the overhead of doing the check is larger than the overhead of sending a query reply that gets ignored. Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn> Signed-off-by: yanjun <yan.jun8@zte.com.cn>
This commit is contained in:
parent
d43ff1caf3
commit
8ef8cdba15
110
src/osd/OSD.cc
110
src/osd/OSD.cc
@ -4365,95 +4365,6 @@ void OSD::build_initial_pg_history(
|
||||
<< dendl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill in the passed history so you know same_interval_since, same_up_since,
|
||||
* and same_primary_since.
|
||||
*/
|
||||
bool OSD::project_pg_history(spg_t pgid, pg_history_t& h, epoch_t from,
|
||||
const OSDMapRef& endmap,
|
||||
const vector<int>& currentup,
|
||||
int currentupprimary,
|
||||
const vector<int>& currentacting,
|
||||
int currentactingprimary)
|
||||
{
|
||||
dout(15) << "project_pg_history " << pgid
|
||||
<< " from " << from << " to " << endmap->get_epoch()
|
||||
<< ", start " << h
|
||||
<< dendl;
|
||||
|
||||
epoch_t e;
|
||||
for (e = endmap->get_epoch();
|
||||
e > from;
|
||||
e--) {
|
||||
// verify during intermediate epoch (e-1)
|
||||
OSDMapRef oldmap = service.try_get_map(e-1);
|
||||
if (!oldmap) {
|
||||
dout(15) << __func__ << ": found map gap, returning false" << dendl;
|
||||
return false;
|
||||
}
|
||||
|
||||
int upprimary, actingprimary;
|
||||
vector<int> up, acting;
|
||||
oldmap->pg_to_up_acting_osds(
|
||||
pgid.pgid,
|
||||
&up,
|
||||
&upprimary,
|
||||
&acting,
|
||||
&actingprimary);
|
||||
|
||||
if (e > h.same_interval_since &&
|
||||
PastIntervals::is_new_interval(
|
||||
actingprimary, currentactingprimary,
|
||||
acting, currentacting,
|
||||
upprimary, currentupprimary,
|
||||
up, currentup,
|
||||
oldmap, endmap,
|
||||
pgid.pgid)) {
|
||||
dout(15) << "project_pg_history " << pgid << " interval changed in " << e << dendl;
|
||||
h.same_interval_since = e;
|
||||
}
|
||||
|
||||
// up set change?
|
||||
if ((up != currentup || upprimary != currentupprimary)
|
||||
&& e > h.same_up_since) {
|
||||
dout(15) << "project_pg_history " << pgid << " up changed in " << e
|
||||
<< " from " << up << " " << upprimary
|
||||
<< " -> " << currentup << " " << currentupprimary << dendl;
|
||||
h.same_up_since = e;
|
||||
}
|
||||
|
||||
// primary change?
|
||||
if (OSDMap::primary_changed(
|
||||
actingprimary,
|
||||
acting,
|
||||
currentactingprimary,
|
||||
currentacting) &&
|
||||
e > h.same_primary_since) {
|
||||
dout(15) << "project_pg_history " << pgid << " primary changed in " << e << dendl;
|
||||
h.same_primary_since = e;
|
||||
}
|
||||
|
||||
if (h.same_interval_since >= e && h.same_up_since >= e && h.same_primary_since >= e)
|
||||
break;
|
||||
}
|
||||
|
||||
// base case: these floors should be the pg creation epoch if we didn't
|
||||
// find any changes.
|
||||
if (e == h.epoch_created) {
|
||||
if (!h.same_interval_since)
|
||||
h.same_interval_since = e;
|
||||
if (!h.same_up_since)
|
||||
h.same_up_since = e;
|
||||
if (!h.same_primary_since)
|
||||
h.same_primary_since = e;
|
||||
}
|
||||
|
||||
dout(15) << "project_pg_history end " << h << dendl;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void OSD::_add_heartbeat_peer(int p)
|
||||
{
|
||||
if (p == whoami)
|
||||
@ -9042,27 +8953,6 @@ void OSD::handle_pg_query_nopg(const MQuery& q)
|
||||
if (!osdmap->have_pg_pool(pgid.pool()))
|
||||
return;
|
||||
|
||||
// get active crush mapping
|
||||
int up_primary, acting_primary;
|
||||
vector<int> up, acting;
|
||||
osdmap->pg_to_up_acting_osds(
|
||||
pgid.pgid, &up, &up_primary, &acting, &acting_primary);
|
||||
|
||||
// same primary?
|
||||
pg_history_t history = q.query.history;
|
||||
bool valid_history = project_pg_history(
|
||||
pgid, history, q.query.epoch_sent,
|
||||
osdmap,
|
||||
up, up_primary, acting, acting_primary);
|
||||
|
||||
if (!valid_history ||
|
||||
q.query.epoch_sent < history.same_interval_since) {
|
||||
dout(10) << " pg " << pgid << " dne, and pg has changed in "
|
||||
<< history.same_interval_since
|
||||
<< " (msg from " << q.query.epoch_sent << ")" << dendl;
|
||||
return;
|
||||
}
|
||||
|
||||
dout(10) << " pg " << pgid << " dne" << dendl;
|
||||
pg_info_t empty(spg_t(pgid.pgid, q.query.to));
|
||||
ConnectionRef con = service.get_con_osd_cluster(q.from.osd, osdmap->get_epoch());
|
||||
|
@ -1923,16 +1923,6 @@ protected:
|
||||
pg_history_t *h,
|
||||
PastIntervals *pi);
|
||||
|
||||
/// project pg history from from to now
|
||||
bool project_pg_history(
|
||||
spg_t pgid, pg_history_t& h, epoch_t from,
|
||||
const OSDMapRef &osdmap,
|
||||
const vector<int>& lastup,
|
||||
int lastupprimary,
|
||||
const vector<int>& lastacting,
|
||||
int lastactingprimary
|
||||
); ///< @return false if there was a map gap between from and now
|
||||
|
||||
epoch_t last_pg_create_epoch;
|
||||
|
||||
void handle_pg_create(OpRequestRef op);
|
||||
|
Loading…
Reference in New Issue
Block a user