mirror of
https://github.com/ceph/ceph
synced 2025-02-23 19:17:37 +00:00
osd/OSDMap: don't mapping all pgs each time in calc_pg_upmaps
We have a cluster pool with 32768 pgs and 400 osds, it costs 600 seconds when doing upmap with '--upmap-max 32768 --upmap-deviation 0.01', which is pretty slow. After adding some debug code, the time mostly spent on pg_to_up_acting_osds, the average time for one pg_to_up_acting_osds is about 12us, so the whole pool's pg will cost 500ms each time, we finally have 1429 pgs need to do upmap, so it cost about 600 seconds. Withi this patch, it only spend 5 seconds to get job done. Signed-off-by: huangjun <huangjun@xsky.com>
This commit is contained in:
parent
9bef451aa7
commit
da45e4e352
@ -4247,15 +4247,14 @@ int OSDMap::calc_pg_upmaps(
|
||||
float start_deviation = 0;
|
||||
float end_deviation = 0;
|
||||
int num_changed = 0;
|
||||
while (true) {
|
||||
map<int,set<pg_t>> pgs_by_osd;
|
||||
int total_pgs = 0;
|
||||
float osd_weight_total = 0;
|
||||
map<int,float> osd_weight;
|
||||
for (auto& i : pools) {
|
||||
if (!only_pools.empty() && !only_pools.count(i.first))
|
||||
map<int,set<pg_t>> pgs_by_osd;
|
||||
int total_pgs = 0;
|
||||
float osd_weight_total = 0;
|
||||
map<int,float> osd_weight;
|
||||
for (auto& i : pools) {
|
||||
if (!only_pools.empty() && !only_pools.count(i.first))
|
||||
continue;
|
||||
for (unsigned ps = 0; ps < i.second.get_pg_num(); ++ps) {
|
||||
for (unsigned ps = 0; ps < i.second.get_pg_num(); ++ps) {
|
||||
pg_t pg(ps, i.first);
|
||||
vector<int> up;
|
||||
tmp.pg_to_up_acting_osds(pg, &up, nullptr, nullptr, nullptr);
|
||||
@ -4263,43 +4262,43 @@ int OSDMap::calc_pg_upmaps(
|
||||
if (osd != CRUSH_ITEM_NONE)
|
||||
pgs_by_osd[osd].insert(pg);
|
||||
}
|
||||
}
|
||||
total_pgs += i.second.get_size() * i.second.get_pg_num();
|
||||
}
|
||||
total_pgs += i.second.get_size() * i.second.get_pg_num();
|
||||
|
||||
map<int,float> pmap;
|
||||
int ruleno = tmp.crush->find_rule(i.second.get_crush_rule(),
|
||||
map<int,float> pmap;
|
||||
int ruleno = tmp.crush->find_rule(i.second.get_crush_rule(),
|
||||
i.second.get_type(),
|
||||
i.second.get_size());
|
||||
tmp.crush->get_rule_weight_osd_map(ruleno, &pmap);
|
||||
ldout(cct,30) << __func__ << " pool " << i.first << " ruleno " << ruleno << dendl;
|
||||
for (auto p : pmap) {
|
||||
tmp.crush->get_rule_weight_osd_map(ruleno, &pmap);
|
||||
ldout(cct,30) << __func__ << " pool " << i.first << " ruleno " << ruleno << dendl;
|
||||
for (auto p : pmap) {
|
||||
auto adjusted_weight = tmp.get_weightf(p.first) * p.second;
|
||||
if (adjusted_weight == 0) {
|
||||
continue;
|
||||
}
|
||||
if (adjusted_weight == 0) {
|
||||
continue;
|
||||
}
|
||||
osd_weight[p.first] += adjusted_weight;
|
||||
osd_weight_total += adjusted_weight;
|
||||
}
|
||||
}
|
||||
for (auto& i : osd_weight) {
|
||||
int pgs = 0;
|
||||
auto p = pgs_by_osd.find(i.first);
|
||||
if (p != pgs_by_osd.end())
|
||||
}
|
||||
for (auto& i : osd_weight) {
|
||||
int pgs = 0;
|
||||
auto p = pgs_by_osd.find(i.first);
|
||||
if (p != pgs_by_osd.end())
|
||||
pgs = p->second.size();
|
||||
else
|
||||
else
|
||||
pgs_by_osd.emplace(i.first, set<pg_t>());
|
||||
ldout(cct, 20) << " osd." << i.first << " weight " << i.second
|
||||
ldout(cct, 20) << " osd." << i.first << " weight " << i.second
|
||||
<< " pgs " << pgs << dendl;
|
||||
}
|
||||
|
||||
if (osd_weight_total == 0) {
|
||||
lderr(cct) << __func__ << " abort due to osd_weight_total == 0" << dendl;
|
||||
break;
|
||||
}
|
||||
float pgs_per_weight = total_pgs / osd_weight_total;
|
||||
ldout(cct, 10) << " osd_weight_total " << osd_weight_total << dendl;
|
||||
ldout(cct, 10) << " pgs_per_weight " << pgs_per_weight << dendl;
|
||||
}
|
||||
if (osd_weight_total == 0) {
|
||||
lderr(cct) << __func__ << " abort due to osd_weight_total == 0" << dendl;
|
||||
return 0;
|
||||
}
|
||||
float pgs_per_weight = total_pgs / osd_weight_total;
|
||||
ldout(cct, 10) << " osd_weight_total " << osd_weight_total << dendl;
|
||||
ldout(cct, 10) << " pgs_per_weight " << pgs_per_weight << dendl;
|
||||
|
||||
while (true) {
|
||||
// osd deviation
|
||||
float total_deviation = 0;
|
||||
map<int,float> osd_deviation; // osd, deviation(pgs)
|
||||
@ -4371,6 +4370,10 @@ int OSDMap::calc_pg_upmaps(
|
||||
if (q.second == osd) {
|
||||
ldout(cct, 10) << " dropping pg_upmap_items " << pg
|
||||
<< " " << p->second << dendl;
|
||||
for (auto i : p->second) {
|
||||
pgs_by_osd[i.second].erase(pg);
|
||||
pgs_by_osd[i.first].insert(pg);
|
||||
}
|
||||
tmp.pg_upmap_items.erase(p);
|
||||
pending_inc->old_pg_upmap_items.insert(pg);
|
||||
++num_changed;
|
||||
@ -4406,6 +4409,10 @@ int OSDMap::calc_pg_upmaps(
|
||||
rmi.push_back(make_pair(orig[i], out[i]));
|
||||
}
|
||||
}
|
||||
for (auto i : rmi) {
|
||||
pgs_by_osd[i.first].erase(pg);
|
||||
pgs_by_osd[i.second].insert(pg);
|
||||
}
|
||||
pending_inc->new_pg_upmap_items[pg] = rmi;
|
||||
ldout(cct, 10) << " " << pg << " pg_upmap_items " << rmi << dendl;
|
||||
restart = true;
|
||||
|
Loading…
Reference in New Issue
Block a user