mirror of
https://github.com/ceph/ceph
synced 2025-02-23 11:07:35 +00:00
osd: Fix _update_calc_stats() to use peer_missing and missing_by_count
Signed-off-by: David Zafman <dzafman@redhat.com>
This commit is contained in:
parent
1e98533984
commit
4bf4cd937c
153
src/osd/PG.cc
153
src/osd/PG.cc
@ -2743,6 +2743,25 @@ bool PG::check_in_progress_op(
|
||||
pg_log.get_log().get_request(r, version, user_version, return_code));
|
||||
}
|
||||
|
||||
static bool find_shard(const set<pg_shard_t> & pgs, shard_id_t shard)
|
||||
{
|
||||
for (auto&p : pgs)
|
||||
if (p.shard == shard)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static pg_shard_t get_another_shard(const set<pg_shard_t> & pgs, pg_shard_t skip, shard_id_t shard)
|
||||
{
|
||||
for (auto&p : pgs) {
|
||||
if (p == skip)
|
||||
continue;
|
||||
if (p.shard == shard)
|
||||
return p;
|
||||
}
|
||||
return pg_shard_t();
|
||||
}
|
||||
|
||||
void PG::_update_calc_stats()
|
||||
{
|
||||
info.stats.version = info.last_update;
|
||||
@ -2765,14 +2784,14 @@ void PG::_update_calc_stats()
|
||||
// In rare case that upset is too large (usually transient), use as target
|
||||
// for calculations below.
|
||||
unsigned target = std::max(num_shards, (unsigned)upset.size());
|
||||
// Not sure this could ever happen, that actingset > upset
|
||||
// which only matters if actingset > num_shards.
|
||||
// For undersized actingset may be larger with OSDs out
|
||||
unsigned nrep = std::max(actingset.size(), upset.size());
|
||||
// calc num_object_copies
|
||||
info.stats.stats.calc_copies(std::max(target, nrep));
|
||||
info.stats.stats.sum.num_objects_degraded = 0;
|
||||
info.stats.stats.sum.num_objects_unfound = 0;
|
||||
info.stats.stats.sum.num_objects_misplaced = 0;
|
||||
|
||||
if ((is_remapped() || is_undersized() || !is_clean()) && (is_peered() || is_activating())) {
|
||||
dout(20) << __func__ << " actingset " << actingset << " upset "
|
||||
<< upset << " actingbackfill " << actingbackfill << dendl;
|
||||
@ -2789,49 +2808,127 @@ void PG::_update_calc_stats()
|
||||
// Objects missing from nodes not in up, sort by # objects
|
||||
boost::container::flat_set<pair<int64_t,pg_shard_t>> acting_source_objects;
|
||||
|
||||
int64_t missing;
|
||||
// Fill missing_target_objects/acting_source_objects
|
||||
|
||||
{
|
||||
int64_t missing;
|
||||
|
||||
// Primary first
|
||||
missing = pg_log.get_missing().num_missing();
|
||||
assert(actingbackfill.count(pg_whoami));
|
||||
if (upset.count(pg_whoami)) {
|
||||
missing_target_objects.insert(make_pair(missing, pg_whoami));
|
||||
} else {
|
||||
acting_source_objects.insert(make_pair(missing, pg_whoami));
|
||||
}
|
||||
info.stats.stats.sum.num_objects_missing_on_primary = missing;
|
||||
dout(20) << __func__ << " shard " << pg_whoami
|
||||
<< " primary objects " << num_objects
|
||||
<< " missing " << missing
|
||||
<< dendl;
|
||||
|
||||
// Primary first
|
||||
missing = pg_log.get_missing().num_missing();
|
||||
assert(actingbackfill.count(pg_whoami));
|
||||
if (upset.count(pg_whoami)) {
|
||||
missing_target_objects.insert(make_pair(missing, pg_whoami));
|
||||
} else {
|
||||
acting_source_objects.insert(make_pair(missing, pg_whoami));
|
||||
}
|
||||
info.stats.stats.sum.num_objects_missing_on_primary = missing;
|
||||
|
||||
// All other peers
|
||||
for (auto& peer : peer_info) {
|
||||
// Ignore other peers until we add code to look at detailed missing
|
||||
// information. (recovery)
|
||||
if (!actingbackfill.count(peer.first)) {
|
||||
continue;
|
||||
}
|
||||
// Primary should not be in the peer_info, skip if it is.
|
||||
if (peer.first == pg_whoami) continue;
|
||||
missing = 0;
|
||||
int64_t missing = 0;
|
||||
int64_t peer_num_objects = peer.second.stats.stats.sum.num_objects;
|
||||
// Backfill targets always track num_objects accurately
|
||||
// all other peers track missing accurately.
|
||||
if (is_backfill_targets(peer.first)) {
|
||||
missing = std::max((int64_t)0, num_objects - peer.second.stats.stats.sum.num_objects);
|
||||
missing = std::max((int64_t)0, num_objects - peer_num_objects);
|
||||
} else {
|
||||
if (peer_missing.count(peer.first)) {
|
||||
missing = peer_missing[peer.first].num_missing();
|
||||
} else {
|
||||
dout(20) << __func__ << " no peer_missing found for " << peer.first << dendl;
|
||||
if (peer_missing.count(peer.first)) {
|
||||
missing = peer_missing[peer.first].num_missing();
|
||||
} else {
|
||||
dout(20) << __func__ << " no peer_missing found for " << peer.first << dendl;
|
||||
missing = std::max((int64_t)0, num_objects - peer_num_objects);
|
||||
}
|
||||
}
|
||||
if (upset.count(peer.first)) {
|
||||
missing_target_objects.insert(make_pair(missing, peer.first));
|
||||
} else {
|
||||
} else if (actingset.count(peer.first)) {
|
||||
acting_source_objects.insert(make_pair(missing, peer.first));
|
||||
}
|
||||
peer.second.stats.stats.sum.num_objects_missing = missing;
|
||||
dout(20) << __func__ << " shard " << peer.first
|
||||
<< " objects " << peer_num_objects
|
||||
<< " missing " << missing
|
||||
<< dendl;
|
||||
}
|
||||
|
||||
// A misplaced object is not stored on the correct OSD
|
||||
int64_t misplaced = 0;
|
||||
// a degraded objects has fewer replicas or EC shards than the pool specifies.
|
||||
int64_t degraded = 0;
|
||||
|
||||
if (is_recovering()) {
|
||||
for (auto& sml: missing_loc.get_missing_by_count()) {
|
||||
for (auto& ml: sml.second) {
|
||||
int missing_shards;
|
||||
if (sml.first == shard_id_t::NO_SHARD) {
|
||||
dout(0) << __func__ << " ml " << ml.second << " upset size " << upset.size() << " up " << ml.first.up << dendl;
|
||||
missing_shards = (int)upset.size() - ml.first.up;
|
||||
} else {
|
||||
// Handle shards not even in upset below
|
||||
if (!find_shard(upset, sml.first))
|
||||
continue;
|
||||
missing_shards = std::max(0, 1 - ml.first.up);
|
||||
dout(0) << __func__ << " shard " << sml.first << " ml " << ml.second << " missing shards " << missing_shards << dendl;
|
||||
}
|
||||
int odegraded = ml.second * missing_shards;
|
||||
// Copies on other osds but limited to the possible degraded
|
||||
int more_osds = std::min(missing_shards, ml.first.other);
|
||||
int omisplaced = ml.second * more_osds;
|
||||
assert(omisplaced <= odegraded);
|
||||
odegraded -= omisplaced;
|
||||
|
||||
misplaced += omisplaced;
|
||||
degraded += odegraded;
|
||||
}
|
||||
}
|
||||
|
||||
dout(20) << __func__ << " missing based degraded " << degraded << dendl;
|
||||
dout(20) << __func__ << " missing based misplaced " << misplaced << dendl;
|
||||
|
||||
// Handle undersized case
|
||||
if (pool.info.is_replicated()) {
|
||||
// Add degraded for missing targets (num_objects missing)
|
||||
assert(target >= upset.size());
|
||||
unsigned needed = target - upset.size();
|
||||
degraded += num_objects * needed;
|
||||
} else {
|
||||
for (unsigned i = 0 ; i < num_shards; ++i) {
|
||||
shard_id_t shard(i);
|
||||
|
||||
if (!find_shard(upset, shard)) {
|
||||
pg_shard_t pgs = get_another_shard(actingset, pg_shard_t(), shard);
|
||||
|
||||
if (pgs != pg_shard_t()) {
|
||||
int64_t missing;
|
||||
|
||||
if (pgs == pg_whoami)
|
||||
missing = info.stats.stats.sum.num_objects_missing_on_primary;
|
||||
else
|
||||
missing = peer_info[pgs].stats.stats.sum.num_objects_missing;
|
||||
|
||||
degraded += missing;
|
||||
misplaced += std::max((int64_t)0, num_objects - missing);
|
||||
} else {
|
||||
// No shard anywhere
|
||||
degraded += num_objects;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
// Handle undersized case
|
||||
if (pool.info.is_replicated()) {
|
||||
// Add to missing_target_objects up to target elements (num_objects missing)
|
||||
// Add to missing_target_objects
|
||||
assert(target >= missing_target_objects.size());
|
||||
unsigned needed = target - missing_target_objects.size();
|
||||
if (needed)
|
||||
@ -2856,11 +2953,8 @@ void PG::_update_calc_stats()
|
||||
for (const auto& item : acting_source_objects)
|
||||
dout(20) << __func__ << " acting shard " << std::get<1>(item) << " missing= " << std::get<0>(item) << dendl;
|
||||
|
||||
// A misplaced object is not stored on the correct OSD
|
||||
int64_t misplaced = 0;
|
||||
// a degraded objects has fewer replicas or EC shards than the pool specifies.
|
||||
int64_t degraded = 0;
|
||||
|
||||
// Handle all objects not in missing for remapped
|
||||
// or backfill
|
||||
for (auto m = missing_target_objects.rbegin();
|
||||
m != missing_target_objects.rend(); ++m) {
|
||||
|
||||
@ -2902,6 +2996,7 @@ void PG::_update_calc_stats()
|
||||
dout(20) << __func__ << " extra acting misplaced " << extra_misplaced << dendl;
|
||||
misplaced += extra_misplaced;
|
||||
}
|
||||
out:
|
||||
dout(20) << __func__ << " degraded " << degraded << dendl;
|
||||
dout(20) << __func__ << " misplaced " << misplaced << dendl;
|
||||
|
||||
|
@ -2831,6 +2831,7 @@ protected:
|
||||
bool is_peered() const {
|
||||
return state_test(PG_STATE_ACTIVE) || state_test(PG_STATE_PEERED);
|
||||
}
|
||||
bool is_recovering() const { return state_test(PG_STATE_RECOVERING); }
|
||||
|
||||
bool is_empty() const { return info.last_update == eversion_t(0,0); }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user