Merge pull request #2293 from ceph/wip-hitset-bytes

osd: improve agent calculation by factoring out hit_set bytes used properly

Reviewed-by: Samuel Just <sam.just@inktank.com>
This commit is contained in:
Samuel Just 2014-09-10 12:02:56 -07:00
commit d0feeaa150
3 changed files with 49 additions and 14 deletions

View File

@ -11139,6 +11139,7 @@ void ReplicatedPG::hit_set_persist()
ctx->delta_stats.num_objects++;
ctx->delta_stats.num_objects_hit_set_archive++;
ctx->delta_stats.num_bytes += bl.length();
ctx->delta_stats.num_bytes_hit_set_archive += bl.length();
bufferlist bss;
::encode(ctx->new_snapset, bss);
@ -11211,6 +11212,7 @@ void ReplicatedPG::hit_set_trim(RepGather *repop, unsigned max)
--repop->ctx->delta_stats.num_objects;
--repop->ctx->delta_stats.num_objects_hit_set_archive;
repop->ctx->delta_stats.num_bytes -= obc->obs.oi.size;
repop->ctx->delta_stats.num_bytes_hit_set_archive -= obc->obs.oi.size;
}
}
@ -11679,8 +11681,6 @@ void ReplicatedPG::agent_choose_mode(bool restart)
uint64_t divisor = pool.info.get_pg_num_divisor(info.pgid.pgid);
assert(divisor > 0);
uint64_t num_user_objects = info.stats.stats.sum.num_objects;
// adjust (effective) user objects down based on the number
// of HitSet objects, which should not count toward our total since
// they cannot be flushed.
@ -11692,12 +11692,16 @@ void ReplicatedPG::agent_choose_mode(bool restart)
if (base_pool->is_erasure())
unflushable += info.stats.stats.sum.num_objects_omap;
uint64_t num_user_objects = info.stats.stats.sum.num_objects;
if (num_user_objects > unflushable)
num_user_objects -= unflushable;
else
num_user_objects = 0;
uint64_t num_user_bytes = info.stats.stats.sum.num_bytes;
uint64_t unflushable_bytes = info.stats.stats.sum.num_bytes_hit_set_archive;
num_user_bytes -= unflushable_bytes;
// also reduce the num_dirty by num_objects_omap
int64_t num_dirty = info.stats.stats.sum.num_objects_dirty;
if (base_pool->is_erasure()) {
@ -11718,6 +11722,7 @@ void ReplicatedPG::agent_choose_mode(bool restart)
<< " num_objects_omap: " << info.stats.stats.sum.num_objects_omap
<< " num_dirty: " << num_dirty
<< " num_user_objects: " << num_user_objects
<< " num_user_bytes: " << num_user_bytes
<< " pool.info.target_max_bytes: " << pool.info.target_max_bytes
<< " pool.info.target_max_objects: " << pool.info.target_max_objects
<< dendl;
@ -11725,9 +11730,8 @@ void ReplicatedPG::agent_choose_mode(bool restart)
// get dirty, full ratios
uint64_t dirty_micro = 0;
uint64_t full_micro = 0;
if (pool.info.target_max_bytes && info.stats.stats.sum.num_objects > 0) {
uint64_t avg_size = info.stats.stats.sum.num_bytes /
info.stats.stats.sum.num_objects;
if (pool.info.target_max_bytes && num_user_objects > 0) {
uint64_t avg_size = num_user_bytes / num_user_objects;
dirty_micro =
num_dirty * avg_size * 1000000 /
MAX(pool.info.target_max_bytes / divisor, 1);
@ -11997,6 +12001,8 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap)
} else {
stat.num_bytes += oi.size;
}
if (soid.nspace == cct->_conf->osd_hit_set_namespace)
stat.num_bytes_hit_set_archive += oi.size;
if (!soid.is_snapdir()) {
if (oi.is_dirty())
@ -12135,7 +12141,8 @@ void ReplicatedPG::_scrub_finish()
<< scrub_cstat.sum.num_objects_dirty << "/" << info.stats.stats.sum.num_objects_dirty << " dirty, "
<< scrub_cstat.sum.num_objects_omap << "/" << info.stats.stats.sum.num_objects_omap << " omap, "
<< scrub_cstat.sum.num_objects_hit_set_archive << "/" << info.stats.stats.sum.num_objects_hit_set_archive << " hit_set_archive, "
<< scrub_cstat.sum.num_bytes << "/" << info.stats.stats.sum.num_bytes << " bytes."
<< scrub_cstat.sum.num_bytes << "/" << info.stats.stats.sum.num_bytes << " bytes,"
<< scrub_cstat.sum.num_bytes_hit_set_archive << "/" << info.stats.stats.sum.num_bytes_hit_set_archive << " hit_set_archive bytes."
<< dendl;
if (scrub_cstat.sum.num_objects != info.stats.stats.sum.num_objects ||
@ -12146,6 +12153,8 @@ void ReplicatedPG::_scrub_finish()
!info.stats.omap_stats_invalid) ||
(scrub_cstat.sum.num_objects_hit_set_archive != info.stats.stats.sum.num_objects_hit_set_archive &&
!info.stats.hitset_stats_invalid) ||
(scrub_cstat.sum.num_bytes_hit_set_archive != info.stats.stats.sum.num_bytes_hit_set_archive &&
!info.stats.hitset_bytes_stats_invalid) ||
scrub_cstat.sum.num_whiteouts != info.stats.stats.sum.num_whiteouts ||
scrub_cstat.sum.num_bytes != info.stats.stats.sum.num_bytes) {
osd->clog->error() << info.pgid << " " << mode
@ -12156,7 +12165,8 @@ void ReplicatedPG::_scrub_finish()
<< scrub_cstat.sum.num_objects_omap << "/" << info.stats.stats.sum.num_objects_omap << " omap, "
<< scrub_cstat.sum.num_objects_hit_set_archive << "/" << info.stats.stats.sum.num_objects_hit_set_archive << " hit_set_archive, "
<< scrub_cstat.sum.num_whiteouts << "/" << info.stats.stats.sum.num_whiteouts << " whiteouts, "
<< scrub_cstat.sum.num_bytes << "/" << info.stats.stats.sum.num_bytes << " bytes.\n";
<< scrub_cstat.sum.num_bytes << "/" << info.stats.stats.sum.num_bytes << " bytes,"
<< scrub_cstat.sum.num_bytes_hit_set_archive << "/" << info.stats.stats.sum.num_bytes_hit_set_archive << " hit_set_archive bytes.\n";
++scrubber.shallow_errors;
if (repair) {
@ -12165,6 +12175,7 @@ void ReplicatedPG::_scrub_finish()
info.stats.dirty_stats_invalid = false;
info.stats.omap_stats_invalid = false;
info.stats.hitset_stats_invalid = false;
info.stats.hitset_bytes_stats_invalid = false;
publish_stats_to_osd();
share_pg_info();
}

View File

@ -1405,11 +1405,12 @@ void object_stat_sum_t::dump(Formatter *f) const
f->dump_int("num_keys_recovered", num_keys_recovered);
f->dump_int("num_objects_omap", num_objects_omap);
f->dump_int("num_objects_hit_set_archive", num_objects_hit_set_archive);
f->dump_int("num_bytes_hit_set_archive", num_bytes_hit_set_archive);
}
void object_stat_sum_t::encode(bufferlist& bl) const
{
ENCODE_START(10, 3, bl);
ENCODE_START(11, 3, bl);
::encode(num_bytes, bl);
::encode(num_objects, bl);
::encode(num_object_clones, bl);
@ -1432,12 +1433,13 @@ void object_stat_sum_t::encode(bufferlist& bl) const
::encode(num_objects_omap, bl);
::encode(num_objects_hit_set_archive, bl);
::encode(num_objects_misplaced, bl);
::encode(num_bytes_hit_set_archive, bl);
ENCODE_FINISH(bl);
}
void object_stat_sum_t::decode(bufferlist::iterator& bl)
{
DECODE_START_LEGACY_COMPAT_LEN(10, 3, 3, bl);
DECODE_START_LEGACY_COMPAT_LEN(11, 3, 3, bl);
::decode(num_bytes, bl);
if (struct_v < 3) {
uint64_t num_kb;
@ -1496,13 +1498,17 @@ void object_stat_sum_t::decode(bufferlist::iterator& bl)
} else {
num_objects_misplaced = 0;
}
if (struct_v >= 11) {
::decode(num_bytes_hit_set_archive, bl);
} else {
num_bytes_hit_set_archive = 0;
}
DECODE_FINISH(bl);
}
void object_stat_sum_t::generate_test_instances(list<object_stat_sum_t*>& o)
{
object_stat_sum_t a;
o.push_back(new object_stat_sum_t(a));
a.num_bytes = 1;
a.num_objects = 3;
@ -1522,6 +1528,8 @@ void object_stat_sum_t::generate_test_instances(list<object_stat_sum_t*>& o)
a.num_objects_dirty = 21;
a.num_whiteouts = 22;
a.num_objects_misplaced = 1232;
a.num_objects_hit_set_archive = 2;
a.num_bytes_hit_set_archive = 27;
o.push_back(new object_stat_sum_t(a));
}
@ -1549,6 +1557,7 @@ void object_stat_sum_t::add(const object_stat_sum_t& o)
num_whiteouts += o.num_whiteouts;
num_objects_omap += o.num_objects_omap;
num_objects_hit_set_archive += o.num_objects_hit_set_archive;
num_bytes_hit_set_archive += o.num_bytes_hit_set_archive;
}
void object_stat_sum_t::sub(const object_stat_sum_t& o)
@ -1575,6 +1584,7 @@ void object_stat_sum_t::sub(const object_stat_sum_t& o)
num_whiteouts -= o.num_whiteouts;
num_objects_omap -= o.num_objects_omap;
num_objects_hit_set_archive -= o.num_objects_hit_set_archive;
num_bytes_hit_set_archive -= o.num_bytes_hit_set_archive;
}
@ -1691,7 +1701,7 @@ void pg_stat_t::dump_brief(Formatter *f) const
void pg_stat_t::encode(bufferlist &bl) const
{
ENCODE_START(19, 8, bl);
ENCODE_START(20, 8, bl);
::encode(version, bl);
::encode(reported_seq, bl);
::encode(reported_epoch, bl);
@ -1728,12 +1738,13 @@ void pg_stat_t::encode(bufferlist &bl) const
::encode(blocked_by, bl);
::encode(last_undegraded, bl);
::encode(last_fullsized, bl);
::encode(hitset_bytes_stats_invalid, bl);
ENCODE_FINISH(bl);
}
void pg_stat_t::decode(bufferlist::iterator &bl)
{
DECODE_START_LEGACY_COMPAT_LEN(19, 8, 8, bl);
DECODE_START_LEGACY_COMPAT_LEN(20, 8, 8, bl);
::decode(version, bl);
::decode(reported_seq, bl);
::decode(reported_epoch, bl);
@ -1853,6 +1864,13 @@ void pg_stat_t::decode(bufferlist::iterator &bl)
last_undegraded = utime_t();
last_fullsized = utime_t();
}
if (struct_v >= 20) {
::decode(hitset_bytes_stats_invalid, bl);
} else {
// if we are decoding an old encoding of this object, then the
// encoder may not have supported num_bytes_hit_set_archive accounting.
hitset_bytes_stats_invalid = true;
}
DECODE_FINISH(bl);
}

View File

@ -1183,6 +1183,7 @@ struct object_stat_sum_t {
int64_t num_whiteouts;
int64_t num_objects_omap;
int64_t num_objects_hit_set_archive;
int64_t num_bytes_hit_set_archive;
object_stat_sum_t()
: num_bytes(0),
@ -1199,7 +1200,8 @@ struct object_stat_sum_t {
num_objects_dirty(0),
num_whiteouts(0),
num_objects_omap(0),
num_objects_hit_set_archive(0)
num_objects_hit_set_archive(0),
num_bytes_hit_set_archive(0)
{}
void floor(int64_t f) {
@ -1226,6 +1228,7 @@ struct object_stat_sum_t {
FLOOR(num_whiteouts);
FLOOR(num_objects_omap);
FLOOR(num_objects_hit_set_archive);
FLOOR(num_bytes_hit_set_archive);
#undef FLOOR
}
@ -1260,6 +1263,7 @@ struct object_stat_sum_t {
SPLIT(num_whiteouts);
SPLIT(num_objects_omap);
SPLIT(num_objects_hit_set_archive);
SPLIT(num_bytes_hit_set_archive);
#undef SPLIT
}
@ -1396,6 +1400,7 @@ struct pg_stat_t {
bool dirty_stats_invalid;
bool omap_stats_invalid;
bool hitset_stats_invalid;
bool hitset_bytes_stats_invalid;
/// up, acting primaries
int32_t up_primary;
@ -1413,6 +1418,7 @@ struct pg_stat_t {
dirty_stats_invalid(false),
omap_stats_invalid(false),
hitset_stats_invalid(false),
hitset_bytes_stats_invalid(false),
up_primary(-1),
acting_primary(-1)
{ }