Merge pull request #1614 from ceph/wip-7964

Wip 7964

Reviewed-by: Sage Weil <sage@inktank.com>
This commit is contained in:
Sage Weil 2014-04-07 14:01:58 -07:00
commit 008ce85d19
4 changed files with 111 additions and 10 deletions

View File

@ -2290,6 +2290,8 @@ ReplicatedPG::RepGather *ReplicatedPG::trim_object(const hobject_t &coid)
delta.num_objects--;
if (coi.is_dirty())
delta.num_objects_dirty--;
if (coi.is_omap())
delta.num_objects_omap--;
if (coi.is_whiteout()) {
dout(20) << __func__ << " trimming whiteout on " << coid << dendl;
delta.num_whiteouts--;
@ -4521,6 +4523,8 @@ inline int ReplicatedPG::_delete_oid(OpContext *ctx, bool no_whiteout)
}
ctx->delta_stats.num_objects--;
if (soid.is_snap())
ctx->delta_stats.num_object_clones--;
if (oi.is_whiteout()) {
dout(20) << __func__ << " deleting whiteout on " << soid << dendl;
ctx->delta_stats.num_whiteouts--;
@ -4694,6 +4698,19 @@ void ReplicatedPG::make_writeable(OpContext *ctx)
}
}
if ((ctx->new_obs.exists &&
ctx->new_obs.oi.is_omap()) &&
(!ctx->obc->obs.exists ||
!ctx->obc->obs.oi.is_omap())) {
++ctx->delta_stats.num_objects_omap;
}
if ((!ctx->new_obs.exists ||
!ctx->new_obs.oi.is_omap()) &&
(ctx->obc->obs.exists &&
ctx->obc->obs.oi.is_omap())) {
--ctx->delta_stats.num_objects_omap;
}
// use newer snapc?
if (ctx->new_snapset.seq > snapc.seq) {
snapc.seq = ctx->new_snapset.seq;
@ -4751,6 +4768,8 @@ void ReplicatedPG::make_writeable(OpContext *ctx)
dout(20) << __func__ << " cloning whiteout on " << soid << " to " << coid << dendl;
ctx->delta_stats.num_whiteouts++;
}
if (snap_oi->is_omap())
ctx->delta_stats.num_objects_omap++;
ctx->delta_stats.num_object_clones++;
ctx->new_snapset.clones.push_back(coid.snap);
ctx->new_snapset.clone_size[coid.snap] = ctx->obs->oi.size;
@ -5475,6 +5494,9 @@ void ReplicatedPG::process_copy_chunk(hobject_t oid, ceph_tid_t tid, int r)
return;
}
if (cop->omap.size())
cop->results.has_omap = true;
if (r >= 0 && pool.info.require_rollback() && cop->omap.size()) {
r = -EOPNOTSUPP;
}
@ -5671,6 +5693,14 @@ void ReplicatedPG::finish_copyfrom(OpContext *ctx)
--ctx->delta_stats.num_whiteouts;
}
if (cb->results->has_omap) {
dout(10) << __func__ << " setting omap flag on " << obs.oi.soid << dendl;
obs.oi.set_flag(object_info_t::FLAG_OMAP);
} else {
dout(10) << __func__ << " clearing omap flag on " << obs.oi.soid << dendl;
obs.oi.clear_flag(object_info_t::FLAG_OMAP);
}
interval_set<uint64_t> ch;
if (obs.oi.size > 0)
ch.insert(0, obs.oi.size);
@ -5747,6 +5777,12 @@ void ReplicatedPG::finish_promote(int r, OpRequestRef op,
dout(20) << __func__ << " creating whiteout on " << soid << dendl;
osd->logger->inc(l_osd_tier_whiteout);
} else {
if (results->has_omap) {
dout(10) << __func__ << " setting omap flag on " << soid << dendl;
tctx->new_obs.oi.set_flag(object_info_t::FLAG_OMAP);
++tctx->delta_stats.num_objects_omap;
}
tctx->op_t->append(results->final_tx);
delete results->final_tx;
results->final_tx = NULL;
@ -7059,6 +7095,8 @@ void ReplicatedPG::add_object_context_to_pg_stat(ObjectContextRef obc, pg_stat_t
stat.num_objects_dirty++;
if (oi.is_whiteout())
stat.num_whiteouts++;
if (oi.is_omap())
stat.num_objects_omap++;
if (oi.soid.snap && oi.soid.snap != CEPH_NOSNAP && oi.soid.snap != CEPH_SNAPDIR) {
stat.num_object_clones++;
@ -10900,6 +10938,8 @@ bool ReplicatedPG::agent_maybe_evict(ObjectContextRef& obc)
ctx->at_version = get_next_version();
assert(ctx->new_obs.exists);
int r = _delete_oid(ctx, true);
if (obc->obs.oi.is_omap())
ctx->delta_stats.num_objects_omap--;
assert(r == 0);
finish_ctx(ctx, pg_log_entry_t::DELETE);
simple_repop_submit(repop);
@ -10922,15 +10962,45 @@ void ReplicatedPG::agent_choose_mode()
{
uint64_t divisor = pool.info.get_pg_num_divisor(info.pgid.pgid);
uint64_t num_user_objects = info.stats.stats.sum.num_objects;
// adjust (effective) user objects down based on the (max) number
// of HitSet objects, which should not count toward our total since
// they cannot be flushed.
uint64_t num_user_objects = info.stats.stats.sum.num_objects;
if (num_user_objects > pool.info.hit_set_count)
num_user_objects -= pool.info.hit_set_count;
uint64_t unflushable = pool.info.hit_set_count;
// also exclude omap objects if ec backing pool
const pg_pool_t *base_pool = get_osdmap()->get_pg_pool(pool.info.tier_of);
assert(base_pool);
if (base_pool->is_erasure())
unflushable += info.stats.stats.sum.num_objects_omap;
if (num_user_objects > unflushable)
num_user_objects -= unflushable;
else
num_user_objects = 0;
// also reduce the num_dirty by num_objects_omap
int64_t num_dirty = info.stats.stats.sum.num_objects_dirty;
if (base_pool->is_erasure()) {
if (num_dirty > info.stats.stats.sum.num_objects_omap)
num_dirty -= info.stats.stats.sum.num_objects_omap;
else
num_dirty = 0;
}
dout(10) << __func__ << ": "
<< " num_objects: " << info.stats.stats.sum.num_objects
<< " num_bytes: " << info.stats.stats.sum.num_bytes
<< " num_objects_dirty: " << info.stats.stats.sum.num_objects_dirty
<< " num_objects_omap: " << info.stats.stats.sum.num_objects_omap
<< " num_dirty: " << num_dirty
<< " num_user_objects: " << num_user_objects
<< " pool.info.target_max_bytes: " << pool.info.target_max_bytes
<< " pool.info.target_max_objects: " << pool.info.target_max_objects
<< dendl;
// get dirty, full ratios
uint64_t dirty_micro = 0;
uint64_t full_micro = 0;
@ -10938,15 +11008,15 @@ void ReplicatedPG::agent_choose_mode()
uint64_t avg_size = info.stats.stats.sum.num_bytes /
info.stats.stats.sum.num_objects;
dirty_micro =
info.stats.stats.sum.num_objects_dirty * avg_size * 1000000 /
num_dirty * avg_size * 1000000 /
(pool.info.target_max_bytes / divisor);
full_micro =
info.stats.stats.sum.num_bytes * 1000000 /
num_user_objects * avg_size * 1000000 /
(pool.info.target_max_bytes / divisor);
}
if (pool.info.target_max_objects) {
uint64_t dirty_objects_micro =
info.stats.stats.sum.num_objects_dirty * 1000000 /
num_dirty * 1000000 /
(pool.info.target_max_objects / divisor);
if (dirty_objects_micro > dirty_micro)
dirty_micro = dirty_objects_micro;
@ -11182,6 +11252,8 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap)
++stat.num_objects_dirty;
if (oi.is_whiteout())
++stat.num_whiteouts;
if (oi.is_omap())
++stat.num_objects_omap;
}
//bufferlist data;
@ -11307,6 +11379,7 @@ void ReplicatedPG::_scrub_finish()
<< scrub_cstat.sum.num_objects << "/" << info.stats.stats.sum.num_objects << " objects, "
<< scrub_cstat.sum.num_object_clones << "/" << info.stats.stats.sum.num_object_clones << " clones, "
<< scrub_cstat.sum.num_objects_dirty << "/" << info.stats.stats.sum.num_objects_dirty << " dirty, "
<< scrub_cstat.sum.num_objects_omap << "/" << info.stats.stats.sum.num_objects_omap << " omap, "
<< scrub_cstat.sum.num_bytes << "/" << info.stats.stats.sum.num_bytes << " bytes."
<< dendl;
@ -11314,6 +11387,8 @@ void ReplicatedPG::_scrub_finish()
scrub_cstat.sum.num_object_clones != info.stats.stats.sum.num_object_clones ||
(scrub_cstat.sum.num_objects_dirty != info.stats.stats.sum.num_objects_dirty &&
!info.stats.dirty_stats_invalid) ||
(scrub_cstat.sum.num_objects_omap != info.stats.stats.sum.num_objects_omap &&
!info.stats.omap_stats_invalid) ||
scrub_cstat.sum.num_whiteouts != info.stats.stats.sum.num_whiteouts ||
scrub_cstat.sum.num_bytes != info.stats.stats.sum.num_bytes) {
osd->clog.error() << info.pgid << " " << mode
@ -11321,6 +11396,7 @@ void ReplicatedPG::_scrub_finish()
<< scrub_cstat.sum.num_objects << "/" << info.stats.stats.sum.num_objects << " objects, "
<< scrub_cstat.sum.num_object_clones << "/" << info.stats.stats.sum.num_object_clones << " clones, "
<< scrub_cstat.sum.num_objects_dirty << "/" << info.stats.stats.sum.num_objects_dirty << " dirty, "
<< scrub_cstat.sum.num_objects_omap << "/" << info.stats.stats.sum.num_objects_omap << " omap, "
<< scrub_cstat.sum.num_whiteouts << "/" << info.stats.stats.sum.num_whiteouts << " whiteouts, "
<< scrub_cstat.sum.num_bytes << "/" << info.stats.stats.sum.num_bytes << " bytes.\n";
++scrubber.shallow_errors;
@ -11329,6 +11405,7 @@ void ReplicatedPG::_scrub_finish()
++scrubber.fixed;
info.stats.stats = scrub_cstat;
info.stats.dirty_stats_invalid = false;
info.stats.omap_stats_invalid = false;
publish_stats_to_osd();
share_pg_info();
}

View File

@ -123,9 +123,11 @@ public:
librados::snap_set_t snapset; ///< src snapset (if head)
bool mirror_snapset;
map<string, bufferlist> attrs; ///< src user attrs
bool has_omap;
CopyResults() : object_size(0), started_temp_obj(false),
final_tx(NULL), user_version(0),
should_requeue(false), mirror_snapset(false) {}
should_requeue(false), mirror_snapset(false),
has_omap(false) {}
};
struct CopyOp {

View File

@ -1306,11 +1306,12 @@ void object_stat_sum_t::dump(Formatter *f) const
f->dump_int("num_objects_recovered", num_objects_recovered);
f->dump_int("num_bytes_recovered", num_bytes_recovered);
f->dump_int("num_keys_recovered", num_keys_recovered);
f->dump_int("num_objects_omap", num_objects_omap);
}
void object_stat_sum_t::encode(bufferlist& bl) const
{
ENCODE_START(7, 3, bl);
ENCODE_START(8, 3, bl);
::encode(num_bytes, bl);
::encode(num_objects, bl);
::encode(num_object_clones, bl);
@ -1330,6 +1331,7 @@ void object_stat_sum_t::encode(bufferlist& bl) const
::encode(num_deep_scrub_errors, bl);
::encode(num_objects_dirty, bl);
::encode(num_whiteouts, bl);
::encode(num_objects_omap, bl);
ENCODE_FINISH(bl);
}
@ -1379,6 +1381,11 @@ void object_stat_sum_t::decode(bufferlist::iterator& bl)
num_objects_dirty = 0;
num_whiteouts = 0;
}
if (struct_v >= 8) {
::decode(num_objects_omap, bl);
} else {
num_objects_omap = 0;
}
DECODE_FINISH(bl);
}
@ -1592,12 +1599,13 @@ void pg_stat_t::encode(bufferlist &bl) const
::encode(dirty_stats_invalid, bl);
::encode(up_primary, bl);
::encode(acting_primary, bl);
::encode(omap_stats_invalid, bl);
ENCODE_FINISH(bl);
}
void pg_stat_t::decode(bufferlist::iterator &bl)
{
DECODE_START_LEGACY_COMPAT_LEN(15, 8, 8, bl);
DECODE_START_LEGACY_COMPAT_LEN(16, 8, 8, bl);
::decode(version, bl);
::decode(reported_seq, bl);
::decode(reported_epoch, bl);
@ -1691,6 +1699,13 @@ void pg_stat_t::decode(bufferlist::iterator &bl)
up_primary = up.size() ? up[0] : -1;
acting_primary = acting.size() ? acting[0] : -1;
}
if (struct_v >= 16) {
::decode(omap_stats_invalid, bl);
} else {
// if we are decoding an old encoding of this object, then the
// encoder may not have supported num_objects_omap accounting.
omap_stats_invalid = true;
}
DECODE_FINISH(bl);
}

View File

@ -1124,6 +1124,7 @@ struct object_stat_sum_t {
int64_t num_keys_recovered;
int64_t num_objects_dirty;
int64_t num_whiteouts;
int64_t num_objects_omap;
object_stat_sum_t()
: num_bytes(0),
@ -1136,7 +1137,8 @@ struct object_stat_sum_t {
num_bytes_recovered(0),
num_keys_recovered(0),
num_objects_dirty(0),
num_whiteouts(0)
num_whiteouts(0),
num_objects_omap(0)
{}
void floor(int64_t f) {
@ -1290,6 +1292,7 @@ struct pg_stat_t {
/// true if num_objects_dirty is not accurate (because it was not
/// maintained starting from pool creation)
bool dirty_stats_invalid;
bool omap_stats_invalid;
/// up, acting primaries
int up_primary;
@ -1305,6 +1308,7 @@ struct pg_stat_t {
log_size(0), ondisk_log_size(0),
mapping_epoch(0),
dirty_stats_invalid(false),
omap_stats_invalid(false),
up_primary(-1),
acting_primary(-1)
{ }
@ -2553,6 +2557,9 @@ struct object_info_t {
bool is_dirty() const {
return test_flag(FLAG_DIRTY);
}
bool is_omap() const {
return test_flag(FLAG_OMAP);
}
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bl);