osd: add refcounting chunks for snapshotted manifest object

To prevent removing a manifest object in use (has the reference count), the chunk maps in clone is
introduced.

Signed-off-by: Myoungwon Oh <ohmyoungwon@gmail.com>
This commit is contained in:
myoungwon oh 2019-07-25 01:37:37 +09:00
parent daccc7a0af
commit 51719f6895
4 changed files with 192 additions and 68 deletions

View File

@ -2588,6 +2588,7 @@ int PrimaryLogPG::do_manifest_flush(OpRequestRef op, ObjectContextRef obc, Flush
}();
bufferlist in;
if (fp_oid != tgt_soid.oid) {
// TODO: don't retain reference to dirty extents
// decrement old chunk's reference count
ObjectOperation dec_op;
cls_cas_chunk_put_ref_op put_call;
@ -3457,24 +3458,95 @@ void PrimaryLogPG::cancel_manifest_ops(bool requeue, vector<ceph_tid_t> *tids)
}
}
void PrimaryLogPG::refcount_manifest(ObjectContextRef obc, object_locator_t oloc, hobject_t soid,
SnapContext snapc, bool get, RefCountCallback *cb, uint64_t offset)
void PrimaryLogPG::dec_refcount_non_intersection(ObjectContextRef obc, const object_info_t& oi,
set<uint64_t> intersection_set)
{
for (auto c : oi.manifest.chunk_map) {
auto iter = intersection_set.find(c.first);
if (intersection_set.end() == iter) {
dout(10) << __func__ << " need dereference. " << " offset: " << c.first
<< " length: " << c.second.length << " oid: " << c.second.oid << dendl;
object_locator_t target_oloc(c.second.oid);
refcount_manifest(obc, obc->obs.oi.soid, target_oloc, c.second.oid,
SnapContext(), refcount_t::DECREMENT_REF, NULL);
}
}
}
void PrimaryLogPG::dec_all_refcount_head_manifest(object_info_t& oi, OpContext* ctx)
{
SnapSetContext* ssc = ctx->obc->ssc;
ceph_assert(oi.has_manifest());
ceph_assert(oi.soid.is_head());
// has snapshot
if (ssc && ssc->snapset.clones.size() > 0) {
dout(15) << __func__ << " has snapset " << dendl;
interval_set<uint64_t> &newest_overlap =
ssc->snapset.clone_overlap.rbegin()->second;
set<uint64_t> refs;
ceph_assert(oi.manifest.is_chunked());
ceph_assert(!oi.manifest.is_redirect());
hobject_t clone_oid = oi.soid;
clone_oid.snap = ssc->snapset.clone_overlap.rbegin()->first;
ObjectContextRef cobc = get_object_context(clone_oid, false, NULL);
if (!cobc) {
dout(0) << __func__ << ": Can not find clone obc " << clone_oid << dendl;;
return;
}
object_info_t& coi = cobc->obs.oi;
oi.manifest.build_intersection_set(coi.manifest.chunk_map, refs, &newest_overlap);
ctx->register_on_commit(
[oi, ctx, this, refs](){
dec_refcount_non_intersection(ctx->obc, oi, refs);
});
return;
}
// no snapshot
if ((oi.flags & object_info_t::FLAG_REDIRECT_HAS_REFERENCE) && oi.manifest.is_redirect()) {
ctx->register_on_commit(
[oi, ctx, this](){
object_locator_t target_oloc(oi.manifest.redirect_target);
refcount_manifest(ctx->obc, oi.soid, target_oloc, oi.manifest.redirect_target,
SnapContext(), refcount_t::DECREMENT_REF, NULL);
});
} else if (oi.manifest.is_chunked()) {
ctx->register_on_commit(
[oi, ctx, this](){
for (auto p : oi.manifest.chunk_map) {
if (p.second.has_reference()) {
object_locator_t target_oloc(p.second.oid);
refcount_manifest(ctx->obc, oi.soid, target_oloc, p.second.oid,
SnapContext(), refcount_t::DECREMENT_REF, NULL);
}
}
});
} else {
ceph_abort_msg("unrecognized manifest type");
}
}
void PrimaryLogPG::refcount_manifest(ObjectContextRef obc, hobject_t src_soid, object_locator_t oloc,
hobject_t tgt_soid, SnapContext snapc, refcount_t type, RefCountCallback* cb)
{
unsigned flags = CEPH_OSD_FLAG_IGNORE_CACHE | CEPH_OSD_FLAG_IGNORE_OVERLAY |
CEPH_OSD_FLAG_RWORDERED;
dout(10) << __func__ << " Start refcount for " << soid << dendl;
dout(10) << __func__ << " Start refcount from " << src_soid
<< " to " << tgt_soid << dendl;
ObjectOperation obj_op;
bufferlist in;
if (get) {
if (type == refcount_t::INCREMENT_REF) {
cls_cas_chunk_get_ref_op call;
call.source = obc->obs.oi.soid;
call.source = src_soid;
::encode(call, in);
obj_op.call("cas", "chunk_get_ref", in);
} else {
} else if (type == refcount_t::DECREMENT_REF) {
cls_cas_chunk_put_ref_op call;
call.source = obc->obs.oi.soid;
call.source = src_soid;
::encode(call, in);
obj_op.call("cas", "chunk_put_ref", in);
}
@ -3487,7 +3559,7 @@ void PrimaryLogPG::refcount_manifest(ObjectContextRef obc, object_locator_t oloc
}
auto tid = osd->objecter->mutate(
soid.oid, oloc, obj_op, snapc,
tgt_soid.oid, oloc, obj_op, snapc,
ceph::real_clock::from_ceph_timespec(obc->obs.oi.mtime),
flags, c);
if (cb) {
@ -4449,8 +4521,31 @@ int PrimaryLogPG::trim_object(
ctx->delta_stats.num_object_clones--;
if (coi.is_cache_pinned())
ctx->delta_stats.num_objects_pinned--;
if (coi.has_manifest())
if (coi.has_manifest()) {
set<uint64_t> refs;
for (auto p : snapset.clones) {
hobject_t clone_oid = coid;
if (clone_oid.snap == p) {
continue;
}
clone_oid.snap = p;
ObjectContextRef cobc = get_object_context(clone_oid, false, NULL);
if (!cobc) {
close_op_ctx(ctx.release());
dout(0) << __func__ << ": Can not find obc " << coid << dendl;;
return -ENOENT;
}
// check if the references is still used
object_info_t& oi = cobc->obs.oi;
if (oi.has_manifest()) {
coi.manifest.build_intersection_set(oi.manifest.chunk_map, refs, NULL);
}
}
dec_refcount_non_intersection(head_obc, coi, refs);
ctx->delta_stats.num_objects_manifest--;
}
obc->obs.exists = false;
snapset.clones.erase(p);
@ -4549,8 +4644,10 @@ int PrimaryLogPG::trim_object(
if (oi.is_cache_pinned()) {
ctx->delta_stats.num_objects_pinned--;
}
if (coi.has_manifest())
if (oi.has_manifest()) {
ctx->delta_stats.num_objects_manifest--;
dec_all_refcount_head_manifest(oi, ctx.get());
}
head_obc->obs.exists = false;
head_obc->obs.oi = object_info_t(head_oid);
t->remove(head_oid);
@ -6637,27 +6734,6 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = 0;
tracepoint(osd, do_osd_op_pre_delete, soid.oid.name.c_str(), soid.snap.val);
{
if (oi.has_manifest()) {
if ((oi.flags & object_info_t::FLAG_REDIRECT_HAS_REFERENCE) && oi.manifest.is_redirect()) {
ctx->register_on_commit(
[oi, ctx, this](){
object_locator_t target_oloc(oi.manifest.redirect_target);
refcount_manifest(ctx->obc, target_oloc, oi.manifest.redirect_target,
SnapContext(), false, NULL, 0);
});
} else if (oi.manifest.is_chunked()) {
ctx->register_on_commit(
[oi, ctx, this](){
for (auto p : oi.manifest.chunk_map) {
if (p.second.has_reference()) {
object_locator_t target_oloc(p.second.oid);
refcount_manifest(ctx->obc, target_oloc, p.second.oid,
SnapContext(), false, NULL, p.first);
}
}
});
}
}
result = _delete_oid(ctx, false, ctx->ignore_cache);
}
break;
@ -6847,8 +6923,8 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
ctx->op_finishers[ctx->current_osd_subop_num].reset(
new SetManifestFinisher(osd_op));
RefCountCallback *fin = new RefCountCallback(ctx, osd_op);
refcount_manifest(ctx->obc, target_oloc, target, SnapContext(),
true, fin, 0);
refcount_manifest(ctx->obc, ctx->obc->obs.oi.soid, target_oloc, target,
SnapContext(), refcount_t::INCREMENT_REF, fin);
result = -EINPROGRESS;
} else {
// finish
@ -6918,6 +6994,10 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = -EOPNOTSUPP;
break;
}
if (soid.has_snapset()) {
result = -EOPNOTSUPP;
break;
}
object_locator_t tgt_oloc;
uint64_t src_offset, src_length, tgt_offset;
@ -6972,8 +7052,8 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
ctx->op_finishers[ctx->current_osd_subop_num].reset(
new SetManifestFinisher(osd_op));
RefCountCallback *fin = new RefCountCallback(ctx, osd_op);
refcount_manifest(ctx->obc, tgt_oloc, target, SnapContext(),
true, fin, src_offset);
refcount_manifest(ctx->obc, ctx->obc->obs.oi.soid, tgt_oloc, target,
SnapContext(), refcount_t::INCREMENT_REF, fin);
result = -EINPROGRESS;
} else {
if (op_finisher) {
@ -6985,7 +7065,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
chunk_info.set_flag(chunk_info_t::FLAG_MISSING);
chunk_info.oid = target;
chunk_info.offset = tgt_offset;
chunk_info.length= src_length;
chunk_info.length = src_length;
oi.manifest.chunk_map[src_offset] = chunk_info;
if (!oi.has_manifest() && !oi.manifest.is_chunked())
ctx->delta_stats.num_objects_manifest++;
@ -7037,15 +7117,14 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
if (obs.oi.manifest.is_chunked()) {
src_hoid = obs.oi.soid;
cb = new PromoteManifestCallback(ctx->obc, this, ctx);
} else if (obs.oi.manifest.is_redirect()) {
object_locator_t src_oloc(obs.oi.manifest.redirect_target);
my_oloc = src_oloc;
src_hoid = obs.oi.manifest.redirect_target;
cb = new PromoteManifestCallback(ctx->obc, this, ctx);
} else {
ceph_abort_msg("unrecognized manifest type");
}
cb = new PromoteManifestCallback(ctx->obc, this, ctx);
ctx->op_finishers[ctx->current_osd_subop_num].reset(
new PromoteFinisher(cb));
unsigned flags = CEPH_OSD_COPY_FROM_FLAG_IGNORE_OVERLAY |
@ -7130,29 +7209,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
break;
}
if (oi.manifest.is_redirect()) {
if ((oi.flags & object_info_t::FLAG_REDIRECT_HAS_REFERENCE)) {
ctx->register_on_commit(
[oi, ctx, this](){
object_locator_t target_oloc(oi.manifest.redirect_target);
refcount_manifest(ctx->obc, target_oloc, oi.manifest.redirect_target,
SnapContext(), false, NULL, 0);
});
}
} else if (oi.manifest.is_chunked()) {
ctx->register_on_commit(
[oi, ctx, this](){
for (auto p : oi.manifest.chunk_map) {
if (p.second.flags & chunk_info_t::FLAG_HAS_REFERENCE) {
object_locator_t target_oloc(p.second.oid);
refcount_manifest(ctx->obc, target_oloc, p.second.oid,
SnapContext(), false, NULL, p.first);
}
}
});
} else {
ceph_abort_msg("unrecognized manifest type");
}
dec_all_refcount_head_manifest(oi, ctx);
oi.clear_flag(object_info_t::FLAG_MANIFEST);
oi.manifest = object_manifest_t();
@ -7935,6 +7992,10 @@ inline int PrimaryLogPG::_delete_oid(
dout(20) << __func__ << " setting whiteout on " << soid << dendl;
oi.set_flag(object_info_t::FLAG_WHITEOUT);
ctx->delta_stats.num_whiteouts++;
if (oi.has_manifest()) {
ctx->delta_stats.num_objects_manifest--;
dec_all_refcount_head_manifest(oi, ctx);
}
t->create(soid);
osd->logger->inc(l_osd_tier_whiteout);
return 0;
@ -7954,6 +8015,7 @@ inline int PrimaryLogPG::_delete_oid(
}
if (oi.has_manifest()) {
ctx->delta_stats.num_objects_manifest--;
dec_all_refcount_head_manifest(oi, ctx);
}
obs.exists = false;
return 0;
@ -8205,6 +8267,20 @@ void PrimaryLogPG::make_writeable(OpContext *ctx)
if (pool.info.is_erasure())
ctx->clone_obc->attr_cache = ctx->obc->attr_cache;
snap_oi = &ctx->clone_obc->obs.oi;
if (ctx->obc->obs.oi.has_manifest()) {
if ((ctx->obc->obs.oi.flags & object_info_t::FLAG_REDIRECT_HAS_REFERENCE) &&
ctx->obc->obs.oi.manifest.is_redirect()) {
snap_oi->set_flag(object_info_t::FLAG_MANIFEST);
snap_oi->manifest.type = object_manifest_t::TYPE_REDIRECT;
snap_oi->manifest.redirect_target = ctx->obc->obs.oi.manifest.redirect_target;
} else if (ctx->obc->obs.oi.manifest.is_chunked()) {
snap_oi->set_flag(object_info_t::FLAG_MANIFEST);
snap_oi->manifest.type = object_manifest_t::TYPE_CHUNKED;
snap_oi->manifest.chunk_map = ctx->obc->obs.oi.manifest.chunk_map;
} else {
ceph_abort_msg("unrecognized manifest type");
}
}
bool got = ctx->lock_manager.get_write_greedy(
coid,
ctx->clone_obc,
@ -9135,10 +9211,6 @@ void PrimaryLogPG::_copy_some_manifest(ObjectContextRef obc, CopyOpRef cop, uint
ObjectOperation op;
op.dup(sub_cop->chunk_ops);
dout(20) << __func__ << " tgt_oid: " << soid.oid << " tgt_offset: "
<< manifest->chunk_map[iter->first].offset
<< " length: " << length << " pool id: " << oloc.pool << dendl;
if (cop->results.user_version) {
op.assert_version(cop->results.user_version);
} else {
@ -9161,6 +9233,12 @@ void PrimaryLogPG::_copy_some_manifest(ObjectContextRef obc, CopyOpRef cop, uint
sub_cop->results.user_version ? NULL : &sub_cop->results.user_version);
fin->tid = tid;
sub_cop->objecter_tid = tid;
dout(20) << __func__ << " tgt_oid: " << soid.oid << " tgt_offset: "
<< manifest->chunk_map[iter->first].offset
<< " length: " << length << " pool id: " << oloc.pool
<< " tid: " << tid << dendl;
if (last_offset < iter->first) {
break;
}

View File

@ -1476,6 +1476,10 @@ protected:
friend struct C_ProxyWrite_Commit;
// -- chunkop --
enum class refcount_t {
INCREMENT_REF,
DECREMENT_REF,
};
void do_proxy_chunked_op(OpRequestRef op, const hobject_t& missing_oid,
ObjectContextRef obc, bool write_ordered);
void do_proxy_chunked_read(OpRequestRef op, ObjectContextRef obc, int op_index,
@ -1494,9 +1498,12 @@ protected:
uint64_t last_offset);
void handle_manifest_flush(hobject_t oid, ceph_tid_t tid, int r,
uint64_t offset, uint64_t last_offset, epoch_t lpr);
void cancel_manifest_ops(bool requeue, std::vector<ceph_tid_t> *tids);
void refcount_manifest(ObjectContextRef obc, object_locator_t oloc, hobject_t soid,
SnapContext snapc, bool get, RefCountCallback *cb, uint64_t offset);
void cancel_manifest_ops(bool requeue, vector<ceph_tid_t> *tids);
void refcount_manifest(ObjectContextRef obc, hobject_t src_soid, object_locator_t oloc,
hobject_t tgt_soid, SnapContext snapc, refcount_t type, RefCountCallback* cb);
void dec_all_refcount_head_manifest(object_info_t& oi, OpContext* ctx);
void dec_refcount_non_intersection(ObjectContextRef obc, const object_info_t& oi,
set<uint64_t> intersection_set);
friend struct C_ProxyChunkRead;
friend class PromoteManifestCallback;

View File

@ -5810,6 +5810,23 @@ void chunk_info_t::dump(Formatter *f) const
f->dump_unsigned("flags", flags);
}
bool chunk_info_t::operator==(const chunk_info_t& cit) const
{
if (has_fingerprint()) {
if (oid.oid.name == cit.oid.oid.name) {
return true;
}
} else {
if (offset == cit.offset && length == cit.length &&
oid.oid.name == cit.oid.oid.name) {
return true;
}
}
return false;
}
ostream& operator<<(ostream& out, const chunk_info_t& ci)
{
return out << "(len: " << ci.length << " oid: " << ci.oid
@ -5819,6 +5836,25 @@ ostream& operator<<(ostream& out, const chunk_info_t& ci)
// -- object_manifest_t --
void object_manifest_t::build_intersection_set(std::map<uint64_t, chunk_info_t>& map,
set<uint64_t>& intersection, interval_set<uint64_t>* check_intersection)
{
for (auto p : chunk_map) {
if (check_intersection) {
if (check_intersection->intersects(p.first, p.second.length)) {
intersection.insert(p.first);
continue;
}
}
for (auto c : map) {
if (p.second == c.second) {
intersection.insert(p.first);
}
}
}
}
void object_manifest_t::encode(ceph::buffer::list& bl) const
{
ENCODE_START(1, 1, bl);

View File

@ -5485,6 +5485,7 @@ struct chunk_info_t {
void decode(ceph::buffer::list::const_iterator &bl);
void dump(ceph::Formatter *f) const;
friend std::ostream& operator<<(std::ostream& out, const chunk_info_t& ci);
bool operator==(const chunk_info_t& cit) const;
};
WRITE_CLASS_ENCODER(chunk_info_t)
std::ostream& operator<<(std::ostream& out, const chunk_info_t& ci);
@ -5529,6 +5530,8 @@ struct object_manifest_t {
redirect_target = hobject_t();
chunk_map.clear();
}
void build_intersection_set(std::map<uint64_t, chunk_info_t>& map,
set<uint64_t>& intersection, interval_set<uint64_t>* check_intersection);
static void generate_test_instances(std::list<object_manifest_t*>& o);
void encode(ceph::buffer::list &bl) const;
void decode(ceph::buffer::list::const_iterator &bl);