From fea77682a6cf9c7571573bc9791c03373d1d976d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 21 Feb 2013 13:28:47 -0800 Subject: [PATCH 1/6] osdc/Objecter: unwatch is a mutation, not a read This was causing librados to unblock after the ACK on unwatch, which meant that librbd users raced and tried to delete the image before the unwatch change was committed..and got EBUSY. See #3958. The watch operation has a similar problem. Signed-off-by: Sage Weil --- src/librados/IoCtxImpl.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/librados/IoCtxImpl.cc b/src/librados/IoCtxImpl.cc index 800e27f90b6..5f09dd47248 100644 --- a/src/librados/IoCtxImpl.cc +++ b/src/librados/IoCtxImpl.cc @@ -1392,7 +1392,7 @@ void librados::IoCtxImpl::set_sync_op_version(eversion_t& ver) int librados::IoCtxImpl::watch(const object_t& oid, uint64_t ver, uint64_t *cookie, librados::WatchCtx *ctx) { - ::ObjectOperation rd; + ::ObjectOperation wr; Mutex mylock("IoCtxImpl::watch::mylock"); Cond cond; bool done; @@ -1404,11 +1404,11 @@ int librados::IoCtxImpl::watch(const object_t& oid, uint64_t ver, WatchContext *wc = new WatchContext(this, oid, ctx); client->register_watcher(wc, cookie); - prepare_assert_ops(&rd); - rd.watch(*cookie, ver, 1); + prepare_assert_ops(&wr); + wr.watch(*cookie, ver, 1); bufferlist bl; wc->linger_id = objecter->linger( - oid, oloc, rd, snap_seq, bl, NULL, + oid, oloc, wr, snap_seq, bl, NULL, CEPH_OSD_FLAG_WRITE, NULL, onfinish, &objver); lock->Unlock(); @@ -1452,16 +1452,16 @@ int librados::IoCtxImpl::unwatch(const object_t& oid, uint64_t cookie) Cond cond; bool done; int r; - Context *onack = new C_SafeCond(&mylock, &cond, &done, &r); + Context *oncommit = new C_SafeCond(&mylock, &cond, &done, &r); eversion_t ver; lock->Lock(); client->unregister_watcher(cookie); - ::ObjectOperation rd; - prepare_assert_ops(&rd); - rd.watch(cookie, 0, 0); - objecter->read(oid, oloc, rd, snap_seq, &outbl, 0, onack, &ver); + ::ObjectOperation wr; + prepare_assert_ops(&wr); + wr.watch(cookie, 0, 0); + objecter->mutate(oid, oloc, wr, snapc, ceph_clock_now(client->cct), 0, NULL, oncommit, &ver); lock->Unlock(); mylock.Lock(); From de4fa95f03b99a55b5713911c364d7e2a4588679 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 21 Feb 2013 15:31:08 -0800 Subject: [PATCH 2/6] osd: make watch OSDOp print sanely Signed-off-by: Sage Weil --- src/osd/osd_types.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index cd75bedfed9..7a059b7b176 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -2974,6 +2974,10 @@ ostream& operator<<(ostream& out, const OSDOp& op) case CEPH_OSD_OP_ROLLBACK: out << " " << snapid_t(op.op.snap.snapid); break; + case CEPH_OSD_OP_WATCH: + out << (op.op.watch.flag ? " add":" remove") + << " cookie " << op.op.watch.cookie << " ver " << op.op.watch.ver; + break; default: out << " " << op.op.extent.offset << "~" << op.op.extent.length; if (op.op.extent.truncate_seq) From 6c08c7c1c6d354d090eb16df279d4b63ca7a355a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 21 Feb 2013 15:44:19 -0800 Subject: [PATCH 3/6] objecter: separate out linger_read() and linger_mutate() A watch is a mutation, while a notify is a read. The mutations need to pass in a proper snap context to be fully correct. Also, make the WRITE flag implicit so the caller doesn't need to pass it in. Signed-off-by: Sage Weil --- src/librados/IoCtxImpl.cc | 12 +++++------ src/osdc/Objecter.cc | 45 +++++++++++++++++++++++++++++++++------ src/osdc/Objecter.h | 19 ++++++++++++----- 3 files changed, 59 insertions(+), 17 deletions(-) diff --git a/src/librados/IoCtxImpl.cc b/src/librados/IoCtxImpl.cc index 5f09dd47248..626999fa75d 100644 --- a/src/librados/IoCtxImpl.cc +++ b/src/librados/IoCtxImpl.cc @@ -1407,10 +1407,10 @@ int librados::IoCtxImpl::watch(const object_t& oid, uint64_t ver, prepare_assert_ops(&wr); wr.watch(*cookie, ver, 1); bufferlist bl; - wc->linger_id = objecter->linger( - oid, oloc, wr, snap_seq, bl, NULL, - CEPH_OSD_FLAG_WRITE, - NULL, onfinish, &objver); + wc->linger_id = objecter->linger_mutate(oid, oloc, wr, + snapc, ceph_clock_now(NULL), bl, + 0, + NULL, onfinish, &objver); lock->Unlock(); mylock.Lock(); @@ -1500,8 +1500,8 @@ int librados::IoCtxImpl::notify(const object_t& oid, uint64_t ver, bufferlist& b ::encode(timeout, inbl); ::encode(bl, inbl); rd.notify(cookie, ver, inbl); - wc->linger_id = objecter->linger(oid, oloc, rd, snap_seq, inbl, NULL, - 0, onack, NULL, &objver); + wc->linger_id = objecter->linger_read(oid, oloc, rd, snap_seq, inbl, NULL, 0, + onack, &objver); lock->Unlock(); mylock.Lock(); diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 21d9df7f3d6..2cb6e555e77 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -265,6 +265,8 @@ void Objecter::send_linger(LingerOp *info) onack, oncommit, info->pobjver); o->snapid = info->snap; + o->snapc = info->snapc; + o->mtime = info->mtime; // do not resend this; we will send a new op to reregister o->should_resend = false; @@ -335,11 +337,43 @@ void Objecter::unregister_linger(uint64_t linger_id) } } -tid_t Objecter::linger(const object_t& oid, const object_locator_t& oloc, - ObjectOperation& op, - snapid_t snap, bufferlist& inbl, bufferlist *poutbl, int flags, - Context *onack, Context *onfinish, - eversion_t *objver) +tid_t Objecter::linger_mutate(const object_t& oid, const object_locator_t& oloc, + ObjectOperation& op, + const SnapContext& snapc, utime_t mtime, + bufferlist& inbl, int flags, + Context *onack, Context *oncommit, + eversion_t *objver) +{ + LingerOp *info = new LingerOp; + info->oid = oid; + info->oloc = oloc; + if (info->oloc.key == oid) + info->oloc.key.clear(); + info->snapc = snapc; + info->mtime = mtime; + info->flags = flags | CEPH_OSD_FLAG_WRITE; + info->ops = op.ops; + info->inbl = inbl; + info->poutbl = NULL; + info->pobjver = objver; + info->on_reg_ack = onack; + info->on_reg_commit = oncommit; + + info->linger_id = ++max_linger_id; + linger_ops[info->linger_id] = info; + + logger->set(l_osdc_linger_active, linger_ops.size()); + + send_linger(info); + + return info->linger_id; +} + +tid_t Objecter::linger_read(const object_t& oid, const object_locator_t& oloc, + ObjectOperation& op, + snapid_t snap, bufferlist& inbl, bufferlist *poutbl, int flags, + Context *onfinish, + eversion_t *objver) { LingerOp *info = new LingerOp; info->oid = oid; @@ -352,7 +386,6 @@ tid_t Objecter::linger(const object_t& oid, const object_locator_t& oloc, info->inbl = inbl; info->poutbl = poutbl; info->pobjver = objver; - info->on_reg_ack = onack; info->on_reg_commit = onfinish; info->linger_id = ++max_linger_id; diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 9ff02f6ab93..a31cac2a03c 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -803,6 +803,9 @@ public: vector acting; snapid_t snap; + SnapContext snapc; + utime_t mtime; + int flags; vector ops; bufferlist inbl; @@ -1069,11 +1072,17 @@ private: o->out_rval.swap(op.out_rval); return op_submit(o); } - tid_t linger(const object_t& oid, const object_locator_t& oloc, - ObjectOperation& op, - snapid_t snap, bufferlist& inbl, bufferlist *poutbl, int flags, - Context *onack, Context *onfinish, - eversion_t *objver); + tid_t linger_mutate(const object_t& oid, const object_locator_t& oloc, + ObjectOperation& op, + const SnapContext& snapc, utime_t mtime, + bufferlist& inbl, int flags, + Context *onack, Context *onfinish, + eversion_t *objver); + tid_t linger_read(const object_t& oid, const object_locator_t& oloc, + ObjectOperation& op, + snapid_t snap, bufferlist& inbl, bufferlist *poutbl, int flags, + Context *onack, + eversion_t *objver); void unregister_linger(uint64_t linger_id); /** From 94ae72546507799667197fd941633bb1fd2520c2 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Thu, 21 Feb 2013 17:39:19 -0800 Subject: [PATCH 4/6] test_librbd_fsx: fix image closing Always close the image we opened in check_clone(), and check the return code of the rbd_close() called before cloning. Refs: #3958 Signed-off-by: Josh Durgin --- src/test/librbd/fsx.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/test/librbd/fsx.c b/src/test/librbd/fsx.c index d884173b0cf..725c20886fa 100644 --- a/src/test/librbd/fsx.c +++ b/src/test/librbd/fsx.c @@ -845,7 +845,12 @@ do_clone() simple_err("do_clone: rbd clone", ret); exit(165); } - rbd_close(image); + + if ((ret = rbd_close(image)) < 0) { + simple_err("do_clone: rbd close", ret); + exit(174); + } + if ((ret = rbd_open(ioctx, imagename, &image, NULL)) < 0) { simple_err("do_clone: rbd open", ret); exit(166); @@ -896,6 +901,10 @@ check_clone(int clonenum) exit(171); } close(fd); + if ((ret = rbd_close(cur_image)) < 0) { + simple_err("check_clone: rbd close", ret); + exit(174); + } check_buffers(good_buf, temp_buf, 0, file_info.st_size); unlink(filename); From 15bb9ba9fbb4185708399ed6deee070d888ef6d2 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Thu, 21 Feb 2013 23:22:59 -0800 Subject: [PATCH 5/6] objecter: initialize linger op snapid Since they are write ops now, it must be CEPH_NOSNAP or the OSD returns EINVAL. Signed-off-by: Josh Durgin --- src/osdc/Objecter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index a31cac2a03c..4105e6cc74b 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -821,7 +821,8 @@ public: tid_t register_tid; epoch_t map_dne_bound; - LingerOp() : linger_id(0), flags(0), poutbl(NULL), pobjver(NULL), + LingerOp() : linger_id(0), snap(CEPH_NOSNAP), flags(0), + poutbl(NULL), pobjver(NULL), registered(false), on_reg_ack(NULL), on_reg_commit(NULL), session(NULL), session_item(this), From 3105034067dd4afba6ebaa9e30c6782854c9d1ad Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Thu, 21 Feb 2013 23:31:21 -0800 Subject: [PATCH 6/6] objecter: don't resend linger ops unnecessarily recalc_linger_op_target() was checking and then setting linger_op->pgid and linger_op->active, but these were only set by recalc_linger_op_target(). This was only called by handle_osd_map(), so the first osdmap after a watch was established would cause a resend of the watch. Analogous to the normal Op, set this information by calling recalc_linger_op_target in send_linger(). Signed-off-by: Josh Durgin --- src/osdc/Objecter.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 2cb6e555e77..4bd34b5ef32 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -287,6 +287,9 @@ void Objecter::send_linger(LingerOp *info) info->register_tid = _op_submit(o); } else { // first send + // populate info->pgid and info->acting so we + // don't resend the linger op on the next osdmap update + recalc_linger_op_target(info); info->register_tid = op_submit(o); }