diff --git a/src/librbd/AioRequest.cc b/src/librbd/AioRequest.cc index 35ba2e6bd89..ceacc6f0e8a 100644 --- a/src/librbd/AioRequest.cc +++ b/src/librbd/AioRequest.cc @@ -246,9 +246,6 @@ namespace librbd { m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val), m_entire_object(NULL) { - m_io_ctx.dup(ictx->data_ctx); - m_io_ctx.snap_set_read(CEPH_NOSNAP); - m_object_image_extents = objectx; m_parent_overlap = object_overlap; } diff --git a/src/librbd/AioRequest.h b/src/librbd/AioRequest.h index a5da41edbef..ae8c855038a 100644 --- a/src/librbd/AioRequest.h +++ b/src/librbd/AioRequest.h @@ -192,8 +192,6 @@ namespace librbd { } private: - librados::IoCtx m_io_ctx; - bool send_pre(); bool send_post(); void send_write(); diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index 116cb92aa8a..9fbc04ca489 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -299,7 +299,7 @@ namespace librbd { data_ctx.snap_set_read(snap_id); if (object_map != NULL) { - object_map->refresh(); + object_map->refresh(in_snap_id); } return 0; } @@ -314,7 +314,7 @@ namespace librbd { data_ctx.snap_set_read(snap_id); if (object_map != NULL) { - object_map->refresh(); + object_map->refresh(CEPH_NOSNAP); } } diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index 60b4756affa..8c88434f611 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -348,7 +348,7 @@ int ImageWatcher::lock() { unlock(); return r; } - m_image_ctx.object_map->refresh(); + m_image_ctx.object_map->refresh(CEPH_NOSNAP); } bufferlist bl; @@ -396,9 +396,8 @@ void ImageWatcher::release_lock() RWLock::WLocker l(m_image_ctx.owner_lock); { RWLock::WLocker l2(m_image_ctx.md_lock); - m_image_ctx.flush_cache(); + librbd::_flush(&m_image_ctx); } - m_image_ctx.data_ctx.aio_flush(); unlock(); } diff --git a/src/librbd/ObjectMap.cc b/src/librbd/ObjectMap.cc index a3eec40a995..3bdf232a43e 100644 --- a/src/librbd/ObjectMap.cc +++ b/src/librbd/ObjectMap.cc @@ -8,6 +8,7 @@ #include "common/errno.h" #include "include/stringify.h" #include "cls/lock/cls_lock_client.h" +#include #define dout_subsys ceph_subsys_rbd #undef dout_prefix @@ -20,6 +21,18 @@ ObjectMap::ObjectMap(ImageCtx &image_ctx) { } +std::string ObjectMap::object_map_name(const std::string &image_id, + uint64_t snap_id) { + std::string oid(RBD_OBJECT_MAP_PREFIX + image_id); + if (snap_id != CEPH_NOSNAP) { + std::stringstream snap_suffix; + snap_suffix << "." << std::setfill('0') << std::setw(16) << std::hex + << snap_id; + oid += snap_suffix.str(); + } + return oid; +} + int ObjectMap::lock() { if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) { @@ -29,10 +42,10 @@ int ObjectMap::lock() int r; bool broke_lock = false; CephContext *cct = m_image_ctx.cct; + std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP)); while (true) { ldout(cct, 10) << &m_image_ctx << " locking object map" << dendl; - r = rados::cls::lock::lock(&m_image_ctx.md_ctx, - object_map_name(m_image_ctx.id), + r = rados::cls::lock::lock(&m_image_ctx.md_ctx, oid, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "", "", utime_t(), 0); if (r == 0) { @@ -47,8 +60,7 @@ int ObjectMap::lock() lockers_t lockers; ClsLockType lock_type; std::string lock_tag; - int r = rados::cls::lock::get_lock_info(&m_image_ctx.md_ctx, - object_map_name(m_image_ctx.id), + int r = rados::cls::lock::get_lock_info(&m_image_ctx.md_ctx, oid, RBD_LOCK_NAME, &lockers, &lock_type, &lock_tag); if (r == -ENOENT) { @@ -63,8 +75,7 @@ int ObjectMap::lock() for (lockers_t::iterator it = lockers.begin(); it != lockers.end(); ++it) { const rados::cls::lock::locker_id_t &locker = it->first; - r = rados::cls::lock::break_lock(&m_image_ctx.md_ctx, - object_map_name(m_image_ctx.id), + r = rados::cls::lock::break_lock(&m_image_ctx.md_ctx, oid, RBD_LOCK_NAME, locker.cookie, locker.locker); if (r < 0 && r != -ENOENT) { @@ -89,8 +100,8 @@ int ObjectMap::unlock() ldout(m_image_ctx.cct, 10) << &m_image_ctx << " unlocking object map" << dendl; - int r = rados::cls::lock::unlock(&m_image_ctx.md_ctx, - object_map_name(m_image_ctx.id), + std::string oid = object_map_name(m_image_ctx.id, CEPH_NOSNAP); + int r = rados::cls::lock::unlock(&m_image_ctx.md_ctx, oid, RBD_LOCK_NAME, ""); if (r < 0 && r != -ENOENT) { lderr(m_image_ctx.cct) << "failed to release object map lock: " @@ -108,52 +119,112 @@ bool ObjectMap::object_may_exist(uint64_t object_no) const } RWLock::RLocker l(m_image_ctx.object_map_lock); - assert(object_no < object_map.size()); + assert(object_no < m_object_map.size()); - bool exists = (object_map[object_no] == OBJECT_EXISTS || - object_map[object_no] == OBJECT_PENDING); + bool exists = (m_object_map[object_no] == OBJECT_EXISTS || + m_object_map[object_no] == OBJECT_PENDING); ldout(m_image_ctx.cct, 20) << &m_image_ctx << " object_may_exist: " << "object_no=" << object_no << " r=" << exists << dendl; return exists; } -int ObjectMap::refresh() +void ObjectMap::refresh(uint64_t snap_id) { if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) { - return 0; + return; } CephContext *cct = m_image_ctx.cct; ldout(cct, 10) << &m_image_ctx << " refreshing object map" << dendl; RWLock::WLocker l(m_image_ctx.object_map_lock); - int r = cls_client::object_map_load(&m_image_ctx.data_ctx, - object_map_name(m_image_ctx.id), - &object_map); + std::string oid(object_map_name(m_image_ctx.id, snap_id)); + int r = cls_client::object_map_load(&m_image_ctx.md_ctx, oid, + &m_object_map); if (r < 0) { lderr(cct) << "error refreshing object map: " << cpp_strerror(r) << dendl; invalidate(); - object_map.clear(); - return r; + m_object_map.clear(); + return; } - ldout(cct, 20) << "refreshed object map: " << object_map.size() + ldout(cct, 20) << "refreshed object map: " << m_object_map.size() << dendl; uint64_t num_objs = Striper::get_num_objects( - m_image_ctx.layout, m_image_ctx.get_image_size(m_image_ctx.snap_id)); - if (object_map.size() != num_objs) { + m_image_ctx.layout, m_image_ctx.get_image_size(snap_id)); + if (m_object_map.size() != num_objs) { // resize op might have been interrupted - lderr(cct) << "incorrect object map size: " << object_map.size() + lderr(cct) << "incorrect object map size: " << m_object_map.size() << " != " << num_objs << dendl; invalidate(); - return -EINVAL; } - return 0; } +void ObjectMap::rollback(uint64_t snap_id) { + if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) { + return; + } + + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << &m_image_ctx << " rollback object map" << dendl; + + RWLock::WLocker l(m_image_ctx.object_map_lock); + + std::string snap_oid(object_map_name(m_image_ctx.id, snap_id)); + bufferlist bl; + int r = m_image_ctx.md_ctx.read(snap_oid, bl, 0, 0); + if (r < 0) { + lderr(cct) << "unable to load snapshot object map '" << snap_oid << "': " + << cpp_strerror(r) << dendl; + invalidate(); + return; + } + + librados::ObjectWriteOperation op; + rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", ""); + op.write_full(bl); + + std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP)); + r = m_image_ctx.md_ctx.operate(oid, &op); + if (r < 0) { + lderr(cct) << "unable to rollback object map: " << cpp_strerror(r) + << dendl; + invalidate(); + } +} + +void ObjectMap::snapshot(uint64_t snap_id) { + if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) { + return; + } + + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << &m_image_ctx << " snapshot object map" << dendl; + + int r; + bufferlist bl; + { + RWLock::RLocker l(m_image_ctx.object_map_lock); + std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP)); + r = m_image_ctx.md_ctx.read(oid, bl, 0, 0); + if (r < 0) { + lderr(cct) << "unable to load object map: " << cpp_strerror(r) + << dendl; + invalidate(); + } + } + + std::string snap_oid(object_map_name(m_image_ctx.id, snap_id)); + r = m_image_ctx.md_ctx.write_full(snap_oid, bl); + if (r < 0) { + lderr(cct) << "unable to snapshot object map '" << snap_oid << "': " + << cpp_strerror(r) << dendl; + invalidate(); + } +} void ObjectMap::aio_resize(uint64_t new_size, uint8_t default_object_state, Context *on_finish) { @@ -190,15 +261,15 @@ bool ObjectMap::aio_update(uint64_t start_object_no, uint64_t end_object_no, ldout(cct, 20) << &m_image_ctx << " aio_update: start=" << start_object_no << ", end=" << end_object_no << ", new_state=" << static_cast(new_state) << dendl; - if (end_object_no > object_map.size()) { + if (end_object_no > m_object_map.size()) { ldout(cct, 20) << "skipping update of invalid object map" << dendl; return false; } for (uint64_t object_no = start_object_no; object_no < end_object_no; ++object_no) { - if ((!current_state || object_map[object_no] == *current_state) && - object_map[object_no] != new_state) { + if ((!current_state || m_object_map[object_no] == *current_state) && + m_object_map[object_no] != new_state) { UpdateRequest *req = new UpdateRequest(m_image_ctx, start_object_no, end_object_no, new_state, current_state, on_finish); @@ -307,8 +378,8 @@ void ObjectMap::ResizeRequest::send() { cls_client::object_map_resize(&op, m_num_objs, m_default_object_state); librados::AioCompletion *rados_completion = create_callback_completion(); - int r = m_image_ctx.data_ctx.aio_operate(object_map_name(m_image_ctx.id), - rados_completion, &op); + std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP)); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op); assert(r == 0); rados_completion->release(); } @@ -318,10 +389,11 @@ void ObjectMap::ResizeRequest::finish(ObjectMap *object_map) { ldout(cct, 5) << &m_image_ctx << " resizing in-memory object map: " << m_num_objs << dendl; - size_t orig_object_map_size = object_map->object_map.size(); - object_map->object_map.resize(m_num_objs); - for (uint64_t i = orig_object_map_size; i < object_map->object_map.size(); ++i) { - object_map->object_map[i] = m_default_object_state; + size_t orig_object_map_size = object_map->m_object_map.size(); + object_map->m_object_map.resize(m_num_objs); + for (uint64_t i = orig_object_map_size; + i < object_map->m_object_map.size(); ++i) { + object_map->m_object_map[i] = m_default_object_state; } } @@ -341,8 +413,8 @@ void ObjectMap::UpdateRequest::send() { m_new_state, m_current_state); librados::AioCompletion *rados_completion = create_callback_completion(); - int r = m_image_ctx.data_ctx.aio_operate(object_map_name(m_image_ctx.id), - rados_completion, &op); + std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP)); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op); assert(r == 0); rados_completion->release(); } @@ -352,11 +424,11 @@ void ObjectMap::UpdateRequest::finish(ObjectMap *object_map) { ldout(cct, 20) << &m_image_ctx << " updating in-memory object map" << dendl; for (uint64_t object_no = m_start_object_no; - object_no < MIN(m_end_object_no, object_map->object_map.size()); + object_no < MIN(m_end_object_no, object_map->m_object_map.size()); ++object_no) { if (!m_current_state || - object_map->object_map[object_no] == *m_current_state) { - object_map->object_map[object_no] = m_new_state; + object_map->m_object_map[object_no] == *m_current_state) { + object_map->m_object_map[object_no] = m_new_state; } } } diff --git a/src/librbd/ObjectMap.h b/src/librbd/ObjectMap.h index 0c476448d3c..2af1db2f057 100644 --- a/src/librbd/ObjectMap.h +++ b/src/librbd/ObjectMap.h @@ -24,6 +24,9 @@ public: ObjectMap(ImageCtx &image_ctx); + static std::string object_map_name(const std::string &image_id, + uint64_t snap_id); + int lock(); int unlock(); @@ -39,7 +42,9 @@ public: const boost::optional ¤t_state, Context *on_finish); - int refresh(); + void refresh(uint64_t snap_id); + void rollback(uint64_t snap_id); + void snapshot(uint64_t snap_id); private: @@ -113,7 +118,7 @@ private: ImageCtx &m_image_ctx; - ceph::BitVector<2> object_map; + ceph::BitVector<2> m_object_map; void invalidate(); diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index a34d310828f..542ad434dc3 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -69,11 +69,6 @@ namespace librbd { return image_name + RBD_SUFFIX; } - const string object_map_name(const string &image_id) - { - return RBD_OBJECT_MAP_PREFIX + image_id; - } - int detect_format(IoCtx &io_ctx, const string &name, bool *old_format, uint64_t *size) { @@ -315,7 +310,6 @@ namespace librbd { rollback_object(ictx, snap_id, ictx->get_object_name(i), throttle); prog_ctx.update_progress(i * bsize, numseg * bsize); } - rollback_object(ictx, snap_id, object_map_name(ictx->id), throttle); r = throttle.wait_for_ret(); if (r < 0) { @@ -323,6 +317,13 @@ namespace librbd { << cpp_strerror(r) << dendl; return r; } + + { + RWLock::RLocker l(ictx->md_lock); + if (ictx->object_map != NULL) { + ictx->object_map->rollback(snap_id); + } + } return 0; } @@ -478,11 +479,13 @@ namespace librbd { if (r < 0) return r; + bool lock_owner = false; while (ictx->image_watcher->is_lock_supported()) { r = prepare_image_update(ictx); if (r < 0) { return -EROFS; } else if (ictx->image_watcher->is_lock_owner()) { + lock_owner = true; break; } @@ -493,13 +496,19 @@ namespace librbd { ldout(ictx->cct, 5) << "snap_create timed out notifying lock owner" << dendl; } - RWLock::RLocker l2(ictx->md_lock); + RWLock::WLocker l2(ictx->md_lock); + r = _flush(ictx); + if (r < 0) { + return r; + } + do { - r = add_snap(ictx, snap_name); + r = add_snap(ictx, snap_name, lock_owner); } while (r == -ESTALE); - if (r < 0) + if (r < 0) { return r; + } if (notify) { notify_change(ictx->md_ctx, ictx->header_oid, ictx); @@ -566,12 +575,20 @@ namespace librbd { } } + if (ictx->object_map != NULL) { + r = ictx->md_ctx.remove(ObjectMap::object_map_name(ictx->id, snap_id)); + if (r < 0 && r != -ENOENT) { + lderr(ictx->cct) << "snap_remove: failed to remove snapshot object map" + << dendl; + return 0; + } + } + r = rm_snap(ictx, snap_name); if (r < 0) return r; r = ictx->data_ctx.selfmanaged_snap_remove(snap_id); - if (r < 0) return r; @@ -886,7 +903,7 @@ reprotect_and_return_err: librados::ObjectWriteOperation op; cls_client::object_map_resize(&op, Striper::get_num_objects(layout, size), OBJECT_NONEXISTENT); - r = io_ctx.operate(object_map_name(id), &op); + r = io_ctx.operate(ObjectMap::object_map_name(id, CEPH_NOSNAP), &op); if (r < 0) { goto err_remove_header; } @@ -1564,7 +1581,7 @@ reprotect_and_return_err: } } if (!old_format) { - r = io_ctx.remove(object_map_name(id)); + r = io_ctx.remove(ObjectMap::object_map_name(id, CEPH_NOSNAP)); if (r < 0 && r != -ENOENT) { lderr(cct) << "error removing image object map" << dendl; } @@ -1726,9 +1743,10 @@ reprotect_and_return_err: } - int add_snap(ImageCtx *ictx, const char *snap_name) + int add_snap(ImageCtx *ictx, const char *snap_name, bool lock_owner) { assert(ictx->owner_lock.is_locked()); + assert(ictx->md_lock.is_wlocked()); uint64_t snap_id; int r = ictx->md_ctx.selfmanaged_snap_create(&snap_id); @@ -1757,6 +1775,24 @@ reprotect_and_return_err: return r; } + if (!ictx->old_format) { + if (ictx->object_map != NULL) { + ictx->object_map->snapshot(snap_id); + } + if (lock_owner) { + // immediately start using the new snap context if we + // own the exclusive lock + std::vector snaps; + snaps.push_back(snap_id); + snaps.insert(snaps.end(), ictx->snapc.snaps.begin(), + ictx->snapc.snaps.end()); + + ictx->snapc.seq = snap_id; + ictx->snapc.snaps.swap(snaps); + ictx->data_ctx.selfmanaged_snap_set_write_ctx(ictx->snapc.seq, + ictx->snaps); + } + } return 0; } @@ -2016,7 +2052,7 @@ reprotect_and_return_err: } else { ictx->object_map = new ObjectMap(*ictx); if (ictx->snap_exists) { - ictx->object_map->refresh(); + ictx->object_map->refresh(ictx->snap_id); } } @@ -3228,6 +3264,9 @@ reprotect_and_return_err: return r; } + RWLock::RLocker owner_locker(ictx->owner_lock); + RWLock::RLocker md_locker(ictx->md_lock); + ictx->snap_lock.get_read(); snapid_t snap_id = ictx->snap_id; ::SnapContext snapc = ictx->snapc; @@ -3246,7 +3285,6 @@ reprotect_and_return_err: c->get(); c->init_time(ictx, AIO_TYPE_WRITE); - RWLock::RLocker l(ictx->owner_lock); if (ictx->image_watcher->is_lock_supported() && !ictx->image_watcher->is_lock_owner()) { c->put(); @@ -3322,6 +3360,9 @@ reprotect_and_return_err: return r; } + RWLock::RLocker owner_locker(ictx->owner_lock); + RWLock::RLocker md_locker(ictx->md_lock); + // TODO: check for snap ictx->snap_lock.get_read(); snapid_t snap_id = ictx->snap_id; @@ -3339,7 +3380,6 @@ reprotect_and_return_err: c->get(); c->init_time(ictx, AIO_TYPE_DISCARD); - RWLock::RLocker l(ictx->owner_lock); if (ictx->image_watcher->is_lock_supported() && !ictx->image_watcher->is_lock_owner()) { c->put(); diff --git a/src/librbd/internal.h b/src/librbd/internal.h index 1e6f790b2da..d164f02ff43 100644 --- a/src/librbd/internal.h +++ b/src/librbd/internal.h @@ -76,7 +76,6 @@ namespace librbd { const std::string id_obj_name(const std::string &name); const std::string header_name(const std::string &image_id); const std::string old_header_name(const std::string &image_name); - const std::string object_map_name(const std::string &image_id); int detect_format(librados::IoCtx &io_ctx, const std::string &name, bool *old_format, uint64_t *size); @@ -120,7 +119,7 @@ namespace librbd { int snap_unprotect(ImageCtx *ictx, const char *snap_name); int snap_is_protected(ImageCtx *ictx, const char *snap_name, bool *is_protected); - int add_snap(ImageCtx *ictx, const char *snap_name); + int add_snap(ImageCtx *ictx, const char *snap_name, bool lock_owner); int rm_snap(ImageCtx *ictx, const char *snap_name); int refresh_parent(ImageCtx *ictx); int ictx_check(ImageCtx *ictx); diff --git a/src/test/librados_test_stub/LibradosTestStub.cc b/src/test/librados_test_stub/LibradosTestStub.cc index fc8b80cf9ac..f9cf32bb079 100644 --- a/src/test/librados_test_stub/LibradosTestStub.cc +++ b/src/test/librados_test_stub/LibradosTestStub.cc @@ -5,6 +5,7 @@ #include "common/ceph_argparse.h" #include "common/common_init.h" #include "common/config.h" +#include "common/debug.h" #include "common/snap_types.h" #include "global/global_context.h" #include "librados/AioCompletionImpl.h" @@ -17,6 +18,9 @@ #include #include #include +#include "include/assert.h" + +#define dout_subsys ceph_subsys_rados static librados::TestClassHandler *get_class_handler() { static librados::TestClassHandler *s_class_handler = NULL; @@ -524,6 +528,11 @@ int IoCtx::write(const std::string& oid, bufferlist& bl, size_t len, return ctx->write(oid, bl, len, off); } +int IoCtx::write_full(const std::string& oid, bufferlist& bl) { + TestIoCtxImpl *ctx = reinterpret_cast(io_ctx_impl); + return ctx->write_full(oid, bl); +} + static int save_operation_result(int result, int *pval) { if (pval != NULL) { *pval = result; @@ -956,6 +965,19 @@ int cls_cxx_write_full(cls_method_context_t hctx, bufferlist *inbl) { } int cls_log(int level, const char *format, ...) { + int size = 256; + va_list ap; + while (1) { + char buf[size]; + va_start(ap, format); + int n = vsnprintf(buf, size, format, ap); + va_end(ap); + if ((n > -1 && n < size) || size > 8196) { + dout(level) << buf << dendl; + return n; + } + size *= 2; + } return 0; }