Merge branch 'wip-librbd-snaps-object-map' into hammer

Reviewed-by: Josh Durgin <jdurgin@redhat.com>
This commit is contained in:
Josh Durgin 2015-02-16 14:27:44 -08:00
commit ac75ff3c2b
9 changed files with 200 additions and 68 deletions

View File

@ -246,9 +246,6 @@ namespace librbd {
m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val),
m_entire_object(NULL)
{
m_io_ctx.dup(ictx->data_ctx);
m_io_ctx.snap_set_read(CEPH_NOSNAP);
m_object_image_extents = objectx;
m_parent_overlap = object_overlap;
}

View File

@ -192,8 +192,6 @@ namespace librbd {
}
private:
librados::IoCtx m_io_ctx;
bool send_pre();
bool send_post();
void send_write();

View File

@ -299,7 +299,7 @@ namespace librbd {
data_ctx.snap_set_read(snap_id);
if (object_map != NULL) {
object_map->refresh();
object_map->refresh(in_snap_id);
}
return 0;
}
@ -314,7 +314,7 @@ namespace librbd {
data_ctx.snap_set_read(snap_id);
if (object_map != NULL) {
object_map->refresh();
object_map->refresh(CEPH_NOSNAP);
}
}

View File

@ -348,7 +348,7 @@ int ImageWatcher::lock() {
unlock();
return r;
}
m_image_ctx.object_map->refresh();
m_image_ctx.object_map->refresh(CEPH_NOSNAP);
}
bufferlist bl;
@ -396,9 +396,8 @@ void ImageWatcher::release_lock()
RWLock::WLocker l(m_image_ctx.owner_lock);
{
RWLock::WLocker l2(m_image_ctx.md_lock);
m_image_ctx.flush_cache();
librbd::_flush(&m_image_ctx);
}
m_image_ctx.data_ctx.aio_flush();
unlock();
}

View File

@ -8,6 +8,7 @@
#include "common/errno.h"
#include "include/stringify.h"
#include "cls/lock/cls_lock_client.h"
#include <sstream>
#define dout_subsys ceph_subsys_rbd
#undef dout_prefix
@ -20,6 +21,18 @@ ObjectMap::ObjectMap(ImageCtx &image_ctx)
{
}
std::string ObjectMap::object_map_name(const std::string &image_id,
uint64_t snap_id) {
std::string oid(RBD_OBJECT_MAP_PREFIX + image_id);
if (snap_id != CEPH_NOSNAP) {
std::stringstream snap_suffix;
snap_suffix << "." << std::setfill('0') << std::setw(16) << std::hex
<< snap_id;
oid += snap_suffix.str();
}
return oid;
}
int ObjectMap::lock()
{
if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) {
@ -29,10 +42,10 @@ int ObjectMap::lock()
int r;
bool broke_lock = false;
CephContext *cct = m_image_ctx.cct;
std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP));
while (true) {
ldout(cct, 10) << &m_image_ctx << " locking object map" << dendl;
r = rados::cls::lock::lock(&m_image_ctx.md_ctx,
object_map_name(m_image_ctx.id),
r = rados::cls::lock::lock(&m_image_ctx.md_ctx, oid,
RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "", "",
utime_t(), 0);
if (r == 0) {
@ -47,8 +60,7 @@ int ObjectMap::lock()
lockers_t lockers;
ClsLockType lock_type;
std::string lock_tag;
int r = rados::cls::lock::get_lock_info(&m_image_ctx.md_ctx,
object_map_name(m_image_ctx.id),
int r = rados::cls::lock::get_lock_info(&m_image_ctx.md_ctx, oid,
RBD_LOCK_NAME, &lockers,
&lock_type, &lock_tag);
if (r == -ENOENT) {
@ -63,8 +75,7 @@ int ObjectMap::lock()
for (lockers_t::iterator it = lockers.begin();
it != lockers.end(); ++it) {
const rados::cls::lock::locker_id_t &locker = it->first;
r = rados::cls::lock::break_lock(&m_image_ctx.md_ctx,
object_map_name(m_image_ctx.id),
r = rados::cls::lock::break_lock(&m_image_ctx.md_ctx, oid,
RBD_LOCK_NAME, locker.cookie,
locker.locker);
if (r < 0 && r != -ENOENT) {
@ -89,8 +100,8 @@ int ObjectMap::unlock()
ldout(m_image_ctx.cct, 10) << &m_image_ctx << " unlocking object map"
<< dendl;
int r = rados::cls::lock::unlock(&m_image_ctx.md_ctx,
object_map_name(m_image_ctx.id),
std::string oid = object_map_name(m_image_ctx.id, CEPH_NOSNAP);
int r = rados::cls::lock::unlock(&m_image_ctx.md_ctx, oid,
RBD_LOCK_NAME, "");
if (r < 0 && r != -ENOENT) {
lderr(m_image_ctx.cct) << "failed to release object map lock: "
@ -108,52 +119,112 @@ bool ObjectMap::object_may_exist(uint64_t object_no) const
}
RWLock::RLocker l(m_image_ctx.object_map_lock);
assert(object_no < object_map.size());
assert(object_no < m_object_map.size());
bool exists = (object_map[object_no] == OBJECT_EXISTS ||
object_map[object_no] == OBJECT_PENDING);
bool exists = (m_object_map[object_no] == OBJECT_EXISTS ||
m_object_map[object_no] == OBJECT_PENDING);
ldout(m_image_ctx.cct, 20) << &m_image_ctx << " object_may_exist: "
<< "object_no=" << object_no << " r=" << exists
<< dendl;
return exists;
}
int ObjectMap::refresh()
void ObjectMap::refresh(uint64_t snap_id)
{
if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) {
return 0;
return;
}
CephContext *cct = m_image_ctx.cct;
ldout(cct, 10) << &m_image_ctx << " refreshing object map" << dendl;
RWLock::WLocker l(m_image_ctx.object_map_lock);
int r = cls_client::object_map_load(&m_image_ctx.data_ctx,
object_map_name(m_image_ctx.id),
&object_map);
std::string oid(object_map_name(m_image_ctx.id, snap_id));
int r = cls_client::object_map_load(&m_image_ctx.md_ctx, oid,
&m_object_map);
if (r < 0) {
lderr(cct) << "error refreshing object map: " << cpp_strerror(r)
<< dendl;
invalidate();
object_map.clear();
return r;
m_object_map.clear();
return;
}
ldout(cct, 20) << "refreshed object map: " << object_map.size()
ldout(cct, 20) << "refreshed object map: " << m_object_map.size()
<< dendl;
uint64_t num_objs = Striper::get_num_objects(
m_image_ctx.layout, m_image_ctx.get_image_size(m_image_ctx.snap_id));
if (object_map.size() != num_objs) {
m_image_ctx.layout, m_image_ctx.get_image_size(snap_id));
if (m_object_map.size() != num_objs) {
// resize op might have been interrupted
lderr(cct) << "incorrect object map size: " << object_map.size()
lderr(cct) << "incorrect object map size: " << m_object_map.size()
<< " != " << num_objs << dendl;
invalidate();
return -EINVAL;
}
return 0;
}
void ObjectMap::rollback(uint64_t snap_id) {
if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) {
return;
}
CephContext *cct = m_image_ctx.cct;
ldout(cct, 10) << &m_image_ctx << " rollback object map" << dendl;
RWLock::WLocker l(m_image_ctx.object_map_lock);
std::string snap_oid(object_map_name(m_image_ctx.id, snap_id));
bufferlist bl;
int r = m_image_ctx.md_ctx.read(snap_oid, bl, 0, 0);
if (r < 0) {
lderr(cct) << "unable to load snapshot object map '" << snap_oid << "': "
<< cpp_strerror(r) << dendl;
invalidate();
return;
}
librados::ObjectWriteOperation op;
rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "");
op.write_full(bl);
std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP));
r = m_image_ctx.md_ctx.operate(oid, &op);
if (r < 0) {
lderr(cct) << "unable to rollback object map: " << cpp_strerror(r)
<< dendl;
invalidate();
}
}
void ObjectMap::snapshot(uint64_t snap_id) {
if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) {
return;
}
CephContext *cct = m_image_ctx.cct;
ldout(cct, 10) << &m_image_ctx << " snapshot object map" << dendl;
int r;
bufferlist bl;
{
RWLock::RLocker l(m_image_ctx.object_map_lock);
std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP));
r = m_image_ctx.md_ctx.read(oid, bl, 0, 0);
if (r < 0) {
lderr(cct) << "unable to load object map: " << cpp_strerror(r)
<< dendl;
invalidate();
}
}
std::string snap_oid(object_map_name(m_image_ctx.id, snap_id));
r = m_image_ctx.md_ctx.write_full(snap_oid, bl);
if (r < 0) {
lderr(cct) << "unable to snapshot object map '" << snap_oid << "': "
<< cpp_strerror(r) << dendl;
invalidate();
}
}
void ObjectMap::aio_resize(uint64_t new_size, uint8_t default_object_state,
Context *on_finish) {
@ -190,15 +261,15 @@ bool ObjectMap::aio_update(uint64_t start_object_no, uint64_t end_object_no,
ldout(cct, 20) << &m_image_ctx << " aio_update: start=" << start_object_no
<< ", end=" << end_object_no << ", new_state="
<< static_cast<uint32_t>(new_state) << dendl;
if (end_object_no > object_map.size()) {
if (end_object_no > m_object_map.size()) {
ldout(cct, 20) << "skipping update of invalid object map" << dendl;
return false;
}
for (uint64_t object_no = start_object_no; object_no < end_object_no;
++object_no) {
if ((!current_state || object_map[object_no] == *current_state) &&
object_map[object_no] != new_state) {
if ((!current_state || m_object_map[object_no] == *current_state) &&
m_object_map[object_no] != new_state) {
UpdateRequest *req = new UpdateRequest(m_image_ctx, start_object_no,
end_object_no, new_state,
current_state, on_finish);
@ -307,8 +378,8 @@ void ObjectMap::ResizeRequest::send() {
cls_client::object_map_resize(&op, m_num_objs, m_default_object_state);
librados::AioCompletion *rados_completion = create_callback_completion();
int r = m_image_ctx.data_ctx.aio_operate(object_map_name(m_image_ctx.id),
rados_completion, &op);
std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP));
int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op);
assert(r == 0);
rados_completion->release();
}
@ -318,10 +389,11 @@ void ObjectMap::ResizeRequest::finish(ObjectMap *object_map) {
ldout(cct, 5) << &m_image_ctx << " resizing in-memory object map: "
<< m_num_objs << dendl;
size_t orig_object_map_size = object_map->object_map.size();
object_map->object_map.resize(m_num_objs);
for (uint64_t i = orig_object_map_size; i < object_map->object_map.size(); ++i) {
object_map->object_map[i] = m_default_object_state;
size_t orig_object_map_size = object_map->m_object_map.size();
object_map->m_object_map.resize(m_num_objs);
for (uint64_t i = orig_object_map_size;
i < object_map->m_object_map.size(); ++i) {
object_map->m_object_map[i] = m_default_object_state;
}
}
@ -341,8 +413,8 @@ void ObjectMap::UpdateRequest::send() {
m_new_state, m_current_state);
librados::AioCompletion *rados_completion = create_callback_completion();
int r = m_image_ctx.data_ctx.aio_operate(object_map_name(m_image_ctx.id),
rados_completion, &op);
std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP));
int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op);
assert(r == 0);
rados_completion->release();
}
@ -352,11 +424,11 @@ void ObjectMap::UpdateRequest::finish(ObjectMap *object_map) {
ldout(cct, 20) << &m_image_ctx << " updating in-memory object map" << dendl;
for (uint64_t object_no = m_start_object_no;
object_no < MIN(m_end_object_no, object_map->object_map.size());
object_no < MIN(m_end_object_no, object_map->m_object_map.size());
++object_no) {
if (!m_current_state ||
object_map->object_map[object_no] == *m_current_state) {
object_map->object_map[object_no] = m_new_state;
object_map->m_object_map[object_no] == *m_current_state) {
object_map->m_object_map[object_no] = m_new_state;
}
}
}

View File

@ -24,6 +24,9 @@ public:
ObjectMap(ImageCtx &image_ctx);
static std::string object_map_name(const std::string &image_id,
uint64_t snap_id);
int lock();
int unlock();
@ -39,7 +42,9 @@ public:
const boost::optional<uint8_t> &current_state,
Context *on_finish);
int refresh();
void refresh(uint64_t snap_id);
void rollback(uint64_t snap_id);
void snapshot(uint64_t snap_id);
private:
@ -113,7 +118,7 @@ private:
ImageCtx &m_image_ctx;
ceph::BitVector<2> object_map;
ceph::BitVector<2> m_object_map;
void invalidate();

View File

@ -69,11 +69,6 @@ namespace librbd {
return image_name + RBD_SUFFIX;
}
const string object_map_name(const string &image_id)
{
return RBD_OBJECT_MAP_PREFIX + image_id;
}
int detect_format(IoCtx &io_ctx, const string &name,
bool *old_format, uint64_t *size)
{
@ -315,7 +310,6 @@ namespace librbd {
rollback_object(ictx, snap_id, ictx->get_object_name(i), throttle);
prog_ctx.update_progress(i * bsize, numseg * bsize);
}
rollback_object(ictx, snap_id, object_map_name(ictx->id), throttle);
r = throttle.wait_for_ret();
if (r < 0) {
@ -323,6 +317,13 @@ namespace librbd {
<< cpp_strerror(r) << dendl;
return r;
}
{
RWLock::RLocker l(ictx->md_lock);
if (ictx->object_map != NULL) {
ictx->object_map->rollback(snap_id);
}
}
return 0;
}
@ -478,11 +479,13 @@ namespace librbd {
if (r < 0)
return r;
bool lock_owner = false;
while (ictx->image_watcher->is_lock_supported()) {
r = prepare_image_update(ictx);
if (r < 0) {
return -EROFS;
} else if (ictx->image_watcher->is_lock_owner()) {
lock_owner = true;
break;
}
@ -493,13 +496,19 @@ namespace librbd {
ldout(ictx->cct, 5) << "snap_create timed out notifying lock owner" << dendl;
}
RWLock::RLocker l2(ictx->md_lock);
RWLock::WLocker l2(ictx->md_lock);
r = _flush(ictx);
if (r < 0) {
return r;
}
do {
r = add_snap(ictx, snap_name);
r = add_snap(ictx, snap_name, lock_owner);
} while (r == -ESTALE);
if (r < 0)
if (r < 0) {
return r;
}
if (notify) {
notify_change(ictx->md_ctx, ictx->header_oid, ictx);
@ -566,12 +575,20 @@ namespace librbd {
}
}
if (ictx->object_map != NULL) {
r = ictx->md_ctx.remove(ObjectMap::object_map_name(ictx->id, snap_id));
if (r < 0 && r != -ENOENT) {
lderr(ictx->cct) << "snap_remove: failed to remove snapshot object map"
<< dendl;
return 0;
}
}
r = rm_snap(ictx, snap_name);
if (r < 0)
return r;
r = ictx->data_ctx.selfmanaged_snap_remove(snap_id);
if (r < 0)
return r;
@ -886,7 +903,7 @@ reprotect_and_return_err:
librados::ObjectWriteOperation op;
cls_client::object_map_resize(&op, Striper::get_num_objects(layout, size),
OBJECT_NONEXISTENT);
r = io_ctx.operate(object_map_name(id), &op);
r = io_ctx.operate(ObjectMap::object_map_name(id, CEPH_NOSNAP), &op);
if (r < 0) {
goto err_remove_header;
}
@ -1564,7 +1581,7 @@ reprotect_and_return_err:
}
}
if (!old_format) {
r = io_ctx.remove(object_map_name(id));
r = io_ctx.remove(ObjectMap::object_map_name(id, CEPH_NOSNAP));
if (r < 0 && r != -ENOENT) {
lderr(cct) << "error removing image object map" << dendl;
}
@ -1726,9 +1743,10 @@ reprotect_and_return_err:
}
int add_snap(ImageCtx *ictx, const char *snap_name)
int add_snap(ImageCtx *ictx, const char *snap_name, bool lock_owner)
{
assert(ictx->owner_lock.is_locked());
assert(ictx->md_lock.is_wlocked());
uint64_t snap_id;
int r = ictx->md_ctx.selfmanaged_snap_create(&snap_id);
@ -1757,6 +1775,24 @@ reprotect_and_return_err:
return r;
}
if (!ictx->old_format) {
if (ictx->object_map != NULL) {
ictx->object_map->snapshot(snap_id);
}
if (lock_owner) {
// immediately start using the new snap context if we
// own the exclusive lock
std::vector<snapid_t> snaps;
snaps.push_back(snap_id);
snaps.insert(snaps.end(), ictx->snapc.snaps.begin(),
ictx->snapc.snaps.end());
ictx->snapc.seq = snap_id;
ictx->snapc.snaps.swap(snaps);
ictx->data_ctx.selfmanaged_snap_set_write_ctx(ictx->snapc.seq,
ictx->snaps);
}
}
return 0;
}
@ -2016,7 +2052,7 @@ reprotect_and_return_err:
} else {
ictx->object_map = new ObjectMap(*ictx);
if (ictx->snap_exists) {
ictx->object_map->refresh();
ictx->object_map->refresh(ictx->snap_id);
}
}
@ -3228,6 +3264,9 @@ reprotect_and_return_err:
return r;
}
RWLock::RLocker owner_locker(ictx->owner_lock);
RWLock::RLocker md_locker(ictx->md_lock);
ictx->snap_lock.get_read();
snapid_t snap_id = ictx->snap_id;
::SnapContext snapc = ictx->snapc;
@ -3246,7 +3285,6 @@ reprotect_and_return_err:
c->get();
c->init_time(ictx, AIO_TYPE_WRITE);
RWLock::RLocker l(ictx->owner_lock);
if (ictx->image_watcher->is_lock_supported() &&
!ictx->image_watcher->is_lock_owner()) {
c->put();
@ -3322,6 +3360,9 @@ reprotect_and_return_err:
return r;
}
RWLock::RLocker owner_locker(ictx->owner_lock);
RWLock::RLocker md_locker(ictx->md_lock);
// TODO: check for snap
ictx->snap_lock.get_read();
snapid_t snap_id = ictx->snap_id;
@ -3339,7 +3380,6 @@ reprotect_and_return_err:
c->get();
c->init_time(ictx, AIO_TYPE_DISCARD);
RWLock::RLocker l(ictx->owner_lock);
if (ictx->image_watcher->is_lock_supported() &&
!ictx->image_watcher->is_lock_owner()) {
c->put();

View File

@ -76,7 +76,6 @@ namespace librbd {
const std::string id_obj_name(const std::string &name);
const std::string header_name(const std::string &image_id);
const std::string old_header_name(const std::string &image_name);
const std::string object_map_name(const std::string &image_id);
int detect_format(librados::IoCtx &io_ctx, const std::string &name,
bool *old_format, uint64_t *size);
@ -120,7 +119,7 @@ namespace librbd {
int snap_unprotect(ImageCtx *ictx, const char *snap_name);
int snap_is_protected(ImageCtx *ictx, const char *snap_name,
bool *is_protected);
int add_snap(ImageCtx *ictx, const char *snap_name);
int add_snap(ImageCtx *ictx, const char *snap_name, bool lock_owner);
int rm_snap(ImageCtx *ictx, const char *snap_name);
int refresh_parent(ImageCtx *ictx);
int ictx_check(ImageCtx *ictx);

View File

@ -5,6 +5,7 @@
#include "common/ceph_argparse.h"
#include "common/common_init.h"
#include "common/config.h"
#include "common/debug.h"
#include "common/snap_types.h"
#include "global/global_context.h"
#include "librados/AioCompletionImpl.h"
@ -17,6 +18,9 @@
#include <deque>
#include <list>
#include <vector>
#include "include/assert.h"
#define dout_subsys ceph_subsys_rados
static librados::TestClassHandler *get_class_handler() {
static librados::TestClassHandler *s_class_handler = NULL;
@ -524,6 +528,11 @@ int IoCtx::write(const std::string& oid, bufferlist& bl, size_t len,
return ctx->write(oid, bl, len, off);
}
int IoCtx::write_full(const std::string& oid, bufferlist& bl) {
TestIoCtxImpl *ctx = reinterpret_cast<TestIoCtxImpl*>(io_ctx_impl);
return ctx->write_full(oid, bl);
}
static int save_operation_result(int result, int *pval) {
if (pval != NULL) {
*pval = result;
@ -956,6 +965,19 @@ int cls_cxx_write_full(cls_method_context_t hctx, bufferlist *inbl) {
}
int cls_log(int level, const char *format, ...) {
int size = 256;
va_list ap;
while (1) {
char buf[size];
va_start(ap, format);
int n = vsnprintf(buf, size, format, ap);
va_end(ap);
if ((n > -1 && n < size) || size > 8196) {
dout(level) << buf << dendl;
return n;
}
size *= 2;
}
return 0;
}