1
0
mirror of https://github.com/ceph/ceph synced 2025-04-01 23:02:17 +00:00

librbd: use assert_exists() to simplify copyup check

Previously we would explicitly STAT the object to see if it exists before
sending the write to the OSD.  Instead, send the write optimistically, and
assert that the object already exists.  This avoids an extra round trip in
the optimistic/common case, and makes the existence check in the initial
first-write case more expensive because we send the data payload along.

Signed-off-by: Sage Weil <sage@inktank.com>
This commit is contained in:
Sage Weil 2012-10-22 17:57:08 -07:00
parent e609c8ac54
commit 4240846f5c
2 changed files with 37 additions and 45 deletions

View File

@ -110,7 +110,7 @@ namespace librbd {
/** read **/
AbstractWrite::AbstractWrite()
: m_state(LIBRBD_AIO_WRITE_FINAL),
: m_state(LIBRBD_AIO_WRITE_FLAT),
m_parent_overlap(0) {}
AbstractWrite::AbstractWrite(ImageCtx *ictx, const std::string &oid,
uint64_t object_no, uint64_t object_off, uint64_t len,
@ -119,10 +119,9 @@ namespace librbd {
const ::SnapContext &snapc, librados::snap_t snap_id,
Context *completion,
bool hide_enoent)
: AioRequest(ictx, oid, object_no, object_off, len, snap_id, completion, hide_enoent)
: AioRequest(ictx, oid, object_no, object_off, len, snap_id, completion, hide_enoent),
m_state(LIBRBD_AIO_WRITE_FLAT)
{
m_state = LIBRBD_AIO_WRITE_FINAL;
m_object_image_extents = objectx;
m_parent_overlap = object_overlap;
@ -138,13 +137,10 @@ namespace librbd {
void AbstractWrite::guard_write()
{
if (has_parent()) {
m_state = LIBRBD_AIO_WRITE_CHECK_EXISTS;
m_read.stat(NULL, NULL, NULL);
m_state = LIBRBD_AIO_WRITE_GUARD;
m_write.assert_exists();
ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl;
}
ldout(m_ictx->cct, 20) << __func__ << " has_parent = " << has_parent()
<< " m_state = " << m_state << " check exists = "
<< LIBRBD_AIO_WRITE_CHECK_EXISTS << dendl;
}
bool AbstractWrite::should_complete(int r)
@ -154,13 +150,9 @@ namespace librbd {
bool finished = true;
switch (m_state) {
case LIBRBD_AIO_WRITE_CHECK_EXISTS:
ldout(m_ictx->cct, 20) << "WRITE_CHECK_EXISTS" << dendl;
if (r < 0 && r != -ENOENT) {
ldout(m_ictx->cct, 20) << "error checking for object existence" << dendl;
break;
}
finished = false;
case LIBRBD_AIO_WRITE_GUARD:
ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl;
if (r == -ENOENT) {
Mutex::Locker l(m_ictx->snap_lock);
Mutex::Locker l2(m_ictx->parent_lock);
@ -171,24 +163,29 @@ namespace librbd {
m_state = LIBRBD_AIO_WRITE_COPYUP;
read_from_parent(m_object_image_extents);
finished = false;
break;
}
if (r < 0) {
ldout(m_ictx->cct, 20) << "error checking for object existence" << dendl;
break;
}
ldout(m_ictx->cct, 20) << "no need to read from parent" << dendl;
m_state = LIBRBD_AIO_WRITE_FINAL;
send();
break;
case LIBRBD_AIO_WRITE_COPYUP:
ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl;
m_state = LIBRBD_AIO_WRITE_FINAL;
m_state = LIBRBD_AIO_WRITE_GUARD;
if (r < 0)
return should_complete(r);
send_copyup();
finished = false;
break;
case LIBRBD_AIO_WRITE_FINAL:
ldout(m_ictx->cct, 20) << "WRITE_FINAL" << dendl;
case LIBRBD_AIO_WRITE_FLAT:
ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl;
// nothing to do
break;
default:
lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl;
assert(0);
@ -201,13 +198,8 @@ namespace librbd {
librados::AioCompletion *rados_completion =
librados::Rados::aio_create_completion(this, NULL, rados_req_cb);
int r;
if (m_state == LIBRBD_AIO_WRITE_CHECK_EXISTS) {
assert(m_read.size());
r = m_ioctx.aio_operate(m_oid, rados_completion, &m_read, &m_read_data);
} else {
assert(m_write.size());
r = m_ioctx.aio_operate(m_oid, rados_completion, &m_write);
}
assert(m_write.size());
r = m_ioctx.aio_operate(m_oid, rados_completion, &m_write);
rados_completion->release();
return r;
}

View File

@ -109,24 +109,25 @@ namespace librbd {
private:
/**
* Writes go through the following state machine to
* deal with layering:
* need copyup
* LIBRBD_AIO_CHECK_EXISTS ---------------> LIBRBD_AIO_WRITE_COPYUP
* | |
* | no overlap or object exists | parent data read
* | |
* v |
* LIBRBD_AIO_WRITE_FINAL <--------------------------/
* Writes go through the following state machine to deal with
* layering:
*
* By default images start in LIBRBD_AIO_WRITE_FINAL.
* If the write may need a copyup, it will start in
* LIBRBD_AIO_WRITE_CHECK_EXISTS instead.
* need copyup
* LIBRBD_AIO_WRITE_GUARD ---------------> LIBRBD_AIO_WRITE_COPYUP
* | ^ |
* v \------------------------------/
* done
* ^
* |
* LIBRBD_AIO_WRITE_FLAT
*
* Writes start in LIBRBD_AIO_WRITE_GUARD or _FLAT, depending on whether
* there is a parent or not.
*/
enum write_state_d {
LIBRBD_AIO_WRITE_CHECK_EXISTS,
LIBRBD_AIO_WRITE_GUARD,
LIBRBD_AIO_WRITE_COPYUP,
LIBRBD_AIO_WRITE_FINAL
LIBRBD_AIO_WRITE_FLAT
};
protected:
@ -135,7 +136,6 @@ namespace librbd {
write_state_d m_state;
vector<pair<uint64_t,uint64_t> > m_object_image_extents;
uint64_t m_parent_overlap;
librados::ObjectReadOperation m_read;
librados::ObjectWriteOperation m_write;
librados::ObjectWriteOperation m_copyup;