mirror of
https://github.com/ceph/ceph
synced 2025-04-01 23:02:17 +00:00
librbd: use assert_exists() to simplify copyup check
Previously we would explicitly STAT the object to see if it exists before sending the write to the OSD. Instead, send the write optimistically, and assert that the object already exists. This avoids an extra round trip in the optimistic/common case, and makes the existence check in the initial first-write case more expensive because we send the data payload along. Signed-off-by: Sage Weil <sage@inktank.com>
This commit is contained in:
parent
e609c8ac54
commit
4240846f5c
src/librbd
@ -110,7 +110,7 @@ namespace librbd {
|
||||
/** read **/
|
||||
|
||||
AbstractWrite::AbstractWrite()
|
||||
: m_state(LIBRBD_AIO_WRITE_FINAL),
|
||||
: m_state(LIBRBD_AIO_WRITE_FLAT),
|
||||
m_parent_overlap(0) {}
|
||||
AbstractWrite::AbstractWrite(ImageCtx *ictx, const std::string &oid,
|
||||
uint64_t object_no, uint64_t object_off, uint64_t len,
|
||||
@ -119,10 +119,9 @@ namespace librbd {
|
||||
const ::SnapContext &snapc, librados::snap_t snap_id,
|
||||
Context *completion,
|
||||
bool hide_enoent)
|
||||
: AioRequest(ictx, oid, object_no, object_off, len, snap_id, completion, hide_enoent)
|
||||
: AioRequest(ictx, oid, object_no, object_off, len, snap_id, completion, hide_enoent),
|
||||
m_state(LIBRBD_AIO_WRITE_FLAT)
|
||||
{
|
||||
m_state = LIBRBD_AIO_WRITE_FINAL;
|
||||
|
||||
m_object_image_extents = objectx;
|
||||
m_parent_overlap = object_overlap;
|
||||
|
||||
@ -138,13 +137,10 @@ namespace librbd {
|
||||
void AbstractWrite::guard_write()
|
||||
{
|
||||
if (has_parent()) {
|
||||
m_state = LIBRBD_AIO_WRITE_CHECK_EXISTS;
|
||||
m_read.stat(NULL, NULL, NULL);
|
||||
m_state = LIBRBD_AIO_WRITE_GUARD;
|
||||
m_write.assert_exists();
|
||||
ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl;
|
||||
}
|
||||
ldout(m_ictx->cct, 20) << __func__ << " has_parent = " << has_parent()
|
||||
<< " m_state = " << m_state << " check exists = "
|
||||
<< LIBRBD_AIO_WRITE_CHECK_EXISTS << dendl;
|
||||
|
||||
}
|
||||
|
||||
bool AbstractWrite::should_complete(int r)
|
||||
@ -154,13 +150,9 @@ namespace librbd {
|
||||
|
||||
bool finished = true;
|
||||
switch (m_state) {
|
||||
case LIBRBD_AIO_WRITE_CHECK_EXISTS:
|
||||
ldout(m_ictx->cct, 20) << "WRITE_CHECK_EXISTS" << dendl;
|
||||
if (r < 0 && r != -ENOENT) {
|
||||
ldout(m_ictx->cct, 20) << "error checking for object existence" << dendl;
|
||||
break;
|
||||
}
|
||||
finished = false;
|
||||
case LIBRBD_AIO_WRITE_GUARD:
|
||||
ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl;
|
||||
|
||||
if (r == -ENOENT) {
|
||||
Mutex::Locker l(m_ictx->snap_lock);
|
||||
Mutex::Locker l2(m_ictx->parent_lock);
|
||||
@ -171,24 +163,29 @@ namespace librbd {
|
||||
|
||||
m_state = LIBRBD_AIO_WRITE_COPYUP;
|
||||
read_from_parent(m_object_image_extents);
|
||||
finished = false;
|
||||
break;
|
||||
}
|
||||
if (r < 0) {
|
||||
ldout(m_ictx->cct, 20) << "error checking for object existence" << dendl;
|
||||
break;
|
||||
}
|
||||
ldout(m_ictx->cct, 20) << "no need to read from parent" << dendl;
|
||||
m_state = LIBRBD_AIO_WRITE_FINAL;
|
||||
send();
|
||||
break;
|
||||
|
||||
case LIBRBD_AIO_WRITE_COPYUP:
|
||||
ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl;
|
||||
m_state = LIBRBD_AIO_WRITE_FINAL;
|
||||
m_state = LIBRBD_AIO_WRITE_GUARD;
|
||||
if (r < 0)
|
||||
return should_complete(r);
|
||||
send_copyup();
|
||||
finished = false;
|
||||
break;
|
||||
case LIBRBD_AIO_WRITE_FINAL:
|
||||
ldout(m_ictx->cct, 20) << "WRITE_FINAL" << dendl;
|
||||
|
||||
case LIBRBD_AIO_WRITE_FLAT:
|
||||
ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl;
|
||||
// nothing to do
|
||||
break;
|
||||
|
||||
default:
|
||||
lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl;
|
||||
assert(0);
|
||||
@ -201,13 +198,8 @@ namespace librbd {
|
||||
librados::AioCompletion *rados_completion =
|
||||
librados::Rados::aio_create_completion(this, NULL, rados_req_cb);
|
||||
int r;
|
||||
if (m_state == LIBRBD_AIO_WRITE_CHECK_EXISTS) {
|
||||
assert(m_read.size());
|
||||
r = m_ioctx.aio_operate(m_oid, rados_completion, &m_read, &m_read_data);
|
||||
} else {
|
||||
assert(m_write.size());
|
||||
r = m_ioctx.aio_operate(m_oid, rados_completion, &m_write);
|
||||
}
|
||||
assert(m_write.size());
|
||||
r = m_ioctx.aio_operate(m_oid, rados_completion, &m_write);
|
||||
rados_completion->release();
|
||||
return r;
|
||||
}
|
||||
|
@ -109,24 +109,25 @@ namespace librbd {
|
||||
|
||||
private:
|
||||
/**
|
||||
* Writes go through the following state machine to
|
||||
* deal with layering:
|
||||
* need copyup
|
||||
* LIBRBD_AIO_CHECK_EXISTS ---------------> LIBRBD_AIO_WRITE_COPYUP
|
||||
* | |
|
||||
* | no overlap or object exists | parent data read
|
||||
* | |
|
||||
* v |
|
||||
* LIBRBD_AIO_WRITE_FINAL <--------------------------/
|
||||
* Writes go through the following state machine to deal with
|
||||
* layering:
|
||||
*
|
||||
* By default images start in LIBRBD_AIO_WRITE_FINAL.
|
||||
* If the write may need a copyup, it will start in
|
||||
* LIBRBD_AIO_WRITE_CHECK_EXISTS instead.
|
||||
* need copyup
|
||||
* LIBRBD_AIO_WRITE_GUARD ---------------> LIBRBD_AIO_WRITE_COPYUP
|
||||
* | ^ |
|
||||
* v \------------------------------/
|
||||
* done
|
||||
* ^
|
||||
* |
|
||||
* LIBRBD_AIO_WRITE_FLAT
|
||||
*
|
||||
* Writes start in LIBRBD_AIO_WRITE_GUARD or _FLAT, depending on whether
|
||||
* there is a parent or not.
|
||||
*/
|
||||
enum write_state_d {
|
||||
LIBRBD_AIO_WRITE_CHECK_EXISTS,
|
||||
LIBRBD_AIO_WRITE_GUARD,
|
||||
LIBRBD_AIO_WRITE_COPYUP,
|
||||
LIBRBD_AIO_WRITE_FINAL
|
||||
LIBRBD_AIO_WRITE_FLAT
|
||||
};
|
||||
|
||||
protected:
|
||||
@ -135,7 +136,6 @@ namespace librbd {
|
||||
write_state_d m_state;
|
||||
vector<pair<uint64_t,uint64_t> > m_object_image_extents;
|
||||
uint64_t m_parent_overlap;
|
||||
librados::ObjectReadOperation m_read;
|
||||
librados::ObjectWriteOperation m_write;
|
||||
librados::ObjectWriteOperation m_copyup;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user