diff --git a/qa/workunits/rbd/rbd_mirror_stress.sh b/qa/workunits/rbd/rbd_mirror_stress.sh index b9f13021590..0fe5af994fd 100755 --- a/qa/workunits/rbd/rbd_mirror_stress.sh +++ b/qa/workunits/rbd/rbd_mirror_stress.sh @@ -325,7 +325,8 @@ write_image() local duration=$(($RANDOM % 35 + 15)) timeout ${duration}s rbd --cluster ${cluster} -p ${POOL} bench-write \ - ${image} --io-size 4096 --io-threads 8 --io-total 10G --io-pattern rand || true + ${image} --io-size 4096 --io-threads 8 --io-total 10G --io-pattern rand \ + --debug-rbd=20 --debug-journaler=20 2> ${TEMPDIR}/rbd-bench-write.log || true } create_snap() @@ -334,7 +335,8 @@ create_snap() local image=$2 local snap_name=$3 - rbd --cluster ${cluster} -p ${POOL} snap create ${image}@${snap_name} + rbd --cluster ${cluster} -p ${POOL} snap create ${image}@${snap_name} \ + --debug-rbd=20 --debug-journaler=20 2> ${TEMPDIR}/rbd-snap-create.log } wait_for_snap() diff --git a/src/journal/JournalPlayer.cc b/src/journal/JournalPlayer.cc index 426624c0ad3..03b70e77128 100644 --- a/src/journal/JournalPlayer.cc +++ b/src/journal/JournalPlayer.cc @@ -351,13 +351,13 @@ int JournalPlayer::process_playback(uint64_t object_number) { ldout(m_cct, 10) << __func__ << ": object_num=" << object_number << dendl; assert(m_lock.is_locked()); - ObjectPlayerPtr object_player = get_object_player(); if (verify_playback_ready()) { notify_entries_available(); } else if (is_object_set_ready()) { if (m_watch_enabled) { schedule_watch(); } else { + ObjectPlayerPtr object_player = get_object_player(); uint8_t splay_width = m_journal_metadata->get_splay_width(); uint64_t active_set = m_journal_metadata->get_active_set(); uint64_t object_set = object_player->get_object_number() / splay_width; diff --git a/src/librbd/LibrbdWriteback.cc b/src/librbd/LibrbdWriteback.cc index 231c9b12e6d..8551820cf90 100644 --- a/src/librbd/LibrbdWriteback.cc +++ b/src/librbd/LibrbdWriteback.cc @@ -167,6 +167,27 @@ namespace librbd { } }; + struct C_CommitIOEventExtent : public Context { + ImageCtx *image_ctx; + uint64_t journal_tid; + uint64_t offset; + uint64_t length; + + C_CommitIOEventExtent(ImageCtx *image_ctx, uint64_t journal_tid, + uint64_t offset, uint64_t length) + : image_ctx(image_ctx), journal_tid(journal_tid), offset(offset), + length(length) { + } + + virtual void finish(int r) { + // all IO operations are flushed prior to closing the journal + assert(image_ctx->journal != nullptr); + + image_ctx->journal->commit_io_event_extent(journal_tid, offset, length, + r); + } + }; + LibrbdWriteback::LibrbdWriteback(ImageCtx *ictx, Mutex& lock) : m_tid(0), m_lock(lock), m_ictx(ictx) { } @@ -248,8 +269,8 @@ namespace librbd { assert(journal_tid == 0 || m_ictx->journal != NULL); if (journal_tid != 0) { m_ictx->journal->flush_event( - journal_tid, new C_WriteJournalCommit(m_ictx, oid.name, object_no, off, - bl, snapc, req_comp, + journal_tid, new C_WriteJournalCommit(m_ictx, oid.name, object_no, off, + bl, snapc, req_comp, journal_tid)); } else { AioObjectWrite *req = new AioObjectWrite(m_ictx, oid.name, object_no, @@ -262,22 +283,32 @@ namespace librbd { void LibrbdWriteback::overwrite_extent(const object_t& oid, uint64_t off, uint64_t len, - ceph_tid_t journal_tid) { + ceph_tid_t original_journal_tid, + ceph_tid_t new_journal_tid) { typedef std::vector > Extents; assert(m_ictx->owner_lock.is_locked()); uint64_t object_no = oid_to_object_no(oid.name, m_ictx->object_prefix); // all IO operations are flushed prior to closing the journal - assert(journal_tid != 0 && m_ictx->journal != NULL); + assert(original_journal_tid != 0 && m_ictx->journal != NULL); Extents file_extents; Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, object_no, off, len, file_extents); for (Extents::iterator it = file_extents.begin(); it != file_extents.end(); ++it) { - m_ictx->journal->commit_io_event_extent(journal_tid, it->first, - it->second, 0); + if (new_journal_tid != 0) { + // ensure new journal event is safely committed to disk before + // committing old event + m_ictx->journal->flush_event( + new_journal_tid, new C_CommitIOEventExtent(m_ictx, + original_journal_tid, + it->first, it->second)); + } else { + m_ictx->journal->commit_io_event_extent(original_journal_tid, it->first, + it->second, 0); + } } } diff --git a/src/librbd/LibrbdWriteback.h b/src/librbd/LibrbdWriteback.h index 018b04367a3..ef5fa75127b 100644 --- a/src/librbd/LibrbdWriteback.h +++ b/src/librbd/LibrbdWriteback.h @@ -42,7 +42,8 @@ namespace librbd { using WritebackHandler::write; virtual void overwrite_extent(const object_t& oid, uint64_t off, - uint64_t len, ceph_tid_t journal_tid); + uint64_t len, ceph_tid_t original_journal_tid, + ceph_tid_t new_journal_tid); virtual void get_client_lock(); virtual void put_client_lock(); diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc index 837c944388d..6fc815d617c 100644 --- a/src/osdc/ObjectCacher.cc +++ b/src/osdc/ObjectCacher.cc @@ -462,7 +462,7 @@ void ObjectCacher::Object::replace_journal_tid(BufferHead *bh, if (bh_tid != 0 && bh_tid != tid) { // inform journal that it should not expect a writeback from this extent oc->writeback_handler.overwrite_extent(get_oid(), bh->start(), - bh->length(), bh_tid); + bh->length(), bh_tid, tid); } bh->set_journal_tid(tid); } diff --git a/src/osdc/WritebackHandler.h b/src/osdc/WritebackHandler.h index f0efd200784..842ae54ae5a 100644 --- a/src/osdc/WritebackHandler.h +++ b/src/osdc/WritebackHandler.h @@ -37,7 +37,8 @@ class WritebackHandler { ceph_tid_t journal_tid, Context *oncommit) = 0; virtual void overwrite_extent(const object_t& oid, uint64_t off, uint64_t len, - ceph_tid_t journal_tid) {} + ceph_tid_t original_journal_tid, + ceph_tid_t new_journal_tid) {} virtual bool can_scattered_write() { return false; } virtual ceph_tid_t write(const object_t& oid, const object_locator_t& oloc,