journal: possible race condition between flush and append callback

When notifying the journal recorder of an overflow or if the object
close request has completed due to no more in-flight IO, it was
possible for a race between a flush request and the processing of
an append completion to attempt to kick off duplicate notifications.
Since the overflowed and closed callbacks are properly protected from
duplicates, use a counter instead of a boolean to track possible
in-flight handler callbacks.

Fixes: https://tracker.ceph.com/issues/47880
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
This commit is contained in:
Jason Dillaman 2020-10-16 11:25:39 -04:00
parent f5adeeb621
commit 458ab997fe
2 changed files with 13 additions and 9 deletions

View File

@ -97,7 +97,7 @@ void ObjectRecorder::flush(Context *on_safe) {
// if currently handling flush notifications, wait so that
// we notify in the correct order (since lock is dropped on
// callback)
while (m_in_flight_callbacks) {
while (m_in_flight_callbacks > 0) {
m_in_flight_callbacks_cond.wait(locker);
}
@ -138,7 +138,7 @@ void ObjectRecorder::flush(const ceph::ref_t<FutureImpl>& future) {
}
if (!m_object_closed && !m_overflowed && send_appends(true, future)) {
m_in_flight_callbacks = true;
++m_in_flight_callbacks;
notify_handler_unlock(locker, true);
}
}
@ -169,7 +169,7 @@ bool ObjectRecorder::close() {
ceph_assert(!m_object_closed);
m_object_closed = true;
if (!m_in_flight_tids.empty() || m_in_flight_callbacks) {
if (!m_in_flight_tids.empty() || m_in_flight_callbacks > 0) {
m_object_closed_notify = true;
return false;
}
@ -181,7 +181,7 @@ void ObjectRecorder::handle_append_flushed(uint64_t tid, int r) {
ldout(m_cct, 20) << "tid=" << tid << ", r=" << r << dendl;
std::unique_lock locker{*m_lock};
m_in_flight_callbacks = true;
++m_in_flight_callbacks;
auto tid_iter = m_in_flight_tids.find(tid);
ceph_assert(tid_iter != m_in_flight_tids.end());
@ -235,7 +235,9 @@ void ObjectRecorder::handle_append_flushed(uint64_t tid, int r) {
ldout(m_cct, 20) << "pending tids=" << m_in_flight_tids << dendl;
// all remaining unsent appends should be redirected to new object
// notify of overflow if one just occurred or indicate that all in-flight
// appends have completed on a closed object (or wake up stalled flush
// requests that was waiting for this strand to complete).
notify_handler_unlock(locker, notify_overflowed);
}
@ -379,14 +381,16 @@ bool ObjectRecorder::send_appends(bool force, ceph::ref_t<FutureImpl> flush_futu
void ObjectRecorder::wake_up_flushes() {
ceph_assert(ceph_mutex_is_locked(*m_lock));
m_in_flight_callbacks = false;
m_in_flight_callbacks_cond.notify_all();
--m_in_flight_callbacks;
if (m_in_flight_callbacks == 0) {
m_in_flight_callbacks_cond.notify_all();
}
}
void ObjectRecorder::notify_handler_unlock(
std::unique_lock<ceph::mutex>& locker, bool notify_overflowed) {
ceph_assert(ceph_mutex_is_locked(*m_lock));
ceph_assert(m_in_flight_callbacks);
ceph_assert(m_in_flight_callbacks > 0);
if (!m_object_closed && notify_overflowed) {
// TODO need to delay completion until after aio_notify completes

View File

@ -143,7 +143,7 @@ private:
bufferlist m_prefetch_bl;
bool m_in_flight_callbacks = false;
uint32_t m_in_flight_callbacks = 0;
ceph::condition_variable m_in_flight_callbacks_cond;
uint64_t m_in_flight_bytes = 0;