From edc9ddfde956a4f737d63df98777b0ee2c2f6bcc Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Mon, 8 Apr 2013 09:10:35 -0700 Subject: [PATCH 1/2] mds: fix journaler to set temp_fetch_len appropriately and read the requested amount The _prefetch() function which intereprets temp_fetch_len interprets it as the amount of data we need from read_pos, which is the beginning of read_buf. So by setting it to the amount *more* we needed, we were getting stuck forever if we actually hit this condition. Fix it by setting temp_fetch_len based on the amount of data we need in aggregate. Furthermore, we were previously rounding *down* the requested amount in order to read only full log segments. Round up instead! Fixes #4618 Signed-off-by: Greg Farnum --- src/osdc/Journaler.cc | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/osdc/Journaler.cc b/src/osdc/Journaler.cc index 544ac8c49c4..0bda7ce9058 100644 --- a/src/osdc/Journaler.cc +++ b/src/osdc/Journaler.cc @@ -835,6 +835,7 @@ void Journaler::_issue_read(uint64_t len) void Journaler::_prefetch() { + ldout(cct, 10) << "_prefetch" << dendl; // prefetch uint64_t pf; if (temp_fetch_len) { @@ -847,9 +848,11 @@ void Journaler::_prefetch() uint64_t raw_target = read_pos + pf; - // only read full log segments + // read full log segments, so increase if necessary uint64_t period = get_layout_period(); - uint64_t target = raw_target - (raw_target % period); + uint64_t remainder = raw_target % period; + uint64_t adjustment = remainder ? period - remainder : 0; + uint64_t target = raw_target + adjustment; // don't read past the log tail if (target > write_pos) @@ -883,7 +886,9 @@ bool Journaler::_is_readable() read_buf.length() >= sizeof(s) + s) return true; // yep, next entry is ready. - // darn it! + ldout (cct, 10) << "_is_readable read_buf.length() == " << read_buf.length() + << ", but need " << s + sizeof(s) + << " for next entry; fetch_len is " << fetch_len << dendl; // partial fragment at the end? if (received_pos == write_pos) { @@ -902,12 +907,14 @@ bool Journaler::_is_readable() return false; } - uint64_t need = (sizeof(s)+s-read_buf.length()); + uint64_t need = sizeof(s) + s; if (need > fetch_len) { + temp_fetch_len = sizeof(s) + s; ldout(cct, 10) << "_is_readable noting temp_fetch_len " << temp_fetch_len << " for len " << s << " entry" << dendl; - temp_fetch_len = need; } + + ldout(cct, 10) << "_is_readable: not readable, returning false" << dendl; return false; } From 4cb18b5a6f17f6fd412d36e079de990a99ea5da3 Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Mon, 8 Apr 2013 14:09:23 -0700 Subject: [PATCH 2/2] journaler: remove the unused prefetch_from member variable Signed-off-by: Greg Farnum --- src/osdc/Journaler.cc | 1 - src/osdc/Journaler.h | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/osdc/Journaler.cc b/src/osdc/Journaler.cc index 0bda7ce9058..cd9b9edc4c7 100644 --- a/src/osdc/Journaler.cc +++ b/src/osdc/Journaler.cc @@ -64,7 +64,6 @@ void Journaler::set_layout(ceph_file_layout *l) if (periods < 2) periods = 2; // we need at least 2 periods to make progress. fetch_len = layout.fl_stripe_count * layout.fl_object_size * periods; - prefetch_from = fetch_len / 2; } diff --git a/src/osdc/Journaler.h b/src/osdc/Journaler.h index 81a39d21a5b..dfc2115c18b 100644 --- a/src/osdc/Journaler.h +++ b/src/osdc/Journaler.h @@ -194,7 +194,6 @@ private: uint64_t fetch_len; // how much to read at a time uint64_t temp_fetch_len; - uint64_t prefetch_from; // how far from end do we read next chunk // for wait_for_readable() Context *on_readable; @@ -251,7 +250,7 @@ public: prezeroing_pos(0), prezero_pos(0), write_pos(0), flush_pos(0), safe_pos(0), waiting_for_zero(false), read_pos(0), requested_pos(0), received_pos(0), - fetch_len(0), temp_fetch_len(0), prefetch_from(0), + fetch_len(0), temp_fetch_len(0), on_readable(0), on_write_error(NULL), expire_pos(0), trimming_pos(0), trimmed_pos(0) { @@ -273,7 +272,6 @@ public: requested_pos = 0; received_pos = 0; fetch_len = 0; - prefetch_from = 0; assert(!on_readable); expire_pos = 0; trimming_pos = 0;