From 7d804cfac44bee7f464b2230dc72696ae127fca8 Mon Sep 17 00:00:00 2001 From: Sage Weil <sage@redhat.com> Date: Thu, 11 Feb 2016 17:17:06 -0500 Subject: [PATCH 1/4] osd/ReplicatedPG: fix whitespace Signed-off-by: Sage Weil <sage@redhat.com> --- src/osd/ReplicatedPG.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index adeafd230fd..d46bcc48690 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -2147,7 +2147,7 @@ ReplicatedPG::cache_result_t ReplicatedPG::maybe_handle_cache_detail( return cache_result_t::BLOCKED_FULL; } - if (!hit_set && (must_promote || !op->need_skip_promote()) ) { + if (!hit_set && (must_promote || !op->need_skip_promote())) { promote_object(obc, missing_oid, oloc, op, promote_obc); return cache_result_t::BLOCKED_PROMOTE; } else if (op->may_write() || op->may_cache()) { From bab48c2c11a01bf2300691bb3033eb42438db3b1 Mon Sep 17 00:00:00 2001 From: Sage Weil <sage@redhat.com> Date: Thu, 11 Feb 2016 17:28:09 -0500 Subject: [PATCH 2/4] ceph_test_rados_api_tier: verify class op forces promotion A class op should force a promotion to the base tier. If it doesn't, well get EOPNOSUPP from the base EC tier, which e.g. breaks rbd_id.* objects (and lots of other stuff, presumably). Signed-off-by: Sage Weil <sage@redhat.com> --- src/test/librados/tier.cc | 116 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/src/test/librados/tier.cc b/src/test/librados/tier.cc index 3c57c7ffc4c..7d67994ffe1 100755 --- a/src/test/librados/tier.cc +++ b/src/test/librados/tier.cc @@ -4500,6 +4500,122 @@ TEST_F(LibRadosTwoPoolsECPP, TryFlushReadRace) { test_lock.Unlock(); } +TEST_F(LibRadosTierECPP, CallForcesPromote) { + Rados cluster; + std::string pool_name = get_temp_pool_name(); + std::string cache_pool_name = pool_name + "-cache"; + ASSERT_EQ("", create_one_ec_pool_pp(pool_name, cluster)); + ASSERT_EQ(0, cluster.pool_create(cache_pool_name.c_str())); + IoCtx cache_ioctx; + ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx)); + IoCtx ioctx; + ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx)); + + // configure cache + bufferlist inbl; + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name + + "\", \"tierpool\": \"" + cache_pool_name + "\"}", + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name + + "\", \"overlaypool\": \"" + cache_pool_name + "\"}", + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name + + "\", \"mode\": \"writeback\"}", + inbl, NULL, NULL)); + + // set things up such that the op would normally be proxied + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "hit_set_count", 2), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "hit_set_period", 600), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "hit_set_type", + "explicit_object"), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "min_read_recency_for_promote", + "4"), + inbl, NULL, NULL)); + + // wait for maps to settle + cluster.wait_for_latest_osdmap(); + + // create/dirty object + bufferlist bl; + bl.append("hi there"); + { + ObjectWriteOperation op; + op.write_full(bl); + ASSERT_EQ(0, ioctx.operate("foo", &op)); + } + + // flush + { + ObjectReadOperation op; + op.cache_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo", completion, &op, + librados::OPERATION_IGNORE_OVERLAY, NULL)); + completion->wait_for_safe(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + // evict + { + ObjectReadOperation op; + op.cache_evict(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, + NULL)); + completion->wait_for_safe(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + // call + { + ObjectReadOperation op; + bufferlist bl; + op.exec("rbd", "get_id", bl); + bufferlist out; + // should get EIO (not an rbd object), not -EOPNOTSUPP (we didn't promote) + ASSERT_EQ(-5, ioctx.operate("foo", &op, &out)); + } + + // make sure foo is back in the cache tier + { + NObjectIterator it = cache_ioctx.nobjects_begin(); + ASSERT_TRUE(it != cache_ioctx.nobjects_end()); + ASSERT_TRUE(it->get_oid() == string("foo")); + ++it; + ASSERT_TRUE(it == cache_ioctx.nobjects_end()); + } + + // tear down tiers + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name + + "\"}", + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name + + "\", \"tierpool\": \"" + cache_pool_name + "\"}", + inbl, NULL, NULL)); + + // wait for maps to settle before next test + cluster.wait_for_latest_osdmap(); + + ASSERT_EQ(0, cluster.pool_delete(cache_pool_name.c_str())); + ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster)); +} + TEST_F(LibRadosTierECPP, HitSetNone) { { list< pair<time_t,time_t> > ls; From a0858a5890eb7c6d2998493a187f820ae103940f Mon Sep 17 00:00:00 2001 From: Sage Weil <sage@redhat.com> Date: Fri, 12 Feb 2016 07:13:19 -0500 Subject: [PATCH 3/4] osd/ReplicatedPG: respect must_promote in READFORWARD and READPROXY Signed-off-by: Sage Weil <sage@redhat.com> --- src/osd/ReplicatedPG.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index d46bcc48690..2de4b8308a7 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -2227,7 +2227,7 @@ ReplicatedPG::cache_result_t ReplicatedPG::maybe_handle_cache_detail( case pg_pool_t::CACHEMODE_READFORWARD: // Do writeback to the cache tier for writes - if (op->may_write() || write_ordered) { + if (op->may_write() || write_ordered || must_promote) { if (agent_state && agent_state->evict_mode == TierAgentState::EVICT_MODE_FULL) { dout(20) << __func__ << " cache pool full, waiting" << dendl; @@ -2244,7 +2244,7 @@ ReplicatedPG::cache_result_t ReplicatedPG::maybe_handle_cache_detail( case pg_pool_t::CACHEMODE_READPROXY: // Do writeback to the cache tier for writes - if (op->may_write() || write_ordered) { + if (op->may_write() || write_ordered || must_promote) { if (agent_state && agent_state->evict_mode == TierAgentState::EVICT_MODE_FULL) { dout(20) << __func__ << " cache pool full, waiting" << dendl; From 7dbce5bac4945faad761408d5a28adcd5862374c Mon Sep 17 00:00:00 2001 From: Sage Weil <sage@redhat.com> Date: Fri, 12 Feb 2016 07:17:26 -0500 Subject: [PATCH 4/4] osd/ReplicatedPG: respect must_promote in WRITEBACK cache mode Force a promotion of the op requires it. This bug was easily masked because the defaultish cache parameters would often promote anyway (e.g., if min read recency was 0). This was broken during the refactor in 1a2689f8d74537b105cdcf2933f080a2bee9f190. Fixes: #14745 Signed-off-by: Sage Weil <sage@redhat.com> --- src/osd/ReplicatedPG.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 2de4b8308a7..c355d603fd5 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -2147,11 +2147,13 @@ ReplicatedPG::cache_result_t ReplicatedPG::maybe_handle_cache_detail( return cache_result_t::BLOCKED_FULL; } - if (!hit_set && (must_promote || !op->need_skip_promote())) { + if (must_promote || (!hit_set && !op->need_skip_promote())) { promote_object(obc, missing_oid, oloc, op, promote_obc); return cache_result_t::BLOCKED_PROMOTE; - } else if (op->may_write() || op->may_cache()) { - if (can_proxy_write && !must_promote) { + } + + if (op->may_write() || op->may_cache()) { + if (can_proxy_write) { do_proxy_write(op, missing_oid); } else { // promote if can't proxy the write @@ -2169,7 +2171,7 @@ ReplicatedPG::cache_result_t ReplicatedPG::maybe_handle_cache_detail( return cache_result_t::HANDLED_PROXY; } else { bool did_proxy_read = false; - if (can_proxy_read && !must_promote) { + if (can_proxy_read) { do_proxy_read(op); did_proxy_read = true; } else {