Merge pull request #3975 from wonzhq/skip-cache

osd: flush/evict op fixes

Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2015-05-01 09:16:05 -07:00
commit eba5956986
7 changed files with 42 additions and 36 deletions

View File

@ -8694,6 +8694,24 @@ int OSD::init_op_flags(OpRequestRef& op)
break;
}
case CEPH_OSD_OP_DELETE:
// if we get a delete with FAILOK we can skip promote. without
// FAILOK we still need to promote (or do something smarter) to
// determine whether to return ENOENT or 0.
if (iter == m->ops.begin() &&
iter->op.flags == CEPH_OSD_OP_FLAG_FAILOK) {
op->set_skip_promote();
}
break;
case CEPH_OSD_OP_CACHE_TRY_FLUSH:
case CEPH_OSD_OP_CACHE_FLUSH:
case CEPH_OSD_OP_CACHE_EVICT:
// If try_flush/flush/evict is the only op, no need to promote.
if (m->ops.size() == 1) {
op->set_skip_promote();
}
default:
break;
}

View File

@ -99,7 +99,10 @@ bool OpRequest::need_class_write_cap() {
return check_rmw(CEPH_OSD_RMW_FLAG_CLASS_WRITE);
}
bool OpRequest::need_promote() {
return check_rmw(CEPH_OSD_RMW_FLAG_PROMOTE);
return check_rmw(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE);
}
bool OpRequest::need_skip_promote() {
return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE);
}
void OpRequest::set_rmw_flags(int flags) {
@ -118,7 +121,8 @@ void OpRequest::set_class_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_READ);
void OpRequest::set_class_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_WRITE); }
void OpRequest::set_pg_op() { set_rmw_flags(CEPH_OSD_RMW_FLAG_PGOP); }
void OpRequest::set_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CACHE); }
void OpRequest::set_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_PROMOTE); }
void OpRequest::set_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE); }
void OpRequest::set_skip_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); }
void OpRequest::mark_flag_point(uint8_t flag, const string& s) {
#ifdef WITH_LTTNG

View File

@ -66,6 +66,7 @@ struct OpRequest : public TrackedOp {
bool need_class_read_cap();
bool need_class_write_cap();
bool need_promote();
bool need_skip_promote();
void set_read();
void set_write();
void set_cache();
@ -73,6 +74,7 @@ struct OpRequest : public TrackedOp {
void set_class_write();
void set_pg_op();
void set_promote();
void set_skip_promote();
void _dump(utime_t now, Formatter *f) const;

View File

@ -1803,7 +1803,7 @@ bool ReplicatedPG::maybe_handle_cache(OpRequestRef op,
osd->logger->inc(l_osd_op_cache_hit);
return false;
}
MOSDOp *m = static_cast<MOSDOp*>(op->get_req());
const object_locator_t& oloc = m->get_object_locator();
@ -1812,6 +1812,10 @@ bool ReplicatedPG::maybe_handle_cache(OpRequestRef op,
return true;
}
if (op->need_skip_promote()) {
return false;
}
// older versions do not proxy the feature bits.
bool can_proxy_read = get_osdmap()->get_up_osd_features() &
CEPH_FEATURE_OSD_PROXY_FEATURES;
@ -1835,9 +1839,7 @@ bool ReplicatedPG::maybe_handle_cache(OpRequestRef op,
waiting_for_cache_not_full.push_back(op);
return true;
}
if (can_skip_promote(op)) {
return false;
}
if (op->may_write() || write_ordered || !hit_set) {
promote_object(obc, missing_oid, oloc, op);
return true;
@ -1923,9 +1925,6 @@ bool ReplicatedPG::maybe_handle_cache(OpRequestRef op,
waiting_for_cache_not_full.push_back(op);
return true;
}
if (can_skip_promote(op)) {
return false;
}
promote_object(obc, missing_oid, oloc, op);
return true;
}
@ -1943,9 +1942,6 @@ bool ReplicatedPG::maybe_handle_cache(OpRequestRef op,
waiting_for_cache_not_full.push_back(op);
return true;
}
if (can_skip_promote(op)) {
return false;
}
promote_object(obc, missing_oid, oloc, op);
return true;
}
@ -1960,20 +1956,6 @@ bool ReplicatedPG::maybe_handle_cache(OpRequestRef op,
return false;
}
bool ReplicatedPG::can_skip_promote(OpRequestRef op)
{
MOSDOp *m = static_cast<MOSDOp*>(op->get_req());
if (m->ops.empty())
return false;
// if we get a delete with FAILOK we can skip promote. without
// FAILOK we still need to promote (or do something smarter) to
// determine whether to return ENOENT or 0.
if (m->ops[0].op.op == CEPH_OSD_OP_DELETE &&
(m->ops[0].op.flags & CEPH_OSD_OP_FLAG_FAILOK))
return true;
return false;
}
void ReplicatedPG::do_cache_redirect(OpRequestRef op)
{
MOSDOp *m = static_cast<MOSDOp*>(op->get_req());
@ -3801,6 +3783,10 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = -EINVAL;
break;
}
if (!obs.exists) {
result = 0;
break;
}
if (oi.is_dirty()) {
result = -EBUSY;
break;

View File

@ -1170,11 +1170,6 @@ protected:
const object_locator_t& oloc, ///< locator for obc|oid
OpRequestRef op); ///< [optional] client op
/**
* Check if the op is such that we can skip promote (e.g., DELETE)
*/
bool can_skip_promote(OpRequestRef op);
int prepare_transaction(OpContext *ctx);
list<pair<OpRequestRef, OpContext*> > in_progress_async_reads;
void complete_read_ctx(int result, OpContext *ctx);

View File

@ -241,7 +241,8 @@ enum {
CEPH_OSD_RMW_FLAG_CLASS_WRITE = (1 << 4),
CEPH_OSD_RMW_FLAG_PGOP = (1 << 5),
CEPH_OSD_RMW_FLAG_CACHE = (1 << 6),
CEPH_OSD_RMW_FLAG_PROMOTE = (1 << 7),
CEPH_OSD_RMW_FLAG_FORCE_PROMOTE = (1 << 7),
CEPH_OSD_RMW_FLAG_SKIP_PROMOTE = (1 << 8),
};

View File

@ -786,10 +786,10 @@ TEST_F(LibRadosTwoPoolsPP, Evict) {
op.cache_evict();
librados::AioCompletion *completion = cluster.aio_create_completion();
ASSERT_EQ(0, cache_ioctx.aio_operate(
"fooberdoodle", completion, &op,
"foo", completion, &op,
librados::OPERATION_IGNORE_CACHE, NULL));
completion->wait_for_safe();
ASSERT_EQ(-ENOENT, completion->get_return_value());
ASSERT_EQ(0, completion->get_return_value());
completion->release();
}
{
@ -2988,10 +2988,10 @@ TEST_F(LibRadosTwoPoolsECPP, Evict) {
op.cache_evict();
librados::AioCompletion *completion = cluster.aio_create_completion();
ASSERT_EQ(0, cache_ioctx.aio_operate(
"fooberdoodle", completion, &op,
"foo", completion, &op,
librados::OPERATION_IGNORE_CACHE, NULL));
completion->wait_for_safe();
ASSERT_EQ(-ENOENT, completion->get_return_value());
ASSERT_EQ(0, completion->get_return_value());
completion->release();
}
{