diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 7994e3e8fb4..e7604db6f39 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -646,6 +646,7 @@ OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "ful OPTION(osd_failsafe_nearfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD near full (failsafe) OPTION(osd_pg_object_context_cache_count, OPT_INT, 64) +OPTION(osd_enable_degraded_writes, OPT_BOOL, true) // determines whether PGLog::check() compares written out log to stored log OPTION(osd_debug_pg_log_writeout, OPT_BOOL, false) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 6ab9c5d07b0..965fee542fa 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -440,6 +440,36 @@ void ReplicatedPG::wait_for_all_missing(OpRequestRef op) op->mark_delayed("waiting for all missing"); } +bool ReplicatedPG::is_degraded_object(const hobject_t &soid, int *healthy_copies) +{ + bool degraded = false; + assert(healthy_copies); + *healthy_copies = 0; + + if (pg_log.get_missing().missing.count(soid)) { + degraded = true; + } else { + *healthy_copies += 1; + } + + for (set::iterator i = actingbackfill.begin(); + i != actingbackfill.end(); + ++i) { + if (*i == get_primary()) continue; + pg_shard_t peer = *i; + if (peer_missing.count(peer) && + peer_missing[peer].missing.count(soid)) { + degraded = true; + continue; + } + + assert(peer_info.count(peer)); + if (!peer_info[peer].is_incomplete()) + *healthy_copies += 1; + } + return degraded; +} + bool ReplicatedPG::is_degraded_or_backfilling_object(const hobject_t& soid) { if (pg_log.get_missing().missing.count(soid)) @@ -1453,10 +1483,13 @@ void ReplicatedPG::do_op(OpRequestRef& op) * * We also block if our peers do not support DEGRADED_WRITES. */ - if ((pool.info.ec_pool() || - !(get_min_peer_features() & CEPH_FEATURE_OSD_DEGRADED_WRITES)) && - write_ordered && - is_degraded_or_backfilling_object(head)) { + int valid_copies = 0; + if (write_ordered && + is_degraded_object(head, &valid_copies) && + (valid_copies < pool.info.min_size || + pool.info.ec_pool() || + !cct->_conf->osd_enable_degraded_writes || + !(get_min_peer_features() & CEPH_FEATURE_OSD_DEGRADED_WRITES))) { wait_for_degraded_object(head, op); return; } diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 50190a7fd0b..f128c71e8d4 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -1511,6 +1511,11 @@ public: void wait_for_all_missing(OpRequestRef op); bool is_degraded_or_backfilling_object(const hobject_t& oid); + + /* true if the object is missing on any peer, *healthy_copies will be + * set to the number of complete peers not missing the object + */ + bool is_degraded_object(const hobject_t &oid, int *healthy_copies); void wait_for_degraded_object(const hobject_t& oid, OpRequestRef op); bool maybe_await_blocked_snapset(const hobject_t &soid, OpRequestRef op);