osd/PG: force auth_log_shard to be primary when appropriate

So if there are a lot fo missing objects on primary, we can
make use of auth_log_shard to restore client I/O quickly.

Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
This commit is contained in:
xie xingguo 2018-08-21 16:37:41 +08:00
parent 7de35629f5
commit 22786cffa8
4 changed files with 39 additions and 1 deletions

View File

@ -26,6 +26,8 @@ function run() {
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
# so we will not force auth_log_shard to be acting_primary
CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
export margin=10
export objects=200
export poolname=test

View File

@ -3217,6 +3217,10 @@ std::vector<Option> get_global_options() {
.set_default(100)
.set_description(""),
Option("osd_force_auth_primary_missing_objects", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(100)
.set_description("Approximate missing objects above which to force auth_log_shard to be primary temporarily"),
Option("osd_async_recovery_min_pg_log_entries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(100)
.set_description("Number of entries difference above which to use asynchronous recovery when appropriate"),

View File

@ -1312,6 +1312,7 @@ void PG::calc_ec_acting(
*/
void PG::calc_replicated_acting(
map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard,
uint64_t force_auth_primary_missing_objects,
unsigned size,
const vector<int> &acting,
const vector<int> &up,
@ -1321,6 +1322,7 @@ void PG::calc_replicated_acting(
vector<int> *want,
set<pg_shard_t> *backfill,
set<pg_shard_t> *acting_backfill,
const OSDMapRef osdmap,
ostream &ss)
{
pg_shard_t auth_log_shard_id = auth_log_shard->first;
@ -1335,7 +1337,32 @@ void PG::calc_replicated_acting(
!primary->second.is_incomplete() &&
primary->second.last_update >=
auth_log_shard->second.log_tail) {
ss << "up_primary: " << up_primary << ") selected as primary" << std::endl;
if (HAVE_FEATURE(osdmap->get_up_osd_features(), SERVER_NAUTILUS)) {
auto approx_missing_objects =
primary->second.stats.stats.sum.num_objects_missing;
auto auth_version = auth_log_shard->second.last_update.version;
auto primary_version = primary->second.last_update.version;
if (auth_version > primary_version) {
approx_missing_objects += auth_version - primary_version;
} else {
approx_missing_objects += primary_version - auth_version;
}
if ((uint64_t)approx_missing_objects >
force_auth_primary_missing_objects) {
primary = auth_log_shard;
ss << "up_primary: " << up_primary << ") has approximate "
<< approx_missing_objects
<< "(>" << force_auth_primary_missing_objects <<") "
<< "missing objects, osd." << auth_log_shard_id
<< " selected as primary instead"
<< std::endl;
} else {
ss << "up_primary: " << up_primary << ") selected as primary"
<< std::endl;
}
} else {
ss << "up_primary: " << up_primary << ") selected as primary" << std::endl;
}
} else {
ceph_assert(!auth_log_shard->second.is_incomplete());
ss << "up[0] needs backfill, osd." << auth_log_shard_id
@ -1670,6 +1697,8 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id,
if (!pool.info.is_erasure())
calc_replicated_acting(
auth_log_shard,
cct->_conf.get_val<uint64_t>(
"osd_force_auth_primary_missing_objects"),
get_osdmap()->get_pg_size(info.pgid.pgid),
acting,
up,
@ -1679,6 +1708,7 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id,
&want,
&want_backfill,
&want_acting_backfill,
get_osdmap(),
ss);
else
calc_ec_acting(

View File

@ -1441,6 +1441,7 @@ protected:
ostream &ss);
static void calc_replicated_acting(
map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard,
uint64_t force_auth_primary_missing_objects,
unsigned size,
const vector<int> &acting,
const vector<int> &up,
@ -1450,6 +1451,7 @@ protected:
vector<int> *want,
set<pg_shard_t> *backfill,
set<pg_shard_t> *acting_backfill,
const OSDMapRef osdmap,
ostream &ss);
void choose_async_recovery_ec(const map<pg_shard_t, pg_info_t> &all_info,
const pg_info_t &auth_info,