osd, mon: New pool recovery priority range -10 to 10

Use OSD_POOL_PRIORITY_MAX and OSD_POOL_PRIORITY_MIN constants
Scale legacy priorities if exceeds maximum

Signed-off-by: David Zafman <dzafman@redhat.com>
This commit is contained in:
David Zafman 2019-04-11 16:54:52 -07:00
parent f0e7202cff
commit 444aa9f9fe
9 changed files with 92 additions and 17 deletions

View File

@ -601,8 +601,10 @@ You may set values for the following keys:
``recovery_priority``
:Description: When a value is set it will boost the computed reservation priority
by this amount. This value should be less than 30.
:Description: When a value is set it will increase or decrease the computed
reservation priority. This value must be in the range -10 to
10. Use a negative priority for less important pools so they
have lower priority than any new pools.
:Type: Integer
:Default: ``0``

View File

@ -30,8 +30,8 @@ function run() {
export objects=50
export poolprefix=test
export FORCE_PRIO="254" # See OSD_BACKFILL_PRIORITY_FORCED
export DEGRADED_PRIO="140" # See OSD_BACKFILL_DEGRADED_PRIORITY_BASE
export NORMAL_PRIO="100" # See OSD_BACKFILL_PRIORITY_BASE
export DEGRADED_PRIO="150" # See OSD_BACKFILL_DEGRADED_PRIORITY_BASE + 10
export NORMAL_PRIO="110" # See OSD_BACKFILL_PRIORITY_BASE + 10
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do

View File

@ -29,7 +29,7 @@ function run() {
export objects=200
export poolprefix=test
export FORCE_PRIO="255" # See OSD_RECOVERY_PRIORITY_FORCED
export NORMAL_PRIO="180" # See OSD_RECOVERY_PRIORITY_BASE
export NORMAL_PRIO="190" # See OSD_RECOVERY_PRIORITY_BASE + 10
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do

View File

@ -2037,10 +2037,12 @@ function test_mon_osd_pool_set()
ceph osd pool get $TEST_POOL_GETSET recovery_priority | expect_false grep '.'
ceph osd pool set $TEST_POOL_GETSET recovery_priority 5
ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep 'recovery_priority: 5'
ceph osd pool set $TEST_POOL_GETSET recovery_priority -5
ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep 'recovery_priority: -5'
ceph osd pool set $TEST_POOL_GETSET recovery_priority 0
ceph osd pool get $TEST_POOL_GETSET recovery_priority | expect_false grep '.'
expect_false ceph osd pool set $TEST_POOL_GETSET recovery_priority -1
expect_false ceph osd pool set $TEST_POOL_GETSET recovery_priority 30
expect_false ceph osd pool set $TEST_POOL_GETSET recovery_priority -11
expect_false ceph osd pool set $TEST_POOL_GETSET recovery_priority 11
ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | expect_false grep '.'
ceph osd pool set $TEST_POOL_GETSET recovery_op_priority 5

View File

@ -652,6 +652,11 @@ void OSDMonitor::on_active()
if (mon->is_leader()) {
mon->clog->debug() << "osdmap " << osdmap;
if (!priority_convert) {
// Only do this once at start-up
convert_pool_priorities();
priority_convert = true;
}
} else {
list<MonOpRequestRef> ls;
take_all_failures(ls);
@ -7620,12 +7625,9 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
ss << "error parsing int value '" << val << "': " << interr;
return -EINVAL;
}
if (n < 0) {
ss << "pool recovery_priority can not be negative";
return -EINVAL;
} else if (n >= 30) {
ss << "pool recovery_priority should be less than 30 due to "
<< "Ceph internal implementation restrictions";
if (n > OSD_POOL_PRIORITY_MAX || n < OSD_POOL_PRIORITY_MIN) {
ss << "pool recovery_priority must be between " << OSD_POOL_PRIORITY_MIN
<< " and " << OSD_POOL_PRIORITY_MAX;
return -EINVAL;
}
} else if (var == "pg_autoscale_bias") {
@ -13178,3 +13180,55 @@ void OSDMonitor::_pool_op_reply(MonOpRequestRef op,
ret, epoch, get_last_committed(), blp);
mon->send_reply(op, reply);
}
void OSDMonitor::convert_pool_priorities(void)
{
pool_opts_t::key_t key = pool_opts_t::get_opt_desc("recovery_priority").key;
int64_t max_prio = 0;
int64_t min_prio = 0;
for (const auto &i : osdmap.get_pools()) {
const auto &pool = i.second;
if (pool.opts.is_set(key)) {
int64_t prio;
pool.opts.get(key, &prio);
if (prio > max_prio)
max_prio = prio;
if (prio < min_prio)
min_prio = prio;
}
}
if (max_prio <= OSD_POOL_PRIORITY_MAX && min_prio >= OSD_POOL_PRIORITY_MIN) {
dout(20) << __func__ << " nothing to fix" << dendl;
return;
}
// Current pool priorities exceeds new maximum
for (const auto &i : osdmap.get_pools()) {
const auto pool_id = i.first;
pg_pool_t pool = i.second;
int64_t prio = 0;
pool.opts.get(key, &prio);
int64_t n;
if (prio > 0 && max_prio > OSD_POOL_PRIORITY_MAX) { // Likely scenario
// Scaled priority range 0 to OSD_POOL_PRIORITY_MAX
n = (float)prio / max_prio * OSD_POOL_PRIORITY_MAX;
} else if (prio < 0 && min_prio < OSD_POOL_PRIORITY_MIN) {
// Scaled priority range OSD_POOL_PRIORITY_MIN to 0
n = (float)prio / min_prio * OSD_POOL_PRIORITY_MIN;
} else {
continue;
}
if (n == 0) {
pool.opts.unset(key);
} else {
pool.opts.set(key, static_cast<int64_t>(n));
}
dout(10) << __func__ << " pool " << pool_id
<< " recovery_priority adjusted "
<< prio << " to " << n << dendl;
pool.last_change = pending_inc.epoch;
pending_inc.new_pools[pool_id] = pool;
}
}

View File

@ -220,6 +220,7 @@ public:
set<int> pending_metadata_rm;
map<int, failure_info_t> failure_info;
map<int,utime_t> down_pending_out; // osd down -> out
bool priority_convert = false;
map<int,double> osd_weight;
@ -701,6 +702,7 @@ public:
pending_inc.new_flags &= ~flag;
}
}
void convert_pool_priorities(void);
};
#endif

View File

@ -2453,11 +2453,22 @@ bool PG::set_force_backfill(bool b)
return did;
}
inline int PG::clamp_recovery_priority(int priority)
int PG::clamp_recovery_priority(int priority, int pool_recovery_priority)
{
static_assert(OSD_RECOVERY_PRIORITY_MIN < OSD_RECOVERY_PRIORITY_MAX, "Invalid priority range");
static_assert(OSD_RECOVERY_PRIORITY_MIN >= 0, "Priority range must match unsigned type");
// User can't set this too high anymore, but might be a legacy value
if (pool_recovery_priority > OSD_POOL_PRIORITY_MAX)
pool_recovery_priority = OSD_POOL_PRIORITY_MAX;
if (pool_recovery_priority < OSD_POOL_PRIORITY_MIN)
pool_recovery_priority = OSD_POOL_PRIORITY_MIN;
// Shift range from min to max to 0 to max - min
pool_recovery_priority += (0 - OSD_POOL_PRIORITY_MIN);
ceph_assert(pool_recovery_priority >= 0 && pool_recovery_priority <= (OSD_POOL_PRIORITY_MAX - OSD_POOL_PRIORITY_MIN));
priority += pool_recovery_priority;
// Clamp to valid range
if (priority > OSD_RECOVERY_PRIORITY_MAX) {
return OSD_RECOVERY_PRIORITY_MAX;
@ -2485,7 +2496,7 @@ unsigned PG::get_recovery_priority()
int64_t pool_recovery_priority = 0;
pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority);
ret = clamp_recovery_priority(pool_recovery_priority + ret);
ret = clamp_recovery_priority(ret, pool_recovery_priority);
}
dout(20) << __func__ << " recovery priority is " << ret << dendl;
return static_cast<unsigned>(ret);
@ -2516,7 +2527,7 @@ unsigned PG::get_backfill_priority()
int64_t pool_recovery_priority = 0;
pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority);
ret = clamp_recovery_priority(pool_recovery_priority + ret);
ret = clamp_recovery_priority(ret, pool_recovery_priority);
}
dout(20) << __func__ << " backfill priority is " << ret << dendl;

View File

@ -1432,7 +1432,7 @@ protected:
bool needs_backfill() const;
/// clip calculated priority to reasonable range
inline int clamp_recovery_priority(int priority);
int clamp_recovery_priority(int prio, int pool_recovery_prio);
/// get log recovery reservation priority
unsigned get_recovery_priority();
/// get backfill reservation priority

View File

@ -68,6 +68,10 @@
#define CEPH_OSD_FEATURE_INCOMPAT_RECOVERY_DELETES CompatSet::Feature(16, "deletes in missing set")
/// pool priority range set by user
#define OSD_POOL_PRIORITY_MAX 10
#define OSD_POOL_PRIORITY_MIN -OSD_POOL_PRIORITY_MAX
/// min recovery priority for MBackfillReserve
#define OSD_RECOVERY_PRIORITY_MIN 0