mirror of
https://github.com/ceph/ceph
synced 2025-02-23 11:07:35 +00:00
Merge pull request #39140 from sseshasa/wip-dmclock-profiles-improvements
osd: Refinements to mclock built-in profiles implementation. Reviewed-by: Josh Durgin <jdurgin@redhat.com> Reviewed-by: Sunny Kumar <sunkumar@redhat.com>
This commit is contained in:
commit
afb6b8d109
@ -3081,7 +3081,7 @@ std::vector<Option> get_global_options() {
|
||||
.set_flag(Option::FLAG_RUNTIME),
|
||||
|
||||
Option("osd_mclock_profile", Option::TYPE_STR, Option::LEVEL_ADVANCED)
|
||||
.set_default("balanced")
|
||||
.set_default("high_client_ops")
|
||||
.set_enum_allowed( { "balanced", "high_recovery_ops", "high_client_ops", "custom" } )
|
||||
.set_description("Which mclock profile to use")
|
||||
.set_long_description("This option specifies the mclock profile to enable - one among the set of built-in profiles or a custom profile. Only considered for osd_op_queue = mclock_scheduler")
|
||||
|
@ -16,7 +16,6 @@
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
|
||||
#include "include/stringify.h"
|
||||
#include "osd/scheduler/mClockScheduler.h"
|
||||
#include "common/dout.h"
|
||||
|
||||
@ -46,18 +45,10 @@ mClockScheduler::mClockScheduler(CephContext *cct,
|
||||
{
|
||||
cct->_conf.add_observer(this);
|
||||
ceph_assert(num_shards > 0);
|
||||
// Set default blocksize and cost for all op types.
|
||||
for (op_type_t op_type = op_type_t::client_op;
|
||||
op_type <= op_type_t::bg_pg_delete;
|
||||
op_type = op_type_t(static_cast<size_t>(op_type) + 1)) {
|
||||
client_cost_infos[op_type] = 4 * 1024;
|
||||
client_scaled_cost_infos[op_type] = 1;
|
||||
}
|
||||
set_max_osd_capacity();
|
||||
set_osd_mclock_cost_per_io();
|
||||
mclock_profile = cct->_conf.get_val<std::string>("osd_mclock_profile");
|
||||
set_client_allocations();
|
||||
enable_mclock_profile();
|
||||
set_mclock_profile();
|
||||
enable_mclock_profile_settings();
|
||||
client_registry.update_from_config(cct->_conf);
|
||||
}
|
||||
|
||||
@ -119,6 +110,8 @@ void mClockScheduler::set_max_osd_capacity()
|
||||
cct->_conf.get_val<double>("osd_mclock_max_capacity_iops_ssd");
|
||||
}
|
||||
}
|
||||
// Set max osd bandwidth across all shards (at 4KiB blocksize)
|
||||
max_osd_bandwidth = max_osd_capacity * 4 * 1024;
|
||||
// Set per op-shard iops limit
|
||||
max_osd_capacity /= num_shards;
|
||||
}
|
||||
@ -139,45 +132,156 @@ void mClockScheduler::set_osd_mclock_cost_per_io()
|
||||
}
|
||||
}
|
||||
|
||||
void mClockScheduler::set_client_allocations()
|
||||
void mClockScheduler::set_mclock_profile()
|
||||
{
|
||||
// Set profile specific client capacity allocations
|
||||
if (mclock_profile == "balanced") {
|
||||
double capacity = std::round(0.5 * max_osd_capacity);
|
||||
client_allocs[op_scheduler_class::client] = capacity;
|
||||
client_allocs[op_scheduler_class::background_recovery] = capacity;
|
||||
} else if (mclock_profile == "high_recovery_ops") {
|
||||
client_allocs[op_scheduler_class::client] =
|
||||
std::round(0.25 * max_osd_capacity);
|
||||
client_allocs[op_scheduler_class::background_recovery] =
|
||||
std::round(0.75 * max_osd_capacity);
|
||||
} else if (mclock_profile == "high_client_ops") {
|
||||
client_allocs[op_scheduler_class::client] =
|
||||
std::round(0.75 * max_osd_capacity);
|
||||
client_allocs[op_scheduler_class::background_recovery] =
|
||||
std::round(0.25 * max_osd_capacity);
|
||||
} else {
|
||||
ceph_assert("Invalid mclock profile" == 0);
|
||||
return;
|
||||
}
|
||||
mclock_profile = cct->_conf.get_val<std::string>("osd_mclock_profile");
|
||||
}
|
||||
|
||||
double mClockScheduler::get_client_allocation(op_type_t op_type)
|
||||
std::string mClockScheduler::get_mclock_profile()
|
||||
{
|
||||
double default_allocation = 1.0;
|
||||
|
||||
switch (op_type) {
|
||||
case op_type_t::client_op:
|
||||
return client_allocs[op_scheduler_class::client];
|
||||
case op_type_t::bg_recovery:
|
||||
return client_allocs[op_scheduler_class::background_recovery];
|
||||
default:
|
||||
// TODO for other op types.
|
||||
return default_allocation;
|
||||
}
|
||||
return mclock_profile;
|
||||
}
|
||||
|
||||
void mClockScheduler::enable_mclock_profile()
|
||||
void mClockScheduler::set_balanced_profile_allocations()
|
||||
{
|
||||
// Client Allocation:
|
||||
// reservation: 40% | weight: 1 | limit: 100% |
|
||||
// Background Recovery Allocation:
|
||||
// reservation: 40% | weight: 1 | limit: 150% |
|
||||
// Background Best Effort Allocation:
|
||||
// reservation: 20% | weight: 2 | limit: max |
|
||||
|
||||
// Client
|
||||
uint64_t client_res = static_cast<uint64_t>(
|
||||
std::round(0.40 * max_osd_capacity));
|
||||
uint64_t client_lim = static_cast<uint64_t>(
|
||||
std::round(max_osd_capacity));
|
||||
uint64_t client_wgt = default_min;
|
||||
|
||||
// Background Recovery
|
||||
uint64_t rec_res = static_cast<uint64_t>(
|
||||
std::round(0.40 * max_osd_capacity));
|
||||
uint64_t rec_lim = static_cast<uint64_t>(
|
||||
std::round(1.5 * max_osd_capacity));
|
||||
uint64_t rec_wgt = default_min;
|
||||
|
||||
// Background Best Effort
|
||||
uint64_t best_effort_res = static_cast<uint64_t>(
|
||||
std::round(0.20 * max_osd_capacity));
|
||||
uint64_t best_effort_lim = default_max;
|
||||
uint64_t best_effort_wgt = 2;
|
||||
|
||||
// Set the allocations for the mclock clients
|
||||
client_allocs[
|
||||
static_cast<size_t>(op_scheduler_class::client)].update(
|
||||
client_res,
|
||||
client_wgt,
|
||||
client_lim);
|
||||
client_allocs[
|
||||
static_cast<size_t>(op_scheduler_class::background_recovery)].update(
|
||||
rec_res,
|
||||
rec_wgt,
|
||||
rec_lim);
|
||||
client_allocs[
|
||||
static_cast<size_t>(op_scheduler_class::background_best_effort)].update(
|
||||
best_effort_res,
|
||||
best_effort_wgt,
|
||||
best_effort_lim);
|
||||
}
|
||||
|
||||
void mClockScheduler::set_high_recovery_ops_profile_allocations()
|
||||
{
|
||||
// Client Allocation:
|
||||
// reservation: 30% | weight: 1 | limit: 80% |
|
||||
// Background Recovery Allocation:
|
||||
// reservation: 60% | weight: 2 | limit: 200% |
|
||||
// Background Best Effort Allocation:
|
||||
// reservation: 1 | weight: 2 | limit: max |
|
||||
|
||||
// Client
|
||||
uint64_t client_res = static_cast<uint64_t>(
|
||||
std::round(0.30 * max_osd_capacity));
|
||||
uint64_t client_lim = static_cast<uint64_t>(
|
||||
std::round(0.80 * max_osd_capacity));
|
||||
uint64_t client_wgt = default_min;
|
||||
|
||||
// Background Recovery
|
||||
uint64_t rec_res = static_cast<uint64_t>(
|
||||
std::round(0.60 * max_osd_capacity));
|
||||
uint64_t rec_lim = static_cast<uint64_t>(
|
||||
std::round(2.0 * max_osd_capacity));
|
||||
uint64_t rec_wgt = 2;
|
||||
|
||||
// Background Best Effort
|
||||
uint64_t best_effort_res = default_min;
|
||||
uint64_t best_effort_lim = default_max;
|
||||
uint64_t best_effort_wgt = 2;
|
||||
|
||||
// Set the allocations for the mclock clients
|
||||
client_allocs[
|
||||
static_cast<size_t>(op_scheduler_class::client)].update(
|
||||
client_res,
|
||||
client_wgt,
|
||||
client_lim);
|
||||
client_allocs[
|
||||
static_cast<size_t>(op_scheduler_class::background_recovery)].update(
|
||||
rec_res,
|
||||
rec_wgt,
|
||||
rec_lim);
|
||||
client_allocs[
|
||||
static_cast<size_t>(op_scheduler_class::background_best_effort)].update(
|
||||
best_effort_res,
|
||||
best_effort_wgt,
|
||||
best_effort_lim);
|
||||
}
|
||||
|
||||
void mClockScheduler::set_high_client_ops_profile_allocations()
|
||||
{
|
||||
// Client Allocation:
|
||||
// reservation: 50% | weight: 2 | limit: max |
|
||||
// Background Recovery Allocation:
|
||||
// reservation: 25% | weight: 1 | limit: 100% |
|
||||
// Background Best Effort Allocation:
|
||||
// reservation: 25% | weight: 2 | limit: max |
|
||||
|
||||
// Client
|
||||
uint64_t client_res = static_cast<uint64_t>(
|
||||
std::round(0.50 * max_osd_capacity));
|
||||
uint64_t client_wgt = 2;
|
||||
uint64_t client_lim = default_max;
|
||||
|
||||
// Background Recovery
|
||||
uint64_t rec_res = static_cast<uint64_t>(
|
||||
std::round(0.25 * max_osd_capacity));
|
||||
uint64_t rec_lim = static_cast<uint64_t>(
|
||||
std::round(max_osd_capacity));
|
||||
uint64_t rec_wgt = default_min;
|
||||
|
||||
// Background Best Effort
|
||||
uint64_t best_effort_res = static_cast<uint64_t>(
|
||||
std::round(0.25 * max_osd_capacity));
|
||||
uint64_t best_effort_lim = default_max;
|
||||
uint64_t best_effort_wgt = 2;
|
||||
|
||||
// Set the allocations for the mclock clients
|
||||
client_allocs[
|
||||
static_cast<size_t>(op_scheduler_class::client)].update(
|
||||
client_res,
|
||||
client_wgt,
|
||||
client_lim);
|
||||
client_allocs[
|
||||
static_cast<size_t>(op_scheduler_class::background_recovery)].update(
|
||||
rec_res,
|
||||
rec_wgt,
|
||||
rec_lim);
|
||||
client_allocs[
|
||||
static_cast<size_t>(op_scheduler_class::background_best_effort)].update(
|
||||
best_effort_res,
|
||||
best_effort_wgt,
|
||||
best_effort_lim);
|
||||
}
|
||||
|
||||
void mClockScheduler::enable_mclock_profile_settings()
|
||||
{
|
||||
// Nothing to do for "custom" profile
|
||||
if (mclock_profile == "custom") {
|
||||
@ -186,92 +290,54 @@ void mClockScheduler::enable_mclock_profile()
|
||||
|
||||
// Set mclock and ceph config options for the chosen profile
|
||||
if (mclock_profile == "balanced") {
|
||||
set_balanced_profile_config();
|
||||
set_balanced_profile_allocations();
|
||||
} else if (mclock_profile == "high_recovery_ops") {
|
||||
set_high_recovery_ops_profile_config();
|
||||
set_high_recovery_ops_profile_allocations();
|
||||
} else if (mclock_profile == "high_client_ops") {
|
||||
set_high_client_ops_profile_config();
|
||||
set_high_client_ops_profile_allocations();
|
||||
} else {
|
||||
ceph_assert("Invalid choice of mclock profile" == 0);
|
||||
return;
|
||||
}
|
||||
|
||||
// Set the mclock config parameters
|
||||
set_profile_config();
|
||||
// Set recovery specific Ceph options
|
||||
set_global_recovery_options();
|
||||
}
|
||||
|
||||
std::string mClockScheduler::get_mclock_profile()
|
||||
void mClockScheduler::set_profile_config()
|
||||
{
|
||||
return mclock_profile;
|
||||
}
|
||||
|
||||
void mClockScheduler::set_balanced_profile_config()
|
||||
{
|
||||
double client_lim = get_client_allocation(op_type_t::client_op);
|
||||
double rec_lim = get_client_allocation(op_type_t::bg_recovery);
|
||||
int client_wgt = 10;
|
||||
ClientAllocs client = client_allocs[
|
||||
static_cast<size_t>(op_scheduler_class::client)];
|
||||
ClientAllocs rec = client_allocs[
|
||||
static_cast<size_t>(op_scheduler_class::background_recovery)];
|
||||
ClientAllocs best_effort = client_allocs[
|
||||
static_cast<size_t>(op_scheduler_class::background_best_effort)];
|
||||
|
||||
// Set external client params
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_client_res", stringify(default_min));
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_client_wgt", stringify(client_wgt));
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_client_lim", stringify(client_lim));
|
||||
cct->_conf.set_val("osd_mclock_scheduler_client_res",
|
||||
std::to_string(client.res));
|
||||
cct->_conf.set_val("osd_mclock_scheduler_client_wgt",
|
||||
std::to_string(client.wgt));
|
||||
cct->_conf.set_val("osd_mclock_scheduler_client_lim",
|
||||
std::to_string(client.lim));
|
||||
|
||||
// Set background recovery client params
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_background_recovery_res", stringify(default_min));
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_background_recovery_wgt", stringify(default_min));
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_background_recovery_lim", stringify(rec_lim));
|
||||
}
|
||||
cct->_conf.set_val("osd_mclock_scheduler_background_recovery_res",
|
||||
std::to_string(rec.res));
|
||||
cct->_conf.set_val("osd_mclock_scheduler_background_recovery_wgt",
|
||||
std::to_string(rec.wgt));
|
||||
cct->_conf.set_val("osd_mclock_scheduler_background_recovery_lim",
|
||||
std::to_string(rec.lim));
|
||||
|
||||
void mClockScheduler::set_high_recovery_ops_profile_config()
|
||||
{
|
||||
double client_lim = get_client_allocation(op_type_t::client_op);
|
||||
double rec_lim = get_client_allocation(op_type_t::bg_recovery);
|
||||
int rec_wgt = 10;
|
||||
|
||||
// Set external client params
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_client_res", stringify(default_min));
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_client_wgt", stringify(default_min));
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_client_lim", stringify(client_lim));
|
||||
|
||||
// Set background recovery client params
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_background_recovery_res", stringify(default_min));
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_background_recovery_wgt", stringify(rec_wgt));
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_background_recovery_lim", stringify(rec_lim));
|
||||
}
|
||||
|
||||
void mClockScheduler::set_high_client_ops_profile_config()
|
||||
{
|
||||
double client_lim = get_client_allocation(op_type_t::client_op);
|
||||
double rec_lim = get_client_allocation(op_type_t::bg_recovery);
|
||||
int client_wgt = 10;
|
||||
|
||||
// Set external client params
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_client_res", stringify(default_min));
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_client_wgt", stringify(client_wgt));
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_client_lim", stringify(client_lim));
|
||||
|
||||
// Set background recovery client params
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_background_recovery_res", stringify(default_min));
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_background_recovery_wgt", stringify(default_min));
|
||||
cct->_conf.set_val(
|
||||
"osd_mclock_scheduler_background_recovery_lim", stringify(rec_lim));
|
||||
// Set background best effort client params
|
||||
cct->_conf.set_val("osd_mclock_scheduler_background_best_effort_res",
|
||||
std::to_string(best_effort.res));
|
||||
cct->_conf.set_val("osd_mclock_scheduler_background_best_effort_wgt",
|
||||
std::to_string(best_effort.wgt));
|
||||
cct->_conf.set_val("osd_mclock_scheduler_background_best_effort_lim",
|
||||
std::to_string(best_effort.lim));
|
||||
}
|
||||
|
||||
void mClockScheduler::set_global_recovery_options()
|
||||
@ -279,32 +345,23 @@ void mClockScheduler::set_global_recovery_options()
|
||||
// Set high value for recovery max active and max backfill
|
||||
int rec_max_active = 1000;
|
||||
int max_backfills = 1000;
|
||||
cct->_conf.set_val("osd_recovery_max_active", stringify(rec_max_active));
|
||||
cct->_conf.set_val("osd_max_backfills", stringify(max_backfills));
|
||||
cct->_conf.set_val("osd_recovery_max_active", std::to_string(rec_max_active));
|
||||
cct->_conf.set_val("osd_max_backfills", std::to_string(max_backfills));
|
||||
|
||||
// Disable recovery sleep
|
||||
cct->_conf.set_val("osd_recovery_sleep", stringify(0));
|
||||
cct->_conf.set_val("osd_recovery_sleep_hdd", stringify(0));
|
||||
cct->_conf.set_val("osd_recovery_sleep_ssd", stringify(0));
|
||||
cct->_conf.set_val("osd_recovery_sleep_hybrid", stringify(0));
|
||||
cct->_conf.set_val("osd_recovery_sleep", std::to_string(0));
|
||||
cct->_conf.set_val("osd_recovery_sleep_hdd", std::to_string(0));
|
||||
cct->_conf.set_val("osd_recovery_sleep_ssd", std::to_string(0));
|
||||
cct->_conf.set_val("osd_recovery_sleep_hybrid", std::to_string(0));
|
||||
|
||||
// Apply the changes
|
||||
cct->_conf.apply_changes(nullptr);
|
||||
}
|
||||
|
||||
int mClockScheduler::calc_scaled_cost(op_type_t op_type, int cost)
|
||||
int mClockScheduler::calc_scaled_cost(int cost)
|
||||
{
|
||||
double client_alloc = get_client_allocation(op_type);
|
||||
if (client_alloc == 1.0) {
|
||||
// Client not yet supported, return default cost.
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Calculate bandwidth from max osd capacity (at 4KiB blocksize).
|
||||
double max_osd_bandwidth = max_osd_capacity * num_shards * 4 * 1024;
|
||||
|
||||
// Calculate scaled cost based on item cost
|
||||
double scaled_cost = (cost / max_osd_bandwidth) * client_alloc;
|
||||
// Calculate scaled cost in msecs based on item cost
|
||||
int scaled_cost = std::floor((cost / max_osd_bandwidth) * 1000);
|
||||
|
||||
// Scale the cost down by an additional cost factor if specified
|
||||
// to account for different device characteristics (hdd, ssd).
|
||||
@ -314,45 +371,7 @@ int mClockScheduler::calc_scaled_cost(op_type_t op_type, int cost)
|
||||
scaled_cost *= osd_mclock_cost_per_io_msec / 1000.0;
|
||||
}
|
||||
|
||||
return std::floor(scaled_cost);
|
||||
}
|
||||
|
||||
bool mClockScheduler::maybe_update_client_cost_info(
|
||||
op_type_t op_type, int new_cost)
|
||||
{
|
||||
int capped_item_cost = 4 * 1024 * 1024;
|
||||
|
||||
if (new_cost == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// The mclock params represented in terms of the per-osd capacity
|
||||
// are scaled up or down according to the cost associated with
|
||||
// item cost and updated within the dmclock server.
|
||||
int cur_cost = client_cost_infos[op_type];
|
||||
|
||||
// Note: Cap the scaling of item cost to ~4MiB as the tag increments
|
||||
// beyond this point are too long causing performance issues. This may
|
||||
// need to be in place until benchmark data is available or a better
|
||||
// scaling model can be put in place. This is a TODO.
|
||||
if (new_cost >= capped_item_cost) {
|
||||
new_cost = capped_item_cost;
|
||||
}
|
||||
|
||||
bool cost_changed =
|
||||
((new_cost >= (cur_cost << 1)) || (cur_cost >= (new_cost << 1)));
|
||||
|
||||
if (cost_changed) {
|
||||
client_cost_infos[op_type] = new_cost;
|
||||
// Update client scaled cost info
|
||||
int scaled_cost = std::max(calc_scaled_cost(op_type, new_cost), 1);
|
||||
if (scaled_cost != client_scaled_cost_infos[op_type]) {
|
||||
client_scaled_cost_infos[op_type] = scaled_cost;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return std::max(scaled_cost, 1);
|
||||
}
|
||||
|
||||
void mClockScheduler::dump(ceph::Formatter &f) const
|
||||
@ -362,18 +381,13 @@ void mClockScheduler::dump(ceph::Formatter &f) const
|
||||
void mClockScheduler::enqueue(OpSchedulerItem&& item)
|
||||
{
|
||||
auto id = get_scheduler_id(item);
|
||||
auto op_type = item.get_op_type();
|
||||
int cost = client_scaled_cost_infos[op_type];
|
||||
|
||||
// Re-calculate the scaled cost for the client if the item cost changed
|
||||
if (maybe_update_client_cost_info(op_type, item.get_cost())) {
|
||||
cost = client_scaled_cost_infos[op_type];
|
||||
}
|
||||
|
||||
// TODO: move this check into OpSchedulerItem, handle backwards compat
|
||||
if (op_scheduler_class::immediate == item.get_scheduler_class()) {
|
||||
if (op_scheduler_class::immediate == id.class_id) {
|
||||
immediate.push_front(std::move(item));
|
||||
} else {
|
||||
int cost = calc_scaled_cost(item.get_cost());
|
||||
// Add item to scheduler queue
|
||||
scheduler.add_request(
|
||||
std::move(item),
|
||||
id,
|
||||
@ -448,12 +462,15 @@ void mClockScheduler::handle_conf_change(
|
||||
changed.count("osd_mclock_max_capacity_iops_hdd") ||
|
||||
changed.count("osd_mclock_max_capacity_iops_ssd")) {
|
||||
set_max_osd_capacity();
|
||||
enable_mclock_profile();
|
||||
client_registry.update_from_config(conf);
|
||||
if (mclock_profile != "custom") {
|
||||
enable_mclock_profile_settings();
|
||||
client_registry.update_from_config(conf);
|
||||
}
|
||||
}
|
||||
if (changed.count("osd_mclock_profile")) {
|
||||
enable_mclock_profile();
|
||||
set_mclock_profile();
|
||||
if (mclock_profile != "custom") {
|
||||
enable_mclock_profile_settings();
|
||||
client_registry.update_from_config(conf);
|
||||
}
|
||||
}
|
||||
@ -469,4 +486,9 @@ void mClockScheduler::handle_conf_change(
|
||||
}
|
||||
}
|
||||
|
||||
mClockScheduler::~mClockScheduler()
|
||||
{
|
||||
cct->_conf.remove_observer(this);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -38,7 +38,6 @@ constexpr uint64_t default_max = 999999;
|
||||
|
||||
using client_id_t = uint64_t;
|
||||
using profile_id_t = uint64_t;
|
||||
using op_type_t = OpSchedulerItem::OpQueueable::op_type_t;
|
||||
|
||||
struct client_profile_id_t {
|
||||
client_id_t client_id;
|
||||
@ -68,11 +67,34 @@ class mClockScheduler : public OpScheduler, md_config_obs_t {
|
||||
const uint32_t num_shards;
|
||||
bool is_rotational;
|
||||
double max_osd_capacity;
|
||||
double max_osd_bandwidth;
|
||||
uint64_t osd_mclock_cost_per_io_msec;
|
||||
std::string mclock_profile = "balanced";
|
||||
std::map<op_scheduler_class, double> client_allocs;
|
||||
std::map<op_type_t, int> client_cost_infos;
|
||||
std::map<op_type_t, int> client_scaled_cost_infos;
|
||||
std::string mclock_profile = "high_client_ops";
|
||||
struct ClientAllocs {
|
||||
uint64_t res;
|
||||
uint64_t wgt;
|
||||
uint64_t lim;
|
||||
|
||||
ClientAllocs(uint64_t _res, uint64_t _wgt, uint64_t _lim) {
|
||||
update(_res, _wgt, _lim);
|
||||
}
|
||||
|
||||
inline void update(uint64_t _res, uint64_t _wgt, uint64_t _lim) {
|
||||
res = _res;
|
||||
wgt = _wgt;
|
||||
lim = _lim;
|
||||
}
|
||||
};
|
||||
std::array<
|
||||
ClientAllocs,
|
||||
static_cast<size_t>(op_scheduler_class::client) + 1
|
||||
> client_allocs = {
|
||||
// Placeholder, get replaced with configured values
|
||||
ClientAllocs(1, 1, 1), // background_recovery
|
||||
ClientAllocs(1, 1, 1), // background_best_effort
|
||||
ClientAllocs(1, 1, 1), // immediate (not used)
|
||||
ClientAllocs(1, 1, 1) // client
|
||||
};
|
||||
class ClientRegistry {
|
||||
std::array<
|
||||
crimson::dmclock::ClientInfo,
|
||||
@ -115,6 +137,7 @@ class mClockScheduler : public OpScheduler, md_config_obs_t {
|
||||
|
||||
public:
|
||||
mClockScheduler(CephContext *cct, uint32_t num_shards, bool is_rotational);
|
||||
~mClockScheduler() override;
|
||||
|
||||
// Set the max osd capacity in iops
|
||||
void set_max_osd_capacity();
|
||||
@ -122,35 +145,32 @@ public:
|
||||
// Set the cost per io for the osd
|
||||
void set_osd_mclock_cost_per_io();
|
||||
|
||||
// Set the mclock related config params based on the profile
|
||||
void enable_mclock_profile();
|
||||
// Set the mclock profile type to enable
|
||||
void set_mclock_profile();
|
||||
|
||||
// Get the active mclock profile
|
||||
std::string get_mclock_profile();
|
||||
|
||||
// Set client capacity allocations based on profile
|
||||
void set_client_allocations();
|
||||
// Set "balanced" profile allocations
|
||||
void set_balanced_profile_allocations();
|
||||
|
||||
// Get client allocation
|
||||
double get_client_allocation(op_type_t op_type);
|
||||
// Set "high_recovery_ops" profile allocations
|
||||
void set_high_recovery_ops_profile_allocations();
|
||||
|
||||
// Set "balanced" profile parameters
|
||||
void set_balanced_profile_config();
|
||||
// Set "high_client_ops" profile allocations
|
||||
void set_high_client_ops_profile_allocations();
|
||||
|
||||
// Set "high_recovery_ops" profile parameters
|
||||
void set_high_recovery_ops_profile_config();
|
||||
// Set the mclock related config params based on the profile
|
||||
void enable_mclock_profile_settings();
|
||||
|
||||
// Set "high_client_ops" profile parameters
|
||||
void set_high_client_ops_profile_config();
|
||||
// Set mclock config parameter based on allocations
|
||||
void set_profile_config();
|
||||
|
||||
// Set recovery specific Ceph settings for profiles
|
||||
void set_global_recovery_options();
|
||||
|
||||
// Calculate scale cost per item
|
||||
int calc_scaled_cost(op_type_t op_type, int cost);
|
||||
|
||||
// Update mclock client cost info
|
||||
bool maybe_update_client_cost_info(op_type_t op_type, int new_cost);
|
||||
int calc_scaled_cost(int cost);
|
||||
|
||||
// Enqueue op in the back of the regular queue
|
||||
void enqueue(OpSchedulerItem &&item) final;
|
||||
|
@ -93,7 +93,7 @@ TEST_F(mClockSchedulerTest, TestEmpty) {
|
||||
|
||||
for (unsigned i = 100; i < 105; i+=2) {
|
||||
q.enqueue(create_item(i, client1, op_scheduler_class::client));
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
}
|
||||
|
||||
ASSERT_FALSE(q.empty());
|
||||
@ -126,7 +126,7 @@ TEST_F(mClockSchedulerTest, TestSingleClientOrderedEnqueueDequeue) {
|
||||
|
||||
for (unsigned i = 100; i < 105; ++i) {
|
||||
q.enqueue(create_item(i, client1, op_scheduler_class::client));
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
}
|
||||
|
||||
auto r = get_item(q.dequeue());
|
||||
@ -150,6 +150,7 @@ TEST_F(mClockSchedulerTest, TestMultiClientOrderedEnqueueDequeue) {
|
||||
for (unsigned i = 0; i < NUM; ++i) {
|
||||
for (auto &&c: {client1, client2, client3}) {
|
||||
q.enqueue(create_item(i, c));
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user