osd/: remove legacy schedulers

The consensus seems to be that PrioritizedQueue is strictly worse than
WeightedPriorityQueue.

mClockClientQueue and mClockClassQueue are superceded by
mClockScheduler.

Signed-off-by: Samuel Just <sjust@redhat.com>
This commit is contained in:
Samuel Just 2019-09-26 15:04:09 -07:00
parent 9ec24d3bfe
commit 2157ac7d14
17 changed files with 8 additions and 1575 deletions

View File

@ -314,9 +314,6 @@ class ClusterConfigurationTest(DashboardTestCase):
'osd_deep_scrub_randomize_ratio', # osd-pg-scrub
'osd_deep_scrub_stride', # osd-pg-scrub
'osd_deep_scrub_update_digest_min_age', # osd-pg-scrub
'osd_op_queue_mclock_scrub_lim', # osd-pg-scrub
'osd_op_queue_mclock_scrub_res', # osd-pg-scrub
'osd_op_queue_mclock_scrub_wgt', # osd-pg-scrub
'osd_requested_scrub_priority', # osd-pg-scrub
'osd_scrub_backoff_ratio', # osd-pg-scrub
'osd_scrub_chunk_max', # osd-pg-scrub

View File

@ -621,30 +621,6 @@ OPTION(osd_op_queue, OPT_STR)
OPTION(osd_op_queue_cut_off, OPT_STR) // Min priority to go to strict queue. (low, high)
// mClock priority queue parameters for five types of ops
OPTION(osd_op_queue_mclock_client_op_res, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_client_op_wgt, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_client_op_lim, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_osd_rep_op_res, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_osd_rep_op_wgt, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_osd_rep_op_lim, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_snap_res, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_snap_wgt, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_snap_lim, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_recov_res, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_recov_wgt, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_recov_lim, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_scrub_res, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_scrub_wgt, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_scrub_lim, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_pg_delete_res, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_pg_delete_wgt, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_pg_delete_lim, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_peering_event_res, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_peering_event_wgt, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_peering_event_lim, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_anticipation_timeout, OPT_DOUBLE)
OPTION(osd_ignore_stale_divergent_priors, OPT_BOOL) // do not assert on divergent_prior entries which aren't in the log and whose on-disk objects are newer
// Set to true for testing. Users should NOT set this.

View File

@ -2872,461 +2872,10 @@ std::vector<Option> get_global_options() {
.set_long_description("Only considered for osd_op_queue = mClockScheduler")
.add_see_also("osd_op_queue"),
Option("osd_op_queue_mclock_client_op_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(1000.0)
.set_description("mclock reservation of client operator requests")
.set_long_description("mclock reservation of client operator requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the reservation")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_client_op_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(500.0)
.set_description("mclock weight of client operator requests")
.set_long_description("mclock weight of client operator requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the weight")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_client_op_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.0)
.set_description("mclock limit of client operator requests")
.set_long_description("mclock limit of client operator requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the limit")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_osd_rep_op_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(1000.0)
.set_description("mclock reservation of osd replication operation requests and replies")
.set_long_description("mclock reservation of replication operation requests and replies when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the reservation")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_osd_rep_op_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(500.0)
.set_description("mclock weight of osd replication operation requests and replies")
.set_long_description("mclock weight of osd replication operation requests and replies when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the weight")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_osd_rep_op_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.0)
.set_description("mclock limit of osd replication operation requests and replies")
.set_long_description("mclock limit of osd sub-operation requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the limit")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_snap_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.0)
.set_description("mclock reservation of snaptrim requests")
.set_long_description("mclock reservation of snaptrim requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the reservation")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_snap_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(1.0)
.set_description("mclock weight of snaptrim requests")
.set_long_description("mclock weight of snaptrim requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the weight")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_snap_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.001)
.set_description("")
.set_description("mclock limit of snaptrim requests")
.set_long_description("mclock limit of snaptrim requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the limit")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_recov_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.0)
.set_description("mclock reservation of recovery requests")
.set_long_description("mclock reservation of recovery requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the reservation")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_recov_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(1.0)
.set_description("mclock weight of recovery requests")
.set_long_description("mclock weight of recovery requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the weight")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_recov_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.001)
.set_description("mclock limit of recovery requests")
.set_long_description("mclock limit of recovery requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the limit")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_scrub_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.0)
.set_description("mclock reservation of scrub requests")
.set_long_description("mclock reservation of scrub requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the reservation")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_scrub_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(1.0)
.set_description("mclock weight of scrub requests")
.set_long_description("mclock weight of scrub requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the weight")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_lim")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_scrub_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.001)
.set_description("mclock weight of limit requests")
.set_long_description("mclock weight of limit requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the limit")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_anticipation_timeout"),
Option("osd_op_queue_mclock_anticipation_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
Option("osd_mclock_scheduler_anticipation_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.0)
.set_description("mclock anticipation timeout in seconds")
.set_long_description("the amount of time that mclock waits until the unused resource is forfeited")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim"),
Option("osd_op_queue_mclock_pg_delete_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.0)
.set_description("mclock reservation of pg delete work")
.set_long_description("mclock reservation of pg delete work when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the reservation")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim"),
Option("osd_op_queue_mclock_pg_delete_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(1.0)
.set_description("mclock weight of pg delete work")
.set_long_description("mclock weight of pg delete work when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the weight")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_lim"),
Option("osd_op_queue_mclock_pg_delete_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.001)
.set_description("mclock weight of pg delete work limit requests")
.set_long_description("mclock weight of limit pg delete work when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the limit")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt"),
Option("osd_op_queue_mclock_peering_event_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.0)
.set_description("mclock reservation of peering events")
.set_long_description("mclock reservation of scrub requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the reservation")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim"),
Option("osd_op_queue_mclock_peering_event_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(1.0)
.set_description("mclock weight of peering events")
.set_long_description("mclock weight of scrub requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the weight")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_lim"),
Option("osd_op_queue_mclock_peering_event_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.001)
.set_description("mclock weight of limit peering events")
.set_long_description("mclock weight of limit requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the limit")
.add_see_also("osd_op_queue")
.add_see_also("osd_op_queue_mclock_client_op_res")
.add_see_also("osd_op_queue_mclock_client_op_wgt")
.add_see_also("osd_op_queue_mclock_client_op_lim")
.add_see_also("osd_op_queue_mclock_osd_rep_op_res")
.add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
.add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
.add_see_also("osd_op_queue_mclock_snap_res")
.add_see_also("osd_op_queue_mclock_snap_wgt")
.add_see_also("osd_op_queue_mclock_snap_lim")
.add_see_also("osd_op_queue_mclock_recov_res")
.add_see_also("osd_op_queue_mclock_recov_wgt")
.add_see_also("osd_op_queue_mclock_recov_lim")
.add_see_also("osd_op_queue_mclock_scrub_res")
.add_see_also("osd_op_queue_mclock_scrub_wgt"),
.set_long_description("the amount of time that mclock waits until the unused resource is forfeited"),
Option("osd_ignore_stale_divergent_priors", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(false)

View File

@ -29,9 +29,6 @@ set(osd_srcs
osd_types.cc
ECUtil.cc
ExtentCache.cc
mClockOpClassSupport.cc
mClockOpClassQueue.cc
mClockClientQueue.cc
scheduler/OpScheduler.cc
scheduler/OpSchedulerItem.cc
scheduler/mClockScheduler.cc

View File

@ -1,97 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2016 Red Hat Inc.
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
#include <memory>
#include "osd/mClockClientQueue.h"
#include "common/dout.h"
namespace dmc = crimson::dmclock;
using namespace std::placeholders;
#define dout_context cct
#define dout_subsys ceph_subsys_osd
#undef dout_prefix
#define dout_prefix *_dout
namespace ceph {
/*
* class mClockClientQueue
*/
mClockClientQueue::mClockClientQueue(CephContext *cct) :
queue(std::bind(&mClockClientQueue::op_class_client_info_f, this, _1),
cct->_conf->osd_op_queue_mclock_anticipation_timeout),
client_info_mgr(cct)
{
// empty
}
const dmc::ClientInfo* mClockClientQueue::op_class_client_info_f(
const mClockClientQueue::InnerClient& client)
{
return client_info_mgr.get_client_info(client.second);
}
mClockClientQueue::InnerClient
inline mClockClientQueue::get_inner_client(const Client& cl,
const Request& request) {
return InnerClient(cl, client_info_mgr.osd_op_type(request));
}
// Formatted output of the queue
inline void mClockClientQueue::dump(ceph::Formatter *f) const {
queue.dump(f);
}
inline void mClockClientQueue::enqueue_strict(Client cl,
unsigned priority,
Request&& item) {
queue.enqueue_strict(get_inner_client(cl, item), priority,
std::move(item));
}
// Enqueue op in the front of the strict queue
inline void mClockClientQueue::enqueue_strict_front(Client cl,
unsigned priority,
Request&& item) {
queue.enqueue_strict_front(get_inner_client(cl, item), priority,
std::move(item));
}
// Enqueue op in the back of the regular queue
inline void mClockClientQueue::enqueue(Client cl,
unsigned priority,
unsigned cost,
Request&& item) {
queue.enqueue(get_inner_client(cl, item), priority, 1u, std::move(item));
}
// Enqueue the op in the front of the regular queue
inline void mClockClientQueue::enqueue_front(Client cl,
unsigned priority,
unsigned cost,
Request&& item) {
queue.enqueue_front(get_inner_client(cl, item), priority, 1u,
std::move(item));
}
// Return an op to be dispatched
inline Request mClockClientQueue::dequeue() {
return queue.dequeue();
}
} // namespace ceph

View File

@ -1,115 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2016 Red Hat Inc.
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
#pragma once
#include <ostream>
#include "boost/variant.hpp"
#include "common/config.h"
#include "common/ceph_context.h"
#include "common/mClockPriorityQueue.h"
#include "osd/scheduler/OpSchedulerItem.h"
#include "osd/mClockOpClassSupport.h"
namespace ceph {
using Request = ceph::osd::scheduler::OpSchedulerItem;
using Client = uint64_t;
// This class exists to bridge the ceph code, which treats the class
// as the client, and the queue, where the class is
// osd_op_type_t. So this adapter class will transform calls
// appropriately.
class mClockClientQueue : public OpQueue<Request, Client> {
using osd_op_type_t = ceph::mclock::osd_op_type_t;
using InnerClient = std::pair<uint64_t,osd_op_type_t>;
using queue_t = mClockQueue<Request, InnerClient>;
queue_t queue;
ceph::mclock::OpClassClientInfoMgr client_info_mgr;
public:
mClockClientQueue(CephContext *cct);
const crimson::dmclock::ClientInfo* op_class_client_info_f(const InnerClient& client);
inline unsigned get_size_slow() const {
return queue.get_size_slow();
}
// Ops of this priority should be deleted immediately
inline void remove_by_class(Client cl,
std::list<Request> *out) override final {
queue.remove_by_filter(
[&cl, out] (Request&& r) -> bool {
if (cl == r.get_owner()) {
out->push_front(std::move(r));
return true;
} else {
return false;
}
});
}
void enqueue_strict(Client cl,
unsigned priority,
Request&& item) override final;
// Enqueue op in the front of the strict queue
void enqueue_strict_front(Client cl,
unsigned priority,
Request&& item) override final;
// Enqueue op in the back of the regular queue
void enqueue(Client cl,
unsigned priority,
unsigned cost,
Request&& item) override final;
// Enqueue the op in the front of the regular queue
void enqueue_front(Client cl,
unsigned priority,
unsigned cost,
Request&& item) override final;
// Return an op to be dispatch
Request dequeue() override final;
// Returns if the queue is empty
inline bool empty() const override final {
return queue.empty();
}
// Formatted output of the queue
void dump(ceph::Formatter *f) const override final;
void print(std::ostream &ostream) const final {
ostream << "mClockClientQueue";
}
protected:
InnerClient get_inner_client(const Client& cl, const Request& request);
}; // class mClockClientQueue
} // namespace ceph

View File

@ -1,54 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2016 Red Hat Inc.
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
#include <memory>
#include "osd/mClockOpClassQueue.h"
#include "common/dout.h"
namespace dmc = crimson::dmclock;
using namespace std::placeholders;
#define dout_context cct
#define dout_subsys ceph_subsys_osd
#undef dout_prefix
#define dout_prefix *_dout
namespace ceph {
/*
* class mClockOpClassQueue
*/
mClockOpClassQueue::mClockOpClassQueue(CephContext *cct) :
queue(std::bind(&mClockOpClassQueue::op_class_client_info_f, this, _1),
cct->_conf->osd_op_queue_mclock_anticipation_timeout),
client_info_mgr(cct)
{
// empty
}
const dmc::ClientInfo* mClockOpClassQueue::op_class_client_info_f(
const osd_op_type_t& op_type)
{
return client_info_mgr.get_client_info(op_type);
}
// Formatted output of the queue
void mClockOpClassQueue::dump(ceph::Formatter *f) const {
queue.dump(f);
}
} // namespace ceph

View File

@ -1,129 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2016 Red Hat Inc.
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
#pragma once
#include <ostream>
#include "boost/variant.hpp"
#include "boost/container/flat_set.hpp"
#include "common/config.h"
#include "common/ceph_context.h"
#include "common/mClockPriorityQueue.h"
#include "osd/scheduler/OpSchedulerItem.h"
#include "osd/mClockOpClassSupport.h"
namespace ceph {
using Request = ceph::osd::scheduler::OpSchedulerItem;
using Client = uint64_t;
// This class exists to bridge the ceph code, which treats the class
// as the client, and the queue, where the class is
// osd_op_type_t. So this adapter class will transform calls
// appropriately.
class mClockOpClassQueue : public OpQueue<Request, Client> {
using osd_op_type_t = ceph::mclock::osd_op_type_t;
using queue_t = mClockQueue<Request, osd_op_type_t>;
queue_t queue;
ceph::mclock::OpClassClientInfoMgr client_info_mgr;
public:
mClockOpClassQueue(CephContext *cct);
const crimson::dmclock::ClientInfo*
op_class_client_info_f(const osd_op_type_t& op_type);
inline unsigned get_size_slow() const {
return queue.get_size_slow();
}
// Ops of this priority should be deleted immediately
inline void remove_by_class(Client cl,
std::list<Request> *out) override final {
queue.remove_by_filter(
[&cl, out] (Request&& r) -> bool {
if (cl == r.get_owner()) {
out->push_front(std::move(r));
return true;
} else {
return false;
}
});
}
inline void enqueue_strict(Client cl,
unsigned priority,
Request&& item) override final {
queue.enqueue_strict(client_info_mgr.osd_op_type(item),
priority,
std::move(item));
}
// Enqueue op in the front of the strict queue
inline void enqueue_strict_front(Client cl,
unsigned priority,
Request&& item) override final {
queue.enqueue_strict_front(client_info_mgr.osd_op_type(item),
priority,
std::move(item));
}
// Enqueue op in the back of the regular queue
inline void enqueue(Client cl,
unsigned priority,
unsigned cost,
Request&& item) override final {
queue.enqueue(client_info_mgr.osd_op_type(item),
priority,
1u,
std::move(item));
}
// Enqueue the op in the front of the regular queue
inline void enqueue_front(Client cl,
unsigned priority,
unsigned cost,
Request&& item) override final {
queue.enqueue_front(client_info_mgr.osd_op_type(item),
priority,
1u,
std::move(item));
}
// Returns if the queue is empty
inline bool empty() const override final {
return queue.empty();
}
// Return an op to be dispatch
inline Request dequeue() override final {
return queue.dequeue();
}
// Formatted output of the queue
void dump(ceph::Formatter *f) const override final;
void print(std::ostream &ostream) const final {
ostream << "mClockOpClassQueue";
}
}; // class mClockOpClassQueue
} // namespace ceph

View File

@ -1,112 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2017 Red Hat Inc.
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
#include "osd/mClockOpClassSupport.h"
#include "common/dout.h"
#include "include/ceph_assert.h"
namespace ceph {
namespace mclock {
OpClassClientInfoMgr::OpClassClientInfoMgr(CephContext *cct) :
client_op(cct->_conf->osd_op_queue_mclock_client_op_res,
cct->_conf->osd_op_queue_mclock_client_op_wgt,
cct->_conf->osd_op_queue_mclock_client_op_lim),
osd_rep_op(cct->_conf->osd_op_queue_mclock_osd_rep_op_res,
cct->_conf->osd_op_queue_mclock_osd_rep_op_wgt,
cct->_conf->osd_op_queue_mclock_osd_rep_op_lim),
snaptrim(cct->_conf->osd_op_queue_mclock_snap_res,
cct->_conf->osd_op_queue_mclock_snap_wgt,
cct->_conf->osd_op_queue_mclock_snap_lim),
recov(cct->_conf->osd_op_queue_mclock_recov_res,
cct->_conf->osd_op_queue_mclock_recov_wgt,
cct->_conf->osd_op_queue_mclock_recov_lim),
scrub(cct->_conf->osd_op_queue_mclock_scrub_res,
cct->_conf->osd_op_queue_mclock_scrub_wgt,
cct->_conf->osd_op_queue_mclock_scrub_lim),
pg_delete(cct->_conf->osd_op_queue_mclock_pg_delete_res,
cct->_conf->osd_op_queue_mclock_pg_delete_wgt,
cct->_conf->osd_op_queue_mclock_pg_delete_lim),
peering_event(cct->_conf->osd_op_queue_mclock_peering_event_res,
cct->_conf->osd_op_queue_mclock_peering_event_wgt,
cct->_conf->osd_op_queue_mclock_peering_event_lim)
{
constexpr int rep_ops[] = {
MSG_OSD_REPOP,
MSG_OSD_REPOPREPLY,
MSG_OSD_PG_UPDATE_LOG_MISSING,
MSG_OSD_PG_UPDATE_LOG_MISSING_REPLY,
MSG_OSD_EC_WRITE,
MSG_OSD_EC_WRITE_REPLY,
MSG_OSD_EC_READ,
MSG_OSD_EC_READ_REPLY
};
for (auto op : rep_ops) {
add_rep_op_msg(op);
}
lgeneric_subdout(cct, osd, 20) <<
"mClock OpClass settings:: " <<
"client_op:" << client_op <<
"; osd_rep_op:" << osd_rep_op <<
"; snaptrim:" << snaptrim <<
"; recov:" << recov <<
"; scrub:" << scrub <<
dendl;
lgeneric_subdout(cct, osd, 30) <<
"mClock OpClass message bit set:: " <<
rep_op_msg_bitset.to_string() << dendl;
}
void OpClassClientInfoMgr::add_rep_op_msg(int message_code) {
ceph_assert(message_code >= 0 && message_code < int(rep_op_msg_bitset_size));
rep_op_msg_bitset.set(message_code);
}
osd_op_type_t
OpClassClientInfoMgr::osd_op_type(
const ceph::osd::scheduler::OpSchedulerItem& op) const {
osd_op_type_t type = convert_op_type(op.get_op_type());
if (osd_op_type_t::client_op != type) {
return type;
} else {
std::optional<OpRequestRef> op_ref_maybe = op.maybe_get_op();
ceph_assert(op_ref_maybe);
__u16 mtype = (*op_ref_maybe)->get_req()->get_header().type;
if (rep_op_msg_bitset.test(mtype)) {
return osd_op_type_t::osd_rep_op;
} else {
return osd_op_type_t::client_op;
}
}
}
// used for debugging since faster implementation can be done
// with rep_op_msg_bitmap
bool OpClassClientInfoMgr::is_rep_op(uint16_t mtype) {
return
MSG_OSD_REPOP == mtype ||
MSG_OSD_REPOPREPLY == mtype ||
MSG_OSD_PG_UPDATE_LOG_MISSING == mtype ||
MSG_OSD_PG_UPDATE_LOG_MISSING_REPLY == mtype ||
MSG_OSD_EC_WRITE == mtype ||
MSG_OSD_EC_WRITE_REPLY == mtype ||
MSG_OSD_EC_READ == mtype ||
MSG_OSD_EC_READ_REPLY == mtype;
}
} // namespace mclock
} // namespace ceph

View File

@ -1,105 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2017 Red Hat Inc.
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
#pragma once
#include <bitset>
#include "dmclock/src/dmclock_server.h"
#include "osd/OpRequest.h"
#include "osd/scheduler/OpSchedulerItem.h"
namespace ceph {
namespace mclock {
using op_item_type_t =
ceph::osd::scheduler::OpSchedulerItem::OpQueueable::op_type_t;
enum class osd_op_type_t {
client_op, osd_rep_op, bg_snaptrim, bg_recovery, bg_scrub, bg_pg_delete,
peering_event
};
class OpClassClientInfoMgr {
crimson::dmclock::ClientInfo client_op;
crimson::dmclock::ClientInfo osd_rep_op;
crimson::dmclock::ClientInfo snaptrim;
crimson::dmclock::ClientInfo recov;
crimson::dmclock::ClientInfo scrub;
crimson::dmclock::ClientInfo pg_delete;
crimson::dmclock::ClientInfo peering_event;
static constexpr std::size_t rep_op_msg_bitset_size = 128;
std::bitset<rep_op_msg_bitset_size> rep_op_msg_bitset;
void add_rep_op_msg(int message_code);
public:
OpClassClientInfoMgr(CephContext *cct);
inline const crimson::dmclock::ClientInfo*
get_client_info(osd_op_type_t type) {
switch(type) {
case osd_op_type_t::client_op:
return &client_op;
case osd_op_type_t::osd_rep_op:
return &osd_rep_op;
case osd_op_type_t::bg_snaptrim:
return &snaptrim;
case osd_op_type_t::bg_recovery:
return &recov;
case osd_op_type_t::bg_scrub:
return &scrub;
case osd_op_type_t::bg_pg_delete:
return &pg_delete;
case osd_op_type_t::peering_event:
return &peering_event;
default:
ceph_abort();
return nullptr;
}
}
// converts operation type from op queue internal to mclock
// equivalent
inline static osd_op_type_t convert_op_type(op_item_type_t t) {
switch(t) {
case op_item_type_t::client_op:
return osd_op_type_t::client_op;
case op_item_type_t::bg_snaptrim:
return osd_op_type_t::bg_snaptrim;
case op_item_type_t::bg_recovery:
return osd_op_type_t::bg_recovery;
case op_item_type_t::bg_scrub:
return osd_op_type_t::bg_scrub;
case op_item_type_t::bg_pg_delete:
return osd_op_type_t::bg_pg_delete;
case op_item_type_t::peering_event:
return osd_op_type_t::peering_event;
default:
ceph_abort();
}
}
osd_op_type_t osd_op_type(
const ceph::osd::scheduler::OpSchedulerItem&) const;
// used for debugging since faster implementation can be done
// with rep_op_msg_bitmap
static bool is_rep_op(uint16_t);
}; // OpClassClientInfoMgr
} // namespace mclock
} // namespace ceph

View File

@ -16,11 +16,8 @@
#include "osd/scheduler/OpScheduler.h"
#include "common/PrioritizedQueue.h"
#include "common/WeightedPriorityQueue.h"
#include "osd/scheduler/mClockScheduler.h"
#include "osd/mClockClientQueue.h"
#include "osd/mClockOpClassQueue.h"
namespace ceph::osd::scheduler {
@ -28,38 +25,14 @@ OpSchedulerRef make_scheduler(CephContext *cct)
{
const std::string *type = &cct->_conf->osd_op_queue;
if (*type == "debug_random") {
static const std::string index_lookup[] = { "prioritized",
"mclock_opclass",
"mclock_client",
"mclock_scheduler",
static const std::string index_lookup[] = { "mclock_scheduler",
"wpq" };
srand(time(NULL));
unsigned which = rand() % (sizeof(index_lookup) / sizeof(index_lookup[0]));
type = &index_lookup[which];
}
if (*type == "prioritized") {
return std::make_unique<
ClassedOpQueueScheduler<PrioritizedQueue<OpSchedulerItem, client>>>(
cct,
cct->_conf->osd_op_pq_max_tokens_per_priority,
cct->_conf->osd_op_pq_min_cost
);
} else if (*type == "mclock_opclass") {
return std::make_unique<
ClassedOpQueueScheduler<mClockOpClassQueue>>(
cct,
cct
);
} else if (*type == "mclock_client") {
return std::make_unique<
ClassedOpQueueScheduler<mClockClientQueue>>(
cct,
cct
);
} else if (*type == "mclock_scheduler") {
return std::make_unique<mClockScheduler>(cct);
} else if (*type == "wpq" ) {
if (*type == "wpq" ) {
// default is 'wpq'
return std::make_unique<
ClassedOpQueueScheduler<WeightedPriorityQueue<OpSchedulerItem, client>>>(
@ -67,6 +40,8 @@ OpSchedulerRef make_scheduler(CephContext *cct)
cct->_conf->osd_op_pq_max_tokens_per_priority,
cct->_conf->osd_op_pq_min_cost
);
} else if (*type == "mclock_scheduler") {
return std::make_unique<mClockScheduler>(cct);
} else {
ceph_assert("Invalid choice of wq" == 0);
}

View File

@ -36,7 +36,7 @@ mClockScheduler::mClockScheduler(CephContext *cct) :
&client_registry,
_1),
dmc::AtLimit::Allow,
cct->_conf->osd_op_queue_mclock_anticipation_timeout)
cct->_conf.get_val<double>("osd_mclock_scheduler_anticipation_timeout"))
{
cct->_conf.add_observer(this);
client_registry.update_from_config(cct->_conf);
@ -134,7 +134,7 @@ OpSchedulerItem mClockScheduler::dequeue()
ceph_assert(result.is_retn());
auto &retn = result.get_retn();
return std::move(*result.get_retn().request);
return std::move(*retn.request);
}
}
}

View File

@ -29,7 +29,6 @@
#include "common/ceph_context.h"
#include "common/mClockPriorityQueue.h"
#include "osd/scheduler/OpSchedulerItem.h"
#include "osd/mClockOpClassSupport.h"
namespace ceph::osd::scheduler {

View File

@ -23,9 +23,6 @@ export class OsdPgScrubModalOptions {
'osd_deep_scrub_randomize_ratio',
'osd_deep_scrub_stride',
'osd_deep_scrub_update_digest_min_age',
'osd_op_queue_mclock_scrub_lim',
'osd_op_queue_mclock_scrub_res',
'osd_op_queue_mclock_scrub_wgt',
'osd_requested_scrub_priority',
'osd_scrub_backoff_ratio',
'osd_scrub_chunk_max',

View File

@ -113,24 +113,6 @@ add_executable(unittest_ec_transaction
add_ceph_unittest(unittest_ec_transaction)
target_link_libraries(unittest_ec_transaction osd global ${BLKID_LIBRARIES})
# unittest_mclock_op_class_queue
add_executable(unittest_mclock_op_class_queue
TestMClockOpClassQueue.cc
)
add_ceph_unittest(unittest_mclock_op_class_queue)
target_link_libraries(unittest_mclock_op_class_queue
global osd dmclock os
)
# unittest_mclock_client_queue
add_executable(unittest_mclock_client_queue
TestMClockClientQueue.cc
)
add_ceph_unittest(unittest_mclock_client_queue)
target_link_libraries(unittest_mclock_client_queue
global osd dmclock os
)
# unittest_mclock_scheduler
add_executable(unittest_mclock_scheduler
TestMClockScheduler.cc

View File

@ -1,242 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
#include <iostream>
#include "gtest/gtest.h"
#include "global/global_init.h"
#include "common/common_init.h"
#include "osd/mClockClientQueue.h"
using namespace ceph::osd::scheduler;
int main(int argc, char **argv) {
std::vector<const char*> args(argv, argv+argc);
auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_OSD,
CODE_ENVIRONMENT_UTILITY,
CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
common_init_finish(g_ceph_context);
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
class MClockClientQueueTest : public testing::Test {
public:
mClockClientQueue q;
uint64_t client1;
uint64_t client2;
uint64_t client3;
MClockClientQueueTest() :
q(g_ceph_context),
client1(1001),
client2(9999),
client3(100000001)
{}
struct MockDmclockItem : public PGOpQueueable {
MockDmclockItem() :
PGOpQueueable(spg_t()) {}
public:
op_type_t get_op_type() const final {
return op_type_t::client_op;
}
ostream &print(ostream &rhs) const final { return rhs; }
std::optional<OpRequestRef> maybe_get_op() const final {
return std::nullopt;
}
op_scheduler_class get_scheduler_class() const final {
return op_scheduler_class::client;
}
void run(OSD *osd, OSDShard *sdata, PGRef& pg, ThreadPool::TPHandle &handle) final {}
};
template <typename... Args>
Request create_dmclock(epoch_t e, uint64_t owner, Args... args) {
return Request(
OpSchedulerItem(
unique_ptr<OpSchedulerItem::OpQueueable>(
new MockDmclockItem(
std::forward<Args>(args)...)),
12, 12,
utime_t(), owner, e));
}
Request create_snaptrim(epoch_t e, uint64_t owner) {
return Request(OpSchedulerItem(unique_ptr<OpSchedulerItem::OpQueueable>(new PGSnapTrim(spg_t(), e)),
12, 12,
utime_t(), owner, e));
}
Request create_scrub(epoch_t e, uint64_t owner) {
return Request(OpSchedulerItem(unique_ptr<OpSchedulerItem::OpQueueable>(new PGScrub(spg_t(), e)),
12, 12,
utime_t(), owner, e));
}
Request create_recovery(epoch_t e, uint64_t owner) {
return Request(OpSchedulerItem(unique_ptr<OpSchedulerItem::OpQueueable>(new PGRecovery(spg_t(), e, 64)),
12, 12,
utime_t(), owner, e));
}
};
TEST_F(MClockClientQueueTest, TestSize) {
ASSERT_TRUE(q.empty());
ASSERT_EQ(0u, q.get_size_slow());
q.enqueue(client1, 12, 1u, create_snaptrim(100, client1));
q.enqueue_strict(client2, 12, create_snaptrim(101, client2));
q.enqueue(client2, 12, 1u, create_snaptrim(102, client2));
q.enqueue_strict(client3, 12, create_snaptrim(103, client3));
q.enqueue(client1, 12, 1u, create_snaptrim(104, client1));
ASSERT_FALSE(q.empty());
ASSERT_EQ(5u, q.get_size_slow());
std::list<Request> reqs;
reqs.push_back(q.dequeue());
reqs.push_back(q.dequeue());
reqs.push_back(q.dequeue());
ASSERT_FALSE(q.empty());
ASSERT_EQ(2u, q.get_size_slow());
q.enqueue_front(client2, 12, 1u, std::move(reqs.back()));
reqs.pop_back();
q.enqueue_strict_front(client3, 12, std::move(reqs.back()));
reqs.pop_back();
q.enqueue_strict_front(client2, 12, std::move(reqs.back()));
reqs.pop_back();
ASSERT_FALSE(q.empty());
ASSERT_EQ(5u, q.get_size_slow());
for (int i = 0; i < 5; ++i) {
(void) q.dequeue();
}
ASSERT_TRUE(q.empty());
ASSERT_EQ(0u, q.get_size_slow());
}
TEST_F(MClockClientQueueTest, TestEnqueue) {
q.enqueue(client1, 12, 1u, create_snaptrim(100, client1));
q.enqueue(client2, 12, 1u, create_snaptrim(101, client2));
q.enqueue(client2, 12, 1u, create_snaptrim(102, client2));
q.enqueue(client3, 12, 1u, create_snaptrim(103, client3));
q.enqueue(client1, 12, 1u, create_snaptrim(104, client1));
Request r = q.dequeue();
ASSERT_EQ(100u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(101u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(103u, r.get_map_epoch());
r = q.dequeue();
ASSERT_TRUE(r.get_map_epoch() == 102u ||
r.get_map_epoch() == 104u);
r = q.dequeue();
ASSERT_TRUE(r.get_map_epoch() == 102u ||
r.get_map_epoch() == 104u);
}
TEST_F(MClockClientQueueTest, TestDistributedEnqueue) {
Request r1 = create_snaptrim(100, client1);
Request r2 = create_snaptrim(101, client2);
Request r3 = create_snaptrim(102, client3);
Request r4 = create_dmclock(103, client1);
Request r5 = create_dmclock(104, client2);
Request r6 = create_dmclock(105, client3);
q.enqueue(client1, 12, 0, std::move(r1));
q.enqueue(client2, 12, 0, std::move(r2));
q.enqueue(client3, 12, 0, std::move(r3));
q.enqueue(client1, 12, 0, std::move(r4));
q.enqueue(client2, 12, 0, std::move(r5));
q.enqueue(client3, 12, 0, std::move(r6));
Request r = q.dequeue();
r = q.dequeue();
r = q.dequeue();
r = q.dequeue();
ASSERT_EQ(105u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(104u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(103u, r.get_map_epoch());
}
TEST_F(MClockClientQueueTest, TestEnqueueStrict) {
q.enqueue_strict(client1, 12, create_snaptrim(100, client1));
q.enqueue_strict(client2, 13, create_snaptrim(101, client2));
q.enqueue_strict(client2, 16, create_snaptrim(102, client2));
q.enqueue_strict(client3, 14, create_snaptrim(103, client3));
q.enqueue_strict(client1, 15, create_snaptrim(104, client1));
Request r = q.dequeue();
ASSERT_EQ(102u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(104u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(103u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(101u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(100u, r.get_map_epoch());
}
TEST_F(MClockClientQueueTest, TestRemoveByClass) {
q.enqueue(client1, 12, 1u, create_snaptrim(100, client1));
q.enqueue_strict(client2, 12, create_snaptrim(101, client2));
q.enqueue(client2, 12, 1u, create_snaptrim(102, client2));
q.enqueue_strict(client3, 12, create_snaptrim(103, client3));
q.enqueue(client1, 12, 1u, create_snaptrim(104, client1));
std::list<Request> filtered_out;
q.remove_by_class(client2, &filtered_out);
ASSERT_EQ(2u, filtered_out.size());
while (!filtered_out.empty()) {
auto e = filtered_out.front().get_map_epoch() ;
ASSERT_TRUE(e == 101 || e == 102);
filtered_out.pop_front();
}
ASSERT_EQ(3u, q.get_size_slow());
Request r = q.dequeue();
ASSERT_EQ(103u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(100u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(104u, r.get_map_epoch());
}

View File

@ -1,185 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
#include <iostream>
#include "gtest/gtest.h"
#include "global/global_context.h"
#include "global/global_init.h"
#include "common/common_init.h"
#include "osd/mClockOpClassQueue.h"
using namespace ceph::osd::scheduler;
int main(int argc, char **argv) {
std::vector<const char*> args(argv, argv+argc);
auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_OSD,
CODE_ENVIRONMENT_UTILITY,
CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
common_init_finish(g_ceph_context);
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
class MClockOpClassQueueTest : public testing::Test {
public:
mClockOpClassQueue q;
uint64_t client1;
uint64_t client2;
uint64_t client3;
MClockOpClassQueueTest() :
q(g_ceph_context),
client1(1001),
client2(9999),
client3(100000001)
{}
#if 0 // more work needed here
Request create_client_op(epoch_t e, uint64_t owner) {
return Request(spg_t(), OpSchedulerItem(OpRequestRef(), e));
}
#endif
Request create_snaptrim(epoch_t e, uint64_t owner) {
return Request(OpSchedulerItem(unique_ptr<OpSchedulerItem::OpQueueable>(new PGSnapTrim(spg_t(), e)),
12, 12,
utime_t(), owner, e));
}
Request create_scrub(epoch_t e, uint64_t owner) {
return Request(OpSchedulerItem(unique_ptr<OpSchedulerItem::OpQueueable>(new PGScrub(spg_t(), e)),
12, 12,
utime_t(), owner, e));
}
Request create_recovery(epoch_t e, uint64_t owner) {
return Request(OpSchedulerItem(unique_ptr<OpSchedulerItem::OpQueueable>(new PGRecovery(spg_t(), e, 64)),
12, 12,
utime_t(), owner, e));
}
};
TEST_F(MClockOpClassQueueTest, TestSize) {
ASSERT_TRUE(q.empty());
ASSERT_EQ(0u, q.get_size_slow());
q.enqueue(client1, 12, 1, create_snaptrim(100, client1));
q.enqueue_strict(client2, 12, create_snaptrim(101, client2));
q.enqueue(client2, 12, 1, create_snaptrim(102, client2));
q.enqueue_strict(client3, 12, create_snaptrim(103, client3));
q.enqueue(client1, 12, 1, create_snaptrim(104, client1));
ASSERT_FALSE(q.empty());
ASSERT_EQ(5u, q.get_size_slow());
std::list<Request> reqs;
reqs.push_back(q.dequeue());
reqs.push_back(q.dequeue());
reqs.push_back(q.dequeue());
ASSERT_FALSE(q.empty());
ASSERT_EQ(2u, q.get_size_slow());
q.enqueue_front(client2, 12, 1, std::move(reqs.back()));
reqs.pop_back();
q.enqueue_strict_front(client3, 12, std::move(reqs.back()));
reqs.pop_back();
q.enqueue_strict_front(client2, 12, std::move(reqs.back()));
reqs.pop_back();
ASSERT_FALSE(q.empty());
ASSERT_EQ(5u, q.get_size_slow());
for (int i = 0; i < 5; ++i) {
(void) q.dequeue();
}
ASSERT_TRUE(q.empty());
ASSERT_EQ(0u, q.get_size_slow());
}
TEST_F(MClockOpClassQueueTest, TestEnqueue) {
q.enqueue(client1, 12, 1, create_snaptrim(100, client1));
q.enqueue(client2, 12, 1, create_snaptrim(101, client2));
q.enqueue(client2, 12, 1, create_snaptrim(102, client2));
q.enqueue(client3, 12, 1, create_snaptrim(103, client3));
q.enqueue(client1, 12, 1, create_snaptrim(104, client1));
Request r = q.dequeue();
ASSERT_EQ(100u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(101u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(102u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(103u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(104u, r.get_map_epoch());
}
TEST_F(MClockOpClassQueueTest, TestEnqueueStrict) {
q.enqueue_strict(client1, 12, create_snaptrim(100, client1));
q.enqueue_strict(client2, 13, create_snaptrim(101, client2));
q.enqueue_strict(client2, 16, create_snaptrim(102, client2));
q.enqueue_strict(client3, 14, create_snaptrim(103, client3));
q.enqueue_strict(client1, 15, create_snaptrim(104, client1));
Request r = q.dequeue();
ASSERT_EQ(102u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(104u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(103u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(101u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(100u, r.get_map_epoch());
}
TEST_F(MClockOpClassQueueTest, TestRemoveByClass) {
q.enqueue(client1, 12, 1, create_snaptrim(100, client1));
q.enqueue_strict(client2, 12, create_snaptrim(101, client2));
q.enqueue(client2, 12, 1, create_snaptrim(102, client2));
q.enqueue_strict(client3, 12, create_snaptrim(103, client3));
q.enqueue(client1, 12, 1, create_snaptrim(104, client1));
std::list<Request> filtered_out;
q.remove_by_class(client2, &filtered_out);
ASSERT_EQ(2u, filtered_out.size());
while (!filtered_out.empty()) {
auto e = filtered_out.front().get_map_epoch() ;
ASSERT_TRUE(e == 101 || e == 102);
filtered_out.pop_front();
}
ASSERT_EQ(3u, q.get_size_slow());
Request r = q.dequeue();
ASSERT_EQ(103u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(100u, r.get_map_epoch());
r = q.dequeue();
ASSERT_EQ(104u, r.get_map_epoch());
}