From 88a7fc2713f372ff95cff02f43f3dababfff756e Mon Sep 17 00:00:00 2001
From: Samuel Just <sjust@redhat.com>
Date: Thu, 16 Nov 2023 20:20:07 -0800
Subject: [PATCH] crush: add msr tunables

Signed-off-by: Samuel Just <sjust@redhat.com>
---
 src/crush/CrushCompiler.cc | 50 +++++++++++++++++++++++++++++++++++
 src/crush/CrushWrapper.cc  | 25 ++++++++++++++++++
 src/crush/CrushWrapper.h   | 53 +++++++++++++++++++++++++++++++++-----
 src/crush/crush.h          | 13 +++++++++-
 src/crush/grammar.h        |  8 ++++++
 src/osd/OSDMap.cc          |  2 ++
 6 files changed, 144 insertions(+), 7 deletions(-)

diff --git a/src/crush/CrushCompiler.cc b/src/crush/CrushCompiler.cc
index 740f003005b..c884caed00e 100644
--- a/src/crush/CrushCompiler.cc
+++ b/src/crush/CrushCompiler.cc
@@ -321,6 +321,13 @@ int CrushCompiler::decompile(ostream &out)
   if (crush.get_allowed_bucket_algs() != CRUSH_LEGACY_ALLOWED_BUCKET_ALGS)
     out << "tunable allowed_bucket_algs " << crush.get_allowed_bucket_algs()
 	<< "\n";
+  if (crush.has_nondefault_tunables_msr()) {
+    out << "tunable msr_descents " << crush.get_msr_descents()
+	<< "\n";
+    out << "tunable msr_collision_tries "
+	<< crush.get_msr_collision_tries()
+	<< "\n";
+  }
 
   out << "\n# devices\n";
   for (int i=0; i<crush.get_max_devices(); i++) {
@@ -428,6 +435,15 @@ int CrushCompiler::decompile(ostream &out)
 	out << "\tstep set_chooseleaf_stable " << crush.get_rule_arg1(i, j)
 	    << "\n";
 	break;
+      case CRUSH_RULE_SET_MSR_DESCENTS:
+	out << "\tstep set_msr_descents " << crush.get_rule_arg1(i, j)
+	    << "\n";
+	break;
+      case CRUSH_RULE_SET_MSR_COLLISION_TRIES:
+	out << "\tstep set_msr_collision_tries "
+	    << crush.get_rule_arg1(i, j)
+	    << "\n";
+	break;
       case CRUSH_RULE_CHOOSE_FIRSTN:
 	out << "\tstep choose firstn "
 	    << crush.get_rule_arg1(i, j) 
@@ -456,6 +472,13 @@ int CrushCompiler::decompile(ostream &out)
 	print_type_name(out, crush.get_rule_arg2(i, j), crush);
 	out << "\n";
 	break;
+      case CRUSH_RULE_CHOOSE_MSR:
+	out << "\tstep choosemsr "
+	    << crush.get_rule_arg1(i, j) 
+	    << " type ";
+	print_type_name(out, crush.get_rule_arg2(i, j), crush);
+	out << "\n";
+	break;
       }
     }
     out << "}\n";
@@ -538,6 +561,10 @@ int CrushCompiler::parse_tunable(iter_t const& i)
     crush.set_straw_calc_version(val);
   else if (name == "allowed_bucket_algs")
     crush.set_allowed_bucket_algs(val);
+  else if (name == "msr_descents")
+    crush.set_msr_descents(val);
+  else if (name == "msr_collision_tries")
+    crush.set_msr_collision_tries(val);
   else {
     err << "tunable " << name << " not recognized" << std::endl;
     return -1;
@@ -915,6 +942,18 @@ int CrushCompiler::parse_rule(iter_t const& i)
 	crush.set_rule_step_set_chooseleaf_stable(ruleno, step++, val);
       }
       break;
+    case crush_grammar::_step_set_msr_descents:
+      {
+	int val = int_node(s->children[1]);
+	crush.set_rule_step_set_msr_descents(ruleno, step++, val);
+      }
+      break;
+    case crush_grammar::_step_set_msr_collision_tries:
+      {
+	int val = int_node(s->children[1]);
+	crush.set_rule_step_set_msr_collision_tries(ruleno, step++, val);
+      }
+      break;
 
     case crush_grammar::_step_choose:
     case crush_grammar::_step_chooseleaf:
@@ -942,6 +981,17 @@ int CrushCompiler::parse_rule(iter_t const& i)
       }
       break;
 
+    case crush_grammar::_step_choose_msr:
+      {
+	string type = string_node(s->children[3]);
+	if (!type_id.count(type)) {
+	  err << "in rule '" << rname << "' type '" << type << "' not defined" << std::endl;
+	  return -1;
+	}
+	crush.set_rule_step_choose_msr(ruleno, step++, int_node(s->children[1]), type_id[type]);
+      }
+      break;
+
     case crush_grammar::_step_emit:
       crush.set_rule_step_emit(ruleno, step++);
       break;
diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc
index 5e3694c5193..69936a53d65 100644
--- a/src/crush/CrushWrapper.cc
+++ b/src/crush/CrushWrapper.cc
@@ -3103,6 +3103,10 @@ void CrushWrapper::encode(bufferlist& bl, uint64_t features) const
       }
     }
   }
+  if (HAVE_FEATURE(features, CRUSH_MSR)) {
+    encode(crush->msr_descents, bl);
+    encode(crush->msr_collision_tries, bl);
+  }
 }
 
 static void decode_32_or_64_string_map(map<int32_t,string>& m, bufferlist::const_iterator& blp)
@@ -3253,6 +3257,12 @@ void CrushWrapper::decode(bufferlist::const_iterator& blp)
 	choose_args[choose_args_index] = arg_map;
       }
     }
+    if (!blp.end()) {
+      decode(crush->msr_descents, blp);
+      decode(crush->msr_collision_tries, blp);
+    } else {
+      set_default_msr_tunables();
+    }
     update_choose_args(nullptr); // in case we decode a legacy "corrupted" map
     finalize();
   }
@@ -3508,6 +3518,8 @@ void CrushWrapper::dump_tunables(Formatter *f) const
   f->dump_int("chooseleaf_descend_once", get_chooseleaf_descend_once());
   f->dump_int("chooseleaf_vary_r", get_chooseleaf_vary_r());
   f->dump_int("chooseleaf_stable", get_chooseleaf_stable());
+  f->dump_int("msr_descents", get_msr_descents());
+  f->dump_int("msr_collision_tries", get_msr_collision_tries());
   f->dump_int("straw_calc_version", get_straw_calc_version());
   f->dump_int("allowed_bucket_algs", get_allowed_bucket_algs());
 
@@ -3637,6 +3649,11 @@ void CrushWrapper::dump_rule(int rule_id, Formatter *f) const
       f->dump_int("num", get_rule_arg1(rule_id, j));
       f->dump_string("type", get_type_name(get_rule_arg2(rule_id, j)));
       break;
+    case CRUSH_RULE_CHOOSE_MSR:
+      f->dump_string("op", "choosemsr");
+      f->dump_int("num", get_rule_arg1(rule_id, j));
+      f->dump_string("type", get_type_name(get_rule_arg2(rule_id, j)));
+      break;
     case CRUSH_RULE_SET_CHOOSE_TRIES:
       f->dump_string("op", "set_choose_tries");
       f->dump_int("num", get_rule_arg1(rule_id, j));
@@ -3645,6 +3662,14 @@ void CrushWrapper::dump_rule(int rule_id, Formatter *f) const
       f->dump_string("op", "set_chooseleaf_tries");
       f->dump_int("num", get_rule_arg1(rule_id, j));
       break;
+    case CRUSH_RULE_SET_MSR_DESCENTS:
+      f->dump_string("op", "set_msr_descents");
+      f->dump_int("num", get_rule_arg1(rule_id, j));
+      break;
+    case CRUSH_RULE_SET_MSR_COLLISION_TRIES:
+      f->dump_string("op", "set_msr_collision_tries");
+      f->dump_int("num", get_rule_arg1(rule_id, j));
+      break;
     default:
       f->dump_int("opcode", get_rule_op(rule_id, j));
       f->dump_int("arg1", get_rule_arg1(rule_id, j));
diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h
index e84a829a2d3..8419a92a410 100644
--- a/src/crush/CrushWrapper.h
+++ b/src/crush/CrushWrapper.h
@@ -125,6 +125,7 @@ public:
     crush->chooseleaf_vary_r = 0;
     crush->chooseleaf_stable = 0;
     crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
+    set_default_msr_tunables();
   }
   void set_tunables_bobtail() {
     crush->choose_local_tries = 0;
@@ -134,6 +135,7 @@ public:
     crush->chooseleaf_vary_r = 0;
     crush->chooseleaf_stable = 0;
     crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
+    set_default_msr_tunables();
   }
   void set_tunables_firefly() {
     crush->choose_local_tries = 0;
@@ -143,6 +145,7 @@ public:
     crush->chooseleaf_vary_r = 1;
     crush->chooseleaf_stable = 0;
     crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
+    set_default_msr_tunables();
   }
   void set_tunables_hammer() {
     crush->choose_local_tries = 0;
@@ -156,6 +159,7 @@ public:
       (1 << CRUSH_BUCKET_LIST) |
       (1 << CRUSH_BUCKET_STRAW) |
       (1 << CRUSH_BUCKET_STRAW2);
+    set_default_msr_tunables();
   }
   void set_tunables_jewel() {
     crush->choose_local_tries = 0;
@@ -169,6 +173,7 @@ public:
       (1 << CRUSH_BUCKET_LIST) |
       (1 << CRUSH_BUCKET_STRAW) |
       (1 << CRUSH_BUCKET_STRAW2);
+    set_default_msr_tunables();
   }
 
   void set_tunables_legacy() {
@@ -233,6 +238,24 @@ public:
     crush->straw_calc_version = n;
   }
 
+  int get_msr_descents() const {
+    return crush->msr_descents;
+  }
+  void set_msr_descents(int n) {
+    crush->msr_descents = n;
+  }
+
+  int get_msr_collision_tries() const {
+    return crush->msr_collision_tries;
+  }
+  void set_msr_collision_tries(int n) {
+    crush->msr_collision_tries = n;
+  }
+  void set_default_msr_tunables() {
+    set_msr_descents(100);
+    set_msr_collision_tries(100);
+  }
+
   unsigned get_allowed_bucket_algs() const {
     return crush->allowed_bucket_algs;
   }
@@ -248,7 +271,8 @@ public:
       crush->chooseleaf_descend_once == 0 &&
       crush->chooseleaf_vary_r == 0 &&
       crush->chooseleaf_stable == 0 &&
-      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
+      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS &&
+      !has_nondefault_tunables_msr();
   }
   bool has_bobtail_tunables() const {
     return
@@ -258,7 +282,8 @@ public:
       crush->chooseleaf_descend_once == 1 &&
       crush->chooseleaf_vary_r == 0 &&
       crush->chooseleaf_stable == 0 &&
-      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
+      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS &&
+      !has_nondefault_tunables_msr();
   }
   bool has_firefly_tunables() const {
     return
@@ -268,7 +293,8 @@ public:
       crush->chooseleaf_descend_once == 1 &&
       crush->chooseleaf_vary_r == 1 &&
       crush->chooseleaf_stable == 0 &&
-      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
+      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS &&
+      !has_nondefault_tunables_msr();
   }
   bool has_hammer_tunables() const {
     return
@@ -281,7 +307,8 @@ public:
       crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
 				      (1 << CRUSH_BUCKET_LIST) |
 				      (1 << CRUSH_BUCKET_STRAW) |
-				      (1 << CRUSH_BUCKET_STRAW2));
+				      (1 << CRUSH_BUCKET_STRAW2)) &&
+      !has_nondefault_tunables_msr();
   }
   bool has_jewel_tunables() const {
     return
@@ -294,7 +321,8 @@ public:
       crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
 				      (1 << CRUSH_BUCKET_LIST) |
 				      (1 << CRUSH_BUCKET_STRAW) |
-				      (1 << CRUSH_BUCKET_STRAW2));
+				      (1 << CRUSH_BUCKET_STRAW2)) &&
+      !has_nondefault_tunables_msr();
   }
 
   bool has_optimal_tunables() const {
@@ -322,6 +350,11 @@ public:
     return
         crush->chooseleaf_stable != 0;
   }
+  bool has_nondefault_tunables_msr() const {
+    return
+      crush->msr_descents != 100 ||
+      crush->msr_collision_tries != 100;
+  }
 
   bool has_v2_rules() const;
   bool has_v3_rules() const;
@@ -337,7 +370,7 @@ public:
   bool is_msr_rule(unsigned ruleid) const;
 
   std::string get_min_required_version() const {
-    if (has_msr_rules())
+    if (has_msr_rules() || has_nondefault_tunables_msr())
       return "squid";
     else if (has_v5_rules() || has_nondefault_tunables5())
       return "jewel";
@@ -1155,6 +1188,14 @@ public:
   int set_rule_step_set_chooseleaf_stable(unsigned ruleno, unsigned step, int val) {
     return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_STABLE, val, 0);
   }
+
+  int set_rule_step_set_msr_descents(unsigned ruleno, unsigned step, int val) {
+    return set_rule_step(ruleno, step, CRUSH_RULE_SET_MSR_DESCENTS, val, 0);
+  }
+  int set_rule_step_set_msr_collision_tries(unsigned ruleno, unsigned step, int val) {
+    return set_rule_step(ruleno, step, CRUSH_RULE_SET_MSR_COLLISION_TRIES, val, 0);
+  }
+
   int set_rule_step_choose_firstn(unsigned ruleno, unsigned step, int val, int type) {
     return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_FIRSTN, val, type);
   }
diff --git a/src/crush/crush.h b/src/crush/crush.h
index 263922615b6..236ffea419c 100644
--- a/src/crush/crush.h
+++ b/src/crush/crush.h
@@ -65,7 +65,12 @@ enum crush_opcodes {
 	CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
 	CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
 	CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12,
-	CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13
+	CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13,
+
+	/* set choose_msr_total_tries */
+	CRUSH_RULE_SET_MSR_DESCENTS = 14,
+	/* set choose_msr_local_collision_tries */
+	CRUSH_RULE_SET_MSR_COLLISION_TRIES = 15,
 };
 
 /*
@@ -415,6 +420,12 @@ struct crush_map {
          */
 	__u8 chooseleaf_stable;
 
+	/*! Sets total descents for MSR rules */
+	__u8 msr_descents;
+
+	/*! Sets local collision retries for MSR rules */
+	__u8 msr_collision_tries;
+
         /*! @cond INTERNAL */
 	/* This value is calculated after decode or construction by
 	   the builder. It is exposed here (rather than having a
diff --git a/src/crush/grammar.h b/src/crush/grammar.h
index 072b36b073a..9e955091c6a 100644
--- a/src/crush/grammar.h
+++ b/src/crush/grammar.h
@@ -50,6 +50,8 @@ struct crush_grammar : public boost::spirit::grammar<crush_grammar>
     _step_set_choose_tries,
     _step_set_choose_local_tries,
     _step_set_choose_local_fallback_tries,
+    _step_set_msr_descents,
+    _step_set_msr_collision_tries,
     _step_choose,
     _step_chooseleaf,
     _step_emit,
@@ -91,6 +93,8 @@ struct crush_grammar : public boost::spirit::grammar<crush_grammar>
     boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_set_chooseleaf_tries> >    step_set_chooseleaf_tries;
     boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_set_chooseleaf_vary_r> >    step_set_chooseleaf_vary_r;
     boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_set_chooseleaf_stable> >    step_set_chooseleaf_stable;
+    boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_set_msr_descents> >    step_set_msr_descents;
+    boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_set_msr_collision_tries> >    step_set_msr_collision_tries;
     boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_choose> >    step_choose;
     boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_chooseleaf> >      step_chooseleaf;
     boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_emit> >      step_emit;
@@ -149,6 +153,8 @@ struct crush_grammar : public boost::spirit::grammar<crush_grammar>
       step_set_chooseleaf_tries = str_p("set_chooseleaf_tries") >> posint;
       step_set_chooseleaf_vary_r = str_p("set_chooseleaf_vary_r") >> posint;
       step_set_chooseleaf_stable = str_p("set_chooseleaf_stable") >> posint;
+      step_set_msr_descents = str_p("set_msr_descents") >> posint;
+      step_set_msr_collision_tries = str_p("set_msr_collision_tries") >> posint;
       step_choose = str_p("choose")
 	>> ( str_p("indep") | str_p("firstn") )
 	>> integer
@@ -165,6 +171,8 @@ struct crush_grammar : public boost::spirit::grammar<crush_grammar>
 				step_set_chooseleaf_tries |
 				step_set_chooseleaf_vary_r |
 				step_set_chooseleaf_stable |
+				step_set_msr_descents |
+				step_set_msr_collision_tries |
 				step_choose |
 				step_chooseleaf |
 				step_emit );
diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc
index 30543107102..8b3d2ad3711 100644
--- a/src/osd/OSDMap.cc
+++ b/src/osd/OSDMap.cc
@@ -1766,6 +1766,8 @@ uint64_t OSDMap::get_features(int entity_type, uint64_t *pmask) const
     features |= CEPH_FEATURE_CRUSH_TUNABLES5;
   if (crush->has_incompat_choose_args())
     features |= CEPH_FEATUREMASK_CRUSH_CHOOSE_ARGS;
+  if (crush->has_nondefault_tunables_msr())
+    features |= CEPH_FEATURE_CRUSH_MSR;
   mask |= CEPH_FEATURES_CRUSH;
 
   if (!pg_upmap.empty() || !pg_upmap_items.empty() || !pg_upmap_primaries.empty())