diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 3c1305c0ba0..64842ff8b73 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -393,6 +393,7 @@ OPTION(mon_keyvaluedb, OPT_STR, "rocksdb") // type of keyvaluedb backend // UNSAFE -- TESTING ONLY! Allows addition of a cache tier with preexisting snaps OPTION(mon_debug_unsafe_allow_tier_with_nonempty_snaps, OPT_BOOL, false) +OPTION(mon_osd_blacklist_default_expire, OPT_DOUBLE, 60*60) // default one hour OPTION(paxos_stash_full_interval, OPT_INT, 25) // how often (in commits) to stash a full copy of the PaxosService state OPTION(paxos_max_join_drift, OPT_INT, 10) // max paxos iterations before we must first sync the monitor stores diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index bf868d7687e..fff25a747ba 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -853,11 +853,11 @@ COMMAND("osd pool rename " \ "rename to ", "osd", "rw", "cli,rest") COMMAND("osd pool get " \ "name=pool,type=CephPoolname " \ - "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block", \ + "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block", \ "get pool parameter ", "osd", "r", "cli,rest") COMMAND("osd pool set " \ "name=pool,type=CephPoolname " \ - "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites " \ + "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites " \ "name=val,type=CephString " \ "name=force,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \ "set pool parameter to ", "osd", "rw", "cli,rest") diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 09d3d190572..f1a1c728555 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -5472,10 +5472,10 @@ int OSDMonitor::prepare_pool_stripe_width(const unsigned pool_type, } int OSDMonitor::prepare_pool_crush_rule(const unsigned pool_type, - const string &erasure_code_profile, - const string &rule_name, - int *crush_rule, - ostream *ss) + const string &erasure_code_profile, + const string &rule_name, + int *crush_rule, + ostream *ss) { if (*crush_rule < 0) { @@ -5483,7 +5483,7 @@ int OSDMonitor::prepare_pool_crush_rule(const unsigned pool_type, case pg_pool_t::TYPE_REPLICATED: { if (rule_name == "") { - //Use default rule + // Use default rule *crush_rule = osdmap.crush->get_osd_pool_default_crush_replicated_ruleset(g_ceph_context); if (*crush_rule < 0) { // Errors may happen e.g. if no valid rule is available @@ -5534,8 +5534,8 @@ int OSDMonitor::prepare_pool_crush_rule(const unsigned pool_type, } int OSDMonitor::get_crush_rule(const string &rule_name, - int *crush_rule, - ostream *ss) + int *crush_rule, + ostream *ss) { int ret; ret = osdmap.crush->get_rule_id(rule_name); @@ -5553,7 +5553,7 @@ int OSDMonitor::get_crush_rule(const string &rule_name, << " try again" << dendl; return -EAGAIN; } else { - //Cannot find it , return error + // Cannot find it , return error *ss << "specified rule " << rule_name << " doesn't exist"; return ret; } @@ -5623,9 +5623,9 @@ int OSDMonitor::prepare_new_pool(string& name, uint64_t auid, r = tester.test(); } else { r = tester.test_with_crushtool(g_conf->crushtool.c_str(), - osdmap.get_max_osd(), - g_conf->mon_lease, - crush_rule); + osdmap.get_max_osd(), + g_conf->mon_lease, + crush_rule); } if (r) { dout(10) << " tester.test_with_crushtool returns " << r @@ -6021,6 +6021,12 @@ int OSDMonitor::prepare_command_pool_set(map &cmdmap, ss << "ec overwrites can only be enabled for an erasure coded pool"; return -EINVAL; } + stringstream err; + if (!g_conf->mon_debug_no_require_bluestore_for_ec_overwrites && + !is_pool_currently_all_bluestore(pool, p, &err)) { + ss << "pool must only be stored on bluestore for scrubbing to work: " << err.str(); + return -EINVAL; + } if (val == "true" || (interr.empty() && n == 1)) { p.flags |= pg_pool_t::FLAG_EC_OVERWRITES; } else if (val == "false" || (interr.empty() && n == 0)) { @@ -6030,12 +6036,6 @@ int OSDMonitor::prepare_command_pool_set(map &cmdmap, ss << "expecting value 'true', 'false', '0', or '1'"; return -EINVAL; } - stringstream err; - if (!g_conf->mon_debug_no_require_bluestore_for_ec_overwrites && - !is_pool_currently_all_bluestore(pool, p, &err)) { - ss << "pool must only be stored on bluestore for scrubbing to work: " << err.str(); - return -EINVAL; - } } else if (var == "target_max_objects") { if (interr.length()) { ss << "error parsing int '" << val << "': " << interr; @@ -7586,7 +7586,6 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } while (false); } else if (prefix == "osd crush reweight-all") { - // osd crush reweight CrushWrapper newcrush; _get_pending_crush(newcrush); @@ -7729,7 +7728,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } if (tunable == "straw_calc_version") { - if (value < 0 || value > 1) { + if (value != 0 && value != 1) { ss << "value must be 0 or 1; got " << value; err = -EINVAL; goto reply; @@ -8253,10 +8252,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, return prepare_unset_flag(op, CEPH_OSDMAP_NODEEP_SCRUB); else if (key == "notieragent") return prepare_unset_flag(op, CEPH_OSDMAP_NOTIERAGENT); - else if (key == "sortbitwise") { - ss << "the sortbitwise flag is required and cannot be unset"; - err = -EPERM; - } else { + else { ss << "unrecognized flag '" << key << "'"; err = -EINVAL; } @@ -8903,6 +8899,22 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, goto reply; } + int pool_min_size = osdmap.get_pg_pool_min_size(pgid); + if ((int)id_vec.size() < pool_min_size) { + ss << "num of osds (" << id_vec.size() <<") < pool min size (" + << pool_min_size << ")"; + err = -EINVAL; + goto reply; + } + + int pool_size = osdmap.get_pg_pool_size(pgid); + if ((int)id_vec.size() > pool_size) { + ss << "num of osds (" << id_vec.size() <<") > pool size (" + << pool_size << ")"; + err = -EINVAL; + goto reply; + } + vector new_pg_upmap; for (auto osd : id_vec) { if (osd != CRUSH_ITEM_NONE && !osdmap.exists(osd)) { @@ -8942,7 +8954,17 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, goto reply; } + int pool_size = osdmap.get_pg_pool_size(pgid); + if ((int)(id_vec.size() / 2) > pool_size) { + ss << "num of osd pairs (" << id_vec.size() / 2 <<") > pool size (" + << pool_size << ")"; + err = -EINVAL; + goto reply; + } + vector> new_pg_upmap_items; + ostringstream items; + items << "["; for (auto p = id_vec.begin(); p != id_vec.end(); ++p) { int from = *p++; int to = *p; @@ -8957,13 +8979,16 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, goto reply; } new_pg_upmap_items.push_back(make_pair(from, to)); + items << from << "->" << to << ","; } + string out(items.str()); + out.resize(out.size() - 1); // drop last ',' + out += "]"; pending_inc.new_pg_upmap_items[pgid] = mempool::osdmap::vector>( new_pg_upmap_items.begin(), new_pg_upmap_items.end()); - ss << "set " << pgid << " pg_upmap_items mapping to " - << new_pg_upmap_items; + ss << "set " << pgid << " pg_upmap_items mapping to " << out; } break; @@ -9074,7 +9099,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, wait_for_finished_proposal( op, new Monitor::C_Command(mon, op, 0, rs, rdata, get_last_committed() + 1)); - return true; + return true; } else if (prefix == "osd lost") { int64_t id; if (!cmd_getval(g_ceph_context, cmdmap, "id", id)) { @@ -9201,7 +9226,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, // make sure authmon is writeable. if (!mon->authmon()->is_writeable()) { dout(10) << __func__ << " waiting for auth mon to be writeable for " - << "osd destroy" << dendl; + << "osd new" << dendl; mon->authmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); return false; } @@ -9339,10 +9364,21 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, utime_t expires = ceph_clock_now(); double d; // default one hour - cmd_getval(g_ceph_context, cmdmap, "expire", d, double(60*60)); + cmd_getval(g_ceph_context, cmdmap, "expire", d, + g_conf->mon_osd_blacklist_default_expire); expires += d; pending_inc.new_blacklist[addr] = expires; + + { + // cancel any pending un-blacklisting request too + auto it = std::find(pending_inc.old_blacklist.begin(), + pending_inc.old_blacklist.end(), addr); + if (it != pending_inc.old_blacklist.end()) { + pending_inc.old_blacklist.erase(it); + } + } + ss << "blacklisting " << addr << " until " << expires << " (" << d << " sec)"; getline(ss, rs); wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 971e15d5aa0..aee8256e077 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -1947,7 +1947,6 @@ void OSDMap::_apply_upmap(const pg_pool_t& pi, pg_t raw_pg, vector *raw) co } if (!exists && pos >= 0) { (*raw)[pos] = r.second; - return; } } } diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index 37fd980977b..8e236cf805b 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -1038,6 +1038,24 @@ public: return p && pgid.ps() < p->get_pg_num(); } + int get_pg_pool_min_size(pg_t pgid) const { + if (!pg_exists(pgid)) { + return -ENOENT; + } + const pg_pool_t *p = get_pg_pool(pgid.pool()); + assert(p); + return p->get_min_size(); + } + + int get_pg_pool_size(pg_t pgid) const { + if (!pg_exists(pgid)) { + return -ENOENT; + } + const pg_pool_t *p = get_pg_pool(pgid.pool()); + assert(p); + return p->get_size(); + } + private: /// pg -> (raw osd list) void _pg_to_raw_osds( diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 056cd12de4e..07ddc912b32 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -987,7 +987,7 @@ static opt_mapping_t opt_mapping = boost::assign::map_list_of pool_opts_t::CSUM_MIN_BLOCK, pool_opts_t::INT)); bool pool_opts_t::is_opt_name(const std::string& name) { - return opt_mapping.find(name) != opt_mapping.end(); + return opt_mapping.count(name); } pool_opts_t::opt_desc_t pool_opts_t::get_opt_desc(const std::string& name) { @@ -997,7 +997,7 @@ pool_opts_t::opt_desc_t pool_opts_t::get_opt_desc(const std::string& name) { } bool pool_opts_t::is_set(pool_opts_t::key_t key) const { - return opts.find(key) != opts.end(); + return opts.count(key); } const pool_opts_t::value_t& pool_opts_t::get(pool_opts_t::key_t key) const { diff --git a/src/test/cli/osdmaptool/upmap.t b/src/test/cli/osdmaptool/upmap.t index e86a56a4f97..17542046794 100644 --- a/src/test/cli/osdmaptool/upmap.t +++ b/src/test/cli/osdmaptool/upmap.t @@ -13,10 +13,10 @@ ceph osd pg-upmap-items 0.89 8 13 54 50 ceph osd pg-upmap-items 0.8d 219 223 210 209 ceph osd pg-upmap-items 0.90 163 166 210 209 192 191 + ceph osd pg-upmap-items 0.9e 210 209 27 28 ceph osd pg-upmap-items 0.12b 54 50 227 225 ceph osd pg-upmap-items 0.13f 54 50 ceph osd pg-upmap-items 0.151 36 37 54 50 ceph osd pg-upmap-items 0.1c0 78 83 43 48 54 50 ceph osd pg-upmap-items 0.1e3 54 50 197 201 - ceph osd pg-upmap-items 0.2c4 54 50 $ rm -f om c