mirror of
https://github.com/ceph/ceph
synced 2025-01-03 01:22:53 +00:00
mon/OSDMonitor: batch OSDs nodown/noout support
This patch allow us to add batch OSDs(e.g., from a specific host which is currently in maintenance) into a specific nodown/noout list, which can not be automatically marked down/out and hence can be eliminated from data migration. This has the same effect with the global nodown/noout flag but is more fine-grained. Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
This commit is contained in:
parent
c41ab152e9
commit
4fec5b85f1
@ -1430,6 +1430,16 @@ function test_mon_osd()
|
||||
ceph osd dump | grep 'osd.0.*in'
|
||||
ceph osd find 0
|
||||
|
||||
ceph osd add-noout 0
|
||||
ceph health detail | grep 'noout osd(s).*0'
|
||||
ceph osd rm-noout 0
|
||||
! ceph health detail | grep 'noout osds(s).*0'
|
||||
|
||||
ceph osd add-nodown 0 1
|
||||
ceph health detail | grep 'nodown osd(s).*0.*1'
|
||||
ceph osd rm-nodown all
|
||||
! ceph health detail | grep 'nodown osd(s).*0.*1'
|
||||
|
||||
# make sure mark out preserves weight
|
||||
ceph osd reweight osd.0 .5
|
||||
ceph osd dump | grep ^osd.0 | grep 'weight 0.5'
|
||||
|
@ -46,6 +46,10 @@ const char *ceph_osd_state_name(int s)
|
||||
return "backfillfull";
|
||||
case CEPH_OSD_DESTROYED:
|
||||
return "destroyed";
|
||||
case CEPH_OSD_NODOWN:
|
||||
return "nodown";
|
||||
case CEPH_OSD_NOOUT:
|
||||
return "noout";
|
||||
default:
|
||||
return "???";
|
||||
}
|
||||
|
@ -110,14 +110,16 @@ struct ceph_eversion {
|
||||
*/
|
||||
|
||||
/* status bits */
|
||||
#define CEPH_OSD_EXISTS (1<<0)
|
||||
#define CEPH_OSD_UP (1<<1)
|
||||
#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */
|
||||
#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */
|
||||
#define CEPH_OSD_FULL (1<<4) /* osd is at or above full threshold */
|
||||
#define CEPH_OSD_NEARFULL (1<<5) /* osd is at or above nearfull threshold */
|
||||
#define CEPH_OSD_EXISTS (1<<0)
|
||||
#define CEPH_OSD_UP (1<<1)
|
||||
#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */
|
||||
#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */
|
||||
#define CEPH_OSD_FULL (1<<4) /* osd is at or above full threshold */
|
||||
#define CEPH_OSD_NEARFULL (1<<5) /* osd is at or above nearfull threshold */
|
||||
#define CEPH_OSD_BACKFILLFULL (1<<6) /* osd is at or above backfillfull threshold */
|
||||
#define CEPH_OSD_DESTROYED (1<<7) /* osd has been destroyed */
|
||||
#define CEPH_OSD_DESTROYED (1<<7) /* osd has been destroyed */
|
||||
#define CEPH_OSD_NODOWN (1<<8) /* osd can not be marked down */
|
||||
#define CEPH_OSD_NOOUT (1<<9) /* osd can not be marked out */
|
||||
|
||||
extern const char *ceph_osd_state_name(int s);
|
||||
|
||||
@ -132,24 +134,24 @@ extern const char *ceph_osd_state_name(int s);
|
||||
/*
|
||||
* osd map flag bits
|
||||
*/
|
||||
#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */
|
||||
#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */
|
||||
#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */
|
||||
#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */
|
||||
#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */
|
||||
#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */
|
||||
#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */
|
||||
#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */
|
||||
#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */
|
||||
#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */
|
||||
#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */
|
||||
#define CEPH_OSDMAP_NOSCRUB (1<<11) /* block periodic scrub */
|
||||
#define CEPH_OSDMAP_NODEEP_SCRUB (1<<12) /* block periodic deep-scrub */
|
||||
#define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */
|
||||
#define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */
|
||||
#define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */
|
||||
#define CEPH_OSDMAP_REQUIRE_JEWEL (1<<16) /* require jewel for booting osds */
|
||||
#define CEPH_OSDMAP_REQUIRE_KRAKEN (1<<17) /* require kraken for booting osds */
|
||||
#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */
|
||||
#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */
|
||||
#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */
|
||||
#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */
|
||||
#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */
|
||||
#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */
|
||||
#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */
|
||||
#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */
|
||||
#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */
|
||||
#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */
|
||||
#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */
|
||||
#define CEPH_OSDMAP_NOSCRUB (1<<11) /* block periodic scrub */
|
||||
#define CEPH_OSDMAP_NODEEP_SCRUB (1<<12) /* block periodic deep-scrub */
|
||||
#define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */
|
||||
#define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */
|
||||
#define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */
|
||||
#define CEPH_OSDMAP_REQUIRE_JEWEL (1<<16) /* require jewel for booting osds */
|
||||
#define CEPH_OSDMAP_REQUIRE_KRAKEN (1<<17) /* require kraken for booting osds */
|
||||
#define CEPH_OSDMAP_REQUIRE_LUMINOUS (1<<18) /* require l for booting osds */
|
||||
|
||||
/* these are hidden in 'ceph status' view */
|
||||
|
@ -650,6 +650,28 @@ COMMAND("osd in " \
|
||||
COMMAND("osd rm " \
|
||||
"name=ids,type=CephString,n=N", \
|
||||
"remove osd(s) <id> [<id>...] in", "osd", "rw", "cli,rest")
|
||||
COMMAND("osd add-noout " \
|
||||
"name=ids,type=CephString,n=N", \
|
||||
"mark osd(s) <id> [<id>...] as noout, " \
|
||||
"or use <all|any|*> to mark all osds as noout", \
|
||||
"osd", "rw", "cli,rest")
|
||||
COMMAND("osd add-nodown " \
|
||||
"name=ids,type=CephString,n=N", \
|
||||
"mark osd(s) <id> [<id>...] as nodown, " \
|
||||
"or use <all|any|*> to mark all osds as nodown", \
|
||||
"osd", "rw", "cli,rest")
|
||||
COMMAND("osd rm-noout " \
|
||||
"name=ids,type=CephString,n=N", \
|
||||
"allow osd(s) <id> [<id>...] to be marked out " \
|
||||
"(if they are currently marked as noout), " \
|
||||
"can use <all|any|*> to automatically filter out all noout osds", \
|
||||
"osd", "rw", "cli,rest")
|
||||
COMMAND("osd rm-nodown " \
|
||||
"name=ids,type=CephString,n=N", \
|
||||
"allow osd(s) <id> [<id>...] to be marked down " \
|
||||
"(if they are currently marked as nodown), " \
|
||||
"can use <all|any|*> to automatically filter out all nodown osds", \
|
||||
"osd", "rw", "cli,rest")
|
||||
COMMAND("osd reweight " \
|
||||
"name=id,type=CephOsdName " \
|
||||
"type=CephFloat,name=weight,range=0.0|1.0", \
|
||||
|
@ -1888,18 +1888,26 @@ bool OSDMonitor::prepare_mark_me_down(MonOpRequestRef op)
|
||||
bool OSDMonitor::can_mark_down(int i)
|
||||
{
|
||||
if (osdmap.test_flag(CEPH_OSDMAP_NODOWN)) {
|
||||
dout(5) << "can_mark_down NODOWN flag set, will not mark osd." << i << " down" << dendl;
|
||||
dout(5) << __func__ << " NODOWN flag set, will not mark osd." << i
|
||||
<< " down" << dendl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (osdmap.is_nodown(i)) {
|
||||
dout(5) << __func__ << " osd." << i << " is marked as nodown, "
|
||||
<< "will not mark it down" << dendl;
|
||||
return false;
|
||||
}
|
||||
|
||||
int num_osds = osdmap.get_num_osds();
|
||||
if (num_osds == 0) {
|
||||
dout(5) << "can_mark_down no osds" << dendl;
|
||||
dout(5) << __func__ << " no osds" << dendl;
|
||||
return false;
|
||||
}
|
||||
int up = osdmap.get_num_up_osds() - pending_inc.get_net_marked_down(&osdmap);
|
||||
float up_ratio = (float)up / (float)num_osds;
|
||||
if (up_ratio < g_conf->mon_osd_min_up_ratio) {
|
||||
dout(2) << "can_mark_down current up_ratio " << up_ratio << " < min "
|
||||
dout(2) << __func__ << " current up_ratio " << up_ratio << " < min "
|
||||
<< g_conf->mon_osd_min_up_ratio
|
||||
<< ", will not mark osd." << i << " down" << dendl;
|
||||
return false;
|
||||
@ -1926,6 +1934,13 @@ bool OSDMonitor::can_mark_out(int i)
|
||||
dout(5) << __func__ << " NOOUT flag set, will not mark osds out" << dendl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (osdmap.is_noout(i)) {
|
||||
dout(5) << __func__ << " osd." << i << " is marked as noout, "
|
||||
<< "will not mark it out" << dendl;
|
||||
return false;
|
||||
}
|
||||
|
||||
int num_osds = osdmap.get_num_osds();
|
||||
if (num_osds == 0) {
|
||||
dout(5) << __func__ << " no osds" << dendl;
|
||||
@ -3787,6 +3802,32 @@ void OSDMonitor::get_health(list<pair<health_status_t,string> >& summary,
|
||||
detail->push_back(make_pair(HEALTH_WARN, ss.str()));
|
||||
}
|
||||
}
|
||||
|
||||
// warn if there is any nodown osds.
|
||||
vector<int> nodown_osds;
|
||||
osdmap.get_nodown_osds(&nodown_osds);
|
||||
if (nodown_osds.size()) {
|
||||
ostringstream ss;
|
||||
ss << nodown_osds.size() << " nodown osd(s)";
|
||||
summary.push_back(make_pair(HEALTH_WARN, ss.str()));
|
||||
if (detail) {
|
||||
ss << ": " << nodown_osds;
|
||||
detail->push_back(make_pair(HEALTH_WARN, ss.str()));
|
||||
}
|
||||
}
|
||||
|
||||
// warn if there is any noout osds.
|
||||
vector<int> noout_osds;
|
||||
osdmap.get_noout_osds(&noout_osds);
|
||||
if (noout_osds.size()) {
|
||||
ostringstream ss;
|
||||
ss << noout_osds.size() << " noout osd(s)";
|
||||
summary.push_back(make_pair(HEALTH_WARN, ss.str()));
|
||||
if (detail) {
|
||||
ss << ": " << noout_osds;
|
||||
detail->push_back(make_pair(HEALTH_WARN, ss.str()));
|
||||
}
|
||||
}
|
||||
}
|
||||
// note: we leave it to ceph-mgr to generate details health warnings
|
||||
// with actual osd utilizations
|
||||
@ -8417,6 +8458,198 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
|
||||
get_last_committed() + 1));
|
||||
return true;
|
||||
}
|
||||
} else if (prefix == "osd add-noout" ||
|
||||
prefix == "osd add-nodown") {
|
||||
|
||||
bool noout = prefix == "osd add-noout";
|
||||
bool any = false;
|
||||
bool stop = false;
|
||||
|
||||
vector<string> idvec;
|
||||
cmd_getval(g_ceph_context, cmdmap, "ids", idvec);
|
||||
for (unsigned j = 0; j < idvec.size() && !stop; j++) {
|
||||
|
||||
set<int> osds;
|
||||
|
||||
// wildcard?
|
||||
if (j == 0 &&
|
||||
(idvec[0] == "any" || idvec[0] == "all" || idvec[0] == "*")) {
|
||||
osdmap.get_all_osds(osds);
|
||||
stop = true;
|
||||
} else {
|
||||
// try traditional single osd way
|
||||
|
||||
long osd = parse_osd_id(idvec[j].c_str(), &ss);
|
||||
if (osd < 0) {
|
||||
// ss has reason for failure
|
||||
ss << ", unable to parse osd id:\"" << idvec[j] << "\". ";
|
||||
err = -EINVAL;
|
||||
continue;
|
||||
}
|
||||
|
||||
osds.insert(osd);
|
||||
}
|
||||
|
||||
for (auto &osd : osds) {
|
||||
|
||||
if (!osdmap.exists(osd)) {
|
||||
ss << "osd." << osd << " does not exist. ";
|
||||
continue;
|
||||
}
|
||||
|
||||
if (noout) {
|
||||
if (osdmap.is_out(osd)) {
|
||||
ss << "osd." << osd << " is already out. ";
|
||||
continue;
|
||||
}
|
||||
|
||||
if (osdmap.is_noout(osd)) { // already noout?
|
||||
// continue to check if there is any pending "rm-noout" request
|
||||
if (pending_inc.pending_osd_has_state(osd, CEPH_OSD_NOOUT)) {
|
||||
// cancel it
|
||||
pending_inc.pending_osd_state_clear(osd, CEPH_OSD_NOOUT);
|
||||
any = true;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
pending_inc.pending_osd_state_set(osd, CEPH_OSD_NOOUT);
|
||||
any = true;
|
||||
} else {
|
||||
// nodown
|
||||
|
||||
if (osdmap.is_down(osd)) {
|
||||
ss << "osd." << osd << " is already down. ";
|
||||
continue;
|
||||
}
|
||||
|
||||
if (osdmap.is_nodown(osd)) { // already nodown?
|
||||
// continue to check if there is any pending "rm-nodown" request
|
||||
if (pending_inc.pending_osd_has_state(osd, CEPH_OSD_NODOWN)) {
|
||||
// cancel it
|
||||
pending_inc.pending_osd_state_clear(osd, CEPH_OSD_NODOWN);
|
||||
any = true;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
pending_inc.pending_osd_state_set(osd, CEPH_OSD_NODOWN);
|
||||
any = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (any) {
|
||||
getline(ss, rs);
|
||||
wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, err, rs,
|
||||
get_last_committed() + 1));
|
||||
return true;
|
||||
}
|
||||
} else if (prefix == "osd rm-noout" ||
|
||||
prefix == "osd rm-nodown") {
|
||||
|
||||
bool any = false;
|
||||
bool stop = false;
|
||||
bool noout = prefix == "osd rm-noout";
|
||||
|
||||
vector<string> idvec;
|
||||
cmd_getval(g_ceph_context, cmdmap, "ids", idvec);
|
||||
|
||||
for (unsigned j = 0; j < idvec.size() && !stop; j++) {
|
||||
vector<int> osds;
|
||||
|
||||
// wildcard?
|
||||
if (j == 0 &&
|
||||
(idvec[0] == "any" || idvec[0] == "all" || idvec[0] == "*")) {
|
||||
|
||||
// touch previous noout/nodown osds only
|
||||
if (noout) {
|
||||
osdmap.get_noout_osds(&osds);
|
||||
} else {
|
||||
osdmap.get_nodown_osds(&osds);
|
||||
}
|
||||
|
||||
// cancel pending noout/nodown requests too,
|
||||
// if there is any
|
||||
vector<int> pending_state_osds;
|
||||
(void) pending_inc.get_pending_state_osds(&pending_state_osds);
|
||||
for (auto &p : pending_state_osds) {
|
||||
if (noout) {
|
||||
if (!osdmap.is_noout(p) &&
|
||||
pending_inc.pending_osd_has_state(p, CEPH_OSD_NOOUT)) {
|
||||
pending_inc.pending_osd_state_clear(p, CEPH_OSD_NOOUT);
|
||||
any = true;
|
||||
}
|
||||
} else {
|
||||
if (!osdmap.is_nodown(p) &&
|
||||
pending_inc.pending_osd_has_state(p, CEPH_OSD_NODOWN)) {
|
||||
pending_inc.pending_osd_state_clear(p, CEPH_OSD_NODOWN);
|
||||
any = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stop = true;
|
||||
} else {
|
||||
// try traditional single osd way
|
||||
|
||||
long osd = parse_osd_id(idvec[j].c_str(), &ss);
|
||||
if (osd < 0) {
|
||||
// ss has reason for failure
|
||||
ss << ", unable to parse osd id:\"" << idvec[j] << "\". ";
|
||||
err = -EINVAL;
|
||||
continue;
|
||||
}
|
||||
|
||||
osds.push_back(osd);
|
||||
}
|
||||
|
||||
for (auto &osd : osds) {
|
||||
|
||||
if (!osdmap.exists(osd)) {
|
||||
ss << "osd." << osd << " does not exist. ";
|
||||
continue;
|
||||
}
|
||||
|
||||
if (noout) {
|
||||
if (osdmap.is_noout(osd)) {
|
||||
pending_inc.pending_osd_state_set(osd, CEPH_OSD_NOOUT);
|
||||
any = true;
|
||||
} else {
|
||||
// noout flag is not set or has already been successfully cancelled
|
||||
// continue to check pending_inc
|
||||
if (pending_inc.pending_osd_has_state(osd, CEPH_OSD_NOOUT)) {
|
||||
// cancel pending noout flag
|
||||
pending_inc.pending_osd_state_clear(osd, CEPH_OSD_NOOUT);
|
||||
any = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// nodown
|
||||
if (osdmap.is_nodown(osd)) {
|
||||
pending_inc.pending_osd_state_set(osd, CEPH_OSD_NODOWN);
|
||||
any = true;
|
||||
} else {
|
||||
// nodown flag is not set or has already been successfully cancelled
|
||||
// continue to check pending_inc
|
||||
if (pending_inc.pending_osd_has_state(osd, CEPH_OSD_NODOWN)) {
|
||||
// cancel pending nodown flag
|
||||
pending_inc.pending_osd_state_clear(osd, CEPH_OSD_NODOWN);
|
||||
any = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (any) {
|
||||
getline(ss, rs);
|
||||
wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, err, rs,
|
||||
get_last_committed() + 1));
|
||||
return true;
|
||||
}
|
||||
} else if (prefix == "osd pg-temp") {
|
||||
string pgidstr;
|
||||
if (!cmd_getval(g_ceph_context, cmdmap, "pgid", pgidstr)) {
|
||||
|
@ -457,6 +457,31 @@ public:
|
||||
|
||||
/// propage update pools' snap metadata to any of their tiers
|
||||
int propagate_snaps_to_tiers(CephContext *cct, const OSDMap &base);
|
||||
|
||||
/// filter out osds with any pending state changing
|
||||
size_t get_pending_state_osds(vector<int> *osds) {
|
||||
assert(osds);
|
||||
osds->clear();
|
||||
|
||||
for (auto &p : new_state) {
|
||||
osds->push_back(p.first);
|
||||
}
|
||||
|
||||
return osds->size();
|
||||
}
|
||||
|
||||
bool pending_osd_has_state(int osd, unsigned state) {
|
||||
return new_state.count(osd) && (new_state[osd] & state) != 0;
|
||||
}
|
||||
|
||||
void pending_osd_state_set(int osd, unsigned state) {
|
||||
new_state[osd] |= state;
|
||||
}
|
||||
|
||||
void pending_osd_state_clear(int osd, unsigned state) {
|
||||
new_state[osd] &= ~state;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
private:
|
||||
@ -745,6 +770,36 @@ public:
|
||||
return !is_out(osd);
|
||||
}
|
||||
|
||||
bool is_nodown(int osd) const {
|
||||
return exists(osd) && (osd_state[osd] & CEPH_OSD_NODOWN);
|
||||
}
|
||||
|
||||
bool is_noout(int osd) const {
|
||||
return exists(osd) && (osd_state[osd] & CEPH_OSD_NOOUT);
|
||||
}
|
||||
|
||||
void get_nodown_osds(vector<int> *osds) const {
|
||||
assert(osds);
|
||||
osds->clear();
|
||||
|
||||
for (int i = 0; i < max_osd; i++) {
|
||||
if (is_nodown(i)) {
|
||||
osds->push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void get_noout_osds(vector<int> *osds) const {
|
||||
assert(osds);
|
||||
osds->clear();
|
||||
|
||||
for (int i = 0; i < max_osd; i++) {
|
||||
if (is_noout(i)) {
|
||||
osds->push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* check if an entire crush subtree is down
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user