diff --git a/src/messages/MOSDScrub2.h b/src/messages/MOSDScrub2.h new file mode 100644 index 00000000000..82a22923689 --- /dev/null +++ b/src/messages/MOSDScrub2.h @@ -0,0 +1,57 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "msg/Message.h" + +/* + * instruct an OSD to scrub some or all pg(s) + */ + +struct MOSDScrub2 : public Message { + + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; + + uuid_d fsid; + epoch_t epoch; + vector scrub_pgs; + bool repair = false; + bool deep = false; + + MOSDScrub2() : Message(MSG_OSD_SCRUB2, HEAD_VERSION, COMPAT_VERSION) {} + MOSDScrub2(const uuid_d& f, epoch_t e, vector& pgs, bool r, bool d) : + Message(MSG_OSD_SCRUB2, HEAD_VERSION, COMPAT_VERSION), + fsid(f), epoch(e), scrub_pgs(pgs), repair(r), deep(d) {} +private: + ~MOSDScrub2() override {} + +public: + const char *get_type_name() const override { return "scrub2"; } + void print(ostream& out) const override { + out << "scrub2(" << scrub_pgs; + if (repair) + out << " repair"; + if (deep) + out << " deep"; + out << ")"; + } + + void encode_payload(uint64_t features) override { + using ceph::encode; + encode(fsid, payload); + encode(epoch, payload); + encode(scrub_pgs, payload); + encode(repair, payload); + encode(deep, payload); + } + void decode_payload() override { + bufferlist::iterator p = payload.begin(); + decode(fsid, p); + decode(epoch, p); + decode(scrub_pgs, p); + decode(repair, p); + decode(deep, p); + } +}; diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index 5ae53c853ec..5684ec66964 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -30,6 +30,7 @@ #include "messages/MCommandReply.h" #include "messages/MPGStats.h" #include "messages/MOSDScrub.h" +#include "messages/MOSDScrub2.h" #include "messages/MOSDForceRecovery.h" #include "common/errno.h" @@ -838,6 +839,7 @@ bool DaemonServer::handle_command(MCommand *m) prefix == "pg deep-scrub") { string scrubop = prefix.substr(3, string::npos); pg_t pgid; + spg_t spgid; string pgidstr; cmd_getval(g_ceph_context, cmdctx->cmdmap, "pgid", pgidstr); if (!pgid.parse(pgidstr.c_str())) { @@ -855,8 +857,10 @@ bool DaemonServer::handle_command(MCommand *m) return true; } int acting_primary = -1; + epoch_t epoch; cluster_state.with_osdmap([&](const OSDMap& osdmap) { - acting_primary = osdmap.get_pg_acting_primary(pgid); + epoch = osdmap.get_epoch(); + osdmap.get_primary_shard(pgid, &acting_primary, &spgid); }); if (acting_primary == -1) { ss << "pg " << pgid << " has no primary osd"; @@ -869,14 +873,23 @@ bool DaemonServer::handle_command(MCommand *m) << " is not currently connected"; cmdctx->reply(-EAGAIN, ss); } - vector pgs = { pgid }; for (auto& con : p->second) { - con->send_message(new MOSDScrub(monc->get_fsid(), - pgs, - scrubop == "repair", - scrubop == "deep-scrub")); + if (HAVE_FEATURE(con->get_features(), SERVER_MIMIC)) { + vector pgs = { spgid }; + con->send_message(new MOSDScrub2(monc->get_fsid(), + epoch, + pgs, + scrubop == "repair", + scrubop == "deep-scrub")); + } else { + vector pgs = { pgid }; + con->send_message(new MOSDScrub(monc->get_fsid(), + pgs, + scrubop == "repair", + scrubop == "deep-scrub")); + } } - ss << "instructing pg " << pgid << " on osd." << acting_primary + ss << "instructing pg " << spgid << " on osd." << acting_primary << " to " << scrubop; cmdctx->reply(0, ss); return true; @@ -916,15 +929,41 @@ bool DaemonServer::handle_command(MCommand *m) } set sent_osds, failed_osds; for (auto osd : osds) { + vector spgs; + epoch_t epoch; + cluster_state.with_pgmap([&](const PGMap& pgmap) { + cluster_state.with_osdmap([&](const OSDMap& osdmap) { + epoch = osdmap.get_epoch(); + auto p = pgmap.pg_by_osd.find(osd); + if (p != pgmap.pg_by_osd.end()) { + for (auto pgid : p->second) { + int primary; + spg_t spg; + osdmap.get_primary_shard(pgid, &primary, &spg); + if (primary == osd) { + spgs.push_back(spg); + } + } + } + }); + }); auto p = osd_cons.find(osd); if (p == osd_cons.end()) { failed_osds.insert(osd); } else { sent_osds.insert(osd); for (auto& con : p->second) { - con->send_message(new MOSDScrub(monc->get_fsid(), - pvec.back() == "repair", - pvec.back() == "deep-scrub")); + if (HAVE_FEATURE(con->get_features(), SERVER_MIMIC)) { + con->send_message(new MOSDScrub2(monc->get_fsid(), + epoch, + spgs, + pvec.back() == "repair", + pvec.back() == "deep-scrub")); + } else { + con->send_message(new MOSDScrub(monc->get_fsid(), + pvec.back() == "repair", + pvec.back() == "deep-scrub")); + } } } } diff --git a/src/msg/Message.cc b/src/msg/Message.cc index a6283c7ba97..ea30911b7ee 100644 --- a/src/msg/Message.cc +++ b/src/msg/Message.cc @@ -81,6 +81,7 @@ #include "messages/MOSDPGCreate2.h" #include "messages/MOSDPGTrim.h" #include "messages/MOSDScrub.h" +#include "messages/MOSDScrub2.h" #include "messages/MOSDScrubReserve.h" #include "messages/MOSDRepScrub.h" #include "messages/MOSDRepScrubMap.h" @@ -524,6 +525,9 @@ Message *decode_message(CephContext *cct, int crcflags, case MSG_OSD_SCRUB: m = new MOSDScrub; break; + case MSG_OSD_SCRUB2: + m = new MOSDScrub2; + break; case MSG_OSD_SCRUB_RESERVE: m = new MOSDScrubReserve; break; diff --git a/src/msg/Message.h b/src/msg/Message.h index 547bfa7d14f..4c17ace1913 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -126,6 +126,7 @@ #define MSG_OSD_PG_RECOVERY_DELETE 118 #define MSG_OSD_PG_RECOVERY_DELETE_REPLY 119 #define MSG_OSD_PG_CREATE2 120 +#define MSG_OSD_SCRUB2 121 // *** MDS *** diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 5b188e1aec8..f07266da06b 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -111,6 +111,7 @@ #include "messages/MOSDAlive.h" #include "messages/MOSDScrub.h" +#include "messages/MOSDScrub2.h" #include "messages/MOSDScrubReserve.h" #include "messages/MOSDRepScrub.h" @@ -6545,6 +6546,9 @@ void OSD::ms_fast_dispatch(Message *m) case MSG_OSD_FORCE_RECOVERY: handle_fast_force_recovery(static_cast(m)); return; + case MSG_OSD_SCRUB2: + handle_fast_scrub(static_cast(m)); + return; case MSG_OSD_PG_CREATE2: return handle_fast_pg_create(static_cast(m)); @@ -6800,6 +6804,7 @@ void OSD::_dispatch(Message *m) } } +// remove me post-nautilus void OSD::handle_scrub(MOSDScrub *m) { dout(10) << "handle_scrub " << *m << dendl; @@ -6854,6 +6859,31 @@ void OSD::handle_scrub(MOSDScrub *m) m->put(); } +void OSD::handle_fast_scrub(MOSDScrub2 *m) +{ + dout(10) << __func__ << " " << *m << dendl; + if (!require_mon_or_mgr_peer(m)) { + m->put(); + return; + } + if (m->fsid != monc->get_fsid()) { + dout(0) << __func__ << " fsid " << m->fsid << " != " << monc->get_fsid() + << dendl; + m->put(); + return; + } + for (auto pgid : m->scrub_pgs) { + enqueue_peering_evt( + pgid, + PGPeeringEventRef( + std::make_shared( + m->epoch, + m->epoch, + PG::RequestScrub(m->deep, m->repair)))); + } + m->put(); +} + bool OSD::scrub_random_backoff() { bool coin_flip = (rand() / (double)RAND_MAX >= diff --git a/src/osd/OSD.h b/src/osd/OSD.h index ff02231e900..bac288101e2 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -2122,6 +2122,7 @@ private: case CEPH_MSG_PING: case CEPH_MSG_OSD_OP: case CEPH_MSG_OSD_BACKOFF: + case MSG_OSD_SCRUB2: case MSG_OSD_FORCE_RECOVERY: case MSG_MON_COMMAND: case MSG_COMMAND: @@ -2246,6 +2247,7 @@ private: uuid_d& cluster_fsid, uuid_d& osd_fsid, int whoami); void handle_scrub(struct MOSDScrub *m); + void handle_fast_scrub(struct MOSDScrub2 *m); void handle_osd_ping(class MOSDPing *m); int init_op_flags(OpRequestRef& op);