From 8fbefb00baf10f8e1e8e023ac13d5f5bb83e889d Mon Sep 17 00:00:00 2001 From: Sidharth Anupkrishnan Date: Thu, 21 May 2020 20:55:54 +0530 Subject: [PATCH] mds: add killpoints for directory fragmentation Defined a set of killpoints for testing directory fragmentation during cluster recovery after failure. Signed-off-by: Sidharth Anupkrishnan Signed-off-by: Patrick Donnelly --- src/common/options/mds.yaml.in | 8 ++++++++ src/mds/MDCache.cc | 23 +++++++++++++++++++++++ src/mds/MDCache.h | 14 ++++++++++++++ src/mds/MDSRank.cc | 1 + 4 files changed, 46 insertions(+) diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in index cf85729e6e2..e16780a20c4 100644 --- a/src/common/options/mds.yaml.in +++ b/src/common/options/mds.yaml.in @@ -1106,6 +1106,14 @@ options: fmt_desc: Ceph will inject MDS failure in the subtree import code (for developers only). with_legacy: true +- name: mds_kill_dirfrag_at + type: int + level: dev + default: 0 + services: + - mds + flags: + - runtime - name: mds_kill_link_at type: int level: dev diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 696634c0ee0..2ba0d638af0 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -156,6 +156,7 @@ MDCache::MDCache(MDSRank *m, PurgeQueue &purge_queue_) : export_ephemeral_random_max = g_conf().get_val("mds_export_ephemeral_random_max"); symlink_recovery = g_conf().get_val("mds_symlink_recovery"); + kill_dirfrag_at = static_cast(g_conf().get_val("mds_kill_dirfrag_at")); kill_shutdown_at = g_conf().get_val("mds_kill_shutdown_at"); @@ -208,6 +209,11 @@ void MDCache::handle_conf_change(const std::set& changed, const MDS if (changed.count("mds_export_ephemeral_random_max")) { export_ephemeral_random_max = g_conf().get_val("mds_export_ephemeral_random_max"); } + + if (changed.count("mds_kill_dirfrag_at")) { + kill_dirfrag_at = static_cast(g_conf().get_val("mds_kill_dirfrag_at")); + } + if (changed.count("mds_health_cache_threshold")) cache_health_threshold = g_conf().get_val("mds_health_cache_threshold"); if (changed.count("mds_cache_mid")) @@ -11765,6 +11771,7 @@ void MDCache::merge_dir(CInode *diri, frag_t frag) void MDCache::fragment_freeze_dirs(const std::vector& dirs) { + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_FREEZE); bool any_subtree = false, any_non_subtree = false; for (const auto& dir : dirs) { dir->auth_pin(dir); // until we mark and complete them @@ -12183,6 +12190,8 @@ void MDCache::_fragment_logged(const MDRequestRef& mdr) dout(10) << "fragment_logged " << basedirfrag << " bits " << info.bits << " on " << *diri << dendl; + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_LOGGED); + mdr->mark_event("prepare logged"); mdr->apply(); // mark scatterlock @@ -12219,6 +12228,7 @@ void MDCache::_fragment_stored(const MDRequestRef& mdr) // tell peers mds_rank_t diri_auth = (first->is_subtree_root() && !diri->is_auth()) ? diri->authority().first : CDIR_AUTH_UNKNOWN; + dout(20) << " first dirfrag " << *first << " diri_auth=" << diri_auth << dendl; for (const auto &p : first->get_replicas()) { if (mds->mdsmap->get_state(p.first) < MDSMap::STATE_REJOIN || (mds->mdsmap->get_state(p.first) == MDSMap::STATE_REJOIN && @@ -12245,6 +12255,7 @@ void MDCache::_fragment_stored(const MDRequestRef& mdr) * So we need to ensure replicas have received the notify, then unlock * the dirfragtreelock. */ + dout(20) << " ack wanted" << dendl; notify->mark_ack_wanted(); info.notify_ack_waiting.insert(p.first); } @@ -12255,6 +12266,7 @@ void MDCache::_fragment_stored(const MDRequestRef& mdr) } mds->send_message_mds(notify, p.first); + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_STORED_POST_NOTIFY); } // journal commit @@ -12277,6 +12289,8 @@ void MDCache::_fragment_stored(const MDRequestRef& mdr) dir->unfreeze_dir(); } + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_STORED_POST_JOURNAL); + if (info.notify_ack_waiting.empty()) { fragment_drop_locks(info); } else { @@ -12287,6 +12301,8 @@ void MDCache::_fragment_stored(const MDRequestRef& mdr) void MDCache::_fragment_committed(dirfrag_t basedirfrag, const MDRequestRef& mdr) { dout(10) << "fragment_committed " << basedirfrag << dendl; + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_COMMITTED); + if (mdr) mdr->mark_event("commit logged"); @@ -12325,6 +12341,8 @@ void MDCache::_fragment_committed(dirfrag_t basedirfrag, const MDRequestRef& mdr void MDCache::_fragment_old_purged(dirfrag_t basedirfrag, int bits, const MDRequestRef& mdr) { dout(10) << "fragment_old_purged " << basedirfrag << dendl; + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_OLD_PURGED); + if (mdr) mdr->mark_event("old frags purged"); @@ -12361,6 +12379,8 @@ void MDCache::fragment_drop_locks(fragment_info_t& info) void MDCache::fragment_maybe_finish(const fragment_info_iterator& it) { + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_MAYBE_FINISH); + if (!it->second.finishing) return; @@ -12383,6 +12403,7 @@ void MDCache::fragment_maybe_finish(const fragment_info_iterator& it) void MDCache::handle_fragment_notify_ack(const cref_t &ack) { dout(10) << "handle_fragment_notify_ack " << *ack << " from " << ack->get_source() << dendl; + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_HANDLE_NOTIFY_ACK); mds_rank_t from = mds_rank_t(ack->get_source().num()); if (mds->get_state() < MDSMap::STATE_ACTIVE) { @@ -12406,6 +12427,7 @@ void MDCache::handle_fragment_notify_ack(const cref_t &ac void MDCache::handle_fragment_notify(const cref_t ¬ify) { dout(10) << "handle_fragment_notify " << *notify << " from " << notify->get_source() << dendl; + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_HANDLE_NOTIFY); mds_rank_t from = mds_rank_t(notify->get_source().num()); if (mds->get_state() < MDSMap::STATE_REJOIN) { @@ -12453,6 +12475,7 @@ void MDCache::handle_fragment_notify(const cref_t ¬ify) auto ack = make_message(notify->get_base_dirfrag(), notify->get_bits(), notify->get_tid()); mds->send_message_mds(ack, from); + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_HANDLE_NOTIFY_POSTACK); } } diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index dc56e06d03c..18c848d941c 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -1370,6 +1370,19 @@ private: StrayManager stray_manager; private: + enum dirfrag_killpoint : std::int8_t { + FRAGMENT_FREEZE = 1, + FRAGMENT_HANDLE_NOTIFY, + FRAGMENT_HANDLE_NOTIFY_POSTACK, + FRAGMENT_STORED_POST_NOTIFY, + FRAGMENT_STORED_POST_JOURNAL, + FRAGMENT_HANDLE_NOTIFY_ACK, + FRAGMENT_MAYBE_FINISH, + FRAGMENT_LOGGED, + FRAGMENT_COMMITTED, + FRAGMENT_OLD_PURGED, + }; + std::set replay_taken_inos; // the inos have been taken when replaying // -- fragmenting -- @@ -1499,6 +1512,7 @@ private: // Stores the symlink target on the file object's head bool symlink_recovery; + enum dirfrag_killpoint kill_dirfrag_at; // File size recovery RecoveryQueue recovery_queue; diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 2878887e335..c517594469b 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -4072,6 +4072,7 @@ const char** MDSRankDispatcher::get_tracked_conf_keys() const "mds_inject_journal_corrupt_dentry_first", "mds_inject_migrator_session_race", "mds_inject_rename_corrupt_dentry_first", + "mds_kill_dirfrag_at", "mds_kill_shutdown_at", "mds_log_event_large_threshold", "mds_log_events_per_segment",