mirror of
https://github.com/ceph/ceph
synced 2024-12-27 14:03:25 +00:00
Merge PR #43184 into main
* refs/pull/43184/head: qa: fix journal flush failure issue due to the MDS daemon crashes qa: add test support for the alloc ino failing mds: do not take the ino which has been used Reviewed-by: Jeff Layton <jlayton@redhat.com> Reviewed-by: Venky Shankar <vshankar@redhat.com> Reviewed-by: Patrick Donnelly <pdonnell@redhat.com>
This commit is contained in:
commit
69882f5123
@ -57,6 +57,8 @@
|
||||
.. confval:: mds_kill_import_at
|
||||
.. confval:: mds_kill_link_at
|
||||
.. confval:: mds_kill_rename_at
|
||||
.. confval:: mds_inject_skip_replaying_inotable
|
||||
.. confval:: mds_kill_skip_replaying_inotable
|
||||
.. confval:: mds_wipe_sessions
|
||||
.. confval:: mds_wipe_ino_prealloc
|
||||
.. confval:: mds_skip_ino
|
||||
|
@ -11,3 +11,4 @@ overrides:
|
||||
- has not responded to cap revoke by MDS for over
|
||||
- MDS_CLIENT_LATE_RELEASE
|
||||
- responding to mclientcaps
|
||||
- RECENT_CRASH
|
||||
|
@ -593,3 +593,46 @@ class TestCacheDrop(CephFSTestCase):
|
||||
# particular operation causing this is journal flush which causes the
|
||||
# MDS to wait wait for cap revoke.
|
||||
self.mount_a.resume_netns()
|
||||
|
||||
class TestSkipReplayInoTable(CephFSTestCase):
|
||||
MDSS_REQUIRED = 1
|
||||
CLIENTS_REQUIRED = 1
|
||||
|
||||
def test_alloc_cinode_assert(self):
|
||||
"""
|
||||
Test alloc CInode assert.
|
||||
|
||||
See: https://tracker.ceph.com/issues/52280
|
||||
"""
|
||||
|
||||
# Create a directory and the mds will journal this and then crash
|
||||
self.mount_a.run_shell(["rm", "-rf", "test_alloc_ino"])
|
||||
self.mount_a.run_shell(["mkdir", "test_alloc_ino"])
|
||||
|
||||
status = self.fs.status()
|
||||
rank0 = self.fs.get_rank(rank=0, status=status)
|
||||
|
||||
self.fs.mds_asok(['config', 'set', 'mds_kill_skip_replaying_inotable', "true"])
|
||||
# This will make the MDS crash, since we only have one MDS in the
|
||||
# cluster and without the "wait=False" it will stuck here forever.
|
||||
self.mount_a.run_shell(["mkdir", "test_alloc_ino/dir1"], wait=False)
|
||||
|
||||
# sleep 10 seconds to make sure the journal logs are flushed and
|
||||
# the mds crashes
|
||||
time.sleep(10)
|
||||
|
||||
# Now set the mds config to skip replaying the inotable
|
||||
self.fs.set_ceph_conf('mds', 'mds_inject_skip_replaying_inotable', True)
|
||||
self.fs.set_ceph_conf('mds', 'mds_wipe_sessions', True)
|
||||
|
||||
self.fs.mds_restart()
|
||||
# sleep 5 seconds to make sure the mds tell command won't stuck
|
||||
time.sleep(5)
|
||||
self.fs.wait_for_daemons()
|
||||
|
||||
self.delete_mds_coredump(rank0['name']);
|
||||
|
||||
self.mount_a.run_shell(["mkdir", "test_alloc_ino/dir2"])
|
||||
|
||||
ls_out = set(self.mount_a.ls("test_alloc_ino/"))
|
||||
self.assertEqual(ls_out, set({"dir1", "dir2"}))
|
||||
|
@ -1085,6 +1085,26 @@ options:
|
||||
default: false
|
||||
services:
|
||||
- mds
|
||||
- name: mds_kill_skip_replaying_inotable
|
||||
type: bool
|
||||
level: dev
|
||||
default: false
|
||||
services:
|
||||
- mds
|
||||
fmt_desc: Ceph will skip replaying the inotable when replaying the journal, and
|
||||
the premary MDS will crash, while the replacing MDS won't.
|
||||
(for testing only).
|
||||
with_legacy: true
|
||||
- name: mds_inject_skip_replaying_inotable
|
||||
type: bool
|
||||
level: dev
|
||||
default: false
|
||||
services:
|
||||
- mds
|
||||
fmt_desc: Ceph will skip replaying the inotable when replaying the journal, and
|
||||
the premary MDS will crash, while the replacing MDS won't.
|
||||
(for testing only).
|
||||
with_legacy: true
|
||||
# percentage of MDS modify replies to skip sending the client a trace on [0-1]
|
||||
- name: mds_inject_traceless_reply_probability
|
||||
type: float
|
||||
|
@ -1950,6 +1950,7 @@ CDentry *CDir::_load_dentry(
|
||||
|
||||
if (!undef_inode) {
|
||||
mdcache->add_inode(in); // add
|
||||
mdcache->insert_taken_inos(in->ino());
|
||||
dn = add_primary_dentry(dname, in, std::move(alternate_name), first, last); // link
|
||||
}
|
||||
dout(12) << "_fetched got " << *dn << " " << *in << dendl;
|
||||
|
@ -309,6 +309,8 @@ void MDCache::remove_inode(CInode *o)
|
||||
snap_inode_map.erase(o->vino());
|
||||
}
|
||||
|
||||
clear_taken_inos(o->ino());
|
||||
|
||||
if (o->ino() < MDS_INO_SYSTEM_BASE) {
|
||||
if (o == root) root = 0;
|
||||
if (o == myin) myin = 0;
|
||||
|
@ -201,6 +201,19 @@ class MDCache {
|
||||
explicit MDCache(MDSRank *m, PurgeQueue &purge_queue_);
|
||||
~MDCache();
|
||||
|
||||
void insert_taken_inos(inodeno_t ino) {
|
||||
replay_taken_inos.insert(ino);
|
||||
}
|
||||
void clear_taken_inos(inodeno_t ino) {
|
||||
replay_taken_inos.erase(ino);
|
||||
}
|
||||
bool test_and_clear_taken_inos(inodeno_t ino) {
|
||||
return replay_taken_inos.erase(ino) != 0;
|
||||
}
|
||||
bool is_taken_inos_empty(void) {
|
||||
return replay_taken_inos.empty();
|
||||
}
|
||||
|
||||
uint64_t cache_limit_memory(void) {
|
||||
return cache_memory_limit;
|
||||
}
|
||||
@ -1237,6 +1250,8 @@ class MDCache {
|
||||
StrayManager stray_manager;
|
||||
|
||||
private:
|
||||
std::set<inodeno_t> replay_taken_inos; // the inos have been taken when replaying
|
||||
|
||||
// -- fragmenting --
|
||||
struct ufragment {
|
||||
ufragment() {}
|
||||
|
@ -3359,17 +3359,36 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino
|
||||
// while session is opening.
|
||||
bool allow_prealloc_inos = mdr->session->is_open();
|
||||
|
||||
inodeno_t _useino = useino;
|
||||
|
||||
// assign ino
|
||||
if (allow_prealloc_inos && (mdr->used_prealloc_ino = _inode->ino = mdr->session->take_ino(useino))) {
|
||||
mds->sessionmap.mark_projected(mdr->session);
|
||||
dout(10) << "prepare_new_inode used_prealloc " << mdr->used_prealloc_ino
|
||||
<< " (" << mdr->session->info.prealloc_inos.size() << " left)"
|
||||
<< dendl;
|
||||
} else {
|
||||
mdr->alloc_ino =
|
||||
_inode->ino = mds->inotable->project_alloc_id(useino);
|
||||
dout(10) << "prepare_new_inode alloc " << mdr->alloc_ino << dendl;
|
||||
}
|
||||
do {
|
||||
if (allow_prealloc_inos && (mdr->used_prealloc_ino = _inode->ino = mdr->session->take_ino(_useino))) {
|
||||
if (mdcache->test_and_clear_taken_inos(_inode->ino)) {
|
||||
_inode->ino = 0;
|
||||
dout(10) << "prepare_new_inode used_prealloc " << mdr->used_prealloc_ino
|
||||
<< " (" << mdr->session->info.prealloc_inos.size() << " left)"
|
||||
<< " but has been taken, will try again!" << dendl;
|
||||
} else {
|
||||
mds->sessionmap.mark_projected(mdr->session);
|
||||
dout(10) << "prepare_new_inode used_prealloc " << mdr->used_prealloc_ino
|
||||
<< " (" << mdr->session->info.prealloc_inos.size() << " left)"
|
||||
<< dendl;
|
||||
}
|
||||
} else {
|
||||
mdr->alloc_ino =
|
||||
_inode->ino = mds->inotable->project_alloc_id(_useino);
|
||||
if (mdcache->test_and_clear_taken_inos(_inode->ino)) {
|
||||
mds->inotable->apply_alloc_id(_inode->ino);
|
||||
_inode->ino = 0;
|
||||
dout(10) << "prepare_new_inode alloc " << mdr->alloc_ino
|
||||
<< " but has been taken, will try again!" << dendl;
|
||||
} else {
|
||||
dout(10) << "prepare_new_inode alloc " << mdr->alloc_ino << dendl;
|
||||
}
|
||||
}
|
||||
_useino = 0;
|
||||
} while (!_inode->ino);
|
||||
|
||||
if (useino && useino != _inode->ino) {
|
||||
dout(0) << "WARNING: client specified " << useino << " and i allocated " << _inode->ino << dendl;
|
||||
@ -3378,7 +3397,7 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino
|
||||
<< " but mds." << mds->get_nodeid() << " allocated " << _inode->ino;
|
||||
//ceph_abort(); // just for now.
|
||||
}
|
||||
|
||||
|
||||
if (allow_prealloc_inos &&
|
||||
mdr->session->get_num_projected_prealloc_inos() < g_conf()->mds_client_prealloc_inos / 2) {
|
||||
int need = g_conf()->mds_client_prealloc_inos - mdr->session->get_num_projected_prealloc_inos();
|
||||
@ -4461,6 +4480,9 @@ public:
|
||||
void finish(int r) override {
|
||||
ceph_assert(r == 0);
|
||||
|
||||
// crash current MDS and the replacing MDS will test the journal
|
||||
ceph_assert(!g_conf()->mds_kill_skip_replaying_inotable);
|
||||
|
||||
dn->pop_projected_linkage();
|
||||
|
||||
// dirty inode, dn, dir
|
||||
@ -6831,6 +6853,9 @@ public:
|
||||
void finish(int r) override {
|
||||
ceph_assert(r == 0);
|
||||
|
||||
// crash current MDS and the replacing MDS will test the journal
|
||||
ceph_assert(!g_conf()->mds_kill_skip_replaying_inotable);
|
||||
|
||||
// link the inode
|
||||
dn->pop_projected_linkage();
|
||||
|
||||
@ -7137,6 +7162,11 @@ void Server::handle_client_symlink(MDRequestRef& mdr)
|
||||
|
||||
journal_and_reply(mdr, newi, dn, le, new C_MDS_mknod_finish(this, mdr, dn, newi));
|
||||
mds->balancer->maybe_fragment(dir, false);
|
||||
|
||||
// flush the journal as soon as possible
|
||||
if (g_conf()->mds_kill_skip_replaying_inotable) {
|
||||
mdlog->flush();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -601,7 +601,7 @@ private:
|
||||
}
|
||||
|
||||
void update_segment(LogSegment *ls);
|
||||
void replay(MDSRank *mds, LogSegment *ls, MDPeerUpdate *su=NULL);
|
||||
void replay(MDSRank *mds, LogSegment *ls, int type, MDPeerUpdate *su=NULL);
|
||||
};
|
||||
WRITE_CLASS_ENCODER_FEATURES(EMetaBlob)
|
||||
WRITE_CLASS_ENCODER_FEATURES(EMetaBlob::fullbit)
|
||||
|
@ -1163,7 +1163,7 @@ void EMetaBlob::generate_test_instances(std::list<EMetaBlob*>& ls)
|
||||
ls.push_back(new EMetaBlob());
|
||||
}
|
||||
|
||||
void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup)
|
||||
void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, int type, MDPeerUpdate *peerup)
|
||||
{
|
||||
dout(10) << "EMetaBlob.replay " << lump_map.size() << " dirlumps by " << client_name << dendl;
|
||||
|
||||
@ -1567,11 +1567,16 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup)
|
||||
logseg->open_files.push_back(&in->item_open_file);
|
||||
}
|
||||
|
||||
bool skip_replaying_inotable = g_conf()->mds_inject_skip_replaying_inotable;
|
||||
|
||||
// allocated_inos
|
||||
if (inotablev) {
|
||||
if (mds->inotable->get_version() >= inotablev) {
|
||||
if (mds->inotable->get_version() >= inotablev ||
|
||||
unlikely(type == EVENT_UPDATE && skip_replaying_inotable)) {
|
||||
dout(10) << "EMetaBlob.replay inotable tablev " << inotablev
|
||||
<< " <= table " << mds->inotable->get_version() << dendl;
|
||||
if (allocated_ino)
|
||||
mds->mdcache->insert_taken_inos(allocated_ino);
|
||||
} else {
|
||||
dout(10) << "EMetaBlob.replay inotable v " << inotablev
|
||||
<< " - 1 == table " << mds->inotable->get_version()
|
||||
@ -1595,9 +1600,12 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup)
|
||||
}
|
||||
}
|
||||
if (sessionmapv) {
|
||||
if (mds->sessionmap.get_version() >= sessionmapv) {
|
||||
if (mds->sessionmap.get_version() >= sessionmapv ||
|
||||
unlikely(type == EVENT_UPDATE && skip_replaying_inotable)) {
|
||||
dout(10) << "EMetaBlob.replay sessionmap v " << sessionmapv
|
||||
<< " <= table " << mds->sessionmap.get_version() << dendl;
|
||||
if (used_preallocated_ino)
|
||||
mds->mdcache->insert_taken_inos(used_preallocated_ino);
|
||||
} else {
|
||||
dout(10) << "EMetaBlob.replay sessionmap v " << sessionmapv
|
||||
<< ", table " << mds->sessionmap.get_version()
|
||||
@ -2237,7 +2245,8 @@ void EUpdate::update_segment()
|
||||
void EUpdate::replay(MDSRank *mds)
|
||||
{
|
||||
auto&& segment = get_segment();
|
||||
metablob.replay(mds, segment);
|
||||
dout(10) << "EUpdate::replay" << dendl;
|
||||
metablob.replay(mds, segment, EVENT_UPDATE);
|
||||
|
||||
if (had_peers) {
|
||||
dout(10) << "EUpdate.replay " << reqid << " had peers, expecting a matching ECommitted" << dendl;
|
||||
@ -2320,7 +2329,7 @@ void EOpen::replay(MDSRank *mds)
|
||||
{
|
||||
dout(10) << "EOpen.replay " << dendl;
|
||||
auto&& segment = get_segment();
|
||||
metablob.replay(mds, segment);
|
||||
metablob.replay(mds, segment, EVENT_OPEN);
|
||||
|
||||
// note which segments inodes belong to, so we don't have to start rejournaling them
|
||||
for (const auto &ino : inos) {
|
||||
@ -2636,7 +2645,7 @@ void EPeerUpdate::replay(MDSRank *mds)
|
||||
dout(10) << "EPeerUpdate.replay prepare " << reqid << " for mds." << leader
|
||||
<< ": applying commit, saving rollback info" << dendl;
|
||||
su = new MDPeerUpdate(origop, rollback);
|
||||
commit.replay(mds, segment, su);
|
||||
commit.replay(mds, segment, EVENT_PEERUPDATE, su);
|
||||
mds->mdcache->add_uncommitted_peer(reqid, segment, leader, su);
|
||||
break;
|
||||
|
||||
@ -2648,7 +2657,7 @@ void EPeerUpdate::replay(MDSRank *mds)
|
||||
case EPeerUpdate::OP_ROLLBACK:
|
||||
dout(10) << "EPeerUpdate.replay abort " << reqid << " for mds." << leader
|
||||
<< ": applying rollback commit blob" << dendl;
|
||||
commit.replay(mds, segment);
|
||||
commit.replay(mds, segment, EVENT_PEERUPDATE);
|
||||
mds->mdcache->finish_uncommitted_peer(reqid, false);
|
||||
break;
|
||||
|
||||
@ -2827,7 +2836,7 @@ void ESubtreeMap::replay(MDSRank *mds)
|
||||
|
||||
// first, stick the spanning tree in my cache
|
||||
//metablob.print(*_dout);
|
||||
metablob.replay(mds, get_segment());
|
||||
metablob.replay(mds, get_segment(), EVENT_SUBTREEMAP);
|
||||
|
||||
// restore import/export maps
|
||||
for (map<dirfrag_t, vector<dirfrag_t> >::iterator p = subtrees.begin();
|
||||
@ -2902,7 +2911,7 @@ void EFragment::replay(MDSRank *mds)
|
||||
ceph_abort();
|
||||
}
|
||||
|
||||
metablob.replay(mds, segment);
|
||||
metablob.replay(mds, segment, EVENT_FRAGMENT);
|
||||
if (in && g_conf()->mds_debug_frag)
|
||||
in->verify_dirfrags();
|
||||
}
|
||||
@ -2986,7 +2995,7 @@ void EExport::replay(MDSRank *mds)
|
||||
{
|
||||
dout(10) << "EExport.replay " << base << dendl;
|
||||
auto&& segment = get_segment();
|
||||
metablob.replay(mds, segment);
|
||||
metablob.replay(mds, segment, EVENT_EXPORT);
|
||||
|
||||
CDir *dir = mds->mdcache->get_dirfrag(base);
|
||||
ceph_assert(dir);
|
||||
@ -3065,7 +3074,7 @@ void EImportStart::replay(MDSRank *mds)
|
||||
dout(10) << "EImportStart.replay " << base << " bounds " << bounds << dendl;
|
||||
//metablob.print(*_dout);
|
||||
auto&& segment = get_segment();
|
||||
metablob.replay(mds, segment);
|
||||
metablob.replay(mds, segment, EVENT_IMPORTSTART);
|
||||
|
||||
// put in ambiguous import list
|
||||
mds->mdcache->add_ambiguous_import(base, bounds);
|
||||
|
Loading…
Reference in New Issue
Block a user