diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 6fe0911277c..d0829daa394 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -203,6 +203,7 @@ CDir::CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth) : pop_nested(ceph_clock_now()), pop_auth_subtree(ceph_clock_now()), pop_auth_subtree_nested(ceph_clock_now()), + pop_lru_subdirs(member_offset(CInode, item_pop_lru)), num_dentries_nested(0), num_dentries_auth_subtree(0), num_dentries_auth_subtree_nested(0), dir_auth(CDIR_AUTH_DEFAULT) @@ -406,7 +407,6 @@ CDentry* CDir::add_primary_dentry(std::string_view dname, CInode *in, items[dn->key()] = dn; dn->get_linkage()->inode = in; - in->set_primary_parent(dn); link_inode_work(dn, in); @@ -553,7 +553,6 @@ void CDir::link_primary_inode(CDentry *dn, CInode *in) assert(dn->get_linkage()->is_null()); dn->get_linkage()->inode = in; - in->set_primary_parent(dn); link_inode_work(dn, in); @@ -578,7 +577,7 @@ void CDir::link_primary_inode(CDentry *dn, CInode *in) void CDir::link_inode_work( CDentry *dn, CInode *in) { assert(dn->get_linkage()->get_inode() == in); - assert(in->get_parent_dn() == dn); + in->set_primary_parent(dn); // set inode version //in->inode.version = dn->get_version(); @@ -676,9 +675,11 @@ void CDir::unlink_inode_work( CDentry *dn ) // unlink auth_pin count if (in->auth_pins + in->nested_auth_pins) dn->adjust_nested_auth_pins(0 - (in->auth_pins + in->nested_auth_pins), 0 - in->auth_pins, NULL); - + // detach inode in->remove_primary_parent(dn); + if (in->is_dir()) + in->item_pop_lru.remove_myself(); dn->get_linkage()->inode = 0; } else { assert(!dn->get_linkage()->is_null()); @@ -853,10 +854,13 @@ void CDir::steal_dentry(CDentry *dn) if (dn->get_linkage()->is_primary()) { CInode *in = dn->get_linkage()->get_inode(); auto pi = in->get_projected_inode(); - if (dn->get_linkage()->get_inode()->is_dir()) + if (in->is_dir()) { fnode.fragstat.nsubdirs++; - else + if (in->item_pop_lru.is_on_list()) + pop_lru_subdirs.push_back(&in->item_pop_lru); + } else { fnode.fragstat.nfiles++; + } fnode.rstat.rbytes += pi->accounted_rstat.rbytes; fnode.rstat.rfiles += pi->accounted_rstat.rfiles; fnode.rstat.rsubdirs += pi->accounted_rstat.rsubdirs; diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 472845042fc..632694c2412 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -379,6 +379,8 @@ protected: load_spread_t pop_spread; + elist pop_lru_subdirs; + // and to provide density int num_dentries_nested; int num_dentries_auth_subtree; diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 3a6c7658003..dc83aeea793 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -668,6 +668,7 @@ public: int auth_pin_freeze_allowance = 0; inode_load_vec_t pop; + elist::item item_pop_lru; // friends friend class Server; diff --git a/src/mds/MDBalancer.cc b/src/mds/MDBalancer.cc index 4acd44db507..87e044ff274 100644 --- a/src/mds/MDBalancer.cc +++ b/src/mds/MDBalancer.cc @@ -992,13 +992,18 @@ void MDBalancer::find_exports(CDir *dir, dout(7) << " find_exports in " << dir_pop << " " << *dir << " need " << need << " (" << needmin << " - " << needmax << ")" << dendl; double subdir_sum = 0; - for (auto it = dir->begin(); it != dir->end(); ++it) { - CInode *in = it->second->get_linkage()->get_inode(); - if (!in) continue; - if (!in->is_dir()) continue; + for (elist::iterator it = dir->pop_lru_subdirs.begin_use_current(); + !it.end(); ) { + CInode *in = *it; + ++it; + + assert(in->is_dir()); + assert(in->get_parent_dir() == dir); list dfls; in->get_nested_dirfrags(dfls); + + size_t num_idle_frags = 0; for (list::iterator p = dfls.begin(); p != dfls.end(); ++p) { @@ -1017,7 +1022,10 @@ void MDBalancer::find_exports(CDir *dir, subdir_sum += pop; dout(15) << " subdir pop " << pop << " " << *subdir << dendl; - if (pop < minchunk) continue; + if (pop < minchunk) { + num_idle_frags++; + continue; + } // lucky find? if (pop > needmin && pop < needmax) { @@ -1035,6 +1043,8 @@ void MDBalancer::find_exports(CDir *dir, } else smaller.insert(pair(pop, subdir)); } + if (dfls.size() == num_idle_frags) + in->item_pop_lru.remove_myself(); } dout(15) << " sum " << subdir_sum << " / " << dir_pop << dendl; @@ -1202,14 +1212,21 @@ void MDBalancer::hit_dir(const utime_t& now, CDir *dir, int type, int who, doubl bool hit_subtree_nested = dir->is_auth(); // all nested auth subtrees while (true) { + CDir *pdir = dir->inode->get_parent_dir(); dir->pop_nested.get(type).hit(now, mds->mdcache->decayrate, amount); if (rd_adj != 0.0) dir->pop_nested.get(META_POP_IRD).adjust(now, mds->mdcache->decayrate, rd_adj); if (hit_subtree) { dir->pop_auth_subtree.get(type).hit(now, mds->mdcache->decayrate, amount); + if (rd_adj != 0.0) dir->pop_auth_subtree.get(META_POP_IRD).adjust(now, mds->mdcache->decayrate, rd_adj); + + if (dir->is_subtree_root()) + hit_subtree = false; // end of auth domain, stop hitting auth counters. + else if (pdir) + pdir->pop_lru_subdirs.push_front(&dir->get_inode()->item_pop_lru); } if (hit_subtree_nested) { @@ -1217,12 +1234,8 @@ void MDBalancer::hit_dir(const utime_t& now, CDir *dir, int type, int who, doubl if (rd_adj != 0.0) dir->pop_auth_subtree_nested.get(META_POP_IRD).adjust(now, mds->mdcache->decayrate, rd_adj); } - - if (dir->is_subtree_root()) - hit_subtree = false; // end of auth domain, stop hitting auth counters. - - if (dir->inode->get_parent_dn() == 0) break; - dir = dir->inode->get_parent_dn()->get_dir(); + if (!pdir) break; + dir = pdir; } } @@ -1268,11 +1281,14 @@ void MDBalancer::adjust_pop_for_rename(CDir *pdir, CDir *dir, utime_t now, bool bool adjust_subtree_nest = dir->is_auth(); bool adjust_subtree = adjust_subtree_nest && !dir->is_subtree_root(); + CDir *cur = dir; while (true) { if (inc) { pdir->pop_nested.add(now, rate, dir->pop_nested); - if (adjust_subtree) + if (adjust_subtree) { pdir->pop_auth_subtree.add(now, rate, dir->pop_auth_subtree); + pdir->pop_lru_subdirs.push_front(&cur->get_inode()->item_pop_lru); + } if (adjust_subtree_nest) pdir->pop_auth_subtree_nested.add(now, rate, dir->pop_auth_subtree_nested); @@ -1287,6 +1303,7 @@ void MDBalancer::adjust_pop_for_rename(CDir *pdir, CDir *dir, utime_t now, bool if (pdir->is_subtree_root()) adjust_subtree = false; + cur = pdir; pdir = pdir->inode->get_parent_dir(); if (!pdir) break; } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index b58e394c612..52a1d3478bb 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -943,10 +943,13 @@ void MDCache::try_subtree_merge_at(CDir *dir, set *to_eval, bool adjust // adjust popularity? if (adjust_pop && dir->is_auth()) { utime_t now = ceph_clock_now(); + CDir *cur = dir; CDir *p = dir->get_parent_dir(); while (p) { p->pop_auth_subtree.add(now, decayrate, dir->pop_auth_subtree); + p->pop_lru_subdirs.push_front(&cur->get_inode()->item_pop_lru); if (p->is_subtree_root()) break; + cur = p; p = p->inode->get_parent_dir(); } } diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 2ed79d2124f..829d526fe5a 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -3003,6 +3003,9 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::iterator& blp, assert(!dn->get_linkage()->get_inode()); dn->dir->link_primary_inode(dn, in); } + + if (in->is_dir()) + dn->dir->pop_lru_subdirs.push_back(&in->item_pop_lru); // add inode? if (added) {