diff --git a/PendingReleaseNotes b/PendingReleaseNotes index f122be29c30..130b66e423d 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -157,6 +157,8 @@ * the callback function passed to LibRGWFS.readdir() now accepts a ``flags`` parameter. it will be the last parameter passed to ``readdir()` method. +* The 'cephfs-data-scan scan_links' now automatically repair inotables. + >=13.1.0 -------- diff --git a/doc/cephfs/disaster-recovery-experts.rst b/doc/cephfs/disaster-recovery-experts.rst index 1c585a664f3..75c03f0302c 100644 --- a/doc/cephfs/disaster-recovery-experts.rst +++ b/doc/cephfs/disaster-recovery-experts.rst @@ -183,14 +183,6 @@ The example below shows how to run 4 workers simultaneously: It is **important** to ensure that all workers have completed the scan_extents phase before any workers enter the scan_inodes phase. -Output of 'scan_links' command includes max used inode number for each -MDS rank. You may need to update InoTables of each MDS rank. - -:: - cephfs-table-tool recovery-fs:x show inode - cephfs-table-tool recovery-fs:x take_inos = (2 << 40): + break + self.mount_a.run_shell(["rm", "-f", "dir1/file1"]) + + self.mount_a.umount_wait() + + self.fs.mds_asok(["flush", "journal"], mds0_id) + self.fs.mds_asok(["flush", "journal"], mds1_id) + self.mds_cluster.mds_stop() + + self.fs.rados(["rm", "mds0_inotable"]) + self.fs.rados(["rm", "mds1_inotable"]) + + self.fs.data_scan(["scan_links", "--filesystem", self.fs.name]) + + mds0_inotable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "inode"])) + self.assertGreaterEqual( + mds0_inotable['0']['data']['inotable']['free'][0]['start'], dir_ino) + + mds1_inotable = json.loads(self.fs.table_tool([self.fs.name + ":1", "show", "inode"])) + self.assertGreaterEqual( + mds1_inotable['1']['data']['inotable']['free'][0]['start'], file_ino) diff --git a/src/mds/InoTable.cc b/src/mds/InoTable.cc index 803d5431c0d..43fe380c789 100644 --- a/src/mds/InoTable.cc +++ b/src/mds/InoTable.cc @@ -223,3 +223,18 @@ bool InoTable::repair(inodeno_t id) dout(10) << "repair: after status. ino = " << id << " pver =" << projected_version << " ver= " << version << dendl; return true; } + +bool InoTable::force_consume_to(inodeno_t ino) +{ + auto it = free.begin(); + if (it != free.end() && it.get_start() <= ino) { + inodeno_t min = it.get_start(); + derr << "erasing " << min << " to " << ino << dendl; + free.erase(min, ino - min + 1); + projected_free = free; + projected_version = ++version; + return true; + } else { + return false; + } +} diff --git a/src/mds/InoTable.h b/src/mds/InoTable.h index cc228797ae3..0e26e1e9872 100644 --- a/src/mds/InoTable.h +++ b/src/mds/InoTable.h @@ -96,19 +96,7 @@ class InoTable : public MDSTable { * * @return true if the table was modified */ - bool force_consume_to(inodeno_t ino) - { - if (free.contains(ino)) { - inodeno_t min = free.begin().get_start(); - std::cerr << "Erasing " << min << " to " << ino << std::endl; - free.erase(min, ino - min + 1); - projected_free = free; - projected_version = ++version; - return true; - } else { - return false; - } - } + bool force_consume_to(inodeno_t ino); }; WRITE_CLASS_ENCODER(InoTable) diff --git a/src/mds/MDSTable.cc b/src/mds/MDSTable.cc index 8af30ac5471..7107ff3e4e2 100644 --- a/src/mds/MDSTable.cc +++ b/src/mds/MDSTable.cc @@ -144,7 +144,7 @@ object_t MDSTable::get_object_name() const { char n[50]; if (per_mds) - snprintf(n, sizeof(n), "mds%d_%s", int(mds->get_nodeid()), table_name); + snprintf(n, sizeof(n), "mds%d_%s", int(rank), table_name); else snprintf(n, sizeof(n), "mds_%s", table_name); return object_t(n); diff --git a/src/mds/MDSTable.h b/src/mds/MDSTable.h index a7a8502ca9d..65658687823 100644 --- a/src/mds/MDSTable.h +++ b/src/mds/MDSTable.h @@ -31,7 +31,6 @@ protected: bool per_mds; mds_rank_t rank; - object_t get_object_name() const; static const int STATE_UNDEF = 0; static const int STATE_OPENING = 1; @@ -80,6 +79,7 @@ public: if (is_active()) save(0); } + object_t get_object_name() const; void load(MDSInternalContextBase *onfinish); void load_2(int, bufferlist&, Context *onfinish); diff --git a/src/tools/cephfs/DataScan.cc b/src/tools/cephfs/DataScan.cc index 8cc94604736..5463af9d8d8 100644 --- a/src/tools/cephfs/DataScan.cc +++ b/src/tools/cephfs/DataScan.cc @@ -19,6 +19,7 @@ #include "include/util.h" #include "mds/CInode.h" +#include "mds/InoTable.h" #include "cls/cephfs/cls_cephfs_client.h" #include "PgFiles.h" @@ -1132,7 +1133,21 @@ int DataScan::scan_links() } for (auto& p : max_ino_map) { - std::cout << "mds." << p.first << " max used ino " << p.second << std::endl; + InoTable inotable(nullptr); + inotable.set_rank(p.first); + bool dirty = false; + int r = metadata_driver->load_table(&inotable); + if (r < 0) { + inotable.reset_state(); + dirty = true; + } + if (inotable.force_consume_to(p.second)) + dirty = true; + if (dirty) { + r = metadata_driver->save_table(&inotable); + if (r < 0) + return r; + } } return 0; @@ -1373,6 +1388,48 @@ int MetadataTool::read_dentry(inodeno_t parent_ino, frag_t frag, return 0; } +int MetadataDriver::load_table(MDSTable *table) +{ + object_t table_oid = table->get_object_name(); + + bufferlist table_bl; + int r = metadata_io.read(table_oid.name, table_bl, 0, 0); + if (r < 0) { + derr << "unable to read mds table '" << table_oid.name << "': " + << cpp_strerror(r) << dendl; + return r; + } + + try { + version_t table_ver; + auto p = table_bl.cbegin(); + decode(table_ver, p); + table->decode_state(p); + table->force_replay_version(table_ver); + } catch (const buffer::error &err) { + derr << "unable to decode mds table '" << table_oid.name << "': " + << err.what() << dendl; + return -EIO; + } + return 0; +} + +int MetadataDriver::save_table(MDSTable *table) +{ + object_t table_oid = table->get_object_name(); + + bufferlist table_bl; + encode(table->get_version(), table_bl); + table->encode_state(table_bl); + int r = metadata_io.write_full(table_oid.name, table_bl); + if (r != 0) { + derr << "error updating mds table " << table_oid.name + << ": " << cpp_strerror(r) << dendl; + return r; + } + return 0; +} + int MetadataDriver::inject_lost_and_found( inodeno_t ino, const InodeStore &dentry) { diff --git a/src/tools/cephfs/DataScan.h b/src/tools/cephfs/DataScan.h index e44d9a50795..007fe824c74 100644 --- a/src/tools/cephfs/DataScan.h +++ b/src/tools/cephfs/DataScan.h @@ -17,6 +17,7 @@ #include "include/rados/librados.hpp" class InodeStore; +class MDSTable; class RecoveryDriver { protected: @@ -232,6 +233,9 @@ class MetadataDriver : public RecoveryDriver, public MetadataTool int init_roots(int64_t data_pool_id) override; int check_roots(bool *result) override; + + int load_table(MDSTable *table); + int save_table(MDSTable *table); }; class DataScan : public MDSUtility, public MetadataTool