tools/cephfs: make 'cephfs-data-scan scan_links' update inotable

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
This commit is contained in:
Yan, Zheng 2018-10-11 18:04:57 +08:00
parent fa24a0312f
commit 01089652d3
9 changed files with 129 additions and 25 deletions

View File

@ -157,6 +157,8 @@
* the callback function passed to LibRGWFS.readdir() now accepts a ``flags``
parameter. it will be the last parameter passed to ``readdir()` method.
* The 'cephfs-data-scan scan_links' now automatically repair inotables.
>=13.1.0
--------

View File

@ -183,14 +183,6 @@ The example below shows how to run 4 workers simultaneously:
It is **important** to ensure that all workers have completed the
scan_extents phase before any workers enter the scan_inodes phase.
Output of 'scan_links' command includes max used inode number for each
MDS rank. You may need to update InoTables of each MDS rank.
::
cephfs-table-tool recovery-fs:x show inode
cephfs-table-tool recovery-fs:x take_inos <max ino of mds.x)
After completing the metadata recovery, you may want to run cleanup
operation to delete ancillary data geneated during recovery.

View File

@ -6,6 +6,7 @@ import json
import logging
import os
import time
from textwrap import dedent
import traceback
from collections import namedtuple, defaultdict
@ -541,7 +542,7 @@ class TestDataScan(CephFSTestCase):
log.info("{0}: {1}".format(pg_str, lines))
self.assertSetEqual(set(lines), set(pgs_to_files[pg_str]))
def test_scan_links(self):
def test_rebuild_linkage(self):
"""
The scan_links command fixes linkage errors
"""
@ -596,3 +597,48 @@ class TestDataScan(CephFSTestCase):
# link count was adjusted?
file1_nlink = self.mount_a.path_to_nlink("testdir1/file1")
self.assertEqual(file1_nlink, 2)
def test_rebuild_inotable(self):
"""
The scan_links command repair inotables
"""
self.fs.set_max_mds(2)
self.fs.wait_for_daemons()
active_mds_names = self.fs.get_active_names()
mds0_id = active_mds_names[0]
mds1_id = active_mds_names[1]
self.mount_a.run_shell(["mkdir", "dir1"])
dir_ino = self.mount_a.path_to_ino("dir1")
self.mount_a.setfattr("dir1", "ceph.dir.pin", "1")
# wait for subtree migration
file_ino = 0;
while True:
time.sleep(1)
# allocate an inode from mds.1
self.mount_a.run_shell(["touch", "dir1/file1"])
file_ino = self.mount_a.path_to_ino("dir1/file1")
if file_ino >= (2 << 40):
break
self.mount_a.run_shell(["rm", "-f", "dir1/file1"])
self.mount_a.umount_wait()
self.fs.mds_asok(["flush", "journal"], mds0_id)
self.fs.mds_asok(["flush", "journal"], mds1_id)
self.mds_cluster.mds_stop()
self.fs.rados(["rm", "mds0_inotable"])
self.fs.rados(["rm", "mds1_inotable"])
self.fs.data_scan(["scan_links", "--filesystem", self.fs.name])
mds0_inotable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "inode"]))
self.assertGreaterEqual(
mds0_inotable['0']['data']['inotable']['free'][0]['start'], dir_ino)
mds1_inotable = json.loads(self.fs.table_tool([self.fs.name + ":1", "show", "inode"]))
self.assertGreaterEqual(
mds1_inotable['1']['data']['inotable']['free'][0]['start'], file_ino)

View File

@ -223,3 +223,18 @@ bool InoTable::repair(inodeno_t id)
dout(10) << "repair: after status. ino = " << id << " pver =" << projected_version << " ver= " << version << dendl;
return true;
}
bool InoTable::force_consume_to(inodeno_t ino)
{
auto it = free.begin();
if (it != free.end() && it.get_start() <= ino) {
inodeno_t min = it.get_start();
derr << "erasing " << min << " to " << ino << dendl;
free.erase(min, ino - min + 1);
projected_free = free;
projected_version = ++version;
return true;
} else {
return false;
}
}

View File

@ -96,19 +96,7 @@ class InoTable : public MDSTable {
*
* @return true if the table was modified
*/
bool force_consume_to(inodeno_t ino)
{
if (free.contains(ino)) {
inodeno_t min = free.begin().get_start();
std::cerr << "Erasing " << min << " to " << ino << std::endl;
free.erase(min, ino - min + 1);
projected_free = free;
projected_version = ++version;
return true;
} else {
return false;
}
}
bool force_consume_to(inodeno_t ino);
};
WRITE_CLASS_ENCODER(InoTable)

View File

@ -144,7 +144,7 @@ object_t MDSTable::get_object_name() const
{
char n[50];
if (per_mds)
snprintf(n, sizeof(n), "mds%d_%s", int(mds->get_nodeid()), table_name);
snprintf(n, sizeof(n), "mds%d_%s", int(rank), table_name);
else
snprintf(n, sizeof(n), "mds_%s", table_name);
return object_t(n);

View File

@ -31,7 +31,6 @@ protected:
bool per_mds;
mds_rank_t rank;
object_t get_object_name() const;
static const int STATE_UNDEF = 0;
static const int STATE_OPENING = 1;
@ -80,6 +79,7 @@ public:
if (is_active()) save(0);
}
object_t get_object_name() const;
void load(MDSInternalContextBase *onfinish);
void load_2(int, bufferlist&, Context *onfinish);

View File

@ -19,6 +19,7 @@
#include "include/util.h"
#include "mds/CInode.h"
#include "mds/InoTable.h"
#include "cls/cephfs/cls_cephfs_client.h"
#include "PgFiles.h"
@ -1132,7 +1133,21 @@ int DataScan::scan_links()
}
for (auto& p : max_ino_map) {
std::cout << "mds." << p.first << " max used ino " << p.second << std::endl;
InoTable inotable(nullptr);
inotable.set_rank(p.first);
bool dirty = false;
int r = metadata_driver->load_table(&inotable);
if (r < 0) {
inotable.reset_state();
dirty = true;
}
if (inotable.force_consume_to(p.second))
dirty = true;
if (dirty) {
r = metadata_driver->save_table(&inotable);
if (r < 0)
return r;
}
}
return 0;
@ -1373,6 +1388,48 @@ int MetadataTool::read_dentry(inodeno_t parent_ino, frag_t frag,
return 0;
}
int MetadataDriver::load_table(MDSTable *table)
{
object_t table_oid = table->get_object_name();
bufferlist table_bl;
int r = metadata_io.read(table_oid.name, table_bl, 0, 0);
if (r < 0) {
derr << "unable to read mds table '" << table_oid.name << "': "
<< cpp_strerror(r) << dendl;
return r;
}
try {
version_t table_ver;
auto p = table_bl.cbegin();
decode(table_ver, p);
table->decode_state(p);
table->force_replay_version(table_ver);
} catch (const buffer::error &err) {
derr << "unable to decode mds table '" << table_oid.name << "': "
<< err.what() << dendl;
return -EIO;
}
return 0;
}
int MetadataDriver::save_table(MDSTable *table)
{
object_t table_oid = table->get_object_name();
bufferlist table_bl;
encode(table->get_version(), table_bl);
table->encode_state(table_bl);
int r = metadata_io.write_full(table_oid.name, table_bl);
if (r != 0) {
derr << "error updating mds table " << table_oid.name
<< ": " << cpp_strerror(r) << dendl;
return r;
}
return 0;
}
int MetadataDriver::inject_lost_and_found(
inodeno_t ino, const InodeStore &dentry)
{

View File

@ -17,6 +17,7 @@
#include "include/rados/librados.hpp"
class InodeStore;
class MDSTable;
class RecoveryDriver {
protected:
@ -232,6 +233,9 @@ class MetadataDriver : public RecoveryDriver, public MetadataTool
int init_roots(int64_t data_pool_id) override;
int check_roots(bool *result) override;
int load_table(MDSTable *table);
int save_table(MDSTable *table);
};
class DataScan : public MDSUtility, public MetadataTool