mirror of
https://github.com/ceph/ceph
synced 2025-02-24 03:27:10 +00:00
Merge PR 16202 into master
* refs/remotes/upstream/pull/16202/head: doc: add some docs about 'cephfs-data-scan scan_links' mds/FSMap.cc: remember stopped mds when resetting filesystem tools/cephfs: handle removed dentries when replaying journal Reviewed-by: Patrick Donnelly <pdonnell@redhat.com>
This commit is contained in:
commit
ba548ffe26
@ -130,18 +130,20 @@ objects.
|
||||
|
||||
Finally, you can regenerate metadata objects for missing files
|
||||
and directories based on the contents of a data pool. This is
|
||||
a two-phase process. First, scanning *all* objects to calculate
|
||||
a three-phase process. First, scanning *all* objects to calculate
|
||||
size and mtime metadata for inodes. Second, scanning the first
|
||||
object from every file to collect this metadata and inject
|
||||
it into the metadata pool.
|
||||
object from every file to collect this metadata and inject it into
|
||||
the metadata pool. Third, checking inode linkages and fixing found
|
||||
errors.
|
||||
|
||||
::
|
||||
|
||||
cephfs-data-scan scan_extents <data pool>
|
||||
cephfs-data-scan scan_inodes <data pool>
|
||||
cephfs-data-scan scan_links
|
||||
|
||||
This command may take a *very long* time if there are many
|
||||
files or very large files in the data pool.
|
||||
'scan_extents' and 'scan_inodes' commands may take a *very long* time
|
||||
if there are many files or very large files in the data pool.
|
||||
|
||||
To accelerate the process, run multiple instances of the tool.
|
||||
|
||||
@ -246,7 +248,7 @@ it with empty file system data structures:
|
||||
ceph osd pool create recovery <pg-num> replicated <crush-ruleset-name>
|
||||
ceph fs new recovery-fs recovery <data pool> --allow-dangerous-metadata-overlay
|
||||
cephfs-data-scan init --force-init --filesystem recovery-fs --alternate-pool recovery
|
||||
ceph fs reset recovery-fs --yes-i-realy-mean-it
|
||||
ceph fs reset recovery-fs --yes-i-really-mean-it
|
||||
cephfs-table-tool recovery-fs:all reset session
|
||||
cephfs-table-tool recovery-fs:all reset snap
|
||||
cephfs-table-tool recovery-fs:all reset inode
|
||||
@ -256,8 +258,9 @@ results to the alternate pool:
|
||||
|
||||
::
|
||||
|
||||
cephfs-data-scan scan_extents --alternate-pool recovery --filesystem <original filesystem name>
|
||||
cephfs-data-scan scan_extents --alternate-pool recovery --filesystem <original filesystem name> <original data pool name>
|
||||
cephfs-data-scan scan_inodes --alternate-pool recovery --filesystem <original filesystem name> --force-corrupt --force-init <original data pool name>
|
||||
cephfs-data-scan scan_links --filesystem recovery-fs
|
||||
|
||||
If the damaged filesystem contains dirty journal data, it may be recovered next
|
||||
with:
|
||||
@ -267,10 +270,10 @@ with:
|
||||
cephfs-journal-tool --rank=<original filesystem name>:0 event recover_dentries list --alternate-pool recovery
|
||||
cephfs-journal-tool --rank recovery-fs:0 journal reset --force
|
||||
|
||||
After recovery, some recovered directories will have incorrect link counts.
|
||||
Ensure the parameter mds_debug_scatterstat is set to false (the default) to
|
||||
prevent the MDS from checking the link counts, then run a forward scrub to
|
||||
repair them. Ensure you have an MDS running and issue:
|
||||
After recovery, some recovered directories will have incorrect statistics.
|
||||
Ensure the parameters mds_verify_scatter and mds_debug_scatterstat are set
|
||||
to false (the default) to prevent the MDS from checking the statistics, then
|
||||
run a forward scrub to repair them. Ensure you have an MDS running and issue:
|
||||
|
||||
::
|
||||
|
||||
|
@ -295,6 +295,12 @@ void FSMap::reset_filesystem(fs_cluster_id_t fscid)
|
||||
new_fs->mds_map.standby_count_wanted = fs->mds_map.standby_count_wanted;
|
||||
new_fs->mds_map.enabled = true;
|
||||
|
||||
// Remember mds ranks that have ever started. (They should load old inotable
|
||||
// instead of creating new one if they start again.)
|
||||
new_fs->mds_map.stopped.insert(fs->mds_map.in.begin(), fs->mds_map.in.end());
|
||||
new_fs->mds_map.stopped.insert(fs->mds_map.stopped.begin(), fs->mds_map.stopped.end());
|
||||
new_fs->mds_map.stopped.erase(mds_rank_t(0));
|
||||
|
||||
// Persist the new FSMap
|
||||
filesystems[new_fs->fscid] = new_fs;
|
||||
}
|
||||
|
@ -720,6 +720,15 @@ int JournalTool::recover_dentries(
|
||||
read_keys.insert(key);
|
||||
}
|
||||
|
||||
list<EMetaBlob::nullbit> const &nb_list = lump.get_dnull();
|
||||
for (auto& nb : nb_list) {
|
||||
// Get a key like "foobar_head"
|
||||
std::string key;
|
||||
dentry_key_t dn_key(nb.dnlast, nb.dn.c_str());
|
||||
dn_key.encode(key);
|
||||
read_keys.insert(key);
|
||||
}
|
||||
|
||||
// Perform bulk read of existing dentries
|
||||
std::map<std::string, bufferlist> read_vals;
|
||||
r = input.omap_get_vals_by_keys(frag_oid.name, read_keys, &read_vals);
|
||||
@ -866,6 +875,48 @@ int JournalTool::recover_dentries(
|
||||
}
|
||||
}
|
||||
|
||||
std::set<std::string> null_vals;
|
||||
for (auto& nb : nb_list) {
|
||||
std::string key;
|
||||
dentry_key_t dn_key(nb.dnlast, nb.dn.c_str());
|
||||
dn_key.encode(key);
|
||||
|
||||
dout(4) << "inspecting nullbit " << frag_oid.name << "/" << nb.dn
|
||||
<< dendl;
|
||||
|
||||
auto it = read_vals.find(key);
|
||||
if (it != read_vals.end()) {
|
||||
dout(4) << "dentry exists, will remove" << dendl;
|
||||
|
||||
bufferlist::iterator q = it->second.begin();
|
||||
snapid_t dnfirst;
|
||||
::decode(dnfirst, q);
|
||||
char dentry_type;
|
||||
::decode(dentry_type, q);
|
||||
|
||||
bool remove_dentry = false;
|
||||
if (dentry_type == 'L') {
|
||||
dout(10) << "Existing hardlink inode in slot to be (maybe) removed "
|
||||
<< "by null journal dn '" << nb.dn.c_str()
|
||||
<< "' with lump fnode version " << lump.fnode.version
|
||||
<< "vs existing fnode version " << old_fnode_version << dendl;
|
||||
remove_dentry = old_fnode_version < lump.fnode.version;
|
||||
} else if (dentry_type == 'I') {
|
||||
dout(10) << "Existing full inode in slot to be (maybe) removed "
|
||||
<< "by null journal dn '" << nb.dn.c_str()
|
||||
<< "' with lump fnode version " << lump.fnode.version
|
||||
<< "vs existing fnode version " << old_fnode_version << dendl;
|
||||
remove_dentry = old_fnode_version < lump.fnode.version;
|
||||
} else {
|
||||
dout(4) << "corrupt dentry in backing store, will remove" << dendl;
|
||||
remove_dentry = true;
|
||||
}
|
||||
|
||||
if (remove_dentry)
|
||||
null_vals.insert(key);
|
||||
}
|
||||
}
|
||||
|
||||
// Write back any new/changed dentries
|
||||
if (!write_vals.empty()) {
|
||||
r = output.omap_set(frag_oid.name, write_vals);
|
||||
@ -875,6 +926,16 @@ int JournalTool::recover_dentries(
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
// remove any null dentries
|
||||
if (!null_vals.empty()) {
|
||||
r = output.omap_rm_keys(frag_oid.name, null_vals);
|
||||
if (r != 0) {
|
||||
derr << "error removing dentries from " << frag_oid.name
|
||||
<< ": " << cpp_strerror(r) << dendl;
|
||||
return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Now that we've looked at the dirlumps, we finally pay attention to
|
||||
|
Loading…
Reference in New Issue
Block a user