btrfs-progs: disk-io: Try to find a best copy when reading tree blocks
[BUG] If the first copy of a tree block has a bad key order, but the second copy is completely good, then "btrfs ins dump-tree -b <bytenr>" fails to print anything past the bad key: leaf 29786112 items 47 free space 983 generation 20 owner EXTENT_TREE leaf 29786112 flags 0x1(WRITTEN) backref revision 1 fs uuid 3381d111-94a3-4ac7-8f39-611bbbdab7e6 chunk uuid 9af1c3c7-2af5-488b-8553-530bd515f14c [snip] item 9 key (20975616 METADATA_ITEM 0) itemoff 3543 itemsize 33 refs 1 gen 16 flags TREE_BLOCK tree block skinny level 0 tree block backref root CHUNK_TREE item 10 key (29360128 BLOCK_GROUP_ITEM 33554432) itemoff 3519 itemsize 24 block group used 94208 chunk_objectid 256 flags METADATA|DUP ERROR: leaf 29786112 slot 11 pointer invalid, offset 1245184 size 0 leaf data limit 3995 ERROR: skip remaining slots While kernel can locate the good copy and acts just like nothing happened. [CAUSE] btrfs-progs uses read_tree_block() to try each copy. But it only uses less strict check_tree_block(), which has less sanity check than btrfs_check_node/leaf(). Some error like bad key order is ignored to allow btrfs check to fix it. This leads to above problem. [FIX] Introduce a new member, @candidate_mirror in read_tree_block(), which records the copy passes check_tree_block() but fails btrfs_check_leaf/node() as last chance. Only if no better copy found, then use @candidate_mirror. So btrfs-progs can act just like kernel to use best copy. Link: https://bugzilla.kernel.org/show_bug.cgi?id=202691 Reported-by: Yoon Jungyeon <jungyeon@gatech.edu> [Inspired by that image, not to fix any bug of that bugzilla] Signed-off-by: Qu Wenruo <wqu@suse.com>
This commit is contained in:
parent
f76136a8d0
commit
66d610010b
30
disk-io.c
30
disk-io.c
|
@ -327,6 +327,7 @@ struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
|||
u32 sectorsize = fs_info->sectorsize;
|
||||
int mirror_num = 1;
|
||||
int good_mirror = 0;
|
||||
int candidate_mirror = 0;
|
||||
int num_copies;
|
||||
int ignore = 0;
|
||||
|
||||
|
@ -362,10 +363,30 @@ struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
|||
&fs_info->recow_ebs);
|
||||
eb->refs++;
|
||||
}
|
||||
btrfs_set_buffer_uptodate(eb);
|
||||
return eb;
|
||||
|
||||
/*
|
||||
* check_tree_block() is less strict to allow btrfs
|
||||
* check to get raw eb with bad key order and fix it.
|
||||
* But we still need to try to get a good copy if
|
||||
* possible, or bad key order can go into tools like
|
||||
* btrfs ins dump-tree.
|
||||
*/
|
||||
if (btrfs_header_level(eb))
|
||||
ret = btrfs_check_node(fs_info, NULL, eb);
|
||||
else
|
||||
ret = btrfs_check_leaf(fs_info, NULL, eb);
|
||||
if (!ret || candidate_mirror == mirror_num) {
|
||||
btrfs_set_buffer_uptodate(eb);
|
||||
return eb;
|
||||
}
|
||||
if (candidate_mirror <= 0)
|
||||
candidate_mirror = mirror_num;
|
||||
}
|
||||
if (ignore) {
|
||||
if (candidate_mirror > 0) {
|
||||
mirror_num = candidate_mirror;
|
||||
continue;
|
||||
}
|
||||
if (check_tree_block(fs_info, eb)) {
|
||||
if (!fs_info->suppress_check_block_errors)
|
||||
print_tree_block_error(fs_info, eb,
|
||||
|
@ -387,7 +408,10 @@ struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
|||
}
|
||||
mirror_num++;
|
||||
if (mirror_num > num_copies) {
|
||||
mirror_num = good_mirror;
|
||||
if (candidate_mirror > 0)
|
||||
mirror_num = candidate_mirror;
|
||||
else
|
||||
mirror_num = good_mirror;
|
||||
ignore = 1;
|
||||
continue;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue