btrfs-progs/check/mode-lowmem.c

5670 lines
145 KiB
C

/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "kerncompat.h"
#include <sys/stat.h>
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include "kernel-lib/rbtree.h"
#include "kernel-shared/accessors.h"
#include "kernel-shared/extent-io-tree.h"
#include "kernel-shared/uapi/btrfs_tree.h"
#include "kernel-shared/extent_io.h"
#include "kernel-shared/ulist.h"
#include "kernel-shared/ctree.h"
#include "kernel-shared/transaction.h"
#include "kernel-shared/disk-io.h"
#include "kernel-shared/backref.h"
#include "kernel-shared/compression.h"
#include "kernel-shared/volumes.h"
#include "kernel-shared/file-item.h"
#include "kernel-shared/tree-checker.h"
#include "common/messages.h"
#include "common/internal.h"
#include "common/utils.h"
#include "common/device-utils.h"
#include "check/repair.h"
#include "check/mode-common.h"
#include "check/mode-lowmem.h"
static u64 last_allocated_chunk;
static u64 total_used = 0;
static int calc_extent_flag(struct btrfs_root *root, struct extent_buffer *eb,
u64 *flags_ret)
{
struct btrfs_root *extent_root;
struct btrfs_root_item *ri = &root->root_item;
struct btrfs_extent_inline_ref *iref;
struct btrfs_extent_item *ei;
struct btrfs_key key;
struct btrfs_path *path = NULL;
unsigned long ptr;
unsigned long end;
u64 flags;
u64 owner = 0;
u64 offset;
int slot;
int type;
int ret = 0;
/*
* Except file/reloc tree, we can not have FULL BACKREF MODE
*/
if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
goto normal;
/* root node */
if (eb->start == btrfs_root_bytenr(ri))
goto normal;
if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
goto full_backref;
owner = btrfs_header_owner(eb);
if (owner == root->objectid)
goto normal;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = btrfs_header_bytenr(eb);
key.type = (u8)-1;
key.offset = (u64)-1;
extent_root = btrfs_extent_root(gfs_info, key.objectid);
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret <= 0) {
ret = -EIO;
goto out;
}
if (ret > 0) {
ret = btrfs_previous_extent_item(extent_root, path,
key.objectid);
if (ret)
goto full_backref;
}
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
eb = path->nodes[0];
slot = path->slots[0];
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
flags = btrfs_extent_flags(eb, ei);
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
goto full_backref;
ptr = (unsigned long)(ei + 1);
end = (unsigned long)ei + btrfs_item_size(eb, slot);
if (key.type == BTRFS_EXTENT_ITEM_KEY)
ptr += sizeof(struct btrfs_tree_block_info);
next:
/* Reached extent item ends normally */
if (ptr == end)
goto full_backref;
/* Beyond extent item end, wrong item size */
if (ptr > end) {
error("extent item at bytenr %llu slot %d has wrong size",
eb->start, slot);
goto full_backref;
}
iref = (struct btrfs_extent_inline_ref *)ptr;
offset = btrfs_extent_inline_ref_offset(eb, iref);
type = btrfs_extent_inline_ref_type(eb, iref);
if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
goto normal;
ptr += btrfs_extent_inline_ref_size(type);
goto next;
normal:
*flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
goto out;
full_backref:
*flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
out:
btrfs_free_path(path);
return ret;
}
/*
* for a tree node or leaf, if it's shared, indeed we don't need to iterate it
* in every fs or file tree check. Here we find its all root ids, and only check
* it in the fs or file tree which has the smallest root id.
*/
static bool need_check(struct btrfs_root *root, struct ulist *roots)
{
struct rb_node *node;
struct ulist_node *u;
/*
* @roots can be empty if it belongs to tree reloc tree
* In that case, we should always check the leaf, as we can't use
* the tree owner to ensure some other root will check it.
*/
if (roots->nnodes == 1 || roots->nnodes == 0)
return true;
node = rb_first(&roots->root);
u = rb_entry(node, struct ulist_node, rb_node);
/*
* current root id is not smallest, we skip it and let it be checked
* in the fs or file tree who hash the smallest root id.
*/
if (root->objectid != u->val)
return false;
return true;
}
/*
* for a tree node or leaf, we record its reference count, so later if we still
* process this node or leaf, don't need to compute its reference count again.
*
* @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
*/
static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
struct extent_buffer *eb, struct node_refs *nrefs,
u64 level, int check_all)
{
struct ulist *roots;
u64 refs = 0;
u64 flags = 0;
int root_level = btrfs_header_level(root->node);
int check;
int ret;
if (nrefs->bytenr[level] == bytenr)
return 0;
if (bytenr != (u64)-1) {
/* the return value of this function seems a mistake */
ret = btrfs_lookup_extent_info(NULL, gfs_info, bytenr,
level, 1, &refs, &flags);
/* temporary fix */
if (ret < 0 && !check_all)
return ret;
nrefs->bytenr[level] = bytenr;
nrefs->refs[level] = refs;
nrefs->full_backref[level] = 0;
nrefs->checked[level] = 0;
if (refs > 1) {
ret = btrfs_find_all_roots(NULL, gfs_info, bytenr,
0, &roots);
if (ret)
return -EIO;
check = need_check(root, roots);
ulist_free(roots);
nrefs->need_check[level] = check;
} else {
if (!check_all) {
nrefs->need_check[level] = 1;
} else {
if (level == root_level) {
nrefs->need_check[level] = 1;
} else {
/*
* The node refs may have not been
* updated if upper needs checking (the
* lowest root_objectid) the node can
* be checked.
*/
nrefs->need_check[level] =
nrefs->need_check[level + 1];
}
}
}
}
if (check_all && eb) {
calc_extent_flag(root, eb, &flags);
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
nrefs->full_backref[level] = 1;
}
return 0;
}
/*
* Mark all extents unfree in the block group. And set @block_group->cached
* according to @cache.
*/
static int modify_block_group_cache(struct btrfs_block_group *block_group, int cache)
{
struct extent_io_tree *free_space_cache = &gfs_info->free_space_cache;
u64 start = block_group->start;
u64 end = start + block_group->length;
if (cache && !block_group->cached) {
block_group->cached = 1;
clear_extent_dirty(free_space_cache, start, end - 1, NULL);
}
if (!cache && block_group->cached) {
block_group->cached = 0;
clear_extent_dirty(free_space_cache, start, end - 1, NULL);
}
return 0;
}
/*
* Modify block groups which have @flags unfree in free space cache.
*
* @cache: if 0, clear block groups cache state;
* not 0, mark blocks groups cached.
*/
static int modify_block_groups_cache(u64 flags, int cache)
{
struct btrfs_root *root = btrfs_block_group_root(gfs_info);
struct btrfs_key key;
struct btrfs_path path = { 0 };
struct btrfs_block_group *bg_cache;
struct btrfs_block_group_item *bi;
struct btrfs_block_group_item bg_item;
struct extent_buffer *eb;
int slot;
int ret;
key.objectid = 0;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0) {
errno = -ret;
error("fail to search block groups due to %m");
goto out;
}
while (1) {
eb = path.nodes[0];
slot = path.slots[0];
btrfs_item_key_to_cpu(eb, &key, slot);
bg_cache = btrfs_lookup_block_group(gfs_info, key.objectid);
if (!bg_cache) {
ret = -ENOENT;
goto out;
}
bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
read_extent_buffer(eb, &bg_item, (unsigned long)bi,
sizeof(bg_item));
if (btrfs_stack_block_group_flags(&bg_item) & flags)
modify_block_group_cache(bg_cache, cache);
ret = btrfs_next_item(root, &path);
if (ret > 0) {
ret = 0;
goto out;
}
if (ret < 0)
goto out;
}
out:
btrfs_release_path(&path);
return ret;
}
static int mark_block_groups_full(u64 flags)
{
return modify_block_groups_cache(flags, 1);
}
static int clear_block_groups_full(u64 flags)
{
return modify_block_groups_cache(flags, 0);
}
static int create_chunk_and_block_group(u64 flags, u64 *start, u64 *nbytes)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = btrfs_block_group_root(gfs_info);
int ret;
if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)
return -EINVAL;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
return ret;
}
ret = btrfs_alloc_chunk(trans, gfs_info, start, nbytes, flags);
if (ret) {
errno = -ret;
error("fail to allocate new chunk %m");
goto out;
}
ret = btrfs_make_block_group(trans, gfs_info, 0, flags, *start,
*nbytes);
if (ret) {
errno = -ret;
error("fail to make block group for chunk %llu %llu %m",
*start, *nbytes);
goto out;
}
out:
btrfs_commit_transaction(trans, root);
return ret;
}
static int force_cow_in_new_chunk(u64 *start_ret)
{
struct btrfs_block_group *bg;
u64 start;
u64 nbytes;
u64 alloc_profile;
u64 flags;
int ret;
alloc_profile = (gfs_info->avail_metadata_alloc_bits &
gfs_info->metadata_alloc_profile);
flags = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
if (btrfs_fs_incompat(gfs_info, MIXED_GROUPS))
flags |= BTRFS_BLOCK_GROUP_DATA;
ret = create_chunk_and_block_group(flags, &start, &nbytes);
if (ret)
goto err;
printf("Created new chunk [%llu %llu]\n", start, nbytes);
flags = BTRFS_BLOCK_GROUP_METADATA;
/* Mark all metadata block groups cached and full in free space*/
ret = mark_block_groups_full(flags);
if (ret)
goto clear_bgs_full;
bg = btrfs_lookup_block_group(gfs_info, start);
if (!bg) {
ret = -ENOENT;
error("fail to look up block group %llu %llu", start, nbytes);
goto clear_bgs_full;
}
/* Clear block group cache just allocated */
ret = modify_block_group_cache(bg, 0);
if (ret)
goto clear_bgs_full;
if (start_ret)
*start_ret = start;
return 0;
clear_bgs_full:
clear_block_groups_full(flags);
err:
return ret;
}
/*
* Returns 0 means not almost full.
* Returns >0 means almost full.
* Returns <0 means fatal error.
*/
static int is_chunk_almost_full(u64 start)
{
struct btrfs_path path = { 0 };
struct btrfs_key key;
struct btrfs_root *root = btrfs_block_group_root(gfs_info);
struct btrfs_block_group_item *bi;
struct btrfs_block_group_item bg_item;
struct extent_buffer *eb;
u64 used;
u64 total;
u64 min_free;
int ret;
int slot;
key.objectid = start;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = (u64)-1;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (!ret)
ret = -EIO;
if (ret < 0)
goto out;
ret = btrfs_previous_item(root, &path, start,
BTRFS_BLOCK_GROUP_ITEM_KEY);
if (ret) {
error("failed to find block group %llu", start);
ret = -ENOENT;
goto out;
}
eb = path.nodes[0];
slot = path.slots[0];
btrfs_item_key_to_cpu(eb, &key, slot);
if (key.objectid != start) {
ret = -ENOENT;
goto out;
}
total = key.offset;
bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
used = btrfs_stack_block_group_used(&bg_item);
/*
* if the free space in the chunk is less than %10 of total,
* or not not enough for CoW once, we think the chunk is almost full.
*/
min_free = max_t(u64, (BTRFS_MAX_LEVEL + 1) * gfs_info->nodesize,
div_factor(total, 1));
if ((total - used) > min_free)
ret = 0;
else
ret = 1;
out:
btrfs_release_path(&path);
return ret;
}
/*
* Returns <0 for error.
* Returns 0 for success.
*/
static int try_to_force_cow_in_new_chunk(u64 old_start, u64 *new_start)
{
int ret;
if (old_start) {
ret = is_chunk_almost_full(old_start);
if (ret <= 0)
return ret;
}
ret = force_cow_in_new_chunk(new_start);
return ret;
}
static int avoid_extents_overwrite(void)
{
int ret;
int mixed = btrfs_fs_incompat(gfs_info, MIXED_GROUPS);
if (gfs_info->excluded_extents)
return 0;
if (last_allocated_chunk != (u64)-1) {
ret = try_to_force_cow_in_new_chunk(last_allocated_chunk,
&last_allocated_chunk);
if (!ret)
goto out;
/*
* If failed, do not try to allocate chunk again in
* next call.
* If there is no space left to allocate, try to exclude all
* metadata blocks. Mixed filesystem is unsupported.
*/
last_allocated_chunk = (u64)-1;
if (ret != -ENOSPC || mixed)
goto out;
}
printf(
"Try to exclude all metadata blocks and extents, it may be slow\n");
ret = exclude_metadata_blocks();
out:
if (ret) {
errno = -ret;
error("failed to avoid extents overwrite %m");
}
return ret;
}
static int end_avoid_extents_overwrite(void)
{
int ret = 0;
cleanup_excluded_extents();
if (last_allocated_chunk)
ret = clear_block_groups_full(BTRFS_BLOCK_GROUP_METADATA);
return ret;
}
/*
* Delete the item @path point to. A wrapper of btrfs_del_item().
*
* If deleted successfully, @path will point to the previous item of the
* deleted item.
*/
static int delete_item(struct btrfs_root *root, struct btrfs_path *path)
{
struct btrfs_key key;
struct btrfs_trans_handle *trans;
int ret = 0;
ret = avoid_extents_overwrite();
if (ret)
return ret;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
goto out;
}
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret) {
ret = -ENOENT;
goto out;
}
ret = btrfs_del_item(trans, root, path);
if (ret)
goto out;
if (path->slots[0] == 0)
btrfs_prev_leaf(root, path);
else
path->slots[0]--;
out:
btrfs_commit_transaction(trans, root);
if (ret)
error("failed to delete root %llu item[%llu, %u, %llu]",
root->objectid, key.objectid, key.type, key.offset);
else
printf("Deleted root %llu item[%llu, %u, %llu]\n",
root->objectid, key.objectid, key.type, key.offset);
return ret;
}
/*
* Wrapper function for btrfs_fix_block_accounting().
*
* Returns 0 on success.
* Returns != 0 on error.
*/
static int repair_block_accounting(void)
{
struct btrfs_trans_handle *trans = NULL;
int ret;
trans = btrfs_start_transaction(gfs_info->tree_root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
return ret;
}
ret = btrfs_fix_block_accounting(trans);
btrfs_commit_transaction(trans, gfs_info->tree_root);
return ret;
}
/*
* This function only handles BACKREF_MISSING,
* If corresponding extent item exists, increase the ref, else insert an extent
* item and backref.
*
* Returns error bits after repair.
*/
static int repair_tree_block_ref(struct btrfs_root *root,
struct extent_buffer *node,
struct node_refs *nrefs, int level, int err)
{
struct btrfs_trans_handle *trans = NULL;
struct btrfs_root *extent_root;
struct btrfs_path path = { 0 };
struct btrfs_extent_item *ei;
struct btrfs_tree_block_info *bi;
struct btrfs_key key;
struct extent_buffer *eb;
u32 size = sizeof(*ei);
u32 node_size = gfs_info->nodesize;
int insert_extent = 0;
int skinny_metadata = btrfs_fs_incompat(gfs_info, SKINNY_METADATA);
int root_level = btrfs_header_level(root->node);
int generation;
int ret;
u64 owner;
u64 bytenr;
u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
u64 parent = 0;
if ((err & BACKREF_MISSING) == 0)
return err;
WARN_ON(level > BTRFS_MAX_LEVEL);
WARN_ON(level < 0);
bytenr = btrfs_header_bytenr(node);
owner = btrfs_header_owner(node);
generation = btrfs_header_generation(node);
key.objectid = bytenr;
key.type = (u8)-1;
key.offset = (u64)-1;
/* Search for the extent item */
extent_root = btrfs_extent_root(gfs_info, bytenr);
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
if (ret <= 0) {
ret = -EIO;
goto out;
}
ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
if (ret)
insert_extent = 1;
/* calculate if the extent item flag is full backref or not */
if (nrefs->full_backref[level] != 0)
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
ret = avoid_extents_overwrite();
if (ret)
goto out;
trans = btrfs_start_transaction(extent_root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
trans = NULL;
goto out;
}
/* insert an extent item */
if (insert_extent) {
struct btrfs_disk_key copy_key;
generation = btrfs_header_generation(node);
if (level < root_level && nrefs->full_backref[level + 1] &&
owner != root->objectid) {
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
}
key.objectid = bytenr;
if (!skinny_metadata) {
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = node_size;
size += sizeof(*bi);
} else {
key.type = BTRFS_METADATA_ITEM_KEY;
key.offset = level;
}
btrfs_release_path(&path);
ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
size);
if (ret)
goto out;
eb = path.nodes[0];
ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
btrfs_set_extent_refs(eb, ei, 0);
btrfs_set_extent_generation(eb, ei, generation);
btrfs_set_extent_flags(eb, ei, flags);
if (!skinny_metadata) {
bi = (struct btrfs_tree_block_info *)(ei + 1);
memset_extent_buffer(eb, 0, (unsigned long)bi,
sizeof(*bi));
btrfs_set_disk_key_objectid(&copy_key, root->objectid);
btrfs_set_disk_key_type(&copy_key, 0);
btrfs_set_disk_key_offset(&copy_key, 0);
btrfs_set_tree_block_level(eb, bi, level);
btrfs_set_tree_block_key(eb, bi, &copy_key);
}
btrfs_mark_buffer_dirty(eb);
printf("Added an extent item [%llu %u]\n", bytenr, node_size);
btrfs_update_block_group(trans, bytenr, node_size, 1, 0);
nrefs->refs[level] = 0;
nrefs->full_backref[level] =
flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
btrfs_release_path(&path);
}
if (level < root_level && nrefs->full_backref[level + 1] &&
owner != root->objectid)
parent = nrefs->bytenr[level + 1];
/* increase the ref */
ret = btrfs_inc_extent_ref(trans, bytenr, node_size, parent,
root->objectid, level, 0);
nrefs->refs[level]++;
out:
if (trans)
btrfs_commit_transaction(trans, extent_root);
btrfs_release_path(&path);
if (ret) {
errno = -ret;
error(
"failed to repair tree block ref start %llu root %llu due to %m",
bytenr, root->objectid);
} else {
printf("Added one tree block ref start %llu %s %llu\n",
bytenr, parent ? "parent" : "root",
parent ? parent : root->objectid);
err &= ~BACKREF_MISSING;
}
return err;
}
/*
* Update global fs information.
*/
static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
int level)
{
u32 free_nrs;
struct extent_buffer *eb = path->nodes[level];
total_btree_bytes += eb->len;
if (fs_root_objectid(root->objectid))
total_fs_tree_bytes += eb->len;
if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
total_extent_tree_bytes += eb->len;
if (level == 0) {
btree_space_waste += btrfs_leaf_free_space(eb);
} else {
free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(gfs_info) -
btrfs_header_nritems(eb));
btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
}
}
/*
* Find the @index according by @ino and name.
* Notice:time efficiency is O(N)
*
* @root: the root of the fs/file tree
* @index_ret: the index as return value
* @namebuf: the name to match
* @name_len: the length of name to match
* @file_type: the file_type of INODE_ITEM to match
*
* Returns 0 if found and *@index_ret will be modified with right value
* Returns< 0 not found and *@index_ret will be (u64)-1
*/
static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
u64 *index_ret, char *namebuf, u32 name_len,
u8 file_type)
{
struct btrfs_path path = { 0 };
struct extent_buffer *node;
struct btrfs_dir_item *di;
struct btrfs_key key;
struct btrfs_key location;
char name[BTRFS_NAME_LEN] = {0};
u32 total;
u32 cur = 0;
u32 len;
u32 data_len;
u8 filetype;
int slot;
int ret;
UASSERT(index_ret);
/* search from the last index */
key.objectid = dirid;
key.offset = (u64)-1;
key.type = BTRFS_DIR_INDEX_KEY;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0)
return ret;
loop:
ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
if (ret) {
ret = -ENOENT;
*index_ret = (64)-1;
goto out;
}
/* Check whether inode_id/filetype/name match */
node = path.nodes[0];
slot = path.slots[0];
di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
total = btrfs_item_size(node, slot);
while (cur < total) {
ret = -ENOENT;
len = btrfs_dir_name_len(node, di);
data_len = btrfs_dir_data_len(node, di);
btrfs_dir_item_key_to_cpu(node, di, &location);
if (location.objectid != location_id ||
location.type != BTRFS_INODE_ITEM_KEY ||
location.offset != 0)
goto next;
filetype = btrfs_dir_ftype(node, di);
if (file_type != filetype)
goto next;
if (len > BTRFS_NAME_LEN)
len = BTRFS_NAME_LEN;
read_extent_buffer(node, name, (unsigned long)(di + 1), len);
if (len != name_len || strncmp(namebuf, name, len))
goto next;
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
*index_ret = key.offset;
ret = 0;
goto out;
next:
len += sizeof(*di) + data_len;
di = (struct btrfs_dir_item *)((char *)di + len);
cur += len;
}
goto loop;
out:
btrfs_release_path(&path);
return ret;
}
/*
* Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
* INODE_REF/INODE_EXTREF match.
*
* @root: the root of the fs/file tree
* @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
* value while find index
* @location_key: location key of the struct btrfs_dir_item to match
* @name: the name to match
* @namelen: the length of name
* @file_type: the type of file to math
*
* Return 0 if no error occurred.
* Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
* DIR_ITEM/DIR_INDEX
* Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
* and DIR_ITEM/DIR_INDEX mismatch
*/
static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
struct btrfs_key *location_key, char *name,
u32 namelen, u8 file_type)
{
struct btrfs_path path = { 0 };
struct extent_buffer *node;
struct btrfs_dir_item *di;
struct btrfs_key location;
char namebuf[BTRFS_NAME_LEN] = {0};
u32 total;
u32 cur = 0;
u32 len;
u32 data_len;
u8 filetype;
int slot;
int ret;
/* get the index by traversing all index */
if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
ret = find_dir_index(root, key->objectid,
location_key->objectid, &key->offset,
name, namelen, file_type);
if (ret)
ret = DIR_INDEX_MISSING;
return ret;
}
ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
if (ret) {
ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
DIR_INDEX_MISSING;
goto out;
}
/* Check whether inode_id/filetype/name match */
node = path.nodes[0];
slot = path.slots[0];
di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
total = btrfs_item_size(node, slot);
while (cur < total) {
ret = key->type == BTRFS_DIR_ITEM_KEY ?
DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
len = btrfs_dir_name_len(node, di);
data_len = btrfs_dir_data_len(node, di);
btrfs_dir_item_key_to_cpu(node, di, &location);
if (location.objectid != location_key->objectid ||
location.type != location_key->type ||
location.offset != location_key->offset)
goto next;
filetype = btrfs_dir_ftype(node, di);
if (file_type != filetype)
goto next;
if (len > BTRFS_NAME_LEN) {
len = BTRFS_NAME_LEN;
warning("root %llu %s[%llu %llu] name too long %u, trimmed",
root->objectid,
key->type == BTRFS_DIR_ITEM_KEY ?
"DIR_ITEM" : "DIR_INDEX",
key->objectid, key->offset, len);
}
read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
len);
if (len != namelen || strncmp(namebuf, name, len))
goto next;
ret = 0;
goto out;
next:
len += sizeof(*di) + data_len;
di = (struct btrfs_dir_item *)((char *)di + len);
cur += len;
}
out:
btrfs_release_path(&path);
return ret;
}
/*
* The ternary means dir item, dir index and relative inode ref.
* The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
* DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
* strategy:
* If two of three is missing or mismatched, delete the existing one.
* If one of three is missing or mismatched, add the missing one.
*
* returns 0 means success.
* returns not 0 means on error;
*/
static int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
u64 index, char *name, int name_len, u8 filetype,
int err)
{
struct btrfs_trans_handle *trans;
int stage = 0;
int ret = 0;
/*
* stage shall be one of following valild values:
* 0: Fine, nothing to do.
* 1: One of three is wrong, so add missing one.
* 2: Two of three is wrong, so delete existed one.
*/
if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
stage++;
if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
stage++;
if (err & (INODE_REF_MISSING))
stage++;
/* stage must be smllarer than 3 */
UASSERT(stage < 3);
trans = btrfs_start_transaction(root, 1);
if (stage == 2) {
ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
name_len, 0);
goto out;
}
if (stage == 1) {
ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
name_len, 0);
if (ret)
goto out;
ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
filetype, &index, 1, 1);
goto out;
}
out:
btrfs_commit_transaction(trans, root);
if (ret)
error("fail to repair inode %llu name %s filetype %u",
ino, name, filetype);
else
printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
stage == 2 ? "Delete" : "Add",
ino, name, filetype);
return ret;
}
/*
* Prints inode ref error message
*/
static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
u64 index, const char *namebuf, int name_len,
u8 filetype, int err)
{
if (!err)
return;
/* root dir error */
if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
error(
"root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
root->objectid, key->objectid, key->offset, namebuf);
return;
}
/* normal error */
if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
root->objectid, key->offset,
btrfs_name_hash(namebuf, name_len),
err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
namebuf, filetype);
if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
root->objectid, key->offset, index,
err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
namebuf, filetype);
}
/*
* Traverse the given INODE_REF and call find_dir_item() to find related
* DIR_ITEM/DIR_INDEX.
*
* @root: the root of the fs/file tree
* @ref_key: the key of the INODE_REF
* @path the path provides node and slot
* @refs: the count of INODE_REF
* @mode: the st_mode of INODE_ITEM
* @name_ret: returns with the first ref's name
* @name_len_ret: len of the name_ret
*
* Return 0 if no error occurred.
*/
static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
struct btrfs_path *path, char *name_ret,
u32 *namelen_ret, u64 *refs_ret, int mode)
{
struct btrfs_key key;
struct btrfs_key location;
struct btrfs_inode_ref *ref;
struct extent_buffer *node;
char namebuf[BTRFS_NAME_LEN] = {0};
u32 total;
u32 cur = 0;
u32 len;
u32 name_len;
u64 index;
int ret;
int err = 0;
int tmp_err;
int slot;
int need_research = 0;
u64 refs;
begin:
err = 0;
cur = 0;
refs = *refs_ret;
/* since after repair, path and the dir item may be changed */
if (need_research) {
need_research = 0;
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
/*
* The item was deleted, let the path point to the last checked
* item.
*/
if (ret > 0) {
if (path->slots[0] == 0)
btrfs_prev_leaf(root, path);
else
path->slots[0]--;
}
if (ret)
goto out;
}
location.objectid = ref_key->objectid;
location.type = BTRFS_INODE_ITEM_KEY;
location.offset = 0;
node = path->nodes[0];
slot = path->slots[0];
memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
total = btrfs_item_size(node, slot);
next:
/* Update inode ref count */
refs++;
tmp_err = 0;
index = btrfs_inode_ref_index(node, ref);
name_len = btrfs_inode_ref_name_len(node, ref);
if (name_len <= BTRFS_NAME_LEN) {
len = name_len;
} else {
len = BTRFS_NAME_LEN;
warning("root %llu INODE_REF[%llu %llu] name too long",
root->objectid, ref_key->objectid, ref_key->offset);
}
read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
/* copy the first name found to name_ret */
if (refs == 1 && name_ret) {
memcpy(name_ret, namebuf, len);
*namelen_ret = len;
}
/* Check root dir ref */
if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
if (index != 0 || len != strlen("..") ||
strncmp("..", namebuf, len) ||
ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
/* set err bits then repair will delete the ref */
err |= DIR_INDEX_MISSING;
err |= DIR_ITEM_MISSING;
}
goto end;
}
/* Find related DIR_INDEX */
key.objectid = ref_key->offset;
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = index;
tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
imode_to_type(mode));
/* Find related dir_item */
key.objectid = ref_key->offset;
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(namebuf, len);
tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
imode_to_type(mode));
end:
if (tmp_err && opt_check_repair) {
ret = repair_ternary_lowmem(root, ref_key->offset,
ref_key->objectid, index, namebuf,
name_len, imode_to_type(mode),
tmp_err);
if (!ret) {
need_research = 1;
goto begin;
}
}
print_inode_ref_err(root, ref_key, index, namebuf, name_len,
imode_to_type(mode), tmp_err);
err |= tmp_err;
len = sizeof(*ref) + name_len;
ref = (struct btrfs_inode_ref *)((char *)ref + len);
cur += len;
if (cur < total)
goto next;
out:
*refs_ret = refs;
return err;
}
/*
* Traverse the given INODE_EXTREF and call find_dir_item() to find related
* DIR_ITEM/DIR_INDEX.
*
* @root: the root of the fs/file tree
* @ref_key: the key of the INODE_EXTREF
* @refs: the count of INODE_EXTREF
* @mode: the st_mode of INODE_ITEM
*
* Return 0 if no error occurred.
*/
static int check_inode_extref(struct btrfs_root *root,
struct btrfs_key *ref_key,
struct extent_buffer *node, int slot, u64 *refs,
int mode)
{
struct btrfs_key key;
struct btrfs_key location;
struct btrfs_inode_extref *extref;
char namebuf[BTRFS_NAME_LEN] = {0};
u32 total;
u32 cur = 0;
u32 len;
u32 name_len;
u64 index;
u64 parent;
int ret;
int err = 0;
location.objectid = ref_key->objectid;
location.type = BTRFS_INODE_ITEM_KEY;
location.offset = 0;
extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
total = btrfs_item_size(node, slot);
next:
/* update inode ref count */
(*refs)++;
name_len = btrfs_inode_extref_name_len(node, extref);
index = btrfs_inode_extref_index(node, extref);
parent = btrfs_inode_extref_parent(node, extref);
if (name_len <= BTRFS_NAME_LEN) {
len = name_len;
} else {
len = BTRFS_NAME_LEN;
warning("root %llu INODE_EXTREF[%llu %llu] name too long",
root->objectid, ref_key->objectid, ref_key->offset);
}
read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
/* Check root dir ref name */
if (index == 0 && strncmp(namebuf, "..", name_len)) {
error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
root->objectid, ref_key->objectid, ref_key->offset,
namebuf);
err |= ROOT_DIR_ERROR;
}
/* find related dir_index */
key.objectid = parent;
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = index;
ret = find_dir_item(root, &key, &location, namebuf, len, mode);
err |= ret;
/* find related dir_item */
key.objectid = parent;
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(namebuf, len);
ret = find_dir_item(root, &key, &location, namebuf, len, mode);
err |= ret;
len = sizeof(*extref) + name_len;
extref = (struct btrfs_inode_extref *)((char *)extref + len);
cur += len;
if (cur < total)
goto next;
return err;
}
/*
* Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
* DIR_ITEM/DIR_INDEX match.
* Return with @index_ret.
*
* @root: the root of the fs/file tree
* @key: the key of the INODE_REF/INODE_EXTREF
* @name: the name in the INODE_REF/INODE_EXTREF
* @namelen: the length of name in the INODE_REF/INODE_EXTREF
* @index_ret: the index in the INODE_REF/INODE_EXTREF,
* value (64)-1 means do not check index
*
* Return 0 if no error occurred.
* Return >0 for error bitmap
*/
static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
char *name, int namelen, u64 *index_ret)
{
struct btrfs_path path = { 0 };
struct btrfs_inode_ref *ref;
struct btrfs_inode_extref *extref;
struct extent_buffer *node;
char ref_namebuf[BTRFS_NAME_LEN] = {0};
u32 total;
u32 cur = 0;
u32 len;
u32 ref_namelen;
u64 ref_index;
u64 parent;
u64 dir_id;
int slot;
int ret;
UASSERT(index_ret);
ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
if (ret) {
ret = INODE_REF_MISSING;
goto extref;
}
node = path.nodes[0];
slot = path.slots[0];
ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
total = btrfs_item_size(node, slot);
/* Iterate all entry of INODE_REF */
while (cur < total) {
ret = INODE_REF_MISSING;
ref_namelen = btrfs_inode_ref_name_len(node, ref);
ref_index = btrfs_inode_ref_index(node, ref);
if (*index_ret != (u64)-1 && *index_ret != ref_index)
goto next_ref;
if (cur + sizeof(*ref) + ref_namelen > total ||
ref_namelen > BTRFS_NAME_LEN) {
warning("root %llu INODE %s[%llu %llu] name too long",
root->objectid,
key->type == BTRFS_INODE_REF_KEY ?
"REF" : "EXTREF",
key->objectid, key->offset);
if (cur + sizeof(*ref) > total)
break;
len = min_t(u32, total - cur - sizeof(*ref),
BTRFS_NAME_LEN);
} else {
len = ref_namelen;
}
read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
len);
if (len != namelen || strncmp(ref_namebuf, name, len))
goto next_ref;
*index_ret = ref_index;
ret = 0;
goto out;
next_ref:
len = sizeof(*ref) + ref_namelen;
ref = (struct btrfs_inode_ref *)((char *)ref + len);
cur += len;
}
extref:
/* Skip if not support EXTENDED_IREF feature */
if (!btrfs_fs_incompat(gfs_info, EXTENDED_IREF))
goto out;
btrfs_release_path(&path);
dir_id = key->offset;
key->type = BTRFS_INODE_EXTREF_KEY;
key->offset = btrfs_extref_hash(dir_id, name, namelen);
ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
if (ret) {
ret = INODE_REF_MISSING;
goto out;
}
node = path.nodes[0];
slot = path.slots[0];
extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
cur = 0;
total = btrfs_item_size(node, slot);
/* Iterate all entry of INODE_EXTREF */
while (cur < total) {
ret = INODE_REF_MISSING;
ref_namelen = btrfs_inode_extref_name_len(node, extref);
ref_index = btrfs_inode_extref_index(node, extref);
parent = btrfs_inode_extref_parent(node, extref);
if (*index_ret != (u64)-1 && *index_ret != ref_index)
goto next_extref;
if (parent != dir_id)
goto next_extref;
if (ref_namelen <= BTRFS_NAME_LEN) {
len = ref_namelen;
} else {
len = BTRFS_NAME_LEN;
warning("root %llu INODE %s[%llu %llu] name too long",
root->objectid,
key->type == BTRFS_INODE_REF_KEY ?
"REF" : "EXTREF",
key->objectid, key->offset);
}
read_extent_buffer(node, ref_namebuf,
(unsigned long)(extref + 1), len);
if (len != namelen || strncmp(ref_namebuf, name, len))
goto next_extref;
*index_ret = ref_index;
ret = 0;
goto out;
next_extref:
len = sizeof(*extref) + ref_namelen;
extref = (struct btrfs_inode_extref *)((char *)extref + len);
cur += len;
}
out:
btrfs_release_path(&path);
return ret;
}
static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 ino,
u8 filetype)
{
u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
return insert_inode_item(trans, root, ino, 0, 0, 0, mode);
}
/*
* Insert the missing inode item.
*
* Returns 0 means success.
* Returns <0 means error.
*/
static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
u8 filetype)
{
struct btrfs_key key;
struct btrfs_trans_handle *trans;
struct btrfs_path path = { 0 };
int ret;
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = -EIO;
goto out;
}
ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
if (ret < 0 || !ret)
goto fail;
/* insert inode item */
create_inode_item_lowmem(trans, root, ino, filetype);
ret = 0;
fail:
btrfs_commit_transaction(trans, root);
out:
if (ret)
error("failed to repair root %llu INODE ITEM[%llu] missing",
root->objectid, ino);
btrfs_release_path(&path);
return ret;
}
/*
* A wrapper for delete_corrupted_dir_item(), with support part like
* start/commit transaction.
*/
static int lowmem_delete_corrupted_dir_item(struct btrfs_root *root,
struct btrfs_key *di_key,
char *namebuf, u32 name_len)
{
struct btrfs_trans_handle *trans;
int ret;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
return ret;
}
ret = delete_corrupted_dir_item(trans, root, di_key, namebuf, name_len);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
} else {
ret = btrfs_commit_transaction(trans, root);
if (ret < 0) {
errno = -ret;
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
}
}
return ret;
}
static int try_repair_imode(struct btrfs_root *root, u64 ino)
{
struct btrfs_inode_item *iitem;
struct btrfs_path path = { 0 };
struct btrfs_key key;
int ret;
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret > 0)
ret = -ENOENT;
if (ret < 0)
goto out;
iitem = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_inode_item);
if (!is_valid_imode(btrfs_inode_mode(path.nodes[0], iitem))) {
ret = repair_imode_common(root, &path);
} else {
ret = -ENOTTY;
}
out:
btrfs_release_path(&path);
return ret;
}
/*
* Call repair_inode_item_missing and repair_ternary_lowmem to repair
*
* Returns error after repair
*/
static int repair_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
u64 ino, u64 index, u8 filetype, char *namebuf,
u32 name_len, int err)
{
u64 dirid = di_key->objectid;
int ret;
if (err & (DIR_ITEM_HASH_MISMATCH)) {
ret = lowmem_delete_corrupted_dir_item(root, di_key, namebuf,
name_len);
if (!ret)
err &= ~(DIR_ITEM_HASH_MISMATCH);
}
if (err & INODE_ITEM_MISSING) {
ret = repair_inode_item_missing(root, ino, filetype);
if (!ret)
err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
}
if (err & INODE_ITEM_MISMATCH) {
/*
* INODE_ITEM mismatch can be caused by bad imode, so check if
* it's a bad imode, then repair if possible.
*/
ret = try_repair_imode(root, ino);
if (!ret)
err &= ~INODE_ITEM_MISMATCH;
}
if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
name_len, filetype, err);
if (!ret) {
err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
err &= ~(INODE_REF_MISSING);
}
}
return err;
}
static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
u64 ino, u64 index, const char *namebuf,
int name_len, u8 filetype, int err)
{
if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
root->objectid, key->objectid, key->offset, namebuf,
filetype,
err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
}
if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
root->objectid, key->objectid, index, namebuf, filetype,
err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
}
if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
error(
"root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
root->objectid, ino, index, namebuf, filetype,
err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
}
if (err & INODE_REF_MISSING)
error(
"root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
root->objectid, ino, key->objectid, namebuf, filetype);
}
/*
* Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
* call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
*
* @root: the root of the fs/file tree
* @key: the key of the INODE_REF/INODE_EXTREF
* @path: the path
* @size: the st_size of the INODE_ITEM
*
* Return 0 if no error occurred.
* Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
*/
static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
struct btrfs_path *path, u64 *size)
{
struct btrfs_dir_item *di;
struct btrfs_inode_item *ii;
struct btrfs_key key;
struct btrfs_key location;
struct extent_buffer *node;
int slot;
char namebuf[BTRFS_NAME_LEN] = {0};
u32 total;
u32 cur = 0;
u32 len;
u32 name_len;
u32 data_len;
u8 filetype;
u32 mode = 0;
u64 index;
int ret;
int err;
int tmp_err;
int need_research = 0;
begin:
err = 0;
cur = 0;
/* since after repair, path and the dir item may be changed */
if (need_research) {
need_research = 0;
err |= DIR_COUNT_AGAIN;
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
/* the item was deleted, let path point the last checked item */
if (ret > 0) {
if (path->slots[0] == 0)
btrfs_prev_leaf(root, path);
else
path->slots[0]--;
}
if (ret)
goto out;
}
node = path->nodes[0];
slot = path->slots[0];
di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
total = btrfs_item_size(node, slot);
memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
while (cur < total) {
/*
* For DIR_ITEM set index to (u64)-1, so that find_inode_ref
* ignore index check.
*/
if (di_key->type == BTRFS_DIR_INDEX_KEY)
index = di_key->offset;
else
index = (u64)-1;
data_len = btrfs_dir_data_len(node, di);
tmp_err = 0;
if (data_len)
error("root %llu %s[%llu %llu] data_len shouldn't be %u",
root->objectid,
di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
di_key->objectid, di_key->offset, data_len);
name_len = btrfs_dir_name_len(node, di);
if (name_len <= BTRFS_NAME_LEN) {
len = name_len;
} else {
len = BTRFS_NAME_LEN;
warning("root %llu %s[%llu %llu] name too long",
root->objectid,
di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
di_key->objectid, di_key->offset);
}
(*size) += name_len;
read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
len);
filetype = btrfs_dir_ftype(node, di);
if (di_key->type == BTRFS_DIR_ITEM_KEY &&
di_key->offset != btrfs_name_hash(namebuf, len)) {
error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
root->objectid, di_key->objectid, di_key->offset,
namebuf, len, filetype, di_key->offset,
btrfs_name_hash(namebuf, len));
tmp_err |= DIR_ITEM_HASH_MISMATCH;
goto next;
}
btrfs_dir_item_key_to_cpu(node, di, &location);
/* Ignore related ROOT_ITEM check */
if (location.type == BTRFS_ROOT_ITEM_KEY)
goto next;
btrfs_release_path(path);
/* Check relative INODE_ITEM(existence/filetype) */
ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
if (ret) {
tmp_err |= INODE_ITEM_MISSING;
goto next;
}
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
mode = btrfs_inode_mode(path->nodes[0], ii);
if (imode_to_type(mode) != filetype) {
tmp_err |= INODE_ITEM_MISMATCH;
goto next;
}
/* Check relative INODE_REF/INODE_EXTREF */
key.objectid = location.objectid;
key.type = BTRFS_INODE_REF_KEY;
key.offset = di_key->objectid;
tmp_err |= find_inode_ref(root, &key, namebuf, len, &index);
/* check relative INDEX/ITEM */
key.objectid = di_key->objectid;
if (key.type == BTRFS_DIR_ITEM_KEY) {
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = index;
} else {
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(namebuf, name_len);
}
tmp_err |= find_dir_item(root, &key, &location, namebuf,
name_len, filetype);
/* find_dir_item may find index */
if (key.type == BTRFS_DIR_INDEX_KEY)
index = key.offset;
next:
if (tmp_err && opt_check_repair) {
ret = repair_dir_item(root, di_key,
location.objectid, index,
imode_to_type(mode), namebuf,
name_len, tmp_err);
if (ret != tmp_err) {
need_research = 1;
goto begin;
}
}
btrfs_release_path(path);
print_dir_item_err(root, di_key, location.objectid, index,
namebuf, name_len, filetype, tmp_err);
err |= tmp_err;
len = sizeof(*di) + name_len + data_len;
di = (struct btrfs_dir_item *)((char *)di + len);
cur += len;
if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
root->objectid, di_key->objectid,
di_key->offset);
break;
}
}
out:
/* research path */
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
if (ret)
err |= ret > 0 ? -ENOENT : ret;
return err;
}
/*
* Wrapper function of btrfs_punch_hole.
*
* @path: The path holder, will point to the same key after hole punching.
*
* Returns 0 means success.
* Returns not 0 means error.
*/
static int punch_extent_hole(struct btrfs_root *root, struct btrfs_path *path,
u64 ino, u64 start, u64 len)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_punch_hole(trans, root, ino, start, len);
if (ret) {
error("failed to add hole [%llu, %llu] in inode [%llu]",
start, len, ino);
btrfs_abort_transaction(trans, ret);
return ret;
}
printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len, ino);
btrfs_commit_transaction(trans, root);
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret > 0)
ret = -ENOENT;
return ret;
}
static int repair_inline_ram_bytes(struct btrfs_root *root,
struct btrfs_path *path, u64 *ram_bytes_ret)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
u32 on_disk_data_len;
int ret;
int recover_ret;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
return ret;
}
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
/* Not really possible */
if (ret > 0) {
ret = -ENOENT;
btrfs_release_path(path);
goto recover;
}
if (ret < 0)
goto recover;
on_disk_data_len = btrfs_file_extent_inline_item_len(path->nodes[0],
path->slots[0]);
fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(path->nodes[0], fi) !=
BTRFS_FILE_EXTENT_INLINE ||
btrfs_file_extent_compression(path->nodes[0], fi) !=
BTRFS_COMPRESS_NONE)
return -EINVAL;
btrfs_set_file_extent_ram_bytes(path->nodes[0], fi, on_disk_data_len);
btrfs_mark_buffer_dirty(path->nodes[0]);
ret = btrfs_commit_transaction(trans, root);
if (!ret) {
printf(
"Successfully repaired inline ram_bytes for root %llu ino %llu\n",
root->objectid, key.objectid);
*ram_bytes_ret = on_disk_data_len;
}
return ret;
recover:
/*
* COW search failed, mostly due to the extra COW work (extent
* allocation, etc). Since we have a good path from before, readonly
* search should still work, or later checks will fail due to empty
* path.
*/
recover_ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
/* This really shouldn't happen, or we have a big problem */
UASSERT(recover_ret == 0);
return ret;
}
static int check_file_extent_inline(struct btrfs_root *root,
struct btrfs_path *path, u64 *size,
u64 *end)
{
u32 max_inline_extent_size = min_t(u32, gfs_info->sectorsize - 1,
BTRFS_MAX_INLINE_DATA_SIZE(gfs_info));
struct extent_buffer *node = path->nodes[0];
struct btrfs_file_extent_item *fi;
struct btrfs_key fkey;
u64 extent_num_bytes;
u32 item_inline_len;
int ret;
int compressed = 0;
int err = 0;
fi = btrfs_item_ptr(node, path->slots[0], struct btrfs_file_extent_item);
item_inline_len = btrfs_file_extent_inline_item_len(node, path->slots[0]);
extent_num_bytes = btrfs_file_extent_ram_bytes(node, fi);
compressed = btrfs_file_extent_compression(node, fi);
btrfs_item_key_to_cpu(node, &fkey, path->slots[0]);
if (extent_num_bytes == 0) {
error(
"root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
root->objectid, fkey.objectid, fkey.offset);
err |= FILE_EXTENT_ERROR;
}
if (compressed) {
if (extent_num_bytes > gfs_info->sectorsize) {
error(
"root %llu EXTENT_DATA[%llu %llu] too large inline extent ram size, have %llu, max: %u",
root->objectid, fkey.objectid, fkey.offset,
extent_num_bytes, gfs_info->sectorsize - 1);
err |= FILE_EXTENT_ERROR;
}
if (item_inline_len > max_inline_extent_size) {
error(
"root %llu EXTENT_DATA[%llu %llu] too large inline extent on-disk size, have %u, max: %u",
root->objectid, fkey.objectid, fkey.offset,
item_inline_len, max_inline_extent_size);
err |= FILE_EXTENT_ERROR;
}
} else {
if (extent_num_bytes > max_inline_extent_size) {
error(
"root %llu EXTENT_DATA[%llu %llu] too large inline extent size, have %llu, max: %u",
root->objectid, fkey.objectid, fkey.offset,
extent_num_bytes, max_inline_extent_size);
err |= FILE_EXTENT_ERROR;
}
if (extent_num_bytes != item_inline_len) {
error(
"root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
root->objectid, fkey.objectid, fkey.offset,
extent_num_bytes, item_inline_len);
if (opt_check_repair) {
ret = repair_inline_ram_bytes(root, path,
&extent_num_bytes);
if (ret)
err |= FILE_EXTENT_ERROR;
} else {
err |= FILE_EXTENT_ERROR;
}
}
}
*end += extent_num_bytes;
*size += extent_num_bytes;
return err;
}
/*
* Check file extent datasum/hole, update the size of the file extents,
* check and update the last offset of the file extent.
*
* @root: the root of fs/file tree.
* @nodatasum: INODE_NODATASUM feature.
* @size: the sum of all EXTENT_DATA items size for this inode.
* @end: the offset of the last extent.
*
* Return 0 if no error occurred.
*/
static int check_file_extent(struct btrfs_root *root, struct btrfs_path *path,
unsigned int nodatasum, u64 isize, u64 *size,
u64 *end)
{
struct btrfs_file_extent_item *fi;
struct btrfs_key fkey;
struct extent_buffer *node = path->nodes[0];
u64 disk_bytenr;
u64 disk_num_bytes;
u64 extent_num_bytes;
u64 extent_offset;
u64 csum_found; /* In byte size, sectorsize aligned */
u64 search_start; /* Logical range start we search for csum */
u64 search_len; /* Logical range len we search for csum */
u64 gen;
u64 super_gen;
unsigned int extent_type;
unsigned int is_hole;
int slot = path->slots[0];
int compressed = 0;
int ret;
int err = 0;
btrfs_item_key_to_cpu(node, &fkey, slot);
fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(node, fi);
/* Check extent type */
if (extent_type != BTRFS_FILE_EXTENT_REG &&
extent_type != BTRFS_FILE_EXTENT_PREALLOC &&
extent_type != BTRFS_FILE_EXTENT_INLINE) {
err |= FILE_EXTENT_ERROR;
error("root %llu EXTENT_DATA[%llu %llu] type bad",
root->objectid, fkey.objectid, fkey.offset);
return err;
}
/* Check inline extent */
if (extent_type == BTRFS_FILE_EXTENT_INLINE)
return check_file_extent_inline(root, path, size, end);
/* Check REG_EXTENT/PREALLOC_EXTENT */
gen = btrfs_file_extent_generation(node, fi);
disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
extent_offset = btrfs_file_extent_offset(node, fi);
compressed = btrfs_file_extent_compression(node, fi);
is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
super_gen = btrfs_super_generation(gfs_info->super_copy);
if (gen > super_gen + 1) {
error(
"invalid file extent generation, have %llu expect (0, %llu]",
gen, super_gen + 1);
err |= INVALID_GENERATION;
}
/*
* Check EXTENT_DATA csum
*
* For plain (uncompressed) extent, we should only check the range
* we're referring to, as it's possible that part of prealloc extent
* has been written, and has csum:
*
* |<--- Original large preallocated extent A ---->|
* |<- Prealloc File Extent ->|<- Regular Extent ->|
* No csum Has csum
*
* For compressed extent, we should check the whole range.
*/
if (!compressed) {
search_start = disk_bytenr + extent_offset;
search_len = extent_num_bytes;
} else {
search_start = disk_bytenr;
search_len = disk_num_bytes;
}
ret = count_csum_range(search_start, search_len, &csum_found);
if (csum_found > 0 && nodatasum) {
err |= ODD_CSUM_ITEM;
error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
root->objectid, fkey.objectid, fkey.offset);
} else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
!is_hole && (ret < 0 || csum_found < search_len)) {
err |= CSUM_ITEM_MISSING;
error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
root->objectid, fkey.objectid, fkey.offset,
csum_found, search_len);
} else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
csum_found > 0) {
ret = check_prealloc_extent_written(disk_bytenr, disk_num_bytes);
if (ret < 0)
return ret;
if (ret == 0) {
err |= ODD_CSUM_ITEM;
error(
"root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
root->objectid, fkey.objectid, fkey.offset,
csum_found);
}
}
/*
* Extra check for compressed extents:
* Btrfs doesn't allow NODATASUM and compressed extent co-exist, thus
* all compressed extents should have a checksum.
*/
if (compressed && csum_found < search_len) {
error(
"root %llu EXTENT_DATA[%llu %llu] compressed extent must have csum, but only %llu bytes have, expect %llu",
root->objectid, fkey.objectid, fkey.offset, csum_found,
search_len);
err |= CSUM_ITEM_MISSING;
}
if (compressed && nodatasum) {
error(
"root %llu EXTENT_DATA[%llu %llu] is compressed, but inode flag doesn't allow it",
root->objectid, fkey.objectid, fkey.offset);
err |= FILE_EXTENT_ERROR;
}
/* Check EXTENT_DATA hole */
if (!no_holes && (fkey.offset < isize) && (*end != fkey.offset)) {
if (opt_check_repair)
ret = punch_extent_hole(root, path, fkey.objectid,
*end, fkey.offset - *end);
if (!opt_check_repair || ret) {
err |= FILE_EXTENT_ERROR;
error(
"root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
root->objectid, fkey.objectid, fkey.offset,
fkey.objectid, *end);
}
}
/*
* Don't update extent end beyond rounded up isize. As holes
* after isize is not considered as missing holes.
*/
*end = min(round_up(isize, gfs_info->sectorsize),
fkey.offset + extent_num_bytes);
if (!is_hole)
*size += extent_num_bytes;
return err;
}
static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
u64 *size_ret)
{
struct btrfs_key key;
struct btrfs_path path = { 0 };
u32 len;
struct btrfs_dir_item *di;
int ret;
int cur = 0;
int total = 0;
UASSERT(size_ret);
*size_ret = 0;
key.objectid = ino;
key.type = type;
key.offset = (u64)-1;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0) {
ret = -EIO;
goto out;
}
/* If found, go to special case. */
if (ret == 0)
goto special_case;
loop:
ret = btrfs_previous_item(root, &path, ino, type);
if (ret) {
ret = 0;
goto out;
}
special_case:
di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
cur = 0;
total = btrfs_item_size(path.nodes[0], path.slots[0]);
while (cur < total) {
len = btrfs_dir_name_len(path.nodes[0], di);
if (len > BTRFS_NAME_LEN)
len = BTRFS_NAME_LEN;
*size_ret += len;
len += btrfs_dir_data_len(path.nodes[0], di);
len += sizeof(*di);
di = (struct btrfs_dir_item *)((char *)di + len);
cur += len;
}
goto loop;
out:
btrfs_release_path(&path);
return ret;
}
static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
{
u64 item_size;
u64 index_size;
int ret;
UASSERT(size);
ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
if (ret)
goto out;
ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
if (ret)
goto out;
*size = item_size + index_size;
out:
if (ret)
error("failed to count root %llu INODE[%llu] root size",
root->objectid, ino);
return ret;
}
/*
* Set inode item nbytes to @nbytes
*
* Returns 0 on success
* Returns != 0 on error
*/
static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
struct btrfs_path *path,
u64 ino, u64 nbytes)
{
struct btrfs_trans_handle *trans;
struct btrfs_inode_item *ii;
struct btrfs_key key;
struct btrfs_key research_key;
int err = 0;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
err |= ret;
goto out;
}
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
if (ret) {
err |= ret;
goto fail;
}
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
btrfs_mark_buffer_dirty(path->nodes[0]);
fail:
btrfs_commit_transaction(trans, root);
out:
if (ret)
error("failed to set nbytes in inode %llu root %llu",
ino, root->root_key.objectid);
else
printf("Set nbytes in inode item %llu root %llu to %llu\n", ino,
root->root_key.objectid, nbytes);
/* research path */
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
err |= ret;
return err;
}
/*
* Set directory inode isize to @isize.
*
* Returns 0 on success.
* Returns != 0 on error.
*/
static int repair_dir_isize_lowmem(struct btrfs_root *root,
struct btrfs_path *path,
u64 ino, u64 isize)
{
struct btrfs_trans_handle *trans;
struct btrfs_inode_item *ii;
struct btrfs_key key;
struct btrfs_key research_key;
int ret;
int err = 0;
btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
err |= ret;
goto out;
}
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
if (ret) {
err |= ret;
goto fail;
}
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_size(path->nodes[0], ii, isize);
btrfs_mark_buffer_dirty(path->nodes[0]);
fail:
btrfs_commit_transaction(trans, root);
out:
if (ret)
error("failed to set isize in inode %llu root %llu",
ino, root->root_key.objectid);
else
printf("Set isize in inode %llu root %llu to %llu\n",
ino, root->root_key.objectid, isize);
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
err |= ret;
return err;
}
/*
* Wrapper function for btrfs_add_orphan_item().
*
* Returns 0 on success.
* Returns != 0 on error.
*/
static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
struct btrfs_path *path, u64 ino)
{
struct btrfs_trans_handle *trans;
struct btrfs_key research_key;
int ret;
int err = 0;
btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
err |= ret;
goto out;
}
btrfs_release_path(path);
ret = btrfs_add_orphan_item(trans, root, path, ino);
err |= ret;
btrfs_commit_transaction(trans, root);
out:
if (ret)
error("failed to add inode %llu as orphan item root %llu",
ino, root->root_key.objectid);
else
printf("Added inode %llu as orphan item root %llu\n",
ino, root->root_key.objectid);
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
err |= ret;
return err;
}
/* Set inode_item nlink to @ref_count.
* If @ref_count == 0, move it to "lost+found" and increase @ref_count.
*
* Returns 0 on success
*/
static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
struct btrfs_path *path, u64 ino,
const char *name, u32 namelen,
u64 ref_count, u8 filetype, u64 *nlink)
{
struct btrfs_trans_handle *trans;
struct btrfs_inode_item *ii;
struct btrfs_key key;
struct btrfs_key old_key;
char namebuf[BTRFS_NAME_LEN] = {0};
int name_len;
int ret;
int ret2;
/* save the key */
btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
if (name && namelen) {
UASSERT(namelen <= BTRFS_NAME_LEN);
memcpy(namebuf, name, namelen);
name_len = namelen;
} else {
sprintf(namebuf, "%llu", ino);
name_len = count_digits(ino);
printf("Can't find file name for inode %llu, use %s instead\n",
ino, namebuf);
}
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
btrfs_release_path(path);
/* if refs is 0, put it into lostfound */
if (ref_count == 0) {
ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
name_len, filetype, &ref_count);
if (ret)
goto fail;
}
/* reset inode_item's nlink to ref_count */
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
if (ret)
goto fail;
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
btrfs_mark_buffer_dirty(path->nodes[0]);
if (nlink)
*nlink = ref_count;
fail:
btrfs_commit_transaction(trans, root);
out:
if (ret)
error(
"fail to repair nlink of inode %llu root %llu name %s filetype %u",
root->objectid, ino, namebuf, filetype);
else
printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
root->objectid, ino, namebuf, filetype);
/* research */
btrfs_release_path(path);
ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
if (ret2 < 0)
return ret |= ret2;
return ret;
}
static bool has_orphan_item(struct btrfs_root *root, u64 ino)
{
struct btrfs_path path = { 0 };
struct btrfs_key key;
int ret;
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = ino;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
btrfs_release_path(&path);
if (ret == 0)
return true;
return false;
}
static int repair_inode_gen_lowmem(struct btrfs_root *root,
struct btrfs_path *path)
{
struct btrfs_trans_handle *trans;
struct btrfs_inode_item *ii;
struct btrfs_key key;
u64 transid;
int ret;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "inode gen repair: %m");
return ret;
}
transid = trans->transid;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
UASSERT(key.type == BTRFS_INODE_ITEM_KEY);
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret > 0) {
ret = -ENOENT;
error("no inode item found for ino %llu", key.objectid);
goto error;
}
if (ret < 0) {
errno = -ret;
error("failed to find inode item for ino %llu: %m", key.objectid);
goto error;
}
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_generation(path->nodes[0], ii, trans->transid);
btrfs_set_inode_transid(path->nodes[0], ii, trans->transid);
btrfs_mark_buffer_dirty(path->nodes[0]);
ret = btrfs_commit_transaction(trans, root);
if (ret < 0) {
errno = -ret;
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
goto error;
}
printf("resetting inode generation/transid to %llu for ino %llu\n",
transid, key.objectid);
return ret;
error:
btrfs_abort_transaction(trans, ret);
return ret;
}
/*
* Check INODE_ITEM and related ITEMs (the same inode number)
* 1. check link count
* 2. check inode ref/extref
* 3. check dir item/index
*
* Return 0 if no error occurred.
* Return >0 for error or hit the traversal is done(by error bitmap)
*/
static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path)
{
struct extent_buffer *node;
struct btrfs_inode_item *ii;
struct btrfs_key key;
struct btrfs_key last_key;
struct btrfs_super_block *super = gfs_info->super_copy;
u64 inode_id;
u32 mode;
u64 flags;
u64 nlink;
u64 nbytes;
u64 isize;
u64 size = 0;
u64 refs = 0;
u64 extent_end = 0;
u64 extent_size = 0;
u64 generation;
u64 transid;
u64 gen_uplimit;
unsigned int dir;
unsigned int nodatasum;
bool is_orphan = false;
int slot;
int ret;
int err = 0;
char namebuf[BTRFS_NAME_LEN] = {0};
u32 name_len = 0;
node = path->nodes[0];
slot = path->slots[0];
btrfs_item_key_to_cpu(node, &key, slot);
inode_id = key.objectid;
if (inode_id == BTRFS_ORPHAN_OBJECTID) {
ret = btrfs_next_item(root, path);
if (ret > 0)
err |= LAST_ITEM;
return err;
}
is_orphan = has_orphan_item(root, inode_id);
ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
isize = btrfs_inode_size(node, ii);
nbytes = btrfs_inode_nbytes(node, ii);
mode = btrfs_inode_mode(node, ii);
flags = btrfs_inode_flags(node, ii);
dir = imode_to_type(mode) == BTRFS_FT_DIR;
nlink = btrfs_inode_nlink(node, ii);
generation = btrfs_inode_generation(node, ii);
transid = btrfs_inode_transid(node, ii);
nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
if (!is_valid_imode(mode)) {
error("invalid imode mode bits: 0%o", mode);
if (opt_check_repair) {
ret = repair_imode_common(root, path);
if (ret < 0)
err |= INODE_MODE_ERROR;
} else {
err |= INODE_MODE_ERROR;
}
}
if (btrfs_super_log_root(super) != 0 &&
root->objectid == BTRFS_TREE_LOG_OBJECTID)
gen_uplimit = btrfs_super_generation(super) + 1;
else
gen_uplimit = btrfs_super_generation(super);
if (generation > gen_uplimit || transid > gen_uplimit) {
error(
"invalid inode generation %llu or transid %llu for ino %llu, expect [0, %llu)",
generation, transid, inode_id, gen_uplimit);
if (opt_check_repair) {
ret = repair_inode_gen_lowmem(root, path);
if (ret < 0)
err |= INVALID_GENERATION;
} else {
err |= INVALID_GENERATION;
}
}
if (S_ISLNK(mode) &&
flags & (BTRFS_INODE_IMMUTABLE | BTRFS_INODE_APPEND)) {
err |= INODE_FLAGS_ERROR;
error(
"symlinks must never have immutable/append flags set, root %llu inode item %llu flags %llu may be corrupted",
root->objectid, inode_id, flags);
}
while (1) {
btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
ret = btrfs_next_item(root, path);
/*
* New leaf, we need to check it and see if it's valid, if not
* we need to bail otherwise we could end up stuck.
*/
if (path->slots[0] == 0 &&
btrfs_check_block_for_repair(path->nodes[0], NULL))
ret = -EIO;
if (ret < 0) {
err |= FATAL_ERROR;
goto out;
} else if (ret > 0) {
err |= LAST_ITEM;
goto out;
}
node = path->nodes[0];
slot = path->slots[0];
btrfs_item_key_to_cpu(node, &key, slot);
if (key.objectid != inode_id)
goto out;
switch (key.type) {
case BTRFS_INODE_REF_KEY:
ret = check_inode_ref(root, &key, path, namebuf,
&name_len, &refs, mode);
err |= ret;
break;
case BTRFS_INODE_EXTREF_KEY:
{
bool ext_ref = btrfs_fs_incompat(gfs_info,
EXTENDED_IREF);
if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
warning("root %llu EXTREF[%llu %llu] isn't supported",
root->objectid, key.objectid,
key.offset);
ret = check_inode_extref(root, &key, node, slot, &refs,
mode);
err |= ret;
break;
}
case BTRFS_DIR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
if (!dir) {
warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
root->objectid, inode_id,
imode_to_type(mode), key.objectid,
key.offset);
}
if (is_orphan && key.type == BTRFS_DIR_INDEX_KEY)
break;
ret = check_dir_item(root, &key, path, &size);
err |= ret;
break;
case BTRFS_EXTENT_DATA_KEY:
if (dir) {
warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
root->objectid, inode_id, key.objectid,
key.offset);
}
ret = check_file_extent(root, path, nodatasum, isize,
&extent_size, &extent_end);
err |= ret;
break;
case BTRFS_XATTR_ITEM_KEY:
break;
default:
error("ITEM[%llu %u %llu] UNKNOWN TYPE",
key.objectid, key.type, key.offset);
}
}
out:
if (err & LAST_ITEM) {
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
if (ret)
return err;
}
/* verify INODE_ITEM nlink/isize/nbytes */
if (dir) {
if (opt_check_repair && (err & DIR_COUNT_AGAIN)) {
err &= ~DIR_COUNT_AGAIN;
count_dir_isize(root, inode_id, &size);
}
if ((nlink != 1 || refs != 1) && opt_check_repair) {
ret = repair_inode_nlinks_lowmem(root, path, inode_id,
namebuf, name_len, refs, imode_to_type(mode),
&nlink);
}
if (nlink > 1) {
err |= LINK_COUNT_ERROR;
error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
root->objectid, inode_id, nlink);
}
/*
* Just a warning, as dir inode nbytes is just an
* instructive value.
*/
if (!IS_ALIGNED(nbytes, gfs_info->nodesize)) {
warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
root->objectid, inode_id,
gfs_info->nodesize);
}
if (isize != size && !is_orphan) {
if (opt_check_repair)
ret = repair_dir_isize_lowmem(root, path,
inode_id, size);
if (!opt_check_repair || ret) {
err |= ISIZE_ERROR;
error(
"root %llu DIR INODE [%llu] size %llu not equal to %llu",
root->objectid, inode_id, isize, size);
}
}
} else {
if (nlink != refs) {
if (opt_check_repair)
ret = repair_inode_nlinks_lowmem(root, path,
inode_id, namebuf, name_len, refs,
imode_to_type(mode), &nlink);
if (!opt_check_repair || ret) {
err |= LINK_COUNT_ERROR;
error(
"root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
root->objectid, inode_id, nlink, refs);
}
} else if (!nlink && !is_orphan) {
if (opt_check_repair)
ret = repair_inode_orphan_item_lowmem(root,
path, inode_id);
if (!opt_check_repair || ret) {
err |= ORPHAN_ITEM;
error("root %llu INODE[%llu] is orphan item",
root->objectid, inode_id);
}
}
/*
* For orphan inode, updating nbytes/size is just a waste of
* time, so skip such repair and don't report them as error.
*/
if (nbytes != extent_size && !is_orphan) {
if (opt_check_repair) {
ret = repair_inode_nbytes_lowmem(root, path,
inode_id, extent_size);
if (!ret)
nbytes = extent_size;
}
if (!opt_check_repair || ret) {
err |= NBYTES_ERROR;
error(
"root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
root->objectid, inode_id, nbytes,
extent_size);
}
}
if (!nbytes && !no_holes && extent_end < isize) {
if (opt_check_repair)
ret = punch_extent_hole(root, path, inode_id,
extent_end, isize - extent_end);
if (!opt_check_repair || ret) {
err |= NBYTES_ERROR;
error(
"root %llu INODE[%llu] size %llu should have a file extent hole",
root->objectid, inode_id, isize);
}
}
}
if (err & LAST_ITEM)
btrfs_next_item(root, path);
return err;
}
/*
* Returns >0 Found error, not fatal, should continue
* Returns <0 Fatal error, must exit the whole check
* Returns 0 No errors found
*/
static int process_one_leaf(struct btrfs_root *root, struct btrfs_path *path,
struct node_refs *nrefs, int *level)
{
struct extent_buffer *cur = path->nodes[0];
struct btrfs_key key;
u64 cur_bytenr;
u32 nritems;
u64 first_ino = 0;
int root_level = btrfs_header_level(root->node);
int i;
int ret = 0; /* Final return value */
int err = 0; /* Positive error bitmap */
cur_bytenr = cur->start;
/* skip to first inode item or the first inode number change */
nritems = btrfs_header_nritems(cur);
for (i = 0; i < nritems; i++) {
btrfs_item_key_to_cpu(cur, &key, i);
if (i == 0)
first_ino = key.objectid;
if (key.type == BTRFS_INODE_ITEM_KEY ||
(first_ino && first_ino != key.objectid))
break;
}
if (i == nritems) {
path->slots[0] = nritems;
return 0;
}
path->slots[0] = i;
again:
err |= check_inode_item(root, path);
/* modify cur since check_inode_item may change path */
cur = path->nodes[0];
if (err & LAST_ITEM || err & FATAL_ERROR)
goto out;
/* still have inode items in this leaf */
if (cur->start == cur_bytenr)
goto again;
/*
* we have switched to another leaf, above nodes may
* have changed, here walk down the path, if a node
* or leaf is shared, check whether we can skip this
* node or leaf.
*/
for (i = root_level; i >= 0; i--) {
if (path->nodes[i]->start == nrefs->bytenr[i])
continue;
ret = update_nodes_refs(root, path->nodes[i]->start,
path->nodes[i], nrefs, i, 0);
if (ret)
goto out;
if (!nrefs->need_check[i]) {
*level += 1;
break;
}
}
for (i = 0; i < *level; i++) {
free_extent_buffer(path->nodes[i]);
path->nodes[i] = NULL;
}
out:
err &= ~LAST_ITEM;
if (err && !ret)
ret = err;
return ret;
}
/*
* @level if @level == -1 means extent data item
* else normal treeblock.
*/
static bool should_check_extent_strictly(struct btrfs_root *root,
struct node_refs *nrefs, int level)
{
int root_level = btrfs_header_level(root->node);
if (level > root_level || level < -1)
return true;
if (level == root_level)
return true;
/*
* if the upper node is marked full backref, it should contain shared
* backref of the parent (except owner == root->objectid).
*/
while (++level <= root_level)
if (nrefs->refs[level] > 1)
return false;
return true;
}
static int check_extent_inline_ref(struct extent_buffer *eb,
struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
{
int ret;
u8 type = btrfs_extent_inline_ref_type(eb, iref);
switch (type) {
case BTRFS_TREE_BLOCK_REF_KEY:
case BTRFS_EXTENT_DATA_REF_KEY:
case BTRFS_SHARED_BLOCK_REF_KEY:
case BTRFS_SHARED_DATA_REF_KEY:
ret = 0;
break;
default:
error("extent[%llu %u %llu] has unknown ref type: %d",
key->objectid, key->type, key->offset, type);
ret = UNKNOWN_TYPE;
break;
}
return ret;
}
/*
* Check backrefs of a tree block given by @bytenr or @eb.
*
* @root: the root containing the @bytenr or @eb
* @eb: tree block extent buffer, can be NULL
* @bytenr: bytenr of the tree block to search
* @level: tree level of the tree block
* @owner: owner of the tree block
*
* Return >0 for any error found and output error message
* Return 0 for no error found
*/
static int check_tree_block_ref(struct btrfs_root *root,
struct extent_buffer *eb, u64 bytenr,
int level, u64 owner, struct node_refs *nrefs)
{
struct btrfs_key key;
struct btrfs_root *extent_root;
struct btrfs_path path = { 0 };
struct btrfs_extent_item *ei;
struct btrfs_extent_inline_ref *iref;
struct extent_buffer *leaf;
unsigned long end;
unsigned long ptr;
int slot;
int skinny_level;
int root_level = btrfs_header_level(root->node);
int type;
u32 nodesize = gfs_info->nodesize;
u32 item_size;
u64 offset;
int found_ref = 0;
int err = 0;
int ret;
int strict = 1;
int parent = 0;
key.objectid = bytenr;
if (btrfs_fs_incompat(gfs_info, SKINNY_METADATA))
key.type = BTRFS_METADATA_ITEM_KEY;
else
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = (u64)-1;
/* Search for the backref in extent tree */
extent_root = btrfs_extent_root(gfs_info, bytenr);
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
if (ret < 0) {
err |= BACKREF_MISSING;
goto out;
}
ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
if (ret) {
err |= BACKREF_MISSING;
goto out;
}
leaf = path.nodes[0];
slot = path.slots[0];
btrfs_item_key_to_cpu(leaf, &key, slot);
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
if (key.type == BTRFS_METADATA_ITEM_KEY) {
skinny_level = (int)key.offset;
iref = (struct btrfs_extent_inline_ref *)(ei + 1);
} else {
struct btrfs_tree_block_info *info;
info = (struct btrfs_tree_block_info *)(ei + 1);
skinny_level = btrfs_tree_block_level(leaf, info);
iref = (struct btrfs_extent_inline_ref *)(info + 1);
}
if (eb) {
u64 header_gen;
u64 extent_gen;
/*
* Due to the feature of shared tree blocks, if the upper node
* is a fs root or shared node, the extent of checked node may
* not be updated until the next CoW.
*/
if (nrefs)
strict = should_check_extent_strictly(root, nrefs,
level);
if (!(btrfs_extent_flags(leaf, ei) &
BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
error(
"extent[%llu %u] backref type mismatch, missing bit: %llx",
key.objectid, nodesize,
BTRFS_EXTENT_FLAG_TREE_BLOCK);
err = BACKREF_MISMATCH;
}
header_gen = btrfs_header_generation(eb);
extent_gen = btrfs_extent_generation(leaf, ei);
if (header_gen != extent_gen) {
error(
"extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
key.objectid, nodesize, header_gen,
extent_gen);
err = BACKREF_MISMATCH;
}
if (level != skinny_level) {
error(
"extent[%llu %u] level mismatch, wanted: %u, have: %u",
key.objectid, nodesize, level, skinny_level);
err = BACKREF_MISMATCH;
}
if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
error(
"extent[%llu %u] is referred by other roots than %llu",
key.objectid, nodesize, root->objectid);
err = BACKREF_MISMATCH;
}
}
/*
* Iterate the extent/metadata item to find the exact backref
*/
item_size = btrfs_item_size(leaf, slot);
ptr = (unsigned long)iref;
end = (unsigned long)ei + item_size;
while (ptr < end) {
iref = (struct btrfs_extent_inline_ref *)ptr;
type = btrfs_extent_inline_ref_type(leaf, iref);
offset = btrfs_extent_inline_ref_offset(leaf, iref);
ret = check_extent_inline_ref(leaf, &key, iref);
if (ret) {
err |= ret;
break;
}
if (type == BTRFS_TREE_BLOCK_REF_KEY) {
if (offset == root->objectid)
found_ref = 1;
if (!strict && owner == offset)
found_ref = 1;
} else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
/*
* Backref of tree reloc root points to itself, no need
* to check backref any more.
*
* This may be an error of loop backref, but extent tree
* checker should have already handled it.
* Here we only need to avoid infinite iteration.
*/
if (offset == bytenr) {
found_ref = 1;
} else {
/*
* Check if the backref points to valid
* referencer
*/
found_ref = !check_tree_block_ref(root, NULL,
offset, level + 1, owner, NULL);
}
}
if (found_ref)
break;
ptr += btrfs_extent_inline_ref_size(type);
}
/*
* Inlined extent item doesn't have what we need, check
* TREE_BLOCK_REF_KEY
*/
if (!found_ref) {
btrfs_release_path(&path);
key.objectid = bytenr;
key.type = BTRFS_TREE_BLOCK_REF_KEY;
key.offset = root->objectid;
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
if (!ret)
found_ref = 1;
}
/*
* Finally check SHARED BLOCK REF, any found will be good
* Here we're not doing comprehensive extent backref checking,
* only need to ensure there is some extent referring to this
* tree block.
*/
if (!found_ref) {
btrfs_release_path(&path);
key.objectid = bytenr;
key.type = BTRFS_SHARED_BLOCK_REF_KEY;
key.offset = (u64)-1;
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
if (ret < 0) {
err |= BACKREF_MISSING;
goto out;
}
ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
if (ret) {
err |= BACKREF_MISSING;
goto out;
}
found_ref = 1;
}
if (!found_ref)
err |= BACKREF_MISSING;
out:
btrfs_release_path(&path);
if (nrefs && strict &&
level < root_level && nrefs->full_backref[level + 1])
parent = nrefs->bytenr[level + 1];
if (eb && (err & BACKREF_MISSING))
error(
"extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
bytenr, nodesize, owner, level,
parent ? "parent" : "root",
parent ? parent : root->objectid);
return err;
}
/*
* If @err contains BYTES_UNALIGNED then delete the extent data item.
* If @err contains BACKREF_MISSING then add extent of the
* file_extent_data_item.
*
* Returns error bits after reapir.
*/
static int repair_extent_data_item(struct btrfs_root *root,
struct btrfs_path *pathp,
struct node_refs *nrefs,
int err)
{
struct btrfs_trans_handle *trans = NULL;
struct btrfs_file_extent_item *fi;
struct btrfs_key fi_key;
struct btrfs_key key;
struct btrfs_extent_item *ei;
struct btrfs_path path = { 0 };
struct btrfs_root *extent_root;
struct extent_buffer *eb;
u64 size;
u64 disk_bytenr;
u64 num_bytes;
u64 parent;
u64 offset;
u64 extent_offset;
u64 file_offset;
int generation;
int slot;
int need_insert = 0;
int ret = 0;
eb = pathp->nodes[0];
slot = pathp->slots[0];
btrfs_item_key_to_cpu(eb, &fi_key, slot);
fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
btrfs_file_extent_disk_bytenr(eb, fi) == 0)
return err;
file_offset = fi_key.offset;
generation = btrfs_file_extent_generation(eb, fi);
disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
extent_offset = btrfs_file_extent_offset(eb, fi);
offset = file_offset - extent_offset;
if (nrefs->full_backref[0])
parent = btrfs_header_bytenr(eb);
else
parent = 0;
if (err & BYTES_UNALIGNED) {
ret = delete_item(root, pathp);
if (!ret)
err = 0;
goto out_no_release;
}
/* now repair only adds backref */
if ((err & BACKREF_MISSING) == 0)
return err;
/* search extent item */
key.objectid = disk_bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
extent_root = btrfs_extent_root(gfs_info, key.objectid);
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
if (ret < 0) {
ret = -EIO;
goto out;
}
need_insert = ret;
ret = avoid_extents_overwrite();
if (ret)
goto out;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
trans = NULL;
goto out;
}
/* insert an extent item */
if (need_insert) {
key.objectid = disk_bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
size = sizeof(*ei);
btrfs_release_path(&path);
ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
size);
if (ret)
goto out;
eb = path.nodes[0];
ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
btrfs_set_extent_refs(eb, ei, 0);
btrfs_set_extent_generation(eb, ei, generation);
btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
btrfs_mark_buffer_dirty(eb);
ret = btrfs_update_block_group(trans, disk_bytenr, num_bytes,
1, 0);
btrfs_release_path(&path);
}
ret = btrfs_inc_extent_ref(trans, disk_bytenr, num_bytes, parent,
root->objectid,
parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
offset);
if (ret) {
error(
"failed to increase extent data backref[%llu %llu] root %llu",
disk_bytenr, num_bytes, root->objectid);
goto out;
} else {
printf("Add one extent data backref [%llu %llu]\n",
disk_bytenr, num_bytes);
}
err &= ~BACKREF_MISSING;
out:
if (trans)
btrfs_commit_transaction(trans, root);
btrfs_release_path(&path);
out_no_release:
if (ret)
error("can't repair root %llu extent data item[%llu %llu]",
root->objectid, disk_bytenr, num_bytes);
return err;
}
/*
* Check EXTENT_DATA item, mainly for its dbackref in extent tree
*
* Return >0 any error found and output error message
* Return 0 for no error found
*/
static int check_extent_data_item(struct btrfs_root *root,
struct btrfs_path *pathp,
struct node_refs *nrefs, int account_bytes)
{
struct btrfs_file_extent_item *fi;
struct extent_buffer *eb = pathp->nodes[0];
struct btrfs_path path = { 0 };
struct btrfs_root *extent_root;
struct btrfs_key fi_key;
struct btrfs_key dbref_key;
struct extent_buffer *leaf;
struct btrfs_extent_item *ei;
struct btrfs_extent_inline_ref *iref;
struct btrfs_extent_data_ref *dref;
u64 owner;
u64 disk_bytenr;
u64 disk_num_bytes;
u64 extent_num_bytes;
u64 extent_flags;
u64 offset;
u32 item_size;
unsigned long end;
unsigned long ptr;
int type;
int found_dbackref = 0;
int slot = pathp->slots[0];
int err = 0;
int ret;
int strict;
btrfs_item_key_to_cpu(eb, &fi_key, slot);
fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
/* Nothing to check for hole and inline data extents */
if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
btrfs_file_extent_disk_bytenr(eb, fi) == 0)
return 0;
disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
offset = btrfs_file_extent_offset(eb, fi);
/* Check unaligned disk_bytenr, disk_num_bytes and num_bytes */
if (!IS_ALIGNED(disk_bytenr, gfs_info->sectorsize)) {
error(
"file extent [%llu, %llu] has unaligned disk bytenr: %llu, should be aligned to %u",
fi_key.objectid, fi_key.offset, disk_bytenr,
gfs_info->sectorsize);
err |= BYTES_UNALIGNED;
}
if (!IS_ALIGNED(disk_num_bytes, gfs_info->sectorsize)) {
error(
"file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
fi_key.objectid, fi_key.offset, disk_num_bytes,
gfs_info->sectorsize);
err |= BYTES_UNALIGNED;
} else if (account_bytes) {
data_bytes_allocated += disk_num_bytes;
}
if (!IS_ALIGNED(extent_num_bytes, gfs_info->sectorsize)) {
error(
"file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
fi_key.objectid, fi_key.offset, extent_num_bytes,
gfs_info->sectorsize);
err |= BYTES_UNALIGNED;
} else if (account_bytes) {
data_bytes_referenced += extent_num_bytes;
}
owner = btrfs_header_owner(eb);
/* Check the extent item of the file extent in extent tree */
dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
extent_root = btrfs_extent_root(gfs_info, dbref_key.objectid);
ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
if (ret)
goto out;
leaf = path.nodes[0];
slot = path.slots[0];
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
extent_flags = btrfs_extent_flags(leaf, ei);
if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
error(
"file extent[%llu %llu] root %llu owner %llu backref type mismatch, wanted bit: %llx",
fi_key.objectid, fi_key.offset, root->objectid, owner,
BTRFS_EXTENT_FLAG_DATA);
err |= BACKREF_MISMATCH;
}
/* Check data backref inside that extent item */
item_size = btrfs_item_size(leaf, path.slots[0]);
iref = (struct btrfs_extent_inline_ref *)(ei + 1);
ptr = (unsigned long)iref;
end = (unsigned long)ei + item_size;
strict = should_check_extent_strictly(root, nrefs, -1);
while (ptr < end) {
u64 ref_root;
u64 ref_objectid;
u64 ref_offset;
bool match = false;
iref = (struct btrfs_extent_inline_ref *)ptr;
type = btrfs_extent_inline_ref_type(leaf, iref);
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
ret = check_extent_inline_ref(leaf, &dbref_key, iref);
if (ret) {
err |= ret;
break;
}
if (type == BTRFS_EXTENT_DATA_REF_KEY) {
ref_root = btrfs_extent_data_ref_root(leaf, dref);
ref_objectid = btrfs_extent_data_ref_objectid(leaf,
dref);
ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
if (ref_objectid == fi_key.objectid &&
ref_offset == fi_key.offset - offset)
match = true;
if (ref_root == root->objectid && match)
found_dbackref = 1;
else if (!strict && owner == ref_root && match)
found_dbackref = 1;
} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
found_dbackref = !check_tree_block_ref(root, NULL,
btrfs_extent_inline_ref_offset(leaf, iref),
0, owner, NULL);
}
if (found_dbackref)
break;
ptr += btrfs_extent_inline_ref_size(type);
}
if (!found_dbackref) {
btrfs_release_path(&path);
/* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
dbref_key.offset = hash_extent_data_ref(owner, fi_key.objectid,
fi_key.offset - offset);
extent_root = btrfs_extent_root(gfs_info, dbref_key.objectid);
ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0,
0);
if (!ret) {
found_dbackref = 1;
goto out;
}
btrfs_release_path(&path);
/*
* Neither inlined nor EXTENT_DATA_REF found, try
* SHARED_DATA_REF as last chance.
*/
dbref_key.objectid = disk_bytenr;
dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
dbref_key.offset = eb->start;
ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0,
0);
if (!ret) {
found_dbackref = 1;
goto out;
}
}
out:
if (!found_dbackref)
err |= BACKREF_MISSING;
btrfs_release_path(&path);
if (err & BACKREF_MISSING) {
error(
"file extent[%llu %llu] root %llu owner %llu backref lost",
fi_key.objectid, fi_key.offset, root->objectid, owner);
}
return err;
}
/*
* Check a block group item with its referener (chunk) and its used space
* with extent/metadata item
*/
static int check_block_group_item(struct extent_buffer *eb, int slot)
{
struct btrfs_root *extent_root;
struct btrfs_root *chunk_root = gfs_info->chunk_root;
struct btrfs_block_group_item *bi;
struct btrfs_block_group_item bg_item;
struct btrfs_path path = { 0 };
struct btrfs_key bg_key;
struct btrfs_key chunk_key;
struct btrfs_key extent_key;
struct btrfs_chunk *chunk;
struct extent_buffer *leaf;
struct btrfs_extent_item *ei;
u32 nodesize = btrfs_super_nodesize(gfs_info->super_copy);
u64 flags;
u64 bg_flags;
u64 used;
u64 total = 0;
int ret;
int err = 0;
btrfs_item_key_to_cpu(eb, &bg_key, slot);
bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
used = btrfs_stack_block_group_used(&bg_item);
bg_flags = btrfs_stack_block_group_flags(&bg_item);
chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
chunk_key.offset = bg_key.objectid;
/* Search for the referencer chunk */
ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
if (ret) {
error(
"block group[%llu %llu] did not find the related chunk item",
bg_key.objectid, bg_key.offset);
err |= REFERENCER_MISSING;
} else {
chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_chunk);
if (btrfs_chunk_length(path.nodes[0], chunk) !=
bg_key.offset) {
error(
"block group[%llu %llu] related chunk item length does not match",
bg_key.objectid, bg_key.offset);
err |= REFERENCER_MISMATCH;
}
}
btrfs_release_path(&path);
/* Search from the block group bytenr */
extent_key.objectid = bg_key.objectid;
extent_key.type = 0;
extent_key.offset = 0;
extent_root = btrfs_extent_root(gfs_info, extent_key.objectid);
ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
if (ret < 0)
goto out;
/* Iterate extent tree to account used space */
while (1) {
leaf = path.nodes[0];
/* Search slot can point to the last item beyond leaf nritems */
if (path.slots[0] >= btrfs_header_nritems(leaf))
goto next;
btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
break;
if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
extent_key.type != BTRFS_EXTENT_ITEM_KEY)
goto next;
if (extent_key.objectid < bg_key.objectid)
goto next;
if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
total += nodesize;
else
total += extent_key.offset;
ei = btrfs_item_ptr(leaf, path.slots[0],
struct btrfs_extent_item);
flags = btrfs_extent_flags(leaf, ei);
if (flags & BTRFS_EXTENT_FLAG_DATA) {
if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
error(
"bad extent[%llu, %llu) type mismatch with chunk",
extent_key.objectid,
extent_key.objectid + extent_key.offset);
err |= CHUNK_TYPE_MISMATCH;
}
} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
BTRFS_BLOCK_GROUP_METADATA))) {
error(
"bad extent[%llu, %llu) type mismatch with chunk",
extent_key.objectid,
extent_key.objectid + nodesize);
err |= CHUNK_TYPE_MISMATCH;
}
}
next:
ret = btrfs_next_item(extent_root, &path);
if (ret)
break;
}
out:
btrfs_release_path(&path);
total_used += used;
if (total != used) {
error(
"block group[%llu %llu] used %llu but extent items used %llu",
bg_key.objectid, bg_key.offset, used, total);
err |= BG_ACCOUNTING_ERROR;
}
return err;
}
/*
* Get real tree block level for the case like shared block
* Return >= 0 as tree level
* Return <0 for error
*/
static int query_tree_block_level(u64 bytenr)
{
struct btrfs_root *extent_root;
struct extent_buffer *eb;
struct btrfs_path path = { 0 };
struct btrfs_key key;
struct btrfs_extent_item *ei;
struct btrfs_tree_parent_check check = { 0 };
u64 flags;
u64 transid;
u8 backref_level;
u8 header_level;
int ret;
/* Search extent tree for extent generation and level */
key.objectid = bytenr;
key.type = BTRFS_METADATA_ITEM_KEY;
key.offset = (u64)-1;
extent_root = btrfs_extent_root(gfs_info, bytenr);
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
if (ret < 0)
goto release_out;
ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
if (ret < 0)
goto release_out;
if (ret > 0) {
ret = -ENOENT;
goto release_out;
}
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_extent_item);
flags = btrfs_extent_flags(path.nodes[0], ei);
if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
ret = -ENOENT;
goto release_out;
}
/* Get transid for later read_tree_block() check */
transid = btrfs_extent_generation(path.nodes[0], ei);
/* Get backref level as one source */
if (key.type == BTRFS_METADATA_ITEM_KEY) {
backref_level = key.offset;
} else {
struct btrfs_tree_block_info *info;
info = (struct btrfs_tree_block_info *)(ei + 1);
backref_level = btrfs_tree_block_level(path.nodes[0], info);
}
btrfs_release_path(&path);
/* Get level from tree block as an alternative source */
check.transid = transid;
eb = read_tree_block(gfs_info, bytenr, &check);
if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
return -EIO;
}
header_level = btrfs_header_level(eb);
free_extent_buffer(eb);
if (header_level != backref_level)
return -EIO;
return header_level;
release_out:
btrfs_release_path(&path);
return ret;
}
/*
* Check if a tree block backref is valid (points to a valid tree block)
* if level == -1, level will be resolved
* Return >0 for any error found and print error message
*/
static int check_tree_block_backref(u64 root_id, u64 bytenr, int level)
{
struct btrfs_root *root;
struct btrfs_key key;
struct btrfs_path path = { 0 };
struct extent_buffer *eb;
struct extent_buffer *node;
struct btrfs_tree_parent_check check = {
.owner_root = root_id,
};
u32 nodesize = btrfs_super_nodesize(gfs_info->super_copy);
int err = 0;
int ret;
/* Query level for level == -1 special case */
if (level == -1)
level = query_tree_block_level(bytenr);
if (level < 0) {
err |= REFERENCER_MISSING;
goto out;
}
key.objectid = root_id;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_read_fs_root(gfs_info, &key);
if (IS_ERR(root)) {
err |= REFERENCER_MISSING;
goto out;
}
/* Read out the tree block to get item/node key */
eb = read_tree_block(gfs_info, bytenr, &check);
if (!extent_buffer_uptodate(eb)) {
err |= REFERENCER_MISSING;
free_extent_buffer(eb);
goto out;
}
/* Empty tree, no need to check key */
if (!btrfs_header_nritems(eb) && !level) {
free_extent_buffer(eb);
goto out;
}
if (level)
btrfs_node_key_to_cpu(eb, &key, 0);
else
btrfs_item_key_to_cpu(eb, &key, 0);
free_extent_buffer(eb);
path.lowest_level = level;
/* Search with the first key, to ensure we can reach it */
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0) {
err |= REFERENCER_MISSING;
goto release_out;
}
node = path.nodes[level];
if (btrfs_header_bytenr(node) != bytenr) {
error(
"extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
bytenr, nodesize, bytenr,
btrfs_header_bytenr(node));
err |= REFERENCER_MISMATCH;
}
if (btrfs_header_level(node) != level) {
error(
"extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
bytenr, nodesize, level,
btrfs_header_level(node));
err |= REFERENCER_MISMATCH;
}
release_out:
btrfs_release_path(&path);
out:
if (err & REFERENCER_MISSING) {
if (level < 0)
error("extent [%llu %d] lost referencer (owner: %llu)",
bytenr, nodesize, root_id);
else
error(
"extent [%llu %d] lost referencer (owner: %llu, level: %u)",
bytenr, nodesize, root_id, level);
}
return err;
}
/*
* Check if tree block @eb is tree reloc root.
* Return 0 if it's not or any problem happens
* Return 1 if it's a tree reloc root
*/
static int is_tree_reloc_root(struct extent_buffer *eb)
{
struct btrfs_root *tree_reloc_root;
struct btrfs_key key;
u64 bytenr = btrfs_header_bytenr(eb);
u64 owner = btrfs_header_owner(eb);
int ret = 0;
key.objectid = BTRFS_TREE_RELOC_OBJECTID;
key.offset = owner;
key.type = BTRFS_ROOT_ITEM_KEY;
tree_reloc_root = btrfs_read_fs_root_no_cache(gfs_info, &key);
if (IS_ERR(tree_reloc_root))
return 0;
if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
ret = 1;
btrfs_free_fs_root(tree_reloc_root);
return ret;
}
/*
* Check referencer for shared block backref
* If level == -1, this function will resolve the level.
*/
static int check_shared_block_backref(u64 parent, u64 bytenr, int level)
{
struct extent_buffer *eb;
struct btrfs_tree_parent_check check = { 0 };
u32 nr;
int found_parent = 0;
int i;
eb = read_tree_block(gfs_info, parent, &check);
if (!extent_buffer_uptodate(eb))
goto out;
if (level == -1)
level = query_tree_block_level(bytenr);
if (level < 0)
goto out;
/* It's possible it's a tree reloc root */
if (parent == bytenr) {
if (is_tree_reloc_root(eb))
found_parent = 1;
goto out;
}
if (level + 1 != btrfs_header_level(eb))
goto out;
nr = btrfs_header_nritems(eb);
for (i = 0; i < nr; i++) {
if (bytenr == btrfs_node_blockptr(eb, i)) {
found_parent = 1;
break;
}
}
out:
free_extent_buffer(eb);
if (!found_parent) {
error(
"shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
bytenr, gfs_info->nodesize, parent, level);
return REFERENCER_MISSING;
}
return 0;
}
/*
* Check referencer for normal (inlined) data ref
* If len == 0, it will be resolved by searching in extent tree
*/
static int check_extent_data_backref(u64 root_id, u64 objectid, u64 offset,
u64 bytenr, u64 len, u32 count)
{
struct btrfs_root *root;
struct btrfs_root *extent_root;
struct btrfs_key key;
struct btrfs_path path = { 0 };
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
u32 found_count = 0;
int slot;
int ret = 0;
if (!len) {
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = (u64)-1;
extent_root = btrfs_extent_root(gfs_info, bytenr);
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
if (ret < 0)
goto out;
ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
if (ret)
goto out;
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
if (key.objectid != bytenr ||
key.type != BTRFS_EXTENT_ITEM_KEY)
goto out;
len = key.offset;
btrfs_release_path(&path);
}
key.objectid = root_id;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_read_fs_root(gfs_info, &key);
if (IS_ERR(root))
goto out;
key.objectid = objectid;
key.type = BTRFS_EXTENT_DATA_KEY;
/*
* It can be nasty as data backref offset is
* file offset - file extent offset, which is smaller or
* equal to original backref offset. The only special case is
* overflow. So we need to special check and do further search.
*/
key.offset = offset & (1ULL << 63) ? 0 : offset;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0)
goto out;
/*
* Search afterwards to get correct one
* NOTE: As we must do a comprehensive check on the data backref to
* make sure the dref count also matches, we must iterate all file
* extents for that inode.
*/
while (1) {
leaf = path.nodes[0];
slot = path.slots[0];
if (slot >= btrfs_header_nritems(leaf) ||
btrfs_header_owner(leaf) != root_id)
goto next;
/*
* For tree blocks have been relocated, data backref are
* shared instead of keyed. Do not account it.
*/
if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
/*
* skip the leaf to speed up.
*/
slot = btrfs_header_nritems(leaf);
goto next;
}
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid != objectid ||
key.type != BTRFS_EXTENT_DATA_KEY)
break;
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
/*
* Except normal disk bytenr and disk num bytes, we still
* need to do extra check on dbackref offset as
* dbackref offset = file_offset - file_extent_offset
*
* Also, we must check the leaf owner.
* In case of shared tree blocks (snapshots) we can inherit
* leaves from source snapshot.
* In that case, reference from source snapshot should not
* count.
*/
if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
(u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
offset && btrfs_header_owner(leaf) == root_id)
found_count++;
next:
ret = btrfs_next_item(root, &path);
if (ret)
break;
}
out:
btrfs_release_path(&path);
if (found_count != count) {
error(
"extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
bytenr, len, root_id, objectid, offset, count,
found_count);
return REFERENCER_MISSING;
}
return 0;
}
/*
* Check if the referencer of a shared data backref exists
*/
static int check_shared_data_backref(u64 parent, u64 bytenr)
{
struct extent_buffer *eb;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
struct btrfs_tree_parent_check check = { 0 };
u32 nr;
int found_parent = 0;
int i;
eb = read_tree_block(gfs_info, parent, &check);
if (!extent_buffer_uptodate(eb))
goto out;
nr = btrfs_header_nritems(eb);
for (i = 0; i < nr; i++) {
btrfs_item_key_to_cpu(eb, &key, i);
if (key.type != BTRFS_EXTENT_DATA_KEY)
continue;
fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
continue;
if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
found_parent = 1;
break;
}
}
out:
free_extent_buffer(eb);
if (!found_parent) {
error("shared extent %llu referencer lost (parent: %llu)",
bytenr, parent);
return REFERENCER_MISSING;
}
return 0;
}
/*
* Only delete backref if REFERENCER_MISSING or REFERENCER_MISMATCH.
*
* Returns <0 error
* Returns >0 the backref was deleted but extent still exists
* Returns =0 the whole extent item was deleted
*/
static int repair_extent_item(struct btrfs_path *path, u64 bytenr, u64
num_bytes, u64 parent, u64 root_objectid, u64
owner, u64 offset)
{
struct btrfs_trans_handle *trans;
struct btrfs_key old_key;
struct btrfs_root *extent_root = btrfs_extent_root(gfs_info, bytenr);
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
ret = avoid_extents_overwrite();
if (ret)
return ret;
trans = btrfs_start_transaction(extent_root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
goto out;
}
/* delete the backref */
ret = btrfs_free_extent(trans, bytenr, num_bytes, parent, root_objectid,
owner, offset);
if (!ret)
printf("Delete backref in extent [%llu %llu]\n",
bytenr, num_bytes);
else {
error("fail to delete backref in extent [%llu %llu]",
bytenr, num_bytes);
btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_commit_transaction(trans, extent_root);
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, extent_root, &old_key, path, 0, 0);
if (ret > 0) {
/* odd, there must be one block group before at least */
if (path->slots[0] == 0) {
ret = -EUCLEAN;
goto out;
}
/*
* btrfs_free_extent() has deleted the extent item,
* let path point to last checked item.
*/
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
path->slots[0] = btrfs_header_nritems(path->nodes[0]) - 1;
else
path->slots[0]--;
ret = 0;
} else if (ret == 0) {
ret = 1;
}
out:
return ret;
}
/*
* Reset generation for extent item specified by @path.
* Will try to grab the proper generation number from other sources, but if
* it fails, then use current transid as fallback.
*
* Returns < 0 for error.
* Return 0 if the generation is reset.
*/
static int repair_extent_item_generation(struct btrfs_path *path)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
struct btrfs_extent_item *ei;
struct btrfs_root *extent_root;
u64 new_gen = 0;;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
UASSERT(key.type == BTRFS_METADATA_ITEM_KEY ||
key.type == BTRFS_EXTENT_ITEM_KEY);
get_extent_item_generation(key.objectid, &new_gen);
ret = avoid_extents_overwrite();
if (ret)
return ret;
btrfs_release_path(path);
extent_root = btrfs_extent_root(gfs_info, key.objectid);
trans = btrfs_start_transaction(extent_root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
return ret;
}
ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
if (ret < 0) {
errno = -ret;
error("failed to locate extent item for %llu: %m", key.objectid);
btrfs_abort_transaction(trans, ret);
return ret;
}
if (!new_gen)
new_gen = trans->transid;
ei = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_extent_item);
btrfs_set_extent_generation(path->nodes[0], ei, new_gen);
ret = btrfs_commit_transaction(trans, extent_root);
if (ret < 0) {
errno = -ret;
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
btrfs_abort_transaction(trans, ret);
return ret;
}
printf("Reset extent item (%llu) generation to %llu\n",
key.objectid, new_gen);
return ret;
}
/*
* This function will check a given extent item, including its backref and
* itself (like crossing stripe boundary and type)
*
* Since we don't use extent_record anymore, introduce new error bit
*/
static int check_extent_item(struct btrfs_path *path)
{
struct btrfs_extent_item *ei;
struct btrfs_extent_inline_ref *iref;
struct btrfs_extent_data_ref *dref;
struct extent_buffer *eb = path->nodes[0];
unsigned long ptr;
int slot = path->slots[0];
int type;
int last_type = 0;
u32 nodesize = btrfs_super_nodesize(gfs_info->super_copy);
u32 item_size = btrfs_item_size(eb, slot);
u64 flags;
u64 offset;
u64 parent;
u64 num_bytes;
u64 root_objectid;
u64 gen;
u64 owner;
u64 owner_offset;
u64 super_gen;
u64 seq;
u64 last_seq = U64_MAX;
int metadata = 0;
/* To handle corrupted values in skinny backref */
u64 level;
struct btrfs_key key;
int ret;
int err = 0;
int tmp_err = 0;
u32 ptr_offset;
btrfs_item_key_to_cpu(eb, &key, slot);
if (key.type == BTRFS_EXTENT_ITEM_KEY) {
bytes_used += key.offset;
num_bytes = key.offset;
} else {
bytes_used += nodesize;
num_bytes = nodesize;
}
if (item_size < sizeof(*ei)) {
/*
* COMPAT_EXTENT_TREE_V0 case, but it's already a super
* old thing when on disk format is still un-determined.
* No need to care about it anymore
*/
error("unsupported COMPAT_EXTENT_TREE_V0 detected");
return -ENOTTY;
}
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
flags = btrfs_extent_flags(eb, ei);
gen = btrfs_extent_generation(eb, ei);
super_gen = btrfs_super_generation(gfs_info->super_copy);
if (gen > super_gen + 1) {
error(
"invalid generation for extent %llu, have %llu expect (0, %llu]",
key.objectid, gen, super_gen + 1);
tmp_err |= INVALID_GENERATION;
}
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
metadata = 1;
if (metadata && check_crossing_stripes(gfs_info, key.objectid,
eb->len)) {
error("bad metadata [%llu, %llu) crossing stripe boundary",
key.objectid, key.objectid + nodesize);
err |= CROSSING_STRIPE_BOUNDARY;
}
if (metadata)
btrfs_check_subpage_eb_alignment(gfs_info, key.objectid, nodesize);
ptr = (unsigned long)(ei + 1);
if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
/* Old EXTENT_ITEM metadata */
struct btrfs_tree_block_info *info;
info = (struct btrfs_tree_block_info *)ptr;
level = btrfs_tree_block_level(eb, info);
ptr += sizeof(struct btrfs_tree_block_info);
} else {
/* New METADATA_ITEM */
level = key.offset;
}
if (metadata && level >= BTRFS_MAX_LEVEL) {
error(
"tree block %llu has bad backref level, has %llu expect [0, %u]",
key.objectid, level, BTRFS_MAX_LEVEL - 1);
err |= BACKREF_MISMATCH;
/* This is a critical error, exit right now */
goto out;
}
ptr_offset = ptr - (unsigned long)ei;
next:
/* Reached extent item end normally */
if (ptr_offset == item_size)
goto out;
/* Beyond extent item end, wrong item size */
if (ptr_offset > item_size) {
err |= ITEM_SIZE_MISMATCH;
error("extent item at bytenr %llu slot %d has wrong size",
eb->start, slot);
goto out;
}
ptr = (unsigned long)ei + ptr_offset;
parent = 0;
root_objectid = 0;
owner = 0;
owner_offset = 0;
/* Now check every backref in this extent item */
iref = (struct btrfs_extent_inline_ref *)ptr;
type = btrfs_extent_inline_ref_type(eb, iref);
offset = btrfs_extent_inline_ref_offset(eb, iref);
if (type == BTRFS_EXTENT_DATA_REF_KEY) {
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
seq = hash_extent_data_ref(
btrfs_extent_data_ref_root(eb, dref),
btrfs_extent_data_ref_objectid(eb, dref),
btrfs_extent_data_ref_offset(eb, dref));
} else {
seq = offset;
}
/*
* The @type should be ascending, while inside the same type, the
* @seq should be descending.
*/
if (type < last_type)
tmp_err |= BACKREF_OUT_OF_ORDER;
else if (type > last_type)
last_seq = U64_MAX;
if (seq > last_seq)
tmp_err |= BACKREF_OUT_OF_ORDER;
if (tmp_err & BACKREF_OUT_OF_ORDER)
error(
"inline extent backref (type %u seq 0x%llx) of extent [%llu %u %llu] is out of order",
type, seq, key.objectid, key.type, key.offset);
switch (type) {
case BTRFS_TREE_BLOCK_REF_KEY:
root_objectid = offset;
owner = level;
tmp_err |= check_tree_block_backref(offset, key.objectid, level);
break;
case BTRFS_SHARED_BLOCK_REF_KEY:
parent = offset;
tmp_err |= check_shared_block_backref(offset, key.objectid, level);
break;
case BTRFS_EXTENT_DATA_REF_KEY:
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
root_objectid = btrfs_extent_data_ref_root(eb, dref);
owner = btrfs_extent_data_ref_objectid(eb, dref);
owner_offset = btrfs_extent_data_ref_offset(eb, dref);
tmp_err |= check_extent_data_backref(root_objectid,
owner, owner_offset, key.objectid, key.offset,
btrfs_extent_data_ref_count(eb, dref));
break;
case BTRFS_SHARED_DATA_REF_KEY:
parent = offset;
tmp_err |= check_shared_data_backref(offset, key.objectid);
break;
default:
error("extent[%llu %d %llu] has unknown ref type: %d",
key.objectid, key.type, key.offset, type);
err |= UNKNOWN_TYPE;
goto out;
}
if ((tmp_err & (REFERENCER_MISSING | REFERENCER_MISMATCH))
&& opt_check_repair) {
ret = repair_extent_item(path, key.objectid, num_bytes, parent,
root_objectid, owner, owner_offset);
if (ret < 0) {
err |= tmp_err;
err |= FATAL_ERROR;
goto out;
} else if (ret == 0) {
err = 0;
goto out;
} else if (ret > 0) {
/*
* The error has been repaired which means the
* extent item is still existed with other backrefs,
* go to check next.
*/
tmp_err &= ~REFERENCER_MISSING;
tmp_err &= ~REFERENCER_MISMATCH;
err |= tmp_err;
eb = path->nodes[0];
slot = path->slots[0];
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
item_size = btrfs_item_size(eb, slot);
goto next;
}
}
if ((tmp_err & INVALID_GENERATION) && opt_check_repair){
ret = repair_extent_item_generation(path);
if (ret < 0) {
err |= tmp_err;
err |= FATAL_ERROR;
goto out;
}
/* Error has been repaired */
tmp_err &= ~INVALID_GENERATION;
err |= tmp_err;
eb = path->nodes[0];
slot = path->slots[0];
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
item_size = btrfs_item_size(eb, slot);
ptr_offset += btrfs_extent_inline_ref_size(type);
goto next;
}
err |= tmp_err;
ptr_offset += btrfs_extent_inline_ref_size(type);
last_type = type;
last_seq = seq;
goto next;
out:
return err;
}
/*
* Check if a dev extent item is referred correctly by its chunk
*/
static int check_dev_extent_item(struct extent_buffer *eb, int slot)
{
struct btrfs_root *chunk_root = gfs_info->chunk_root;
struct btrfs_dev_extent *ptr;
struct btrfs_path path = { 0 };
struct btrfs_key chunk_key;
struct btrfs_key devext_key;
struct btrfs_chunk *chunk;
struct extent_buffer *l;
int num_stripes;
u64 length;
int i;
int found_chunk = 0;
int ret;
btrfs_item_key_to_cpu(eb, &devext_key, slot);
ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
length = btrfs_dev_extent_length(eb, ptr);
chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
if (ret)
goto out;
l = path.nodes[0];
chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
ret = btrfs_check_chunk_valid(l, chunk, chunk_key.offset);
if (ret < 0)
goto out;
if (btrfs_stripe_length(gfs_info, l, chunk) != length)
goto out;
num_stripes = btrfs_chunk_num_stripes(l, chunk);
for (i = 0; i < num_stripes; i++) {
u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
if (devid == devext_key.objectid &&
offset == devext_key.offset) {
found_chunk = 1;
break;
}
}
out:
btrfs_release_path(&path);
if (!found_chunk) {
error(
"device extent[%llu, %llu, %llu] did not find the related chunk",
devext_key.objectid, devext_key.offset, length);
return REFERENCER_MISSING;
}
return 0;
}
/*
* Check if the used space is correct with the dev item
*/
static int check_dev_item(struct extent_buffer *eb, int slot,
u64 *bytes_used_expected)
{
struct btrfs_root *dev_root = gfs_info->dev_root;
struct btrfs_dev_item *dev_item;
struct btrfs_path path = { 0 };
struct btrfs_key key;
struct btrfs_dev_extent *ptr;
struct btrfs_device *dev;
struct stat st;
u64 block_dev_size;
u64 total_bytes;
u64 dev_id;
u64 used;
u64 total = 0;
u64 prev_devid = 0;
u64 prev_dev_ext_end = 0;
int ret;
dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
dev_id = btrfs_device_id(eb, dev_item);
used = btrfs_device_bytes_used(eb, dev_item);
total_bytes = btrfs_device_total_bytes(eb, dev_item);
if (used > total_bytes) {
error(
"device %llu has incorrect used bytes %llu > total bytes %llu",
dev_id, used, total_bytes);
return ACCOUNTING_MISMATCH;
}
key.objectid = dev_id;
key.type = BTRFS_DEV_EXTENT_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
if (ret < 0) {
btrfs_item_key_to_cpu(eb, &key, slot);
error("cannot find any related dev extent for dev[%llu, %u, %llu]",
key.objectid, key.type, key.offset);
btrfs_release_path(&path);
return REFERENCER_MISSING;
}
/*
* Iterate dev_extents to calculate the used space of a device
*
* Also make sure no dev extents overlap and end beyond device boundary
*/
while (1) {
u64 devid;
u64 physical_offset;
u64 physical_len;
if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
goto next;
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
if (key.objectid > dev_id)
break;
if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
goto next;
ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_dev_extent);
devid = key.objectid;
physical_offset = key.offset;
physical_len = btrfs_dev_extent_length(path.nodes[0], ptr);
if (prev_devid == devid && physical_offset < prev_dev_ext_end) {
error(
"dev extent devid %llu offset %llu len %llu overlap with previous dev extent end %llu",
devid, physical_offset, physical_len,
prev_dev_ext_end);
btrfs_release_path(&path);
return ACCOUNTING_MISMATCH;
}
if (physical_offset + physical_len > total_bytes) {
error(
"dev extent devid %llu offset %llu len %llu is beyond device boundary %llu",
devid, physical_offset, physical_len,
total_bytes);
btrfs_release_path(&path);
return ACCOUNTING_MISMATCH;
}
prev_devid = devid;
prev_dev_ext_end = physical_offset + physical_len;
total += physical_len;
next:
ret = btrfs_next_item(dev_root, &path);
if (ret)
break;
}
btrfs_release_path(&path);
*bytes_used_expected = total;
if (used != total) {
btrfs_item_key_to_cpu(eb, &key, slot);
error(
"Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
total, used, BTRFS_ROOT_TREE_OBJECTID,
BTRFS_DEV_EXTENT_KEY, dev_id);
return ACCOUNTING_MISMATCH;
}
check_dev_size_alignment(dev_id, total_bytes, gfs_info->sectorsize);
dev = btrfs_find_device_by_devid(gfs_info->fs_devices, dev_id, 0);
if (!dev || dev->fd < 0)
return 0;
ret = fstat(dev->fd, &st);
if (ret < 0) {
warning(
"unable to open devid %llu, skipping its block device size check",
dev->devid);
return 0;
}
block_dev_size = device_get_partition_size_fd_stat(dev->fd, &st);
if (block_dev_size < total_bytes) {
error(
"block device size is smaller than total_bytes in device item, has %llu expect >= %llu",
block_dev_size, total_bytes);
return ACCOUNTING_MISMATCH;
}
return 0;
}
/*
* Find the block group item with @bytenr, @len and @type
*
* Return 0 if found.
* Return -ENOENT if not found.
* Return <0 for fatal error.
*/
static int find_block_group_item(struct btrfs_path *path, u64 bytenr, u64 len,
u64 type)
{
struct btrfs_root *root = btrfs_block_group_root(gfs_info);
struct btrfs_block_group_item bgi;
struct btrfs_key key;
int ret;
key.objectid = bytenr;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = len;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
return ret;
if (ret > 0) {
ret = -ENOENT;
error("chunk [%llu %llu) doesn't have related block group item",
bytenr, bytenr + len);
goto out;
}
read_extent_buffer(path->nodes[0], &bgi,
btrfs_item_ptr_offset(path->nodes[0], path->slots[0]),
sizeof(bgi));
if (btrfs_stack_block_group_flags(&bgi) != type) {
error(
"chunk [%llu %llu) type mismatch with block group, block group has 0x%llx chunk has %llx",
bytenr, bytenr + len, btrfs_stack_block_group_flags(&bgi),
type);
ret = -EUCLEAN;
}
out:
btrfs_release_path(path);
return ret;
}
/*
* Check a chunk item.
* Including checking all referred dev_extents and block group
*/
static int check_chunk_item(struct extent_buffer *eb, int slot)
{
struct btrfs_root *dev_root = gfs_info->dev_root;
struct btrfs_path path = { 0 };
struct btrfs_key chunk_key;
struct btrfs_key devext_key;
struct btrfs_chunk *chunk;
struct extent_buffer *leaf;
struct btrfs_dev_extent *ptr;
u64 length;
u64 chunk_end;
u64 stripe_len;
u64 type;
int num_stripes;
u64 offset;
u64 objectid;
int i;
int ret;
int err = 0;
btrfs_item_key_to_cpu(eb, &chunk_key, slot);
chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
length = btrfs_chunk_length(eb, chunk);
chunk_end = chunk_key.offset + length;
if (!IS_ALIGNED(chunk_key.offset, BTRFS_STRIPE_LEN) ||
!IS_ALIGNED(length, BTRFS_STRIPE_LEN)) {
if (get_env_bool("BTRFS_PROGS_DEBUG_STRICT_CHUNK_ALIGNMENT")) {
error("chunk[%llu %llu) is not fully aligned to BTRFS_STRIPE_LEN (%u)",
chunk_key.offset, length, BTRFS_STRIPE_LEN);
err |= BYTES_UNALIGNED;
goto out;
}
warning("chunk[%llu %llu) is not fully aligned to BTRFS_STRIPE_LEN (%u)",
chunk_key.offset, length, BTRFS_STRIPE_LEN);
}
ret = btrfs_check_chunk_valid(eb, chunk, chunk_key.offset);
if (ret < 0) {
error("chunk[%llu %llu) is invalid", chunk_key.offset,
chunk_end);
err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
goto out;
}
type = btrfs_chunk_type(eb, chunk);
ret = find_block_group_item(&path, chunk_key.offset, length, type);
if (ret < 0)
err |= REFERENCER_MISSING;
num_stripes = btrfs_chunk_num_stripes(eb, chunk);
stripe_len = btrfs_stripe_length(gfs_info, eb, chunk);
for (i = 0; i < num_stripes; i++) {
btrfs_release_path(&path);
devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
devext_key.type = BTRFS_DEV_EXTENT_KEY;
devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
0, 0);
if (ret)
goto not_match_dev;
leaf = path.nodes[0];
ptr = btrfs_item_ptr(leaf, path.slots[0],
struct btrfs_dev_extent);
objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
if (objectid != chunk_key.objectid ||
offset != chunk_key.offset ||
btrfs_dev_extent_length(leaf, ptr) != stripe_len)
goto not_match_dev;
continue;
not_match_dev:
err |= BACKREF_MISSING;
error(
"chunk[%llu %llu) stripe %d did not find the related dev extent",
chunk_key.objectid, chunk_end, i);
continue;
}
btrfs_release_path(&path);
out:
return err;
}
/*
* Add block group item to the extent tree if @err contains REFERENCER_MISSING.
* FIXME: We still need to repair error of dev_item.
*
* Returns error after repair.
*/
static int repair_chunk_item(struct btrfs_root *chunk_root,
struct btrfs_path *path, int err)
{
struct btrfs_chunk *chunk;
struct btrfs_key chunk_key;
struct extent_buffer *eb = path->nodes[0];
struct btrfs_root *extent_root;
struct btrfs_trans_handle *trans;
u64 length;
int slot = path->slots[0];
u64 type;
int ret = 0;
btrfs_item_key_to_cpu(eb, &chunk_key, slot);
if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
return err;
extent_root = btrfs_extent_root(gfs_info, chunk_key.offset);
chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
type = btrfs_chunk_type(path->nodes[0], chunk);
length = btrfs_chunk_length(eb, chunk);
/* now repair only adds block group */
if ((err & REFERENCER_MISSING) == 0)
return err;
ret = avoid_extents_overwrite();
if (ret)
return ret;
trans = btrfs_start_transaction(extent_root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
return ret;
}
ret = btrfs_make_block_group(trans, gfs_info, 0, type,
chunk_key.offset, length);
if (ret) {
error("fail to add block group item [%llu %llu]",
chunk_key.offset, length);
} else {
err &= ~REFERENCER_MISSING;
printf("Added block group item[%llu %llu]\n", chunk_key.offset,
length);
}
btrfs_commit_transaction(trans, extent_root);
if (ret)
error("fail to repair item(s) related to chunk item [%llu %llu]",
chunk_key.objectid, chunk_key.offset);
return err;
}
/*
* Main entry function to check known items and update related accounting info
*/
static int check_leaf_items(struct btrfs_root *root, struct btrfs_path *path,
struct node_refs *nrefs, int account_bytes)
{
u64 bytes_used_expected = (u64)-1;
struct btrfs_key key;
struct extent_buffer *eb;
int slot;
int type;
struct btrfs_extent_data_ref *dref;
int ret = 0;
int err = 0;
again:
eb = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(eb)) {
if (slot == 0) {
error("empty leaf [%llu %u] root %llu", eb->start,
gfs_info->nodesize, root->objectid);
err |= EIO;
}
goto out;
}
btrfs_item_key_to_cpu(eb, &key, slot);
type = key.type;
switch (type) {
case BTRFS_EXTENT_DATA_KEY:
ret = check_extent_data_item(root, path, nrefs, account_bytes);
if (opt_check_repair && ret)
ret = repair_extent_data_item(root, path, nrefs, ret);
err |= ret;
break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
ret = check_block_group_item(eb, slot);
if (opt_check_repair &&
ret & REFERENCER_MISSING)
ret = delete_item(root, path);
err |= ret;
break;
case BTRFS_DEV_ITEM_KEY:
ret = check_dev_item(eb, slot, &bytes_used_expected);
if (opt_check_repair && (ret & ACCOUNTING_MISMATCH) &&
bytes_used_expected != (u64)-1) {
ret = repair_dev_item_bytes_used(root->fs_info,
key.offset, bytes_used_expected);
if (ret < 0)
ret = ACCOUNTING_MISMATCH;
}
err |= ret;
break;
case BTRFS_CHUNK_ITEM_KEY:
ret = check_chunk_item(eb, slot);
if (opt_check_repair && ret)
ret = repair_chunk_item(root, path, ret);
err |= ret;
break;
case BTRFS_DEV_EXTENT_KEY:
ret = check_dev_extent_item(eb, slot);
err |= ret;
break;
case BTRFS_EXTENT_ITEM_KEY:
case BTRFS_METADATA_ITEM_KEY:
ret = check_extent_item(path);
err |= ret;
break;
case BTRFS_EXTENT_CSUM_KEY:
total_csum_bytes += btrfs_item_size(eb, slot);
err |= ret;
break;
case BTRFS_TREE_BLOCK_REF_KEY:
ret = check_tree_block_backref(key.offset, key.objectid, -1);
if (opt_check_repair &&
ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
ret = delete_item(root, path);
err |= ret;
break;
case BTRFS_EXTENT_DATA_REF_KEY:
dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
ret = check_extent_data_backref(
btrfs_extent_data_ref_root(eb, dref),
btrfs_extent_data_ref_objectid(eb, dref),
btrfs_extent_data_ref_offset(eb, dref),
key.objectid, 0,
btrfs_extent_data_ref_count(eb, dref));
if (opt_check_repair &&
ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
ret = delete_item(root, path);
err |= ret;
break;
case BTRFS_SHARED_BLOCK_REF_KEY:
ret = check_shared_block_backref(key.offset, key.objectid, -1);
if (opt_check_repair &&
ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
ret = delete_item(root, path);
err |= ret;
break;
case BTRFS_SHARED_DATA_REF_KEY:
ret = check_shared_data_backref(key.offset, key.objectid);
if (opt_check_repair &&
ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
ret = delete_item(root, path);
err |= ret;
break;
default:
break;
}
++path->slots[0];
goto again;
out:
return err;
}
/*
* @trans just for lowmem repair mode
* @check all if not 0 then check all tree block backrefs and items
* 0 then just check relationship of items in fs tree(s)
*
* Returns >0 Found error, should continue
* Returns <0 Fatal error, must exit the whole check
* Returns 0 No errors found
*/
static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
int *level, struct node_refs *nrefs, int check_all)
{
enum btrfs_tree_block_status status;
u64 bytenr;
u64 ptr_gen;
struct extent_buffer *next;
struct extent_buffer *cur;
int ret;
int err = 0;
int check;
int account_file_data = 0;
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
path->nodes[*level], nrefs, *level, check_all);
if (ret < 0)
return ret;
while (*level >= 0) {
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
cur = path->nodes[*level];
bytenr = btrfs_header_bytenr(cur);
check = nrefs->need_check[*level];
if (btrfs_header_level(cur) != *level)
WARN_ON(1);
/*
* Update bytes accounting and check tree block ref
* NOTE: Doing accounting and check before checking nritems
* is necessary because of empty node/leaf.
*/
if ((check_all && !nrefs->checked[*level]) ||
(!check_all && nrefs->need_check[*level])) {
ret = check_tree_block_ref(root, cur,
btrfs_header_bytenr(cur), btrfs_header_level(cur),
btrfs_header_owner(cur), nrefs);
if (opt_check_repair && ret)
ret = repair_tree_block_ref(root,
path->nodes[*level], nrefs, *level, ret);
err |= ret;
if (check_all && nrefs->need_check[*level] &&
nrefs->refs[*level]) {
account_bytes(root, path, *level);
account_file_data = 1;
}
nrefs->checked[*level] = 1;
}
if (path->slots[*level] >= btrfs_header_nritems(cur))
break;
/* Don't forgot to check leaf/node validation */
if (*level == 0) {
/* skip duplicate check */
if (check || !check_all) {
ret = btrfs_check_block_for_repair(cur, NULL);
if (ret != BTRFS_TREE_BLOCK_CLEAN) {
err |= -EIO;
break;
}
}
ret = 0;
if (!check_all)
ret = process_one_leaf(root, path, nrefs, level);
else
ret = check_leaf_items(root, path,
nrefs, account_file_data);
err |= ret;
break;
}
if (check || !check_all) {
ret = btrfs_check_block_for_repair(cur, NULL);
if (ret != BTRFS_TREE_BLOCK_CLEAN) {
err |= -EIO;
break;
}
}
bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
check_all);
if (ret < 0)
break;
/*
* check all trees in check_chunks_and_extent
* check shared node once in check_fs_roots
*/
if (!check_all && !nrefs->need_check[*level - 1]) {
path->slots[*level]++;
continue;
}
next = btrfs_find_tree_block(gfs_info, bytenr, gfs_info->nodesize);
if (!next || !btrfs_buffer_uptodate(next, ptr_gen, 0)) {
struct btrfs_tree_parent_check tree_check = {
.owner_root = btrfs_header_owner(cur),
.transid = ptr_gen,
.level = *level - 1,
};
free_extent_buffer(next);
reada_walk_down(root, cur, path->slots[*level]);
next = read_tree_block(gfs_info, bytenr, &tree_check);
if (!extent_buffer_uptodate(next)) {
struct btrfs_key node_key;
btrfs_node_key_to_cpu(path->nodes[*level],
&node_key,
path->slots[*level]);
btrfs_add_corrupt_extent_record(gfs_info,
&node_key, path->nodes[*level]->start,
gfs_info->nodesize, *level);
err |= -EIO;
break;
}
}
ret = check_child_node(cur, path->slots[*level], next);
err |= ret;
if (ret < 0)
break;
status = btrfs_check_block_for_repair(next, NULL);
if (status != BTRFS_TREE_BLOCK_CLEAN) {
free_extent_buffer(next);
err |= -EIO;
break;
}
*level = *level - 1;
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = next;
path->slots[*level] = 0;
account_file_data = 0;
update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
}
return err;
}
static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
int *level)
{
int i;
struct extent_buffer *leaf;
for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
leaf = path->nodes[i];
if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
path->slots[i]++;
*level = i;
return 0;
}
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = NULL;
*level = i + 1;
}
return 1;
}
/*
* Insert the missing inode item and inode ref.
*
* Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
* Root dir should be handled specially because root dir is the root of fs.
*
* returns err (>0 or 0) after repair
*/
static int repair_fs_first_inode(struct btrfs_root *root, int err)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
struct btrfs_path path = { 0 };
int filetype = BTRFS_FT_DIR;
int ret = 0;
if (err & INODE_REF_MISSING) {
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_REF_KEY;
key.offset = BTRFS_FIRST_FREE_OBJECTID;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
btrfs_release_path(&path);
ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
if (ret)
goto trans_fail;
ret = btrfs_insert_inode_ref(trans, root, "..", 2,
BTRFS_FIRST_FREE_OBJECTID,
BTRFS_FIRST_FREE_OBJECTID, 0);
if (ret)
goto trans_fail;
printf("Add INODE_REF[%llu %llu] name %s\n",
BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
"..");
err &= ~INODE_REF_MISSING;
trans_fail:
if (ret)
error("fail to insert first inode's ref");
btrfs_commit_transaction(trans, root);
}
if (err & INODE_ITEM_MISSING) {
ret = repair_inode_item_missing(root,
BTRFS_FIRST_FREE_OBJECTID, filetype);
if (ret)
goto out;
err &= ~INODE_ITEM_MISSING;
}
out:
if (ret)
error("fail to repair first inode");
btrfs_release_path(&path);
return err;
}
/*
* check first root dir's inode_item and inode_ref
*
* returns 0 means no error
* returns >0 means error
* returns <0 means fatal error
*/
static int check_fs_first_inode(struct btrfs_root *root)
{
struct btrfs_path path = { 0 };
struct btrfs_key key;
struct btrfs_inode_item *ii;
u64 index;
u32 mode;
int err = 0;
int ret;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
/* For root being dropped, we don't need to check first inode */
if (btrfs_root_refs(&root->root_item) == 0 &&
btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
BTRFS_FIRST_FREE_OBJECTID)
return 0;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0)
goto out;
if (ret > 0) {
ret = 0;
err |= INODE_ITEM_MISSING;
} else {
ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_inode_item);
mode = btrfs_inode_mode(path.nodes[0], ii);
if (imode_to_type(mode) != BTRFS_FT_DIR)
err |= INODE_ITEM_MISMATCH;
}
/* lookup first inode ref */
key.offset = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_REF_KEY;
/* special index value */
index = 0;
ret = find_inode_ref(root, &key, "..", strlen(".."), &index);
if (ret < 0)
goto out;
err |= ret;
out:
btrfs_release_path(&path);
if (err && opt_check_repair)
err = repair_fs_first_inode(root, err);
if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
error("root dir INODE_ITEM is %s",
err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
if (err & INODE_REF_MISSING)
error("root dir INODE_REF is missing");
return ret < 0 ? ret : err;
}
/*
* This function calls walk_down_tree and walk_up_tree to check tree
* blocks and integrity of fs tree items.
*
* @root: the root of the tree to be checked.
* @account if NOT 0 means check the tree (including tree)'s treeblocks.
* otherwise means check fs tree(s) items relationship and
* @root MUST be a fs tree root.
* Returns 0 represents OK.
* Returns >0 represents error bits.
*/
static int check_btrfs_root(struct btrfs_root *root, int check_all)
{
struct btrfs_path path = { 0 };
struct node_refs nrefs;
struct btrfs_root_item *root_item = &root->root_item;
u64 super_generation = btrfs_super_generation(gfs_info->super_copy);
int ret;
int level;
int err = 0;
memset(&nrefs, 0, sizeof(nrefs));
if (!check_all) {
/*
* We need to manually check the first inode item (256)
* As the following traversal function will only start from
* the first inode item in the leaf, if inode item (256) is
* missing we will skip it forever.
*/
ret = check_fs_first_inode(root);
if (ret)
return FATAL_ERROR;
}
level = btrfs_header_level(root->node);
if (btrfs_root_generation(root_item) > super_generation + 1) {
error(
"invalid root generation for root %llu, have %llu expect (0, %llu)",
root->root_key.objectid, btrfs_root_generation(root_item),
super_generation + 1);
err |= INVALID_GENERATION;
if (opt_check_repair) {
root->node->flags |= EXTENT_BUFFER_BAD_TRANSID;
ret = recow_extent_buffer(root, root->node);
if (!ret) {
printf("Reset generation for root %llu\n",
root->root_key.objectid);
err &= ~INVALID_GENERATION;
}
}
}
if (btrfs_root_refs(root_item) > 0 ||
btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
path.nodes[level] = root->node;
path.slots[level] = 0;
extent_buffer_get(root->node);
} else {
struct btrfs_key key;
btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
level = root_item->drop_level;
path.lowest_level = level;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0)
goto out;
ret = 0;
}
while (1) {
g_task_ctx.item_count++;
ret = walk_down_tree(root, &path, &level, &nrefs, check_all);
if (ret > 0)
err |= ret;
/* if ret is negative, walk shall stop */
if (ret < 0) {
ret = err | FATAL_ERROR;
break;
}
ret = walk_up_tree(root, &path, &level);
if (ret != 0) {
/* Normal exit, reset ret to err */
ret = err;
break;
}
}
out:
btrfs_release_path(&path);
return ret;
}
/*
* Iterate all items in the tree and call check_inode_item() to check.
*
* @root: the root of the tree to be checked.
*
* Return 0 if no error found.
* Return <0 for error.
*/
static int check_fs_root(struct btrfs_root *root)
{
reset_cached_block_groups();
return check_btrfs_root(root, 0);
}
/*
* Find the relative ref for root_ref and root_backref.
*
* @root: the root of the root tree.
* @ref_key: the key of the root ref.
*
* Return 0 if no error occurred.
*/
static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
struct extent_buffer *node, int slot)
{
struct btrfs_path path = { 0 };
struct btrfs_key key;
struct btrfs_root_ref *ref;
struct btrfs_root_ref *backref;
char ref_name[BTRFS_NAME_LEN] = {0};
char backref_name[BTRFS_NAME_LEN] = {0};
u64 ref_dirid;
u64 ref_seq;
u32 ref_namelen;
u64 backref_dirid;
u64 backref_seq;
u32 backref_namelen;
u32 len;
int ret;
int err = 0;
ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
ref_dirid = btrfs_root_ref_dirid(node, ref);
ref_seq = btrfs_root_ref_sequence(node, ref);
ref_namelen = btrfs_root_ref_name_len(node, ref);
if (ref_namelen <= BTRFS_NAME_LEN) {
len = ref_namelen;
} else {
len = BTRFS_NAME_LEN;
warning("%s[%llu %llu] ref_name too long",
ref_key->type == BTRFS_ROOT_REF_KEY ?
"ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
ref_key->offset);
}
read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
/* Find relative root_ref */
key.objectid = ref_key->offset;
key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
key.offset = ref_key->objectid;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret) {
err |= ROOT_REF_MISSING;
error("%s[%llu %llu] couldn't find relative ref",
ref_key->type == BTRFS_ROOT_REF_KEY ?
"ROOT_REF" : "ROOT_BACKREF",
ref_key->objectid, ref_key->offset);
goto out;
}
backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_root_ref);
backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
if (backref_namelen <= BTRFS_NAME_LEN) {
len = backref_namelen;
} else {
len = BTRFS_NAME_LEN;
warning("%s[%llu %llu] ref_name too long",
key.type == BTRFS_ROOT_REF_KEY ?
"ROOT_REF" : "ROOT_BACKREF",
key.objectid, key.offset);
}
read_extent_buffer(path.nodes[0], backref_name,
(unsigned long)(backref + 1), len);
if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
ref_namelen != backref_namelen ||
strncmp(ref_name, backref_name, len)) {
err |= ROOT_REF_MISMATCH;
error("%s[%llu %llu] mismatch relative ref",
ref_key->type == BTRFS_ROOT_REF_KEY ?
"ROOT_REF" : "ROOT_BACKREF",
ref_key->objectid, ref_key->offset);
}
out:
btrfs_release_path(&path);
return err;
}
/*
* Check all fs/file tree in low_memory mode.
*
* 1. for fs tree root item, call check_fs_root()
* 2. for fs tree root ref/backref, call check_root_ref()
*
* Return 0 if no error occurred.
*/
int check_fs_roots_lowmem(void)
{
struct btrfs_root *tree_root = gfs_info->tree_root;
struct btrfs_root *cur_root = NULL;
struct btrfs_path path = { 0 };
struct btrfs_key key;
struct extent_buffer *node;
int slot;
int ret;
int err = 0;
key.objectid = BTRFS_FS_TREE_OBJECTID;
key.offset = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
if (ret < 0) {
err = ret;
goto out;
} else if (ret > 0) {
err = -ENOENT;
goto out;
}
while (1) {
node = path.nodes[0];
slot = path.slots[0];
btrfs_item_key_to_cpu(node, &key, slot);
if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
goto out;
if (key.type == BTRFS_INODE_ITEM_KEY &&
is_fstree(key.objectid)) {
ret = check_repair_free_space_inode(&path);
/* Check if we still have a valid path to continue */
if (ret < 0 && path.nodes[0]) {
err |= ret;
goto next;
}
if (ret < 0 && !path.nodes[0])
goto out;
}
if (key.type == BTRFS_ROOT_ITEM_KEY &&
fs_root_objectid(key.objectid)) {
if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
cur_root = btrfs_read_fs_root_no_cache(gfs_info,
&key);
} else {
key.offset = (u64)-1;
cur_root = btrfs_read_fs_root(gfs_info, &key);
}
if (IS_ERR(cur_root)) {
error("Fail to read fs/subvol tree: %lld",
key.objectid);
err = -EIO;
goto next;
}
ret = check_fs_root(cur_root);
err |= ret;
if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
btrfs_free_fs_root(cur_root);
} else if (key.type == BTRFS_ROOT_REF_KEY ||
key.type == BTRFS_ROOT_BACKREF_KEY) {
ret = check_root_ref(tree_root, &key, node, slot);
err |= ret;
}
next:
/*
* In repair mode, our path is no longer reliable as CoW can
* happen. We need to reset our path.
*/
if (opt_check_repair) {
btrfs_release_path(&path);
ret = btrfs_search_slot(NULL, tree_root, &key, &path,
0, 0);
if (ret < 0) {
if (!err)
err = ret;
goto out;
}
if (ret > 0) {
/* Key not found, but already at next item */
if (path.slots[0] <
btrfs_header_nritems(path.nodes[0]))
continue;
/* falls through to next leaf */
}
}
ret = btrfs_next_item(tree_root, &path);
if (ret > 0)
goto out;
if (ret < 0) {
err = ret;
goto out;
}
}
out:
btrfs_release_path(&path);
return err;
}
/*
* Low memory usage version check_chunks_and_extents.
*/
int check_chunks_and_extents_lowmem(void)
{
struct btrfs_path path = { 0 };
struct btrfs_key old_key;
struct btrfs_key key;
struct btrfs_root *root;
struct btrfs_root *cur_root;
int err = 0;
int ret;
root = gfs_info->chunk_root;
ret = check_btrfs_root(root, 1);
err |= ret;
root = gfs_info->tree_root;
ret = check_btrfs_root(root, 1);
err |= ret;
key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
key.offset = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
ret = btrfs_search_slot(NULL, gfs_info->tree_root, &key, &path, 0, 0);
if (ret) {
error("cannot find extent tree in tree_root");
goto out;
}
while (1) {
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
if (key.type != BTRFS_ROOT_ITEM_KEY)
goto next;
old_key = key;
key.offset = (u64)-1;
if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
cur_root = btrfs_read_fs_root_no_cache(gfs_info,
&key);
else
cur_root = btrfs_read_fs_root(gfs_info, &key);
if (IS_ERR(cur_root) || !cur_root) {
error("failed to read tree: %lld", key.objectid);
goto next;
}
ret = check_btrfs_root(cur_root, 1);
err |= ret;
if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
btrfs_free_fs_root(cur_root);
btrfs_release_path(&path);
ret = btrfs_search_slot(NULL, gfs_info->tree_root,
&old_key, &path, 0, 0);
if (ret)
goto out;
next:
ret = btrfs_next_item(gfs_info->tree_root, &path);
if (ret)
goto out;
}
out:
if (total_used != btrfs_super_bytes_used(gfs_info->super_copy)) {
fprintf(stderr,
"super bytes_used %llu mismatches actual used %llu\n",
btrfs_super_bytes_used(gfs_info->super_copy),
total_used);
err |= SUPER_BYTES_USED_ERROR;
}
if (opt_check_repair) {
ret = end_avoid_extents_overwrite();
if (ret < 0)
ret = FATAL_ERROR;
err |= ret;
reset_cached_block_groups();
/* update block accounting */
ret = repair_block_accounting();
if (ret)
err |= ret;
else
err &= ~(BG_ACCOUNTING_ERROR | SUPER_BYTES_USED_ERROR);
}
btrfs_release_path(&path);
return err;
}