/* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License v2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */ #include #include "kernel-shared/ctree.h" #include "common/internal.h" #include "common/messages.h" #include "kernel-shared/transaction.h" #include "common/utils.h" #include "kernel-shared/disk-io.h" #include "kernel-shared/volumes.h" #include "common/repair.h" #include "check/mode-common.h" /* * Check if the inode referenced by the given data reference uses the extent * at disk_bytenr as a non-prealloc extent. * * Returns 1 if true, 0 if false and < 0 on error. */ static int check_prealloc_data_ref(u64 disk_bytenr, struct btrfs_extent_data_ref *dref, struct extent_buffer *eb) { u64 rootid = btrfs_extent_data_ref_root(eb, dref); u64 objectid = btrfs_extent_data_ref_objectid(eb, dref); u64 offset = btrfs_extent_data_ref_offset(eb, dref); struct btrfs_root *root; struct btrfs_key key; struct btrfs_path path; int ret; btrfs_init_path(&path); key.objectid = rootid; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; root = btrfs_read_fs_root(gfs_info, &key); if (IS_ERR(root)) { ret = PTR_ERR(root); goto out; } key.objectid = objectid; key.type = BTRFS_EXTENT_DATA_KEY; key.offset = offset; ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); if (ret > 0) { fprintf(stderr, "Missing file extent item for inode %llu, root %llu, offset %llu", objectid, rootid, offset); ret = -ENOENT; } if (ret < 0) goto out; while (true) { struct btrfs_file_extent_item *fi; int extent_type; if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { ret = btrfs_next_leaf(root, &path); if (ret < 0) goto out; if (ret > 0) break; } btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY) break; fi = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_file_extent_item); extent_type = btrfs_file_extent_type(path.nodes[0], fi); if (extent_type != BTRFS_FILE_EXTENT_REG && extent_type != BTRFS_FILE_EXTENT_PREALLOC) goto next; if (btrfs_file_extent_disk_bytenr(path.nodes[0], fi) != disk_bytenr) break; if (extent_type == BTRFS_FILE_EXTENT_REG) { ret = 1; goto out; } next: path.slots[0]++; } ret = 0; out: btrfs_release_path(&path); return ret; } /* * Check if a shared data reference points to a node that has a file extent item * pointing to the extent at @disk_bytenr that is not of type prealloc. * * Returns 1 if true, 0 if false and < 0 on error. */ static int check_prealloc_shared_data_ref(u64 parent, u64 disk_bytenr) { struct extent_buffer *eb; u32 nr; int i; int ret = 0; eb = read_tree_block(gfs_info, parent, 0); if (!extent_buffer_uptodate(eb)) { ret = -EIO; goto out; } nr = btrfs_header_nritems(eb); for (i = 0; i < nr; i++) { struct btrfs_key key; struct btrfs_file_extent_item *fi; int extent_type; btrfs_item_key_to_cpu(eb, &key, i); if (key.type != BTRFS_EXTENT_DATA_KEY) continue; fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); extent_type = btrfs_file_extent_type(eb, fi); if (extent_type != BTRFS_FILE_EXTENT_REG && extent_type != BTRFS_FILE_EXTENT_PREALLOC) continue; if (btrfs_file_extent_disk_bytenr(eb, fi) == disk_bytenr && extent_type == BTRFS_FILE_EXTENT_REG) { ret = 1; break; } } out: free_extent_buffer(eb); return ret; } /* * Check if a prealloc extent is shared by multiple inodes and if any inode has * already written to that extent. This is to avoid emitting invalid warnings * about odd csum items (a inode has an extent entirely marked as prealloc * but another inode shares it and has already written to it). * * Note: right now it does not check if the number of checksum items in the * csum tree matches the number of bytes written into the ex-prealloc extent. * It's complex to deal with that because the prealloc extent might have been * partially written through multiple inodes and we would have to keep track of * ranges, merging them and notice ranges that fully or partially overlap, to * avoid false reports of csum items missing for areas of the prealloc extent * that were not written to - for example if we have a 1M prealloc extent, we * can have only the first half of it written, but 2 different inodes refer to * the its first half (through reflinks/cloning), so keeping a counter of bytes * covered by checksum items is not enough, as the correct value would be 512K * and not 1M (whence the need to track ranges). * * Returns 0 if the prealloc extent was not written yet by any inode, 1 if * at least one other inode has written to it, and < 0 on error. */ int check_prealloc_extent_written(u64 disk_bytenr, u64 num_bytes) { struct btrfs_path path; struct btrfs_key key; int ret; struct btrfs_extent_item *ei; u32 item_size; unsigned long ptr; unsigned long end; key.objectid = disk_bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = num_bytes; btrfs_init_path(&path); ret = btrfs_search_slot(NULL, gfs_info->extent_root, &key, &path, 0, 0); if (ret > 0) { fprintf(stderr, "Missing extent item in extent tree for disk_bytenr %llu, num_bytes %llu\n", disk_bytenr, num_bytes); ret = -ENOENT; } if (ret < 0) goto out; /* First check all inline refs. */ ei = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_extent_item); item_size = btrfs_item_size_nr(path.nodes[0], path.slots[0]); ptr = (unsigned long)(ei + 1); end = (unsigned long)ei + item_size; while (ptr < end) { struct btrfs_extent_inline_ref *iref; int type; iref = (struct btrfs_extent_inline_ref *)ptr; type = btrfs_extent_inline_ref_type(path.nodes[0], iref); ASSERT(type == BTRFS_EXTENT_DATA_REF_KEY || type == BTRFS_SHARED_DATA_REF_KEY); if (type == BTRFS_EXTENT_DATA_REF_KEY) { struct btrfs_extent_data_ref *dref; dref = (struct btrfs_extent_data_ref *)(&iref->offset); ret = check_prealloc_data_ref(disk_bytenr, dref, path.nodes[0]); if (ret != 0) goto out; } else if (type == BTRFS_SHARED_DATA_REF_KEY) { u64 parent; parent = btrfs_extent_inline_ref_offset(path.nodes[0], iref); ret = check_prealloc_shared_data_ref(parent, disk_bytenr); if (ret != 0) goto out; } ptr += btrfs_extent_inline_ref_size(type); } /* Now check if there are any non-inlined refs. */ path.slots[0]++; while (true) { if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { ret = btrfs_next_leaf(gfs_info->extent_root, &path); if (ret < 0) goto out; if (ret > 0) { ret = 0; break; } } btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); if (key.objectid != disk_bytenr) break; if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { struct btrfs_extent_data_ref *dref; dref = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_extent_data_ref); ret = check_prealloc_data_ref(disk_bytenr, dref, path.nodes[0]); if (ret != 0) goto out; } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { ret = check_prealloc_shared_data_ref(key.offset, disk_bytenr); if (ret != 0) goto out; } path.slots[0]++; } out: btrfs_release_path(&path); return ret; } /* * Search in csum tree to find how many bytes of range [@start, @start + @len) * has the corresponding csum item. * * @start: range start * @len: range length * @found: return value of found csum bytes * unit is BYTE. */ int count_csum_range(u64 start, u64 len, u64 *found) { struct btrfs_key key; struct btrfs_path path; struct extent_buffer *leaf; int ret; size_t size; *found = 0; u64 csum_end; u16 csum_size = btrfs_super_csum_size(gfs_info->super_copy); btrfs_init_path(&path); key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; key.offset = start; key.type = BTRFS_EXTENT_CSUM_KEY; ret = btrfs_search_slot(NULL, gfs_info->csum_root, &key, &path, 0, 0); if (ret < 0) goto out; if (ret > 0 && path.slots[0] > 0) { leaf = path.nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1); if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && key.type == BTRFS_EXTENT_CSUM_KEY) path.slots[0]--; } while (len > 0) { leaf = path.nodes[0]; if (path.slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(gfs_info->csum_root, &path); if (ret > 0) break; else if (ret < 0) goto out; leaf = path.nodes[0]; } btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || key.type != BTRFS_EXTENT_CSUM_KEY) break; btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); if (key.offset >= start + len) break; if (key.offset > start) start = key.offset; size = btrfs_item_size_nr(leaf, path.slots[0]); csum_end = key.offset + (size / csum_size) * gfs_info->sectorsize; if (csum_end > start) { size = min(csum_end - start, len); len -= size; start += size; *found += size; } path.slots[0]++; } out: btrfs_release_path(&path); if (ret < 0) return ret; return 0; } /* * Wrapper to insert one inode item into given @root * Timestamp will be set to current time. * * @root: the root to insert inode item into * @ino: inode number * @size: inode size * @nbytes: nbytes (real used size, without hole) * @nlink: number of links * @mode: file mode, including S_IF* bits */ int insert_inode_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 ino, u64 size, u64 nbytes, u64 nlink, u32 mode) { struct btrfs_inode_item ii; time_t now = time(NULL); int ret; memset(&ii, 0, sizeof(ii)); btrfs_set_stack_inode_size(&ii, size); btrfs_set_stack_inode_nbytes(&ii, nbytes); btrfs_set_stack_inode_nlink(&ii, nlink); btrfs_set_stack_inode_mode(&ii, mode); btrfs_set_stack_inode_generation(&ii, trans->transid); btrfs_set_stack_timespec_sec(&ii.ctime, now); btrfs_set_stack_timespec_sec(&ii.mtime, now); ret = btrfs_insert_inode(trans, root, ino, &ii); ASSERT(!ret); warning("root %llu inode %llu recreating inode item, this may " "be incomplete, please check permissions and content after " "the fsck completes.\n", (unsigned long long)root->objectid, (unsigned long long)ino); return 0; } static int get_highest_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 *highest_ino) { struct btrfs_key key, found_key; int ret; btrfs_init_path(path); key.objectid = BTRFS_LAST_FREE_OBJECTID; key.offset = -1; key.type = BTRFS_INODE_ITEM_KEY; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret == 1) { btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0] - 1); *highest_ino = found_key.objectid; ret = 0; } if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID) ret = -EOVERFLOW; btrfs_release_path(path); return ret; } /* * Link inode to dir 'lost+found'. Increase @ref_count. * * Returns 0 means success. * Returns <0 means failure. */ int link_inode_to_lostfound(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 ino, char *namebuf, u32 name_len, u8 filetype, u64 *ref_count) { char *dir_name = "lost+found"; u64 lost_found_ino; int ret; u32 mode = 0700; btrfs_release_path(path); ret = get_highest_inode(trans, root, path, &lost_found_ino); if (ret < 0) goto out; lost_found_ino++; ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name), BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino, mode); if (ret < 0) { errno = -ret; error("failed to create '%s' dir: %m", dir_name); goto out; } ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf, name_len, filetype, NULL, 1, 0); /* * Add ".INO" suffix several times to handle case where * "FILENAME.INO" is already taken by another file. */ while (ret == -EEXIST) { /* * Conflicting file name, add ".INO" as suffix * +1 for '.' */ if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) { ret = -EFBIG; goto out; } snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len, ".%llu", ino); name_len += count_digits(ino) + 1; ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf, name_len, filetype, NULL, 1, 0); } if (ret < 0) { errno = -ret; error("failed to link the inode %llu to %s dir: %m", ino, dir_name); goto out; } ++*ref_count; printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n", name_len, namebuf, dir_name); out: btrfs_release_path(path); if (ret) error("failed to move file '%.*s' to '%s' dir", name_len, namebuf, dir_name); return ret; } /* * Extra (optional) check for dev_item size to report possible problem on a new * kernel. */ void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize) { if (!IS_ALIGNED(total_bytes, sectorsize)) { warning( "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u", devid, total_bytes, sectorsize); warning( "this is OK for older kernel, but may cause kernel warning for newer kernels"); warning("this can be fixed by 'btrfs rescue fix-device-size'"); } } void reada_walk_down(struct btrfs_root *root, struct extent_buffer *node, int slot) { u64 bytenr; u64 ptr_gen; u32 nritems; int i; int level; level = btrfs_header_level(node); if (level != 1) return; nritems = btrfs_header_nritems(node); for (i = slot; i < nritems; i++) { bytenr = btrfs_node_blockptr(node, i); ptr_gen = btrfs_node_ptr_generation(node, i); readahead_tree_block(gfs_info, bytenr, ptr_gen); } } /* * Check the child node/leaf by the following condition: * 1. the first item key of the node/leaf should be the same with the one * in parent. * 2. block in parent node should match the child node/leaf. * 3. generation of parent node and child's header should be consistent. * * Or the child node/leaf pointed by the key in parent is not valid. * * We hope to check leaf owner too, but since subvol may share leaves, * which makes leaf owner check not so strong, key check should be * sufficient enough for that case. */ int check_child_node(struct extent_buffer *parent, int slot, struct extent_buffer *child) { struct btrfs_key parent_key; struct btrfs_key child_key; int ret = 0; btrfs_node_key_to_cpu(parent, &parent_key, slot); if (btrfs_header_level(child) == 0) btrfs_item_key_to_cpu(child, &child_key, 0); else btrfs_node_key_to_cpu(child, &child_key, 0); if (memcmp(&parent_key, &child_key, sizeof(parent_key))) { ret = -EINVAL; fprintf(stderr, "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n", parent_key.objectid, parent_key.type, parent_key.offset, child_key.objectid, child_key.type, child_key.offset); } if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) { ret = -EINVAL; fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n", btrfs_node_blockptr(parent, slot), btrfs_header_bytenr(child)); } if (btrfs_node_ptr_generation(parent, slot) != btrfs_header_generation(child)) { ret = -EINVAL; fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n", btrfs_header_generation(child), btrfs_node_ptr_generation(parent, slot)); } return ret; } void reset_cached_block_groups() { struct btrfs_block_group *cache; u64 start, end; int ret; while (1) { ret = find_first_extent_bit(&gfs_info->free_space_cache, 0, &start, &end, EXTENT_DIRTY); if (ret) break; clear_extent_dirty(&gfs_info->free_space_cache, start, end); } start = 0; while (1) { cache = btrfs_lookup_first_block_group(gfs_info, start); if (!cache) break; if (cache->cached) cache->cached = 0; start = cache->start + cache->length; } } static int traverse_tree_blocks(struct extent_buffer *eb, int tree_root, int pin) { struct extent_buffer *tmp; struct btrfs_root_item *ri; struct btrfs_key key; struct extent_io_tree *tree; u64 bytenr; int level = btrfs_header_level(eb); int nritems; int ret; int i; u64 end = eb->start + eb->len; if (pin) tree = &gfs_info->pinned_extents; else tree = gfs_info->excluded_extents; /* * If we have pinned/excluded this block before, don't do it again. * This can not only avoid forever loop with broken filesystem * but also give us some speedups. */ if (test_range_bit(tree, eb->start, end - 1, EXTENT_DIRTY, 0)) return 0; if (pin) btrfs_pin_extent(gfs_info, eb->start, eb->len); else set_extent_dirty(tree, eb->start, end - 1); nritems = btrfs_header_nritems(eb); for (i = 0; i < nritems; i++) { if (level == 0) { bool is_extent_root; btrfs_item_key_to_cpu(eb, &key, i); if (key.type != BTRFS_ROOT_ITEM_KEY) continue; /* Skip the extent root and reloc roots */ if (key.objectid == BTRFS_TREE_RELOC_OBJECTID || key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) continue; is_extent_root = key.objectid == BTRFS_EXTENT_TREE_OBJECTID; /* If pin, skip the extent root */ if (pin && is_extent_root) continue; ri = btrfs_item_ptr(eb, i, struct btrfs_root_item); bytenr = btrfs_disk_root_bytenr(eb, ri); /* * If at any point we start needing the real root we * will have to build a stump root for the root we are * in, but for now this doesn't actually use the root so * just pass in extent_root. */ tmp = read_tree_block(gfs_info, bytenr, 0); if (!extent_buffer_uptodate(tmp)) { fprintf(stderr, "Error reading root block\n"); return -EIO; } ret = traverse_tree_blocks(tmp, 0, pin); free_extent_buffer(tmp); if (ret) return ret; } else { bytenr = btrfs_node_blockptr(eb, i); /* If we aren't the tree root don't read the block */ if (level == 1 && !tree_root) { if (pin) btrfs_pin_extent(gfs_info, bytenr, gfs_info->nodesize); else set_extent_dirty(tree, bytenr, gfs_info->nodesize); continue; } tmp = read_tree_block(gfs_info, bytenr, 0); if (!extent_buffer_uptodate(tmp)) { fprintf(stderr, "Error reading tree block\n"); return -EIO; } ret = traverse_tree_blocks(tmp, tree_root, pin); free_extent_buffer(tmp); if (ret) return ret; } } return 0; } static int pin_down_tree_blocks(struct extent_buffer *eb, int tree_root) { return traverse_tree_blocks(eb, tree_root, 1); } int pin_metadata_blocks(void) { int ret; ret = pin_down_tree_blocks(gfs_info->chunk_root->node, 0); if (ret) return ret; return pin_down_tree_blocks(gfs_info->tree_root->node, 1); } static int exclude_tree_blocks(struct extent_buffer *eb, int tree_root) { return traverse_tree_blocks(eb, tree_root, 0); } int exclude_metadata_blocks(void) { int ret; struct extent_io_tree *excluded_extents; excluded_extents = malloc(sizeof(*excluded_extents)); if (!excluded_extents) return -ENOMEM; extent_io_tree_init(excluded_extents); gfs_info->excluded_extents = excluded_extents; ret = exclude_tree_blocks(gfs_info->chunk_root->node, 0); if (ret) return ret; return exclude_tree_blocks(gfs_info->tree_root->node, 1); } void cleanup_excluded_extents(void) { if (gfs_info->excluded_extents) { extent_io_tree_cleanup(gfs_info->excluded_extents); free(gfs_info->excluded_extents); } gfs_info->excluded_extents = NULL; } /* * Delete one corrupted dir item whose hash doesn't match its name. * * Since its hash is incorrect, we can't use btrfs_name_hash() to calculate * the search key, but rely on @di_key parameter to do the search. */ int delete_corrupted_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *di_key, char *namebuf, u32 namelen) { struct btrfs_dir_item *di_item; struct btrfs_path path; int ret; btrfs_init_path(&path); ret = btrfs_search_slot(trans, root, di_key, &path, 0, 1); if (ret > 0) { error("key (%llu %u %llu) doesn't exist in root %llu", di_key->objectid, di_key->type, di_key->offset, root->root_key.objectid); ret = -ENOENT; goto out; } if (ret < 0) { error("failed to search root %llu: %d", root->root_key.objectid, ret); goto out; } di_item = btrfs_match_dir_item_name(root, &path, namebuf, namelen); if (!di_item) { /* * This is possible if the dir_item has incorrect namelen. * But in that case, we shouldn't reach repair path here. */ error("no dir item named '%s' found with key (%llu %u %llu)", namebuf, di_key->objectid, di_key->type, di_key->offset); ret = -ENOENT; goto out; } ret = btrfs_delete_one_dir_name(trans, root, &path, di_item); if (ret < 0) error("failed to delete one dir name: %d", ret); out: btrfs_release_path(&path); return ret; } /* * Reset the mode of inode (specified by @root and @ino) to @mode. * * Caller should ensure @path is not populated, the @path is mainly for caller * to grab the correct new path of the inode. * * Return 0 if repair is done, @path will point to the correct inode item. * Return <0 for errors. */ int reset_imode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 ino, u32 mode) { struct btrfs_inode_item *iitem; struct extent_buffer *leaf; struct btrfs_key key; int slot; int ret; key.objectid = ino; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret > 0) ret = -ENOENT; if (ret < 0) { errno = -ret; error("failed to search tree %llu: %m", root->root_key.objectid); return ret; } leaf = path->nodes[0]; slot = path->slots[0]; iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item); btrfs_set_inode_mode(leaf, iitem, mode); btrfs_mark_buffer_dirty(leaf); return ret; } static int find_file_type_dir_index(struct btrfs_root *root, u64 ino, u64 dirid, u64 index, const char *name, u32 name_len, u32 *imode_ret) { struct btrfs_path path; struct btrfs_key key; struct btrfs_key location; struct btrfs_dir_item *di; char namebuf[BTRFS_NAME_LEN] = {0}; bool found = false; u8 filetype; u32 len; int ret; btrfs_init_path(&path); key.objectid = dirid; key.offset = index; key.type = BTRFS_DIR_INDEX_KEY; ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); if (ret > 0) { ret = -ENOENT; goto out; } if (ret < 0) goto out; di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(path.nodes[0], di, &location); /* Various basic check */ if (location.objectid != ino || location.type != BTRFS_INODE_ITEM_KEY || location.offset != 0) goto out; filetype = btrfs_dir_type(path.nodes[0], di); if (filetype >= BTRFS_FT_MAX || filetype == BTRFS_FT_UNKNOWN) goto out; len = min_t(u32, BTRFS_NAME_LEN, btrfs_item_size_nr(path.nodes[0], path.slots[0]) - sizeof(*di)); len = min_t(u32, len, btrfs_dir_name_len(path.nodes[0], di)); read_extent_buffer(path.nodes[0], namebuf, (unsigned long)(di + 1), len); if (name_len != len || memcmp(namebuf, name, len)) goto out; found = true; *imode_ret = btrfs_type_to_imode(filetype); out: btrfs_release_path(&path); if (!found && !ret) ret = -ENOENT; return ret; } static int find_file_type_dir_item(struct btrfs_root *root, u64 ino, u64 dirid, const char *name, u32 name_len, u32 *imode_ret) { struct btrfs_path path; struct btrfs_key key; struct btrfs_key location; struct btrfs_dir_item *di; char namebuf[BTRFS_NAME_LEN] = {0}; bool found = false; unsigned long cur; unsigned long end; u8 filetype; u32 len; int ret; btrfs_init_path(&path); key.objectid = dirid; key.offset = btrfs_name_hash(name, name_len); key.type = BTRFS_DIR_INDEX_KEY; ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); if (ret > 0) { ret = -ENOENT; goto out; } if (ret < 0) goto out; cur = btrfs_item_ptr_offset(path.nodes[0], path.slots[0]); end = cur + btrfs_item_size_nr(path.nodes[0], path.slots[0]); while (cur < end) { di = (struct btrfs_dir_item *)cur; cur += btrfs_dir_name_len(path.nodes[0], di) + sizeof(*di); btrfs_dir_item_key_to_cpu(path.nodes[0], di, &location); /* Various basic check */ if (location.objectid != ino || location.type != BTRFS_INODE_ITEM_KEY || location.offset != 0) continue; filetype = btrfs_dir_type(path.nodes[0], di); if (filetype >= BTRFS_FT_MAX || filetype == BTRFS_FT_UNKNOWN) continue; len = min_t(u32, BTRFS_NAME_LEN, btrfs_item_size_nr(path.nodes[0], path.slots[0]) - sizeof(*di)); len = min_t(u32, len, btrfs_dir_name_len(path.nodes[0], di)); read_extent_buffer(path.nodes[0], namebuf, (unsigned long)(di + 1), len); if (name_len != len || memcmp(namebuf, name, len)) continue; *imode_ret = btrfs_type_to_imode(filetype); found = true; goto out; } out: btrfs_release_path(&path); if (!found && !ret) ret = -ENOENT; return ret; } static int find_file_type(struct btrfs_root *root, u64 ino, u64 dirid, u64 index, const char *name, u32 name_len, u32 *imode_ret) { int ret; ret = find_file_type_dir_index(root, ino, dirid, index, name, name_len, imode_ret); if (ret == 0) return ret; return find_file_type_dir_item(root, ino, dirid, name, name_len, imode_ret); } int detect_imode(struct btrfs_root *root, struct btrfs_path *path, u32 *imode_ret) { struct btrfs_key key; struct btrfs_inode_item iitem; bool found = false; u64 ino; u32 imode = 0; int ret = 0; btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); ino = key.objectid; read_extent_buffer(path->nodes[0], &iitem, btrfs_item_ptr_offset(path->nodes[0], path->slots[0]), sizeof(iitem)); /* root inode */ if (ino == BTRFS_FIRST_FREE_OBJECTID) { imode = S_IFDIR; found = true; goto out; } while (1) { struct btrfs_inode_ref *iref; struct extent_buffer *leaf; unsigned long cur; unsigned long end; char namebuf[BTRFS_NAME_LEN] = {0}; u64 index; u32 namelen; int slot; ret = btrfs_next_item(root, path); if (ret > 0) { /* falls back to rdev check */ ret = 0; goto out; } if (ret < 0) goto out; leaf = path->nodes[0]; slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid != ino) goto out; /* * We ignore some types to make life easier: * - XATTR * Both REG and DIR can have xattr, so not useful */ switch (key.type) { case BTRFS_INODE_REF_KEY: /* The most accurate way to determine filetype */ cur = btrfs_item_ptr_offset(leaf, slot); end = cur + btrfs_item_size_nr(leaf, slot); while (cur < end) { iref = (struct btrfs_inode_ref *)cur; namelen = min_t(u32, end - cur - sizeof(&iref), btrfs_inode_ref_name_len(leaf, iref)); index = btrfs_inode_ref_index(leaf, iref); read_extent_buffer(leaf, namebuf, (unsigned long)(iref + 1), namelen); ret = find_file_type(root, ino, key.offset, index, namebuf, namelen, &imode); if (ret == 0) { found = true; goto out; } cur += sizeof(*iref) + namelen; } break; case BTRFS_DIR_ITEM_KEY: case BTRFS_DIR_INDEX_KEY: imode = S_IFDIR; found = true; goto out; case BTRFS_EXTENT_DATA_KEY: /* * Both REG and LINK could have EXTENT_DATA. * We just fall back to REG as user can inspect the * content. */ imode = S_IFREG; found = true; goto out; } } out: /* * Both CHR and BLK uses rdev, no way to distinguish them, so fall back * to BLK. But either way it doesn't really matter, as CHR/BLK on btrfs * should be pretty rare, and no real data will be lost. */ if (!found && btrfs_stack_inode_rdev(&iitem) != 0) { imode = S_IFBLK; found = true; } if (found) { ret = 0; *imode_ret = (imode | 0700); } else { ret = -ENOENT; } return ret; } /* * Reset the inode mode of the inode specified by @path. * * Caller should ensure the @path is pointing to an INODE_ITEM and root is tree * root. Repair imode for other trees is not supported yet. * * Return 0 if repair is successful. * Return <0 if error happens. */ int repair_imode_common(struct btrfs_root *root, struct btrfs_path *path) { struct btrfs_trans_handle *trans; struct btrfs_key key; u32 imode; int ret; btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); ASSERT(key.type == BTRFS_INODE_ITEM_KEY); if (root->objectid == BTRFS_ROOT_TREE_OBJECTID) { /* In root tree we only have two possible imode */ if (key.objectid == BTRFS_ROOT_TREE_OBJECTID) imode = S_IFDIR | 0755; else imode = S_IFREG | 0600; } else { ret = detect_imode(root, path, &imode); if (ret < 0) return ret; } trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); errno = -ret; error("failed to start transaction: %m"); return ret; } btrfs_release_path(path); ret = reset_imode(trans, root, path, key.objectid, imode); if (ret < 0) goto abort; ret = btrfs_commit_transaction(trans, root); if (!ret) printf("reset mode for inode %llu root %llu\n", key.objectid, root->root_key.objectid); return ret; abort: btrfs_abort_transaction(trans, ret); return ret; } /* * For free space inodes, we can't call check_inode_item() as free space * cache inode doesn't have INODE_REF. * We just check its inode mode. */ int check_repair_free_space_inode(struct btrfs_path *path) { struct btrfs_inode_item *iitem; struct btrfs_key key; u32 mode; int ret = 0; btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); ASSERT(key.type == BTRFS_INODE_ITEM_KEY && is_fstree(key.objectid)); iitem = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); mode = btrfs_inode_mode(path->nodes[0], iitem); if (mode != FREE_SPACE_CACHE_INODE_MODE) { error( "free space cache inode %llu has invalid mode: has 0%o expect 0%o", key.objectid, mode, FREE_SPACE_CACHE_INODE_MODE); ret = -EUCLEAN; if (repair) { ret = repair_imode_common(gfs_info->tree_root, path); if (ret < 0) return ret; return ret; } } return ret; } int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb) { struct btrfs_path path; struct btrfs_trans_handle *trans; struct btrfs_key key; int ret; printf("Recowing metadata block %llu\n", eb->start); key.objectid = btrfs_header_owner(eb); key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; root = btrfs_read_fs_root(gfs_info, &key); if (IS_ERR(root)) { fprintf(stderr, "Couldn't find owner root %llu\n", key.objectid); return PTR_ERR(root); } trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) return PTR_ERR(trans); btrfs_init_path(&path); path.lowest_level = btrfs_header_level(eb); if (path.lowest_level) btrfs_node_key_to_cpu(eb, &key, 0); else btrfs_item_key_to_cpu(eb, &key, 0); ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); btrfs_commit_transaction(trans, root); btrfs_release_path(&path); return ret; } /* * Try to get correct extent item generation. * * Return 0 if we get a correct generation. * Return <0 if we failed to get one. */ int get_extent_item_generation(u64 bytenr, u64 *gen_ret) { struct btrfs_root *root = gfs_info->extent_root; struct btrfs_extent_item *ei; struct btrfs_path path; struct btrfs_key key; int ret; key.objectid = bytenr; key.type = BTRFS_METADATA_ITEM_KEY; key.offset = (u64)-1; btrfs_init_path(&path); ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); /* Not possible */ if (ret == 0) ret = -EUCLEAN; if (ret < 0) goto out; ret = btrfs_previous_extent_item(root, &path, bytenr); if (ret > 0) ret = -ENOENT; if (ret < 0) goto out; ei = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_extent_item); if (btrfs_extent_flags(path.nodes[0], ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK) { struct extent_buffer *eb; eb = read_tree_block(gfs_info, bytenr, 0); if (extent_buffer_uptodate(eb)) { *gen_ret = btrfs_header_generation(eb); ret = 0; } else { ret = -EIO; } free_extent_buffer(eb); } else { /* * TODO: Grab proper data generation for data extents. * But this is not an urgent objective, as we can still * use transaction id as fall back */ ret = -ENOTSUP; } out: btrfs_release_path(&path); return ret; } int repair_dev_item_bytes_used(struct btrfs_fs_info *fs_info, u64 devid, u64 bytes_used_expected) { struct btrfs_trans_handle *trans; struct btrfs_device *device; int ret; device = btrfs_find_device_by_devid(fs_info->fs_devices, devid, 0); if (!device) { error("failed to find device with devid %llu", devid); return -ENOENT; } /* Bytes_used matches, not what we can repair */ if (device->bytes_used == bytes_used_expected) return -ENOTSUP; /* * We have to set the device bytes_used right now, before starting a * new transaction, as it may allocate a new chunk and modify * device->bytes_used. */ device->bytes_used = bytes_used_expected; trans = btrfs_start_transaction(fs_info->chunk_root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); errno = -ret; error("failed to start transaction: %m"); return ret; } /* Manually update the device item in chunk tree */ ret = btrfs_update_device(trans, device); if (ret < 0) { errno = -ret; error("failed to update device item for devid %llu: %m", devid); goto error; } /* * Commit transaction not only to save the above change but also update * the device item in super block. */ ret = btrfs_commit_transaction(trans, fs_info->chunk_root); if (ret < 0) { errno = -ret; error("failed to commit transaction: %m"); } else { printf("reset devid %llu bytes_used to %llu\n", devid, device->bytes_used); } return ret; error: btrfs_abort_transaction(trans, ret); return ret; }