2017-01-27 14:47:17 +00:00
|
|
|
/*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public
|
|
|
|
* License v2 as published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public
|
|
|
|
* License along with this program; if not, write to the
|
|
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
|
|
* Boston, MA 021110-1307, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "kerncompat.h"
|
2022-09-15 11:53:43 +00:00
|
|
|
#include <errno.h>
|
|
|
|
#include <string.h>
|
2017-01-27 14:47:17 +00:00
|
|
|
#include <unistd.h>
|
2022-09-15 11:53:43 +00:00
|
|
|
#include "kernel-lib/sizes.h"
|
2023-08-28 20:12:13 +00:00
|
|
|
#include "kernel-shared/accessors.h"
|
|
|
|
#include "kernel-shared/ctree.h"
|
2020-08-18 13:56:04 +00:00
|
|
|
#include "kernel-shared/disk-io.h"
|
2020-08-18 13:56:04 +00:00
|
|
|
#include "kernel-shared/volumes.h"
|
btrfs-progs: convert: insert a dummy inode item before inode ref for ext2/4
[BUG]
There is a report about failed btrfs-convert, which shows the following
error:
Create btrfs metadata
corrupt leaf: root=5 block=5001931145216 slot=1 ino=89911763, invalid previous key objectid, have 89911762 expect 89911763
leaf 5001931145216 items 336 free space 7 generation 90 owner FS_TREE
leaf 5001931145216 flags 0x1(WRITTEN) backref revision 1
fs uuid 8b69f018-37c3-4b30-b859-42ccfcbe2449
chunk uuid 448ce78c-ea41-49f6-99dc-46ad80b93da9
item 0 key (89911762 INODE_REF 3858733) itemoff 16222 itemsize 61
index 171 namelen 51 name: [FILENAME1]
item 1 key (89911763 INODE_REF 3858733) itemoff 16161 itemsize 61
index 103 namelen 51 name: [FILENAME2]
[CAUSE]
When iterating a directory, btrfs-convert would insert the DIR_ITEMs,
along with the INODE_REF of that inode.
This leads to above stray INODE_REFs, and trigger the tree-checker.
This can only happen for large fs, as for most cases we have all these
modified tree blocks cached, thus tree-checker won't be triggered.
But when the tree block cache is not hit, and we have to read from disk,
then such behavior can lead to above tree-checker error.
[FIX]
Insert a dummy INODE_ITEM for the INODE_REF first, the inode items would
be updated when iterating the child inode of the directory.
Issue: #731
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2024-01-13 08:37:06 +00:00
|
|
|
#include "kernel-shared/transaction.h"
|
|
|
|
#include "common/utils.h"
|
2022-09-14 15:06:52 +00:00
|
|
|
#include "common/internal.h"
|
2022-09-30 12:44:38 +00:00
|
|
|
#include "common/messages.h"
|
2022-09-15 11:53:43 +00:00
|
|
|
#include "common/extent-cache.h"
|
2023-08-23 14:32:41 +00:00
|
|
|
#include "common/extent-tree-utils.h"
|
2017-01-27 14:47:17 +00:00
|
|
|
#include "convert/common.h"
|
|
|
|
#include "convert/source-fs.h"
|
|
|
|
|
2017-03-16 03:18:31 +00:00
|
|
|
const struct simple_range btrfs_reserved_ranges[3] = {
|
2017-02-22 08:46:18 +00:00
|
|
|
{ 0, SZ_1M },
|
|
|
|
{ BTRFS_SB_MIRROR_OFFSET(1), SZ_64K },
|
|
|
|
{ BTRFS_SB_MIRROR_OFFSET(2), SZ_64K }
|
|
|
|
};
|
|
|
|
|
2017-08-22 18:43:49 +00:00
|
|
|
dev_t decode_dev(u32 dev)
|
|
|
|
{
|
|
|
|
unsigned major = (dev & 0xfff00) >> 8;
|
|
|
|
unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
|
|
|
|
|
|
|
|
return MKDEV(major, minor);
|
|
|
|
}
|
|
|
|
|
2017-08-22 18:34:36 +00:00
|
|
|
int ext2_acl_count(size_t size)
|
|
|
|
{
|
|
|
|
ssize_t s;
|
|
|
|
|
|
|
|
size -= sizeof(ext2_acl_header);
|
|
|
|
s = size - 4 * sizeof(ext2_acl_entry_short);
|
|
|
|
if (s < 0) {
|
|
|
|
if (size % sizeof(ext2_acl_entry_short))
|
|
|
|
return -1;
|
|
|
|
return size / sizeof(ext2_acl_entry_short);
|
|
|
|
} else {
|
|
|
|
if (s % sizeof(ext2_acl_entry))
|
|
|
|
return -1;
|
|
|
|
return s / sizeof(ext2_acl_entry) + 4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-27 00:23:00 +00:00
|
|
|
const struct simple_range *intersect_with_reserved(u64 bytenr, u64 num_bytes)
|
2017-01-27 14:47:17 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2017-07-25 20:54:41 +00:00
|
|
|
for (i = 0; i < ARRAY_SIZE(btrfs_reserved_ranges); i++) {
|
|
|
|
const struct simple_range *range = &btrfs_reserved_ranges[i];
|
2017-01-27 14:47:17 +00:00
|
|
|
|
2017-07-25 20:54:41 +00:00
|
|
|
if (bytenr < range_end(range) &&
|
2022-06-26 22:52:43 +00:00
|
|
|
bytenr + num_bytes > range->start)
|
2022-06-27 00:23:00 +00:00
|
|
|
return range;
|
2017-01-27 14:47:17 +00:00
|
|
|
}
|
2022-06-27 00:23:00 +00:00
|
|
|
return NULL;
|
2017-01-27 14:47:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void init_convert_context(struct btrfs_convert_context *cctx)
|
|
|
|
{
|
2017-01-30 12:41:36 +00:00
|
|
|
memset(cctx, 0, sizeof(*cctx));
|
|
|
|
|
2017-02-01 14:06:04 +00:00
|
|
|
cache_tree_init(&cctx->used_space);
|
2017-01-27 14:47:17 +00:00
|
|
|
cache_tree_init(&cctx->data_chunks);
|
2017-02-01 14:06:04 +00:00
|
|
|
cache_tree_init(&cctx->free_space);
|
2020-07-29 08:40:38 +00:00
|
|
|
cache_tree_init(&cctx->free_space_initial);
|
2017-01-27 14:47:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void clean_convert_context(struct btrfs_convert_context *cctx)
|
|
|
|
{
|
2017-02-01 14:06:04 +00:00
|
|
|
free_extent_cache_tree(&cctx->used_space);
|
2017-01-27 14:47:17 +00:00
|
|
|
free_extent_cache_tree(&cctx->data_chunks);
|
2017-02-01 14:06:04 +00:00
|
|
|
free_extent_cache_tree(&cctx->free_space);
|
2020-07-29 08:40:38 +00:00
|
|
|
free_extent_cache_tree(&cctx->free_space_initial);
|
2017-01-27 14:47:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int block_iterate_proc(u64 disk_block, u64 file_block,
|
|
|
|
struct blk_iterate_data *idata)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
2022-06-27 00:23:00 +00:00
|
|
|
const struct simple_range *reserved;
|
2017-01-27 14:47:17 +00:00
|
|
|
int do_barrier;
|
|
|
|
struct btrfs_root *root = idata->root;
|
2020-05-01 06:52:19 +00:00
|
|
|
struct btrfs_block_group *cache;
|
2017-05-18 02:11:46 +00:00
|
|
|
u32 sectorsize = root->fs_info->sectorsize;
|
|
|
|
u64 bytenr = disk_block * sectorsize;
|
2017-01-27 14:47:17 +00:00
|
|
|
|
2022-06-27 00:23:00 +00:00
|
|
|
reserved = intersect_with_reserved(bytenr, sectorsize);
|
|
|
|
do_barrier = reserved || disk_block >= idata->boundary;
|
2017-01-27 14:47:17 +00:00
|
|
|
if ((idata->num_blocks > 0 && do_barrier) ||
|
|
|
|
(file_block > idata->first_block + idata->num_blocks) ||
|
|
|
|
(disk_block != idata->disk_block + idata->num_blocks)) {
|
|
|
|
if (idata->num_blocks > 0) {
|
|
|
|
ret = record_file_blocks(idata, idata->first_block,
|
|
|
|
idata->disk_block,
|
|
|
|
idata->num_blocks);
|
|
|
|
if (ret)
|
|
|
|
goto fail;
|
|
|
|
idata->first_block += idata->num_blocks;
|
|
|
|
idata->num_blocks = 0;
|
|
|
|
}
|
|
|
|
if (file_block > idata->first_block) {
|
|
|
|
ret = record_file_blocks(idata, idata->first_block,
|
|
|
|
0, file_block - idata->first_block);
|
|
|
|
if (ret)
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2022-06-27 00:23:00 +00:00
|
|
|
if (reserved) {
|
|
|
|
bytenr = range_end(reserved);
|
2017-01-27 14:47:17 +00:00
|
|
|
} else {
|
|
|
|
cache = btrfs_lookup_block_group(root->fs_info, bytenr);
|
2022-09-30 12:44:38 +00:00
|
|
|
if (!cache) {
|
|
|
|
error("block group %llu not found", bytenr);
|
|
|
|
ret = -EUCLEAN;
|
|
|
|
goto fail;
|
|
|
|
}
|
2020-05-01 06:52:17 +00:00
|
|
|
bytenr = cache->start + cache->length;
|
2017-01-27 14:47:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
idata->first_block = file_block;
|
|
|
|
idata->disk_block = disk_block;
|
2017-05-18 02:11:46 +00:00
|
|
|
idata->boundary = bytenr / sectorsize;
|
2017-01-27 14:47:17 +00:00
|
|
|
}
|
|
|
|
idata->num_blocks++;
|
|
|
|
fail:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void init_blk_iterate_data(struct blk_iterate_data *data,
|
|
|
|
struct btrfs_trans_handle *trans,
|
|
|
|
struct btrfs_root *root,
|
|
|
|
struct btrfs_inode_item *inode,
|
|
|
|
u64 objectid, int checksum)
|
|
|
|
{
|
|
|
|
struct btrfs_key key;
|
|
|
|
|
|
|
|
data->trans = trans;
|
|
|
|
data->root = root;
|
|
|
|
data->inode = inode;
|
|
|
|
data->objectid = objectid;
|
|
|
|
data->first_block = 0;
|
|
|
|
data->disk_block = 0;
|
|
|
|
data->num_blocks = 0;
|
|
|
|
data->boundary = (u64)-1;
|
|
|
|
data->checksum = checksum;
|
|
|
|
data->errcode = 0;
|
|
|
|
|
|
|
|
key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
|
|
|
|
key.type = BTRFS_ROOT_ITEM_KEY;
|
|
|
|
key.offset = (u64)-1;
|
|
|
|
data->convert_root = btrfs_read_fs_root(root->fs_info, &key);
|
|
|
|
/* Impossible as we just opened it before */
|
|
|
|
BUG_ON(!data->convert_root || IS_ERR(data->convert_root));
|
|
|
|
data->convert_ino = BTRFS_FIRST_FREE_OBJECTID + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int convert_insert_dirent(struct btrfs_trans_handle *trans,
|
|
|
|
struct btrfs_root *root,
|
|
|
|
const char *name, size_t name_len,
|
|
|
|
u64 dir, u64 objectid,
|
|
|
|
u8 file_type, u64 index_cnt,
|
|
|
|
struct btrfs_inode_item *inode)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
u64 inode_size;
|
btrfs-progs: convert: insert a dummy inode item before inode ref for ext2/4
[BUG]
There is a report about failed btrfs-convert, which shows the following
error:
Create btrfs metadata
corrupt leaf: root=5 block=5001931145216 slot=1 ino=89911763, invalid previous key objectid, have 89911762 expect 89911763
leaf 5001931145216 items 336 free space 7 generation 90 owner FS_TREE
leaf 5001931145216 flags 0x1(WRITTEN) backref revision 1
fs uuid 8b69f018-37c3-4b30-b859-42ccfcbe2449
chunk uuid 448ce78c-ea41-49f6-99dc-46ad80b93da9
item 0 key (89911762 INODE_REF 3858733) itemoff 16222 itemsize 61
index 171 namelen 51 name: [FILENAME1]
item 1 key (89911763 INODE_REF 3858733) itemoff 16161 itemsize 61
index 103 namelen 51 name: [FILENAME2]
[CAUSE]
When iterating a directory, btrfs-convert would insert the DIR_ITEMs,
along with the INODE_REF of that inode.
This leads to above stray INODE_REFs, and trigger the tree-checker.
This can only happen for large fs, as for most cases we have all these
modified tree blocks cached, thus tree-checker won't be triggered.
But when the tree block cache is not hit, and we have to read from disk,
then such behavior can lead to above tree-checker error.
[FIX]
Insert a dummy INODE_ITEM for the INODE_REF first, the inode items would
be updated when iterating the child inode of the directory.
Issue: #731
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2024-01-13 08:37:06 +00:00
|
|
|
struct btrfs_inode_item dummy_iitem = { 0 };
|
2017-01-27 14:47:17 +00:00
|
|
|
struct btrfs_key location = {
|
|
|
|
.objectid = objectid,
|
|
|
|
.type = BTRFS_INODE_ITEM_KEY,
|
2024-04-19 23:27:41 +00:00
|
|
|
.offset = 0,
|
2017-01-27 14:47:17 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
ret = btrfs_insert_dir_item(trans, root, name, name_len,
|
|
|
|
dir, &location, file_type, index_cnt);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
btrfs-progs: convert: insert a dummy inode item before inode ref for ext2/4
[BUG]
There is a report about failed btrfs-convert, which shows the following
error:
Create btrfs metadata
corrupt leaf: root=5 block=5001931145216 slot=1 ino=89911763, invalid previous key objectid, have 89911762 expect 89911763
leaf 5001931145216 items 336 free space 7 generation 90 owner FS_TREE
leaf 5001931145216 flags 0x1(WRITTEN) backref revision 1
fs uuid 8b69f018-37c3-4b30-b859-42ccfcbe2449
chunk uuid 448ce78c-ea41-49f6-99dc-46ad80b93da9
item 0 key (89911762 INODE_REF 3858733) itemoff 16222 itemsize 61
index 171 namelen 51 name: [FILENAME1]
item 1 key (89911763 INODE_REF 3858733) itemoff 16161 itemsize 61
index 103 namelen 51 name: [FILENAME2]
[CAUSE]
When iterating a directory, btrfs-convert would insert the DIR_ITEMs,
along with the INODE_REF of that inode.
This leads to above stray INODE_REFs, and trigger the tree-checker.
This can only happen for large fs, as for most cases we have all these
modified tree blocks cached, thus tree-checker won't be triggered.
But when the tree block cache is not hit, and we have to read from disk,
then such behavior can lead to above tree-checker error.
[FIX]
Insert a dummy INODE_ITEM for the INODE_REF first, the inode items would
be updated when iterating the child inode of the directory.
Issue: #731
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2024-01-13 08:37:06 +00:00
|
|
|
|
|
|
|
btrfs_set_stack_inode_mode(&dummy_iitem, btrfs_type_to_imode(file_type));
|
|
|
|
btrfs_set_stack_inode_generation(&dummy_iitem, trans->transid);
|
|
|
|
btrfs_set_stack_inode_transid(&dummy_iitem, trans->transid);
|
|
|
|
/*
|
|
|
|
* We must have an INOTE_ITEM before INODE_REF, or tree-checker won't
|
|
|
|
* be happy.
|
|
|
|
* The content of the INODE_ITEM would be properly updated when iterating
|
|
|
|
* that child inode, but we should still try to make it as valid as
|
|
|
|
* possible, or we may still trigger some tree checker.
|
|
|
|
*/
|
|
|
|
ret = btrfs_insert_inode(trans, root, objectid, &dummy_iitem);
|
|
|
|
/* The inode item is already there, just skip it. */
|
|
|
|
if (ret == -EEXIST)
|
|
|
|
ret = 0;
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
2017-01-27 14:47:17 +00:00
|
|
|
ret = btrfs_insert_inode_ref(trans, root, name, name_len,
|
|
|
|
objectid, dir, index_cnt);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
inode_size = btrfs_stack_inode_size(inode) + name_len * 2;
|
|
|
|
btrfs_set_stack_inode_size(inode, inode_size);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int read_disk_extent(struct btrfs_root *root, u64 bytenr,
|
|
|
|
u32 num_bytes, char *buffer)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
|
|
|
|
|
|
|
|
ret = pread(fs_devs->latest_bdev, buffer, num_bytes, bytenr);
|
|
|
|
if (ret != num_bytes)
|
|
|
|
goto fail;
|
|
|
|
ret = 0;
|
|
|
|
fail:
|
|
|
|
if (ret > 0)
|
2018-09-14 07:25:04 +00:00
|
|
|
ret = -EIO;
|
2017-01-27 14:47:17 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Record a file extent in original filesystem into btrfs one.
|
|
|
|
* The special point is, old disk_block can point to a reserved range.
|
|
|
|
* So here, we don't use disk_block directly but search convert_root
|
|
|
|
* to get the real disk_bytenr.
|
|
|
|
*/
|
|
|
|
int record_file_blocks(struct blk_iterate_data *data,
|
|
|
|
u64 file_block, u64 disk_block, u64 num_blocks)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
struct btrfs_root *root = data->root;
|
|
|
|
struct btrfs_root *convert_root = data->convert_root;
|
2023-08-23 14:32:39 +00:00
|
|
|
struct btrfs_path path = { 0 };
|
2017-05-18 02:11:46 +00:00
|
|
|
u32 sectorsize = root->fs_info->sectorsize;
|
|
|
|
u64 file_pos = file_block * sectorsize;
|
|
|
|
u64 old_disk_bytenr = disk_block * sectorsize;
|
|
|
|
u64 num_bytes = num_blocks * sectorsize;
|
2017-01-27 14:47:17 +00:00
|
|
|
u64 cur_off = old_disk_bytenr;
|
|
|
|
|
|
|
|
/* Hole, pass it to record_file_extent directly */
|
|
|
|
if (old_disk_bytenr == 0)
|
|
|
|
return btrfs_record_file_extent(data->trans, root,
|
|
|
|
data->objectid, data->inode, file_pos, 0,
|
|
|
|
num_bytes);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Search real disk bytenr from convert root
|
|
|
|
*/
|
|
|
|
while (cur_off < old_disk_bytenr + num_bytes) {
|
|
|
|
struct btrfs_key key;
|
|
|
|
struct btrfs_file_extent_item *fi;
|
|
|
|
struct extent_buffer *node;
|
|
|
|
int slot;
|
|
|
|
u64 extent_disk_bytenr;
|
|
|
|
u64 extent_num_bytes;
|
|
|
|
u64 real_disk_bytenr;
|
|
|
|
u64 cur_len;
|
|
|
|
|
|
|
|
key.objectid = data->convert_ino;
|
|
|
|
key.type = BTRFS_EXTENT_DATA_KEY;
|
|
|
|
key.offset = cur_off;
|
|
|
|
|
|
|
|
ret = btrfs_search_slot(NULL, convert_root, &key, &path, 0, 0);
|
|
|
|
if (ret < 0)
|
|
|
|
break;
|
|
|
|
if (ret > 0) {
|
|
|
|
ret = btrfs_previous_item(convert_root, &path,
|
|
|
|
data->convert_ino,
|
|
|
|
BTRFS_EXTENT_DATA_KEY);
|
|
|
|
if (ret < 0)
|
|
|
|
break;
|
|
|
|
if (ret > 0) {
|
|
|
|
ret = -ENOENT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
node = path.nodes[0];
|
|
|
|
slot = path.slots[0];
|
|
|
|
btrfs_item_key_to_cpu(node, &key, slot);
|
|
|
|
BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY ||
|
|
|
|
key.objectid != data->convert_ino ||
|
|
|
|
key.offset > cur_off);
|
|
|
|
fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
|
|
|
|
extent_disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
|
|
|
|
extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
|
|
|
|
BUG_ON(cur_off - key.offset >= extent_num_bytes);
|
|
|
|
btrfs_release_path(&path);
|
|
|
|
|
|
|
|
if (extent_disk_bytenr)
|
|
|
|
real_disk_bytenr = cur_off - key.offset +
|
|
|
|
extent_disk_bytenr;
|
|
|
|
else
|
|
|
|
real_disk_bytenr = 0;
|
|
|
|
cur_len = min(key.offset + extent_num_bytes,
|
|
|
|
old_disk_bytenr + num_bytes) - cur_off;
|
|
|
|
ret = btrfs_record_file_extent(data->trans, data->root,
|
|
|
|
data->objectid, data->inode, file_pos,
|
|
|
|
real_disk_bytenr, cur_len);
|
|
|
|
if (ret < 0)
|
|
|
|
break;
|
|
|
|
cur_off += cur_len;
|
|
|
|
file_pos += cur_len;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* No need to care about csum
|
|
|
|
* As every byte of old fs image is calculated for csum, no
|
|
|
|
* need to waste CPU cycles now.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
btrfs_release_path(&path);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|