From 9fcb0efd664a52e613fbf7fa0fbbc4487eb01ed1 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 7 Feb 2021 19:23:44 +0100 Subject: [PATCH] btrfs-progs: btrfstune: experimental, new option to switch csums This is still work in progress but can survive some stress testing. There are still some sanity checks missing, do not user this on valuable data. To enables this, configure must be run with the experimental features enabled. $ mkfs.btrfs --csum crc32c /dev/sdx $ $ btrfstune --csum sha256 Will change the checksum to sha256. Implementation: - set bit on superblock when the checksums are being changed (similar to the uuid rewrite) - metadata checksums are overwritten in place - data checksums: - the checksum tree is completely deleted and no checksums are verified - data blocks are enumerated and all checksums generated (same as check --init-csum-tree) To make it usable, it should be restartable and track the current progress somehow. Also the previous data checksums should be verified any time they're available. Signed-off-by: David Sterba --- btrfstune.c | 335 ++++++++++++++++++++++++++++++++++++- kernel-shared/ctree.h | 4 + kernel-shared/disk-io.c | 18 +- kernel-shared/disk-io.h | 7 + kernel-shared/file-item.c | 15 ++ kernel-shared/print-tree.c | 17 ++ 6 files changed, 393 insertions(+), 3 deletions(-) diff --git a/btrfstune.c b/btrfstune.c index 4f77cfc0..33c83bf1 100644 --- a/btrfstune.c +++ b/btrfstune.c @@ -33,6 +33,7 @@ #include "common/utils.h" #include "kernel-shared/volumes.h" #include "common/open-utils.h" +#include "common/parse-utils.h" #include "common/device-scan.h" #include "common/help.h" #include "common/box.h" @@ -186,6 +187,313 @@ static int set_metadata_uuid(struct btrfs_root *root, const char *uuid_string) return btrfs_commit_transaction(trans, root); } +static int delete_csum_items(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *root = btrfs_csum_root(fs_info, 0); + struct btrfs_path path; + struct btrfs_key key; + int nr; + int ret; + + btrfs_init_path(&path); + + key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key.type = BTRFS_EXTENT_CSUM_KEY; + key.offset = 0; + + while (1) { + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + if (ret < 0) + goto out; + + nr = btrfs_header_nritems(path.nodes[0]); + if (!nr) + break; + + path.slots[0] = 0; + ret = btrfs_del_items(trans, root, &path, 0, nr); + if (ret) + goto out; + + btrfs_release_path(&path); + } + + ret = 0; +out: + btrfs_release_path(&path); + return ret; +} + +static int change_extents_csum(struct btrfs_fs_info *fs_info, int csum_type) +{ + struct btrfs_root *root = btrfs_extent_root(fs_info, 0); + struct btrfs_path path; + struct btrfs_key key = {0, 0, 0}; + int ret = 0; + + btrfs_init_path(&path); + /* + * Here we don't use transaction as it will takes a lot of reserve + * space, and that will make a near-full btrfs unable to change csums + */ + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + goto out; + + while (1) { + struct btrfs_extent_item *ei; + struct extent_buffer *eb; + u64 flags; + u64 bytenr; + + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_METADATA_ITEM_KEY) + goto next; + ei = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(path.nodes[0], ei); + if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) + goto next; + + bytenr = key.objectid; + eb = read_tree_block(fs_info, bytenr, 0); + if (IS_ERR(eb)) { + error("failed to read tree block: %llu", bytenr); + ret = PTR_ERR(eb); + goto out; + } + /* Only rewrite block */ + /* printf("CSUM: start %llu\n", eb->start); */ + ret = write_tree_block(NULL, fs_info, eb); + free_extent_buffer(eb); + if (ret < 0) { + error("failed to change csum of tree block: %llu", bytenr); + goto out; + } +next: + ret = btrfs_next_item(root, &path); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + goto out; + } + } + +out: + btrfs_release_path(&path); + return ret; +} + +static int change_devices_csum(struct btrfs_root *root, int csum_type) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_path path; + struct btrfs_key key = {0, 0, 0}; + int ret = 0; + + btrfs_init_path(&path); + /* No transaction again */ + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + goto out; + + while (1) { + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.type != BTRFS_DEV_ITEM_KEY || + key.objectid != BTRFS_DEV_ITEMS_OBJECTID) + goto next; + /* Only rewrite block */ + ret = write_tree_block(NULL, fs_info, path.nodes[0]); + if (ret < 0) + goto out; +next: + ret = btrfs_next_item(root, &path); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + goto out; + } + } +out: + btrfs_release_path(&path); + return ret; +} + +static int populate_csum(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, char *buf, u64 start, + u64 len) +{ + u64 offset = 0; + u64 sectorsize; + int ret = 0; + + while (offset < len) { + sectorsize = fs_info->sectorsize; + ret = read_extent_data(fs_info, buf, start + offset, §orsize, 0); + if (ret) + break; + ret = btrfs_csum_file_block(trans, start + len, start + offset, + buf, sectorsize); + if (ret) + break; + offset += sectorsize; + } + return ret; +} + +static int fill_csum_tree_from_extent(struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *extent_root = btrfs_extent_root(fs_info, 0); + struct btrfs_trans_handle *trans; + struct btrfs_path path; + struct btrfs_extent_item *ei; + struct extent_buffer *leaf; + char *buf; + struct btrfs_key key; + int ret; + + trans = btrfs_start_transaction(extent_root, 1); + if (trans == NULL) { + /* fixme */ + printf("cannot start transaction\n"); + return -EINVAL; + } + + btrfs_init_path(&path); + key.objectid = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) { + btrfs_release_path(&path); + return ret; + } + + buf = malloc(fs_info->sectorsize); + if (!buf) { + btrfs_release_path(&path); + return -ENOMEM; + } + + ret = delete_csum_items(trans, fs_info); + if (ret) { + error("unable to delete all checksum items: %d", ret); + return -EIO; + } + + while (1) { + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(extent_root, &path); + if (ret < 0) + break; + if (ret) { + ret = 0; + break; + } + } + leaf = path.nodes[0]; + + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + if (key.type != BTRFS_EXTENT_ITEM_KEY) { + path.slots[0]++; + continue; + } + + ei = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_extent_item); + if (!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_DATA)) { + path.slots[0]++; + continue; + } + + ret = populate_csum(trans, fs_info, buf, key.objectid, key.offset); + if (ret) + break; + path.slots[0]++; + } + + btrfs_release_path(&path); + free(buf); + + /* dont' commit if thre's error */ + ret = btrfs_commit_transaction(trans, extent_root); + + return ret; +} + +static int rewrite_checksums(struct btrfs_root *root, int csum_type) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_super_block *disk_super; + struct btrfs_trans_handle *trans; + u64 super_flags; + int ret; + + disk_super = root->fs_info->super_copy; + super_flags = btrfs_super_flags(disk_super); + + /* FIXME: Sanity checks */ + if (0) { + fprintf(stderr, + "UUID rewrite in progress, cannot change fsid\n"); + return 1; + } + + fs_info->force_csum_type = csum_type; + + /* Step 1 sets the in progress flag, no other change to the sb */ + printf("Set superblock flag CHANGING_CSUM\n"); + trans = btrfs_start_transaction(root, 1); + super_flags |= BTRFS_SUPER_FLAG_CHANGING_CSUM; + btrfs_set_super_flags(disk_super, super_flags); + ret = btrfs_commit_transaction(trans, root); + if (ret < 0) + return ret; + + /* Change extents first */ + printf("Change fsid in extents\n"); + ret = change_extents_csum(fs_info, csum_type); + if (ret < 0) { + error("failed to change csum of metadata: %d", ret); + goto out; + } + + /* Then devices */ + printf("Change csum in chunk tree\n"); + ret = change_devices_csum(fs_info->chunk_root, csum_type); + if (ret < 0) { + error("failed to change UUID of devices: %d", ret); + goto out; + } + + /* DATA */ + printf("Change csum of data blocks\n"); + ret = fill_csum_tree_from_extent(fs_info); + if (ret < 0) + goto out; + + /* Last, change fsid in super */ + ret = write_all_supers(fs_info); + if (ret < 0) + goto out; + + /* All checksums done, drop the flag, super block csum will get updated */ + printf("Clear superblock flag CHANGING_CSUM\n"); + super_flags = btrfs_super_flags(fs_info->super_copy); + super_flags &= ~BTRFS_SUPER_FLAG_CHANGING_CSUM; + btrfs_set_super_flags(fs_info->super_copy, super_flags); + btrfs_set_super_csum_type(disk_super, csum_type); + ret = write_all_supers(fs_info); + printf("Checksum change finished\n"); +out: + /* check errors */ + + return ret; +} + static int set_super_incompat_flags(struct btrfs_root *root, u64 flags) { struct btrfs_trans_handle *trans; @@ -481,6 +789,11 @@ static void print_usage(void) printf(" general:\n"); printf("\t-f allow dangerous operations, make sure that you are aware of the dangers\n"); printf("\t--help print this help\n"); +#ifdef EXPERIMENTAL + printf("\nEXPERIMENTAL FEATURES:\n"); + printf(" checksum changes:\n"); + printf("\t--csum CSUM switch checksum for data and metadata to CSUM\n"); +#endif } int BOX_MAIN(btrfstune)(int argc, char *argv[]) @@ -493,14 +806,19 @@ int BOX_MAIN(btrfstune)(int argc, char *argv[]) u64 seeding_value = 0; int random_fsid = 0; int change_metadata_uuid = 0; + int csum_type = -1; char *new_fsid_str = NULL; int ret; u64 super_flags = 0; int fd = -1; while(1) { + enum { GETOPT_VAL_CSUM = 256 }; static const struct option long_options[] = { { "help", no_argument, NULL, GETOPT_VAL_HELP}, +#ifdef EXPERIMENTAL + { "csum", required_argument, NULL, GETOPT_VAL_CSUM }, +#endif { NULL, 0, NULL, 0 } }; int c = getopt_long(argc, argv, "S:rxfuU:nmM:", long_options, NULL); @@ -541,6 +859,15 @@ int BOX_MAIN(btrfstune)(int argc, char *argv[]) ctree_flags |= OPEN_CTREE_IGNORE_FSID_MISMATCH; change_metadata_uuid = 1; break; +#ifdef EXPERIMENTAL + case GETOPT_VAL_CSUM: + ctree_flags |= OPEN_CTREE_SKIP_CSUM_CHECK; + csum_type = parse_csum_type(optarg); + warning("Switching checksums is experimental, do not use for valuable data!"); + printf("Switch csum to %s\n", + btrfs_super_csum_name(csum_type)); + break; +#endif case GETOPT_VAL_HELP: default: print_usage(); @@ -558,7 +885,7 @@ int BOX_MAIN(btrfstune)(int argc, char *argv[]) return 1; } if (!super_flags && !seeding_flag && !(random_fsid || new_fsid_str) && - !change_metadata_uuid) { + !change_metadata_uuid && csum_type == -1) { error("at least one option should be specified"); print_usage(); return 1; @@ -635,6 +962,12 @@ int BOX_MAIN(btrfstune)(int argc, char *argv[]) total++; } + if (csum_type != -1) { + /* TODO: check conflicting flags */ + printf("Proceed to switch checksums\n"); + ret = rewrite_checksums(root, csum_type); + } + if (change_metadata_uuid) { if (seeding_flag) { fprintf(stderr, diff --git a/kernel-shared/ctree.h b/kernel-shared/ctree.h index 6ca49c09..ab2aaed6 100644 --- a/kernel-shared/ctree.h +++ b/kernel-shared/ctree.h @@ -330,6 +330,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) #define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) #define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35) #define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36) +#define BTRFS_SUPER_FLAG_CHANGING_CSUM (1ULL << 37) #define BTRFS_BACKREF_REV_MAX 256 #define BTRFS_BACKREF_REV_SHIFT 56 @@ -1247,6 +1248,8 @@ struct btrfs_fs_info { unsigned int quota_enabled:1; unsigned int suppress_check_block_errors:1; unsigned int ignore_fsid_mismatch:1; + /* Don't verify checksums at all */ + unsigned int skip_csum_check:1; unsigned int ignore_chunk_tree_error:1; unsigned int avoid_meta_chunk_alloc:1; unsigned int avoid_sys_chunk_alloc:1; @@ -1255,6 +1258,7 @@ struct btrfs_fs_info { unsigned int allow_transid_mismatch:1; int transaction_aborted; + int force_csum_type; int (*free_extent_hook)(u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 owner, u64 offset, diff --git a/kernel-shared/disk-io.c b/kernel-shared/disk-io.c index 364a0bd8..e9d945ec 100644 --- a/kernel-shared/disk-io.c +++ b/kernel-shared/disk-io.c @@ -174,7 +174,9 @@ static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, result, len); if (verify) { - if (memcmp_extent_buffer(buf, result, 0, csum_size)) { + if (buf->fs_info->skip_csum_check) { + /* printf("skip csum check for block %llu\n", buf->start); */ + } else if (memcmp_extent_buffer(buf, result, 0, csum_size)) { if (!silent) { char found[BTRFS_CSUM_STRING_LEN]; char wanted[BTRFS_CSUM_STRING_LEN]; @@ -186,7 +188,6 @@ static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, (unsigned long long)buf->start, wanted, found); } - return 1; } } else { write_extent_buffer(buf, result, 0, csum_size); @@ -212,6 +213,12 @@ int csum_tree_block(struct btrfs_fs_info *fs_info, u16 csum_size = fs_info->csum_size; u16 csum_type = fs_info->csum_type; + if (fs_info->force_csum_type != -1) { + /* printf("CSUM TREE: offset %llu\n", buf->start); */ + csum_type = fs_info->force_csum_type; + csum_size = btrfs_csum_type_size(csum_type); + } + if (verify && fs_info->suppress_check_block_errors) return verify_tree_block_csum_silent(buf, csum_size, csum_type); return csum_tree_block_size(buf, csum_size, verify, csum_type); @@ -933,6 +940,9 @@ struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr) fs_info->data_alloc_profile = (u64)-1; fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; + + fs_info->force_csum_type = -1; + return fs_info; free_all: btrfs_free_fs_info(fs_info); @@ -1418,6 +1428,8 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, struct open_ctree_flags *oc fs_info->suppress_check_block_errors = 1; if (flags & OPEN_CTREE_IGNORE_FSID_MISMATCH) fs_info->ignore_fsid_mismatch = 1; + if (flags & OPEN_CTREE_SKIP_CSUM_CHECK) + fs_info->skip_csum_check = 1; if (flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR) fs_info->ignore_chunk_tree_error = 1; if (flags & OPEN_CTREE_HIDE_NAMES) @@ -1474,6 +1486,8 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, struct open_ctree_flags *oc goto out_devices; } + /* CHECK: ignore_csum_mismatch */ + ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE)); if (btrfs_fs_incompat(fs_info, METADATA_UUID)) ASSERT(!memcmp(disk_super->metadata_uuid, diff --git a/kernel-shared/disk-io.h b/kernel-shared/disk-io.h index b315e16e..d55ced1e 100644 --- a/kernel-shared/disk-io.h +++ b/kernel-shared/disk-io.h @@ -91,6 +91,13 @@ enum btrfs_open_ctree_flags { * mismatch. */ OPEN_CTREE_ALLOW_TRANSID_MISMATCH = (1U << 15), + + /* + * Do not check checksums at all for data and metadata, eg. when the + * superblock type of checksum does not match the actual checksum items + * stored in the csum tree during conversion. + */ + OPEN_CTREE_SKIP_CSUM_CHECK = (1U << 16), }; /* diff --git a/kernel-shared/file-item.c b/kernel-shared/file-item.c index 56a4b3b0..0d68ed52 100644 --- a/kernel-shared/file-item.c +++ b/kernel-shared/file-item.c @@ -142,6 +142,7 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_csum_item *item; struct extent_buffer *leaf; u64 csum_offset = 0; + u16 csum_type = root->fs_info->csum_type; u16 csum_size = root->fs_info->csum_size; int csums_in_item; @@ -152,6 +153,12 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans, if (ret < 0) goto fail; leaf = path->nodes[0]; + + if (leaf->fs_info->force_csum_type != -1) { + csum_type = root->fs_info->force_csum_type; + csum_size = btrfs_csum_type_size(csum_type); + } + if (ret > 0) { ret = 1; if (path->slots[0] == 0) @@ -201,6 +208,12 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u16 csum_size = root->fs_info->csum_size; u16 csum_type = root->fs_info->csum_type; + if (root->fs_info->force_csum_type != -1) { + /* printf("CSUM DATA: offset %llu (%d -> %d)\n", bytenr, csum_type, root->fs_info->force_csum_type); */ + csum_type = root->fs_info->force_csum_type; + csum_size = btrfs_csum_type_size(csum_type); + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -218,6 +231,8 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, ret = PTR_ERR(item); if (ret == -EFBIG) { u32 item_size; + + /* printf("item not big enough for bytenr %llu\n", bytenr); */ /* we found one, but it isn't big enough yet */ leaf = path->nodes[0]; item_size = btrfs_item_size_nr(leaf, path->slots[0]); diff --git a/kernel-shared/print-tree.c b/kernel-shared/print-tree.c index 73f969c3..bd75ae51 100644 --- a/kernel-shared/print-tree.c +++ b/kernel-shared/print-tree.c @@ -1217,6 +1217,7 @@ static void print_header_info(struct extent_buffer *eb, unsigned int mode) { struct btrfs_fs_info *fs_info = eb->fs_info; char flags_str[128]; + u8 csum[BTRFS_CSUM_SIZE]; u64 flags; u32 nr; u8 backref_rev; @@ -1263,6 +1264,22 @@ static void print_header_info(struct extent_buffer *eb, unsigned int mode) btrfs_header_level(eb) ? "node" : "leaf", btrfs_header_bytenr(eb), flags, flags_str, backref_rev, csum_str); + +#ifdef EXPERIMENTAL + printf("checksum stored "); + for (i = 0; i < BTRFS_CSUM_SIZE; i++) + printf("%02hhx", (int)(eb->data[i])); + printf("\n"); + memset(csum, 0, sizeof(csum)); + btrfs_csum_data(fs_info, btrfs_super_csum_type(fs_info->super_copy), + (u8 *)eb->data + BTRFS_CSUM_SIZE, + csum, fs_info->nodesize - BTRFS_CSUM_SIZE); + printf("checksum calced "); + for (i = 0; i < BTRFS_CSUM_SIZE; i++) + printf("%02hhx", (int)(csum[i])); + printf("\n"); +#endif + print_uuids(eb); fflush(stdout); }