btrfs-progs: btrfstune: experimental, new option to switch csums

This is still work in progress but can survive some stress testing.
There are still some sanity checks missing, do not user this on valuable
data. To enables this, configure must be run with the experimental
features enabled.

  $ mkfs.btrfs --csum crc32c /dev/sdx
  $ <mount, fill with data, unmount>

  $ btrfstune --csum sha256

Will change the checksum to sha256.

Implementation:

- set bit on superblock when the checksums are being changed (similar to
  the uuid rewrite)
- metadata checksums are overwritten in place
- data checksums:
  - the checksum tree is completely deleted and no checksums are
    verified
  - data blocks are enumerated and all checksums generated (same as
    check --init-csum-tree)

To make it usable, it should be restartable and track the current
progress somehow. Also the previous data checksums should be verified
any time they're available.

Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
David Sterba 2021-02-07 19:23:44 +01:00
parent d425a9cc0a
commit 9fcb0efd66
6 changed files with 393 additions and 3 deletions

View File

@ -33,6 +33,7 @@
#include "common/utils.h"
#include "kernel-shared/volumes.h"
#include "common/open-utils.h"
#include "common/parse-utils.h"
#include "common/device-scan.h"
#include "common/help.h"
#include "common/box.h"
@ -186,6 +187,313 @@ static int set_metadata_uuid(struct btrfs_root *root, const char *uuid_string)
return btrfs_commit_transaction(trans, root);
}
static int delete_csum_items(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root = btrfs_csum_root(fs_info, 0);
struct btrfs_path path;
struct btrfs_key key;
int nr;
int ret;
btrfs_init_path(&path);
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
key.type = BTRFS_EXTENT_CSUM_KEY;
key.offset = 0;
while (1) {
ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
if (ret < 0)
goto out;
nr = btrfs_header_nritems(path.nodes[0]);
if (!nr)
break;
path.slots[0] = 0;
ret = btrfs_del_items(trans, root, &path, 0, nr);
if (ret)
goto out;
btrfs_release_path(&path);
}
ret = 0;
out:
btrfs_release_path(&path);
return ret;
}
static int change_extents_csum(struct btrfs_fs_info *fs_info, int csum_type)
{
struct btrfs_root *root = btrfs_extent_root(fs_info, 0);
struct btrfs_path path;
struct btrfs_key key = {0, 0, 0};
int ret = 0;
btrfs_init_path(&path);
/*
* Here we don't use transaction as it will takes a lot of reserve
* space, and that will make a near-full btrfs unable to change csums
*/
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0)
goto out;
while (1) {
struct btrfs_extent_item *ei;
struct extent_buffer *eb;
u64 flags;
u64 bytenr;
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
if (key.type != BTRFS_EXTENT_ITEM_KEY &&
key.type != BTRFS_METADATA_ITEM_KEY)
goto next;
ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_extent_item);
flags = btrfs_extent_flags(path.nodes[0], ei);
if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
goto next;
bytenr = key.objectid;
eb = read_tree_block(fs_info, bytenr, 0);
if (IS_ERR(eb)) {
error("failed to read tree block: %llu", bytenr);
ret = PTR_ERR(eb);
goto out;
}
/* Only rewrite block */
/* printf("CSUM: start %llu\n", eb->start); */
ret = write_tree_block(NULL, fs_info, eb);
free_extent_buffer(eb);
if (ret < 0) {
error("failed to change csum of tree block: %llu", bytenr);
goto out;
}
next:
ret = btrfs_next_item(root, &path);
if (ret < 0)
goto out;
if (ret > 0) {
ret = 0;
goto out;
}
}
out:
btrfs_release_path(&path);
return ret;
}
static int change_devices_csum(struct btrfs_root *root, int csum_type)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path path;
struct btrfs_key key = {0, 0, 0};
int ret = 0;
btrfs_init_path(&path);
/* No transaction again */
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0)
goto out;
while (1) {
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
if (key.type != BTRFS_DEV_ITEM_KEY ||
key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
goto next;
/* Only rewrite block */
ret = write_tree_block(NULL, fs_info, path.nodes[0]);
if (ret < 0)
goto out;
next:
ret = btrfs_next_item(root, &path);
if (ret < 0)
goto out;
if (ret > 0) {
ret = 0;
goto out;
}
}
out:
btrfs_release_path(&path);
return ret;
}
static int populate_csum(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, char *buf, u64 start,
u64 len)
{
u64 offset = 0;
u64 sectorsize;
int ret = 0;
while (offset < len) {
sectorsize = fs_info->sectorsize;
ret = read_extent_data(fs_info, buf, start + offset, &sectorsize, 0);
if (ret)
break;
ret = btrfs_csum_file_block(trans, start + len, start + offset,
buf, sectorsize);
if (ret)
break;
offset += sectorsize;
}
return ret;
}
static int fill_csum_tree_from_extent(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *extent_root = btrfs_extent_root(fs_info, 0);
struct btrfs_trans_handle *trans;
struct btrfs_path path;
struct btrfs_extent_item *ei;
struct extent_buffer *leaf;
char *buf;
struct btrfs_key key;
int ret;
trans = btrfs_start_transaction(extent_root, 1);
if (trans == NULL) {
/* fixme */
printf("cannot start transaction\n");
return -EINVAL;
}
btrfs_init_path(&path);
key.objectid = 0;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
if (ret < 0) {
btrfs_release_path(&path);
return ret;
}
buf = malloc(fs_info->sectorsize);
if (!buf) {
btrfs_release_path(&path);
return -ENOMEM;
}
ret = delete_csum_items(trans, fs_info);
if (ret) {
error("unable to delete all checksum items: %d", ret);
return -EIO;
}
while (1) {
if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
ret = btrfs_next_leaf(extent_root, &path);
if (ret < 0)
break;
if (ret) {
ret = 0;
break;
}
}
leaf = path.nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
if (key.type != BTRFS_EXTENT_ITEM_KEY) {
path.slots[0]++;
continue;
}
ei = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_extent_item);
if (!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_DATA)) {
path.slots[0]++;
continue;
}
ret = populate_csum(trans, fs_info, buf, key.objectid, key.offset);
if (ret)
break;
path.slots[0]++;
}
btrfs_release_path(&path);
free(buf);
/* dont' commit if thre's error */
ret = btrfs_commit_transaction(trans, extent_root);
return ret;
}
static int rewrite_checksums(struct btrfs_root *root, int csum_type)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_super_block *disk_super;
struct btrfs_trans_handle *trans;
u64 super_flags;
int ret;
disk_super = root->fs_info->super_copy;
super_flags = btrfs_super_flags(disk_super);
/* FIXME: Sanity checks */
if (0) {
fprintf(stderr,
"UUID rewrite in progress, cannot change fsid\n");
return 1;
}
fs_info->force_csum_type = csum_type;
/* Step 1 sets the in progress flag, no other change to the sb */
printf("Set superblock flag CHANGING_CSUM\n");
trans = btrfs_start_transaction(root, 1);
super_flags |= BTRFS_SUPER_FLAG_CHANGING_CSUM;
btrfs_set_super_flags(disk_super, super_flags);
ret = btrfs_commit_transaction(trans, root);
if (ret < 0)
return ret;
/* Change extents first */
printf("Change fsid in extents\n");
ret = change_extents_csum(fs_info, csum_type);
if (ret < 0) {
error("failed to change csum of metadata: %d", ret);
goto out;
}
/* Then devices */
printf("Change csum in chunk tree\n");
ret = change_devices_csum(fs_info->chunk_root, csum_type);
if (ret < 0) {
error("failed to change UUID of devices: %d", ret);
goto out;
}
/* DATA */
printf("Change csum of data blocks\n");
ret = fill_csum_tree_from_extent(fs_info);
if (ret < 0)
goto out;
/* Last, change fsid in super */
ret = write_all_supers(fs_info);
if (ret < 0)
goto out;
/* All checksums done, drop the flag, super block csum will get updated */
printf("Clear superblock flag CHANGING_CSUM\n");
super_flags = btrfs_super_flags(fs_info->super_copy);
super_flags &= ~BTRFS_SUPER_FLAG_CHANGING_CSUM;
btrfs_set_super_flags(fs_info->super_copy, super_flags);
btrfs_set_super_csum_type(disk_super, csum_type);
ret = write_all_supers(fs_info);
printf("Checksum change finished\n");
out:
/* check errors */
return ret;
}
static int set_super_incompat_flags(struct btrfs_root *root, u64 flags)
{
struct btrfs_trans_handle *trans;
@ -481,6 +789,11 @@ static void print_usage(void)
printf(" general:\n");
printf("\t-f allow dangerous operations, make sure that you are aware of the dangers\n");
printf("\t--help print this help\n");
#ifdef EXPERIMENTAL
printf("\nEXPERIMENTAL FEATURES:\n");
printf(" checksum changes:\n");
printf("\t--csum CSUM switch checksum for data and metadata to CSUM\n");
#endif
}
int BOX_MAIN(btrfstune)(int argc, char *argv[])
@ -493,14 +806,19 @@ int BOX_MAIN(btrfstune)(int argc, char *argv[])
u64 seeding_value = 0;
int random_fsid = 0;
int change_metadata_uuid = 0;
int csum_type = -1;
char *new_fsid_str = NULL;
int ret;
u64 super_flags = 0;
int fd = -1;
while(1) {
enum { GETOPT_VAL_CSUM = 256 };
static const struct option long_options[] = {
{ "help", no_argument, NULL, GETOPT_VAL_HELP},
#ifdef EXPERIMENTAL
{ "csum", required_argument, NULL, GETOPT_VAL_CSUM },
#endif
{ NULL, 0, NULL, 0 }
};
int c = getopt_long(argc, argv, "S:rxfuU:nmM:", long_options, NULL);
@ -541,6 +859,15 @@ int BOX_MAIN(btrfstune)(int argc, char *argv[])
ctree_flags |= OPEN_CTREE_IGNORE_FSID_MISMATCH;
change_metadata_uuid = 1;
break;
#ifdef EXPERIMENTAL
case GETOPT_VAL_CSUM:
ctree_flags |= OPEN_CTREE_SKIP_CSUM_CHECK;
csum_type = parse_csum_type(optarg);
warning("Switching checksums is experimental, do not use for valuable data!");
printf("Switch csum to %s\n",
btrfs_super_csum_name(csum_type));
break;
#endif
case GETOPT_VAL_HELP:
default:
print_usage();
@ -558,7 +885,7 @@ int BOX_MAIN(btrfstune)(int argc, char *argv[])
return 1;
}
if (!super_flags && !seeding_flag && !(random_fsid || new_fsid_str) &&
!change_metadata_uuid) {
!change_metadata_uuid && csum_type == -1) {
error("at least one option should be specified");
print_usage();
return 1;
@ -635,6 +962,12 @@ int BOX_MAIN(btrfstune)(int argc, char *argv[])
total++;
}
if (csum_type != -1) {
/* TODO: check conflicting flags */
printf("Proceed to switch checksums\n");
ret = rewrite_checksums(root, csum_type);
}
if (change_metadata_uuid) {
if (seeding_flag) {
fprintf(stderr,

View File

@ -330,6 +330,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
#define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34)
#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35)
#define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36)
#define BTRFS_SUPER_FLAG_CHANGING_CSUM (1ULL << 37)
#define BTRFS_BACKREF_REV_MAX 256
#define BTRFS_BACKREF_REV_SHIFT 56
@ -1247,6 +1248,8 @@ struct btrfs_fs_info {
unsigned int quota_enabled:1;
unsigned int suppress_check_block_errors:1;
unsigned int ignore_fsid_mismatch:1;
/* Don't verify checksums at all */
unsigned int skip_csum_check:1;
unsigned int ignore_chunk_tree_error:1;
unsigned int avoid_meta_chunk_alloc:1;
unsigned int avoid_sys_chunk_alloc:1;
@ -1255,6 +1258,7 @@ struct btrfs_fs_info {
unsigned int allow_transid_mismatch:1;
int transaction_aborted;
int force_csum_type;
int (*free_extent_hook)(u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset,

View File

@ -174,7 +174,9 @@ static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
result, len);
if (verify) {
if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
if (buf->fs_info->skip_csum_check) {
/* printf("skip csum check for block %llu\n", buf->start); */
} else if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
if (!silent) {
char found[BTRFS_CSUM_STRING_LEN];
char wanted[BTRFS_CSUM_STRING_LEN];
@ -186,7 +188,6 @@ static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
(unsigned long long)buf->start,
wanted, found);
}
return 1;
}
} else {
write_extent_buffer(buf, result, 0, csum_size);
@ -212,6 +213,12 @@ int csum_tree_block(struct btrfs_fs_info *fs_info,
u16 csum_size = fs_info->csum_size;
u16 csum_type = fs_info->csum_type;
if (fs_info->force_csum_type != -1) {
/* printf("CSUM TREE: offset %llu\n", buf->start); */
csum_type = fs_info->force_csum_type;
csum_size = btrfs_csum_type_size(csum_type);
}
if (verify && fs_info->suppress_check_block_errors)
return verify_tree_block_csum_silent(buf, csum_size, csum_type);
return csum_tree_block_size(buf, csum_size, verify, csum_type);
@ -933,6 +940,9 @@ struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr)
fs_info->data_alloc_profile = (u64)-1;
fs_info->metadata_alloc_profile = (u64)-1;
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
fs_info->force_csum_type = -1;
return fs_info;
free_all:
btrfs_free_fs_info(fs_info);
@ -1418,6 +1428,8 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, struct open_ctree_flags *oc
fs_info->suppress_check_block_errors = 1;
if (flags & OPEN_CTREE_IGNORE_FSID_MISMATCH)
fs_info->ignore_fsid_mismatch = 1;
if (flags & OPEN_CTREE_SKIP_CSUM_CHECK)
fs_info->skip_csum_check = 1;
if (flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR)
fs_info->ignore_chunk_tree_error = 1;
if (flags & OPEN_CTREE_HIDE_NAMES)
@ -1474,6 +1486,8 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, struct open_ctree_flags *oc
goto out_devices;
}
/* CHECK: ignore_csum_mismatch */
ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE));
if (btrfs_fs_incompat(fs_info, METADATA_UUID))
ASSERT(!memcmp(disk_super->metadata_uuid,

View File

@ -91,6 +91,13 @@ enum btrfs_open_ctree_flags {
* mismatch.
*/
OPEN_CTREE_ALLOW_TRANSID_MISMATCH = (1U << 15),
/*
* Do not check checksums at all for data and metadata, eg. when the
* superblock type of checksum does not match the actual checksum items
* stored in the csum tree during conversion.
*/
OPEN_CTREE_SKIP_CSUM_CHECK = (1U << 16),
};
/*

View File

@ -142,6 +142,7 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans,
struct btrfs_csum_item *item;
struct extent_buffer *leaf;
u64 csum_offset = 0;
u16 csum_type = root->fs_info->csum_type;
u16 csum_size = root->fs_info->csum_size;
int csums_in_item;
@ -152,6 +153,12 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans,
if (ret < 0)
goto fail;
leaf = path->nodes[0];
if (leaf->fs_info->force_csum_type != -1) {
csum_type = root->fs_info->force_csum_type;
csum_size = btrfs_csum_type_size(csum_type);
}
if (ret > 0) {
ret = 1;
if (path->slots[0] == 0)
@ -201,6 +208,12 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans,
u16 csum_size = root->fs_info->csum_size;
u16 csum_type = root->fs_info->csum_type;
if (root->fs_info->force_csum_type != -1) {
/* printf("CSUM DATA: offset %llu (%d -> %d)\n", bytenr, csum_type, root->fs_info->force_csum_type); */
csum_type = root->fs_info->force_csum_type;
csum_size = btrfs_csum_type_size(csum_type);
}
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@ -218,6 +231,8 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans,
ret = PTR_ERR(item);
if (ret == -EFBIG) {
u32 item_size;
/* printf("item not big enough for bytenr %llu\n", bytenr); */
/* we found one, but it isn't big enough yet */
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);

View File

@ -1217,6 +1217,7 @@ static void print_header_info(struct extent_buffer *eb, unsigned int mode)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
char flags_str[128];
u8 csum[BTRFS_CSUM_SIZE];
u64 flags;
u32 nr;
u8 backref_rev;
@ -1263,6 +1264,22 @@ static void print_header_info(struct extent_buffer *eb, unsigned int mode)
btrfs_header_level(eb) ? "node" : "leaf",
btrfs_header_bytenr(eb), flags, flags_str, backref_rev,
csum_str);
#ifdef EXPERIMENTAL
printf("checksum stored ");
for (i = 0; i < BTRFS_CSUM_SIZE; i++)
printf("%02hhx", (int)(eb->data[i]));
printf("\n");
memset(csum, 0, sizeof(csum));
btrfs_csum_data(fs_info, btrfs_super_csum_type(fs_info->super_copy),
(u8 *)eb->data + BTRFS_CSUM_SIZE,
csum, fs_info->nodesize - BTRFS_CSUM_SIZE);
printf("checksum calced ");
for (i = 0; i < BTRFS_CSUM_SIZE; i++)
printf("%02hhx", (int)(csum[i]));
printf("\n");
#endif
print_uuids(eb);
fflush(stdout);
}