mirror of
https://github.com/kdave/btrfs-progs
synced 2025-01-12 08:49:33 +00:00
880651b969
We use the UASSERT() wrapper instead of the plain assert() as this can be tuned to print the stack trace too if supported. Signed-off-by: David Sterba <dsterba@suse.com>
1142 lines
31 KiB
C
1142 lines
31 KiB
C
/*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License v2 as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public
|
|
* License along with this program; if not, write to the
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 021110-1307, USA.
|
|
*/
|
|
|
|
#include "kerncompat.h"
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include "kernel-lib/sizes.h"
|
|
#include "kernel-shared/accessors.h"
|
|
#include "kernel-shared/uapi/btrfs_tree.h"
|
|
#include "kernel-shared/ctree.h"
|
|
#include "kernel-shared/disk-io.h"
|
|
#include "kernel-shared/volumes.h"
|
|
#include "kernel-shared/file-item.h"
|
|
#include "kernel-shared/extent_io.h"
|
|
#include "kernel-shared/transaction.h"
|
|
#include "kernel-shared/tree-checker.h"
|
|
#include "common/messages.h"
|
|
#include "common/utils.h"
|
|
#include "common/inject-error.h"
|
|
#include "common/extent-tree-utils.h"
|
|
#include "tune/tune.h"
|
|
|
|
static int check_csum_change_requreiment(struct btrfs_fs_info *fs_info, u16 new_csum_type)
|
|
{
|
|
struct btrfs_root *tree_root = fs_info->tree_root;
|
|
struct btrfs_root *dev_root = fs_info->dev_root;
|
|
struct btrfs_path path = { 0 };
|
|
struct btrfs_key key;
|
|
int ret;
|
|
|
|
if (btrfs_super_log_root(fs_info->super_copy)) {
|
|
error("dirty log tree detected, please replay the log or zero it.");
|
|
return -EINVAL;
|
|
}
|
|
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
|
error("no csum change support for extent-tree-v2 feature yet.");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
key.objectid = BTRFS_BALANCE_OBJECTID;
|
|
key.type = BTRFS_TEMPORARY_ITEM_KEY;
|
|
key.offset = 0;
|
|
ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
|
|
btrfs_release_path(&path);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to check the balance status: %m");
|
|
return ret;
|
|
}
|
|
if (ret == 0) {
|
|
error("running balance detected, please finish or cancel it.");
|
|
return -EINVAL;
|
|
}
|
|
|
|
key.objectid = 0;
|
|
key.type = BTRFS_DEV_REPLACE_KEY;
|
|
key.offset = 0;
|
|
ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
|
|
if (ret < 0) {
|
|
btrfs_release_path(&path);
|
|
errno = -ret;
|
|
error("failed to check the dev-replace status: %m");
|
|
return ret;
|
|
}
|
|
if (ret == 0) {
|
|
struct btrfs_dev_replace_item *ptr;
|
|
u64 state;
|
|
|
|
ptr = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dev_replace_item);
|
|
state = btrfs_dev_replace_replace_state(path.nodes[0], ptr);
|
|
if (state == BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED ||
|
|
state == BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED) {
|
|
btrfs_release_path(&path);
|
|
error(
|
|
"running/suspended dev-replace detected, please finish or cancel it");
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
btrfs_release_path(&path);
|
|
|
|
if (fs_info->csum_type == new_csum_type) {
|
|
error("the fs is already using csum type %s (%u)",
|
|
btrfs_super_csum_name(new_csum_type), new_csum_type);
|
|
return -EINVAL;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int get_last_csum_bytenr(struct btrfs_fs_info *fs_info, u64 *result)
|
|
{
|
|
struct btrfs_root *csum_root = btrfs_csum_root(fs_info, 0);
|
|
struct btrfs_path path = { 0 };
|
|
struct btrfs_key key;
|
|
int ret;
|
|
|
|
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
|
key.type = BTRFS_EXTENT_CSUM_KEY;
|
|
key.offset = (u64)-1;
|
|
|
|
ret = btrfs_search_slot(NULL, csum_root, &key, &path, 0, 0);
|
|
if (ret < 0)
|
|
return ret;
|
|
UASSERT(ret > 0);
|
|
ret = btrfs_previous_item(csum_root, &path, BTRFS_EXTENT_CSUM_OBJECTID,
|
|
BTRFS_EXTENT_CSUM_KEY);
|
|
if (ret < 0)
|
|
return ret;
|
|
/*
|
|
* Empty csum tree, set last csum byte to 0 so we can skip new data
|
|
* csum generation.
|
|
*/
|
|
if (ret > 0) {
|
|
*result = 0;
|
|
btrfs_release_path(&path);
|
|
return 0;
|
|
}
|
|
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
|
|
*result = key.offset + btrfs_item_size(path.nodes[0], path.slots[0]) /
|
|
fs_info->csum_size * fs_info->sectorsize;
|
|
btrfs_release_path(&path);
|
|
return 0;
|
|
}
|
|
|
|
static int read_verify_one_data_sector(struct btrfs_fs_info *fs_info,
|
|
u64 logical, void *data_buf,
|
|
const void *old_csums, u16 old_csum_type,
|
|
bool output_error)
|
|
{
|
|
const u32 sectorsize = fs_info->sectorsize;
|
|
int num_copies = btrfs_num_copies(fs_info, logical, sectorsize);
|
|
bool found_good = false;
|
|
|
|
for (int mirror = 1; mirror <= num_copies; mirror++) {
|
|
u8 csum_has[BTRFS_CSUM_SIZE];
|
|
u64 readlen = sectorsize;
|
|
int ret;
|
|
|
|
ret = read_data_from_disk(fs_info, data_buf, logical, &readlen,
|
|
mirror);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to read logical %llu: %m", logical);
|
|
continue;
|
|
}
|
|
btrfs_csum_data(fs_info, fs_info->csum_type, data_buf, csum_has,
|
|
sectorsize);
|
|
if (memcmp(csum_has, old_csums, fs_info->csum_size) == 0) {
|
|
found_good = true;
|
|
break;
|
|
} else if (output_error){
|
|
char found[BTRFS_CSUM_STRING_LEN];
|
|
char want[BTRFS_CSUM_STRING_LEN];
|
|
|
|
btrfs_format_csum(fs_info->csum_type, old_csums, want);
|
|
btrfs_format_csum(fs_info->csum_type, csum_has, found);
|
|
error("csum mismatch for logical %llu mirror %u, has %s expected %s",
|
|
logical, mirror, found, want);
|
|
}
|
|
}
|
|
if (!found_good)
|
|
return -EIO;
|
|
return 0;
|
|
}
|
|
|
|
static int generate_new_csum_range(struct btrfs_trans_handle *trans,
|
|
u64 logical, u64 length, u16 new_csum_type,
|
|
const void *old_csums)
|
|
{
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
const u32 sectorsize = fs_info->sectorsize;
|
|
int ret = 0;
|
|
void *buf;
|
|
|
|
buf = malloc(fs_info->sectorsize);
|
|
if (!buf)
|
|
return -ENOMEM;
|
|
|
|
for (u64 cur = logical; cur < logical + length; cur += sectorsize) {
|
|
ret = read_verify_one_data_sector(fs_info, cur, buf, old_csums +
|
|
(cur - logical) / sectorsize * fs_info->csum_size,
|
|
fs_info->csum_type, true);
|
|
|
|
if (ret < 0) {
|
|
error("failed to recover a good copy for data at logical %llu",
|
|
logical);
|
|
goto out;
|
|
}
|
|
/* Calculate new csum and insert it into the csum tree. */
|
|
ret = btrfs_csum_file_block(trans, cur,
|
|
BTRFS_CSUM_CHANGE_OBJECTID, new_csum_type, buf);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to insert new csum for data at logical %llu: %m",
|
|
cur);
|
|
goto out;
|
|
}
|
|
}
|
|
out:
|
|
free(buf);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* After reading this many bytes of data, commit the current transaction.
|
|
*
|
|
* Only a soft cap, we can exceed the threshold if hitting a large enough csum
|
|
* item.
|
|
*/
|
|
#define CSUM_CHANGE_BYTES_THRESHOLD (SZ_2M)
|
|
|
|
static unsigned int calc_csum_change_nr_items(struct btrfs_fs_info *fs_info,
|
|
u16 new_csum_type)
|
|
{
|
|
const u32 new_csum_size = btrfs_csum_type_size(new_csum_type);
|
|
const u32 csum_item_size = CSUM_CHANGE_BYTES_THRESHOLD /
|
|
fs_info->sectorsize * new_csum_size;
|
|
|
|
return round_up(csum_item_size, fs_info->nodesize) / fs_info->nodesize * 2;
|
|
}
|
|
|
|
static int generate_new_data_csums_range(struct btrfs_fs_info *fs_info, u64 start,
|
|
u16 new_csum_type)
|
|
{
|
|
const unsigned int nr_items = calc_csum_change_nr_items(fs_info, new_csum_type);
|
|
struct btrfs_root *csum_root = btrfs_csum_root(fs_info, 0);
|
|
struct btrfs_trans_handle *trans;
|
|
struct btrfs_path path = { 0 };
|
|
struct btrfs_key key;
|
|
void *csum_buffer;
|
|
u64 converted_bytes = 0;
|
|
u64 last_csum;
|
|
u64 cur = start;
|
|
int ret;
|
|
|
|
ret = get_last_csum_bytenr(fs_info, &last_csum);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to get the last csum item: %m");
|
|
return ret;
|
|
}
|
|
csum_buffer = malloc(fs_info->nodesize);
|
|
if (!csum_buffer)
|
|
return -ENOMEM;
|
|
|
|
trans = btrfs_start_transaction(csum_root, nr_items);
|
|
if (IS_ERR(trans)) {
|
|
ret = PTR_ERR(trans);
|
|
errno = -ret;
|
|
error("failed to start transaction: %m");
|
|
return ret;
|
|
}
|
|
|
|
while (cur < last_csum) {
|
|
u64 csum_start;
|
|
u64 len;
|
|
u32 item_size;
|
|
|
|
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
|
key.type = BTRFS_EXTENT_CSUM_KEY;
|
|
key.offset = cur;
|
|
|
|
ret = btrfs_search_slot(NULL, csum_root, &key, &path, 0, 0);
|
|
if (ret < 0)
|
|
goto out;
|
|
if (ret > 0 && path.slots[0] >=
|
|
btrfs_header_nritems(path.nodes[0])) {
|
|
ret = btrfs_next_leaf(csum_root, &path);
|
|
if (ret > 0) {
|
|
ret = 0;
|
|
btrfs_release_path(&path);
|
|
break;
|
|
}
|
|
if (ret < 0) {
|
|
btrfs_release_path(&path);
|
|
goto out;
|
|
}
|
|
}
|
|
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
|
|
UASSERT(key.offset >= cur);
|
|
item_size = btrfs_item_size(path.nodes[0], path.slots[0]);
|
|
|
|
csum_start = key.offset;
|
|
len = item_size / fs_info->csum_size * fs_info->sectorsize;
|
|
read_extent_buffer(path.nodes[0], csum_buffer,
|
|
btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
|
|
item_size);
|
|
btrfs_release_path(&path);
|
|
|
|
ret = generate_new_csum_range(trans, csum_start, len, new_csum_type,
|
|
csum_buffer);
|
|
if (ret < 0)
|
|
goto out;
|
|
converted_bytes += len;
|
|
if (converted_bytes >= CSUM_CHANGE_BYTES_THRESHOLD) {
|
|
converted_bytes = 0;
|
|
ret = btrfs_commit_transaction(trans, csum_root);
|
|
if (inject_error(0xfc35ae54))
|
|
return -EUCLEAN;
|
|
if (ret < 0)
|
|
goto out;
|
|
trans = btrfs_start_transaction(csum_root, nr_items);
|
|
if (IS_ERR(trans)) {
|
|
ret = PTR_ERR(trans);
|
|
goto out;
|
|
}
|
|
}
|
|
cur = csum_start + len;
|
|
}
|
|
ret = btrfs_commit_transaction(trans, csum_root);
|
|
if (inject_error(0x4de02239))
|
|
return -EUCLEAN;
|
|
out:
|
|
free(csum_buffer);
|
|
return ret;
|
|
}
|
|
|
|
static int generate_new_data_csums(struct btrfs_fs_info *fs_info, u16 new_csum_type)
|
|
{
|
|
struct btrfs_root *tree_root = fs_info->tree_root;
|
|
struct btrfs_trans_handle *trans;
|
|
struct btrfs_path path = { 0 };
|
|
struct btrfs_key key;
|
|
int ret;
|
|
|
|
trans = btrfs_start_transaction(tree_root, 1);
|
|
if (IS_ERR(trans)) {
|
|
ret = PTR_ERR(trans);
|
|
errno = -ret;
|
|
error("failed to start transaction: %m");
|
|
return ret;
|
|
}
|
|
key.objectid = BTRFS_CSUM_CHANGE_OBJECTID;
|
|
key.type = BTRFS_TEMPORARY_ITEM_KEY;
|
|
key.offset = new_csum_type;
|
|
ret = btrfs_insert_empty_item(trans, tree_root, &path, &key, 0);
|
|
btrfs_release_path(&path);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to insert csum change item: %m");
|
|
btrfs_abort_transaction(trans, ret);
|
|
return ret;
|
|
}
|
|
btrfs_set_super_flags(fs_info->super_copy,
|
|
btrfs_super_flags(fs_info->super_copy) |
|
|
BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM);
|
|
ret = btrfs_commit_transaction(trans, tree_root);
|
|
if (inject_error(0x3964edd9))
|
|
return -EUCLEAN;
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to commit the initial transaction: %m");
|
|
return ret;
|
|
}
|
|
return generate_new_data_csums_range(fs_info, 0, new_csum_type);
|
|
}
|
|
|
|
/* After deleting/modifying this many leaves, commit a transaction. */
|
|
#define CSUM_CHANGE_LEAVES_THRESHOLD 32
|
|
|
|
static int delete_old_data_csums(struct btrfs_fs_info *fs_info)
|
|
{
|
|
struct btrfs_root *csum_root = btrfs_csum_root(fs_info, 0);
|
|
struct btrfs_trans_handle *trans = 0;
|
|
struct btrfs_path path = { 0 };
|
|
struct btrfs_key last_key;
|
|
unsigned int deleted_leaves = 0;
|
|
int ret;
|
|
|
|
last_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
|
last_key.type = BTRFS_EXTENT_CSUM_KEY;
|
|
last_key.offset = (u64)-1;
|
|
|
|
while (true) {
|
|
int start_slot;
|
|
int nr;
|
|
|
|
if (deleted_leaves >= CSUM_CHANGE_LEAVES_THRESHOLD) {
|
|
UASSERT(trans);
|
|
ret = btrfs_commit_transaction(trans, csum_root);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error_msg(ERROR_MSG_COMMIT_TRANS,
|
|
"deleting old data csums: %m");
|
|
return ret;
|
|
}
|
|
trans = NULL;
|
|
deleted_leaves = 0;
|
|
}
|
|
if (!trans) {
|
|
trans = btrfs_start_transaction(csum_root, 1);
|
|
if (IS_ERR(trans)) {
|
|
ret = PTR_ERR(trans);
|
|
errno = -ret;
|
|
error_msg(ERROR_MSG_START_TRANS,
|
|
"deleting old data csums: %m");
|
|
return ret;
|
|
}
|
|
}
|
|
ret = btrfs_search_slot(trans, csum_root, &last_key, &path, -1, 1);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to search the last old csum item: %m");
|
|
btrfs_abort_transaction(trans, ret);
|
|
return ret;
|
|
}
|
|
UASSERT(ret > 0);
|
|
|
|
nr = btrfs_header_nritems(path.nodes[0]);
|
|
/* No item left (empty csum tree), exit. */
|
|
if (!nr)
|
|
break;
|
|
for (start_slot = 0; start_slot < nr; start_slot++) {
|
|
struct btrfs_key found_key;
|
|
|
|
btrfs_item_key_to_cpu(path.nodes[0], &found_key, start_slot);
|
|
/* Break from the for loop, we found the first old csum. */
|
|
if (found_key.objectid == BTRFS_EXTENT_CSUM_OBJECTID)
|
|
break;
|
|
}
|
|
/* No more old csum item detected, exit. */
|
|
if (start_slot == nr)
|
|
break;
|
|
|
|
/* Delete items starting from @start_slot to the end. */
|
|
ret = btrfs_del_items(trans, csum_root, &path, start_slot,
|
|
nr - start_slot);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to delete items: %m");
|
|
break;
|
|
}
|
|
btrfs_release_path(&path);
|
|
deleted_leaves++;
|
|
}
|
|
btrfs_release_path(&path);
|
|
if (ret < 0)
|
|
btrfs_abort_transaction(trans, ret);
|
|
ret = btrfs_commit_transaction(trans, csum_root);
|
|
if (inject_error(0x55fb4d13))
|
|
return -EUCLEAN;
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to commit transaction after deleting the old data csums: %m");
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int change_csum_objectids(struct btrfs_fs_info *fs_info)
|
|
{
|
|
struct btrfs_root *csum_root = btrfs_csum_root(fs_info, 0);
|
|
struct btrfs_trans_handle *trans = NULL;
|
|
struct btrfs_path path = { 0 };
|
|
struct btrfs_key last_key;
|
|
unsigned int changed_leaves = 0;
|
|
u64 super_flags;
|
|
int ret = 0;
|
|
|
|
last_key.objectid = BTRFS_CSUM_CHANGE_OBJECTID;
|
|
last_key.type = BTRFS_EXTENT_CSUM_KEY;
|
|
last_key.offset = (u64)-1;
|
|
|
|
while (true) {
|
|
struct btrfs_key found_key;
|
|
int nr;
|
|
|
|
if (changed_leaves >= CSUM_CHANGE_LEAVES_THRESHOLD) {
|
|
UASSERT(trans);
|
|
ret = btrfs_commit_transaction(trans, csum_root);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error_msg(ERROR_MSG_COMMIT_TRANS,
|
|
"changing data csum objectid: %m");
|
|
return ret;
|
|
}
|
|
trans = NULL;
|
|
changed_leaves = 0;
|
|
}
|
|
if (!trans) {
|
|
trans = btrfs_start_transaction(csum_root, 1);
|
|
if (IS_ERR(trans)) {
|
|
ret = PTR_ERR(trans);
|
|
errno = -ret;
|
|
error_msg(ERROR_MSG_START_TRANS,
|
|
"changing data csum objectid: %m");
|
|
return ret;
|
|
}
|
|
}
|
|
ret = btrfs_search_slot(trans, csum_root, &last_key, &path, 0, 1);
|
|
if (ret < 0)
|
|
goto out;
|
|
UASSERT(ret > 0);
|
|
|
|
nr = btrfs_header_nritems(path.nodes[0]);
|
|
/* No item left (empty csum tree), exit. */
|
|
if (!nr)
|
|
goto out;
|
|
/* No more temporary csum items, all converted, exit. */
|
|
if (path.slots[0] == 0)
|
|
goto out;
|
|
|
|
/* All csum items should be new csums. */
|
|
btrfs_item_key_to_cpu(path.nodes[0], &found_key, 0);
|
|
UASSERT(found_key.objectid == BTRFS_CSUM_CHANGE_OBJECTID);
|
|
|
|
/*
|
|
* Start changing the objectids, since EXTENT_CSUM (-10) is
|
|
* larger than CSUM_CHANGE (-13), we always change from the tail.
|
|
*/
|
|
for (int i = nr - 1; i >= 0; i--) {
|
|
btrfs_item_key_to_cpu(path.nodes[0], &found_key, i);
|
|
found_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
|
path.slots[0] = i;
|
|
btrfs_set_item_key_safe(fs_info, &path, &found_key);
|
|
}
|
|
btrfs_release_path(&path);
|
|
}
|
|
out:
|
|
btrfs_release_path(&path);
|
|
if (ret < 0) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* All data csum items has been changed to the new type, we can clear
|
|
* the superblock flag for data csum change, and go to the metadata csum
|
|
* change phase.
|
|
*/
|
|
super_flags = btrfs_super_flags(fs_info->super_copy);
|
|
super_flags &= ~BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM;
|
|
super_flags |= BTRFS_SUPER_FLAG_CHANGING_META_CSUM;
|
|
btrfs_set_super_flags(fs_info->super_copy, super_flags);
|
|
ret = btrfs_commit_transaction(trans, csum_root);
|
|
if (inject_error(0x2628b3fe))
|
|
return -EUCLEAN;
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to commit transaction after changing data csum objectids: %m");
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int rewrite_tree_block_csum(struct btrfs_fs_info *fs_info, u64 logical,
|
|
u16 new_csum_type)
|
|
{
|
|
struct extent_buffer *eb;
|
|
struct btrfs_tree_parent_check check = { 0 };
|
|
u8 result_old[BTRFS_CSUM_SIZE];
|
|
u8 result_new[BTRFS_CSUM_SIZE];
|
|
int ret;
|
|
|
|
eb = alloc_dummy_extent_buffer(fs_info, logical, fs_info->nodesize);
|
|
if (!eb)
|
|
return -ENOMEM;
|
|
|
|
ret = btrfs_read_extent_buffer(eb, &check);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to read tree block at logical %llu: %m", logical);
|
|
goto out;
|
|
}
|
|
|
|
/* Verify the csum first. */
|
|
btrfs_csum_data(fs_info, fs_info->csum_type, (u8 *)eb->data + BTRFS_CSUM_SIZE,
|
|
result_old, fs_info->nodesize - BTRFS_CSUM_SIZE);
|
|
btrfs_csum_data(fs_info, new_csum_type, (u8 *)eb->data + BTRFS_CSUM_SIZE,
|
|
result_new, fs_info->nodesize - BTRFS_CSUM_SIZE);
|
|
|
|
/* Matches old csum, rewrite. */
|
|
if (memcmp_extent_buffer(eb, result_old, 0, fs_info->csum_size) == 0) {
|
|
write_extent_buffer(eb, result_new, 0,
|
|
btrfs_csum_type_size(new_csum_type));
|
|
ret = write_data_to_disk(fs_info, eb->data, eb->start,
|
|
fs_info->nodesize);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to write tree block at logical %llu: %m",
|
|
logical);
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
/* Already new csum. */
|
|
if (memcmp_extent_buffer(eb, result_new, 0, fs_info->csum_size) == 0)
|
|
goto out;
|
|
|
|
/* Csum doesn't match either old or new csum type, bad tree block. */
|
|
ret = -EIO;
|
|
error("tree block csum mismatch at logical %llu", logical);
|
|
out:
|
|
free_extent_buffer(eb);
|
|
return ret;
|
|
}
|
|
|
|
static int change_meta_csums(struct btrfs_fs_info *fs_info, u16 new_csum_type)
|
|
{
|
|
struct btrfs_root *extent_root = btrfs_extent_root(fs_info, 0);
|
|
struct btrfs_path path = { 0 };
|
|
struct btrfs_key key;
|
|
u64 super_flags;
|
|
int ret;
|
|
|
|
/* Re-set the super flags, this is for resume cases. */
|
|
super_flags = btrfs_super_flags(fs_info->super_copy);
|
|
super_flags &= ~BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM;
|
|
super_flags |= BTRFS_SUPER_FLAG_CHANGING_META_CSUM;
|
|
btrfs_set_super_flags(fs_info->super_copy, super_flags);
|
|
ret = write_all_supers(fs_info);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to update super flags: %m");
|
|
}
|
|
|
|
/*
|
|
* Disable metadata csum checks first, as we may hit tree blocks with
|
|
* either old or new csums.
|
|
* We will manually check the meta csums here.
|
|
*/
|
|
fs_info->skip_csum_check = true;
|
|
|
|
key.objectid = 0;
|
|
key.type = 0;
|
|
key.offset = 0;
|
|
|
|
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to get the first tree block of extent tree: %m");
|
|
return ret;
|
|
}
|
|
UASSERT(ret > 0);
|
|
while (true) {
|
|
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
|
|
if (key.type != BTRFS_EXTENT_ITEM_KEY &&
|
|
key.type != BTRFS_METADATA_ITEM_KEY)
|
|
goto next;
|
|
|
|
if (key.type == BTRFS_EXTENT_ITEM_KEY) {
|
|
struct btrfs_extent_item *ei;
|
|
ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
|
|
struct btrfs_extent_item);
|
|
if (btrfs_extent_flags(path.nodes[0], ei) &
|
|
BTRFS_EXTENT_FLAG_DATA)
|
|
goto next;
|
|
}
|
|
ret = rewrite_tree_block_csum(fs_info, key.objectid, new_csum_type);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to rewrite csum for tree block %llu: %m",
|
|
key.offset);
|
|
goto out;
|
|
}
|
|
next:
|
|
ret = btrfs_next_extent_item(extent_root, &path, U64_MAX);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to get next extent item: %m");
|
|
}
|
|
if (ret > 0) {
|
|
ret = 0;
|
|
goto out;
|
|
}
|
|
}
|
|
out:
|
|
btrfs_release_path(&path);
|
|
|
|
/*
|
|
* Finish the change by clearing the csum change flag, update the superblock
|
|
* csum type, and delete the csum change item in the fs with new csum type.
|
|
*/
|
|
if (ret == 0) {
|
|
struct btrfs_root *tree_root = fs_info->tree_root;
|
|
struct btrfs_trans_handle *trans;
|
|
|
|
super_flags = btrfs_super_flags(fs_info->super_copy);
|
|
|
|
btrfs_set_super_csum_type(fs_info->super_copy, new_csum_type);
|
|
super_flags &= ~(BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM |
|
|
BTRFS_SUPER_FLAG_CHANGING_META_CSUM);
|
|
btrfs_set_super_flags(fs_info->super_copy, super_flags);
|
|
|
|
fs_info->csum_type = new_csum_type;
|
|
fs_info->csum_size = btrfs_csum_type_size(new_csum_type);
|
|
fs_info->skip_csum_check = 0;
|
|
|
|
trans = btrfs_start_transaction(tree_root, 1);
|
|
if (IS_ERR(trans)) {
|
|
ret = PTR_ERR(trans);
|
|
errno = -ret;
|
|
error("failed to start new transaction with new csum type: %m");
|
|
return ret;
|
|
}
|
|
key.objectid = BTRFS_CSUM_CHANGE_OBJECTID;
|
|
key.type = BTRFS_TEMPORARY_ITEM_KEY;
|
|
key.offset = new_csum_type;
|
|
|
|
ret = btrfs_search_slot(trans, tree_root, &key, &path, -1, 1);
|
|
if (ret > 0)
|
|
ret = -ENOENT;
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to locate the csum change item: %m");
|
|
btrfs_release_path(&path);
|
|
btrfs_abort_transaction(trans, ret);
|
|
return ret;
|
|
}
|
|
ret = btrfs_del_item(trans, tree_root, &path);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to delete the csum change item: %m");
|
|
btrfs_release_path(&path);
|
|
btrfs_abort_transaction(trans, ret);
|
|
return ret;
|
|
}
|
|
btrfs_release_path(&path);
|
|
ret = btrfs_commit_transaction(trans, tree_root);
|
|
if (inject_error(0x9f0ca518))
|
|
return -EUCLEAN;
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to finalize the csum change: %m");
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Get the first and last csum items which has @objectid as their objectid.
|
|
*
|
|
* This would be called to handle data csum resume, which may have both old
|
|
* and new csums co-exist in the same csum tree.
|
|
*
|
|
* Return >0 if there is no such EXTENT_CSUM with given @objectid.
|
|
* Return 0 if there is such EXTENT_CSUM and populate @first_ret and @last_ret.
|
|
* Return <0 for errors.
|
|
*/
|
|
static int get_csum_items_range(struct btrfs_fs_info *fs_info,
|
|
u64 objectid, u64 *first_ret, u64 *last_ret,
|
|
u32 *last_item_size)
|
|
{
|
|
struct btrfs_root *csum_root = btrfs_csum_root(fs_info, 0);
|
|
struct btrfs_path path = { 0 };
|
|
struct btrfs_key key;
|
|
int ret;
|
|
|
|
key.objectid = objectid;
|
|
key.type = BTRFS_EXTENT_CSUM_KEY;
|
|
key.offset = 0;
|
|
|
|
ret = btrfs_search_slot(NULL, csum_root, &key, &path, 0, 0);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to search csum tree: %m");
|
|
return ret;
|
|
}
|
|
if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
|
|
ret = btrfs_next_leaf(csum_root, &path);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to search csum tree: %m");
|
|
btrfs_release_path(&path);
|
|
return ret;
|
|
}
|
|
/*
|
|
* There is no next leaf, meaning we didn't find any csum item
|
|
* with given objectid.
|
|
*/
|
|
if (ret > 0) {
|
|
btrfs_release_path(&path);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
|
|
btrfs_release_path(&path);
|
|
if (key.objectid != objectid)
|
|
return 1;
|
|
*first_ret = key.offset;
|
|
|
|
key.objectid = objectid;
|
|
key.type = BTRFS_EXTENT_CSUM_KEY;
|
|
key.offset = (u64)-1;
|
|
|
|
ret = btrfs_search_slot(NULL, csum_root, &key, &path, 0, 0);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to search csum tree: %m");
|
|
return ret;
|
|
}
|
|
UASSERT(ret > 0);
|
|
ret = btrfs_previous_item(csum_root, &path, objectid,
|
|
BTRFS_EXTENT_CSUM_KEY);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to search csum tree: %m");
|
|
btrfs_release_path(&path);
|
|
return ret;
|
|
}
|
|
if (ret > 0) {
|
|
btrfs_release_path(&path);
|
|
return 1;
|
|
}
|
|
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
|
|
*last_item_size = btrfs_item_size(path.nodes[0], path.slots[0]);
|
|
btrfs_release_path(&path);
|
|
*last_ret = key.offset;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Verify one data sector to determine which csum type matches the csum.
|
|
*
|
|
* Return >0 if the current csum type doesn't pass the check (including csum
|
|
* item too small compared to csum type).
|
|
* Return 0 if the current csum type passes the check.
|
|
* Return <0 for other errors.
|
|
*/
|
|
static int determine_csum_type(struct btrfs_fs_info *fs_info, u64 logical,
|
|
u16 csum_type)
|
|
{
|
|
struct btrfs_root *csum_root = btrfs_csum_root(fs_info, logical);
|
|
struct btrfs_path path = { 0 };
|
|
struct btrfs_key key;
|
|
u16 csum_size = btrfs_csum_type_size(csum_type);
|
|
u8 csum_expected[BTRFS_CSUM_SIZE];
|
|
void *buf;
|
|
int ret;
|
|
|
|
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
|
key.type = BTRFS_EXTENT_CSUM_KEY;
|
|
key.offset = logical;
|
|
|
|
ret = btrfs_search_slot(NULL, csum_root, &key, &path, 0, 0);
|
|
if (ret > 0)
|
|
ret = -ENOENT;
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to search csum tree: %m");
|
|
btrfs_release_path(&path);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* The csum item size is smaller than expected csum size, no
|
|
* more need to check.
|
|
*/
|
|
if (btrfs_item_size(path.nodes[0], path.slots[0]) < csum_size) {
|
|
btrfs_release_path(&path);
|
|
return 1;
|
|
}
|
|
read_extent_buffer(path.nodes[0], csum_expected,
|
|
btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
|
|
csum_size);
|
|
btrfs_release_path(&path);
|
|
|
|
buf = malloc(fs_info->sectorsize);
|
|
if (!buf)
|
|
return -ENOMEM;
|
|
ret = read_verify_one_data_sector(fs_info, logical, buf, csum_expected,
|
|
csum_type, false);
|
|
if (ret < 0)
|
|
ret = 1;
|
|
free(buf);
|
|
return ret;
|
|
}
|
|
|
|
static int resume_data_csum_change(struct btrfs_fs_info *fs_info, u16 new_csum_type)
|
|
{
|
|
u64 old_csum_first;
|
|
u64 old_csum_last;
|
|
u64 new_csum_first;
|
|
u64 new_csum_last;
|
|
bool old_csum_found = false;
|
|
bool new_csum_found = false;
|
|
u32 old_last_size;
|
|
u32 new_last_size;
|
|
u64 resume_start;
|
|
int ret;
|
|
|
|
ret = get_csum_items_range(fs_info, BTRFS_EXTENT_CSUM_OBJECTID,
|
|
&old_csum_first, &old_csum_last,
|
|
&old_last_size);
|
|
if (ret < 0)
|
|
return ret;
|
|
if (ret == 0)
|
|
old_csum_found = true;
|
|
ret = get_csum_items_range(fs_info, BTRFS_CSUM_CHANGE_OBJECTID,
|
|
&new_csum_first, &new_csum_last,
|
|
&new_last_size);
|
|
if (ret < 0)
|
|
return ret;
|
|
if (ret == 0)
|
|
new_csum_found = true;
|
|
|
|
/*
|
|
* No csum item found at all, this fs has empty csum tree.
|
|
* Just go metadata change.
|
|
*/
|
|
if (!old_csum_found && !new_csum_found)
|
|
goto new_meta_csum;
|
|
|
|
/*
|
|
* Only old csums exists. This can be one of the two cases:
|
|
* - Only the csum change item inserted, no new csum generated.
|
|
* - All data csum is converted to the new type.
|
|
*
|
|
* Here we need to check if the csum item is in old or new type.
|
|
*/
|
|
if (old_csum_found && !new_csum_found) {
|
|
ret = determine_csum_type(fs_info, old_csum_first, fs_info->csum_type);
|
|
if (ret == 0) {
|
|
/* All old data csums, restart generation. */
|
|
resume_start = 0;
|
|
goto new_data_csums;
|
|
}
|
|
ret = determine_csum_type(fs_info, old_csum_first, new_csum_type);
|
|
if (ret == 0) {
|
|
/*
|
|
* All new data csums, just go metadata csum change, which
|
|
* would drop the CHANGING_DATA_CSUM flag for us.
|
|
*/
|
|
goto new_meta_csum;
|
|
}
|
|
error("The data checksum for logical %llu doesn't match either old or new csum type, unable to resume",
|
|
old_csum_first);
|
|
return -EUCLEAN;
|
|
}
|
|
|
|
/*
|
|
* Both old and new csum exist, and new csum is only a subset of the
|
|
* old ones.
|
|
*
|
|
* This means we're still generating new data csums.
|
|
*/
|
|
if (old_csum_found && new_csum_found && old_csum_first <= new_csum_first &&
|
|
old_csum_last >= new_csum_last) {
|
|
resume_start = new_csum_last + new_last_size /
|
|
btrfs_csum_type_size(new_csum_type) *
|
|
fs_info->sectorsize;
|
|
goto new_data_csums;
|
|
}
|
|
|
|
/*
|
|
* Both old and new csum exist, and old csum is a subset of the new ones.
|
|
*
|
|
* This means we're deleting the old csums.
|
|
*/
|
|
if (old_csum_found && new_csum_found && new_csum_first <= old_csum_first &&
|
|
new_csum_last >= old_csum_last)
|
|
goto delete_old;
|
|
|
|
/*
|
|
* Both csums exist but not covering each other, or only new csum exists.
|
|
*
|
|
* This means we have already deleted all the old csums, is going to or
|
|
* have already started objectid change.
|
|
*/
|
|
if ((old_csum_found && new_csum_found && old_csum_last <= new_csum_first) &&
|
|
(!old_csum_found && new_csum_found))
|
|
goto change;
|
|
|
|
/* The remaining cases should not be possible. */
|
|
error("unexpected resume condition:");
|
|
error("old csum found=%d start=%llu last=%llu new csum found=%d start=%llu last=%llu",
|
|
old_csum_found,
|
|
old_csum_found ? old_csum_first : 0,
|
|
old_csum_found ? old_csum_last : 0,
|
|
new_csum_found,
|
|
new_csum_found ? new_csum_first : 0,
|
|
new_csum_found ? new_csum_last : 0);
|
|
return -EUCLEAN;
|
|
|
|
new_data_csums:
|
|
ret = generate_new_data_csums_range(fs_info, resume_start, new_csum_type);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to generate new data csums: %m");
|
|
return ret;
|
|
}
|
|
delete_old:
|
|
ret = delete_old_data_csums(fs_info);
|
|
if (ret < 0)
|
|
return ret;
|
|
change:
|
|
ret = change_csum_objectids(fs_info);
|
|
if (ret < 0)
|
|
return ret;
|
|
new_meta_csum:
|
|
ret = change_meta_csums(fs_info, new_csum_type);
|
|
return ret;
|
|
}
|
|
|
|
static int resume_csum_change(struct btrfs_fs_info *fs_info, u16 new_csum_type)
|
|
{
|
|
const u64 super_flags = btrfs_super_flags(fs_info->super_copy);
|
|
struct btrfs_root *tree_root = fs_info->tree_root;
|
|
struct btrfs_path path = { 0 };
|
|
struct btrfs_key key;
|
|
int ret;
|
|
|
|
if ((super_flags & (BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM |
|
|
BTRFS_SUPER_FLAG_CHANGING_META_CSUM)) ==
|
|
(BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM |
|
|
BTRFS_SUPER_FLAG_CHANGING_META_CSUM)) {
|
|
error(
|
|
"invalid super flags, only one bit of CHANGING_DATA_CSUM or CHANGING_META_CSUM can be set");
|
|
return -EUCLEAN;
|
|
}
|
|
|
|
key.objectid = BTRFS_CSUM_CHANGE_OBJECTID;
|
|
key.type = BTRFS_TEMPORARY_ITEM_KEY;
|
|
key.offset = (u64)-1;
|
|
ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to locate the csum change item: %m");
|
|
return ret;
|
|
}
|
|
UASSERT(ret > 0);
|
|
ret = btrfs_previous_item(tree_root, &path, BTRFS_CSUM_CHANGE_OBJECTID,
|
|
BTRFS_TEMPORARY_ITEM_KEY);
|
|
if (ret > 0)
|
|
ret = -ENOENT;
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to locate the csum change item: %m");
|
|
btrfs_release_path(&path);
|
|
return ret;
|
|
}
|
|
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
|
|
btrfs_release_path(&path);
|
|
|
|
if (new_csum_type != key.offset) {
|
|
ret = -EINVAL;
|
|
error(
|
|
"target csum type mismatch with interrupted csum type, has %s (%u) expect %s (%llu)",
|
|
btrfs_super_csum_name(new_csum_type), new_csum_type,
|
|
btrfs_super_csum_name(key.offset), key.offset);
|
|
return ret;
|
|
}
|
|
|
|
if (super_flags & BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM) {
|
|
ret = resume_data_csum_change(fs_info, new_csum_type);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to resume data checksum change: %m");
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* For metadata resume, just call the same change_meta_csums(), as we
|
|
* have no record on previous converted metadata, thus have to go
|
|
* through all metadata anyway.
|
|
*/
|
|
ret = change_meta_csums(fs_info, new_csum_type);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to resume metadata csum change: %m");
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int btrfs_change_csum_type(struct btrfs_fs_info *fs_info, u16 new_csum_type)
|
|
{
|
|
u16 old_csum_type = fs_info->csum_type;
|
|
int ret;
|
|
|
|
/* Phase 0, check conflicting features. */
|
|
ret = check_csum_change_requreiment(fs_info, new_csum_type);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
if (btrfs_super_flags(fs_info->super_copy) &
|
|
(BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM |
|
|
BTRFS_SUPER_FLAG_CHANGING_META_CSUM)) {
|
|
ret = resume_csum_change(fs_info, new_csum_type);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to resume unfinished csum change: %m");
|
|
return ret;
|
|
}
|
|
printf("converted csum type from %s (%u) to %s (%u)\n",
|
|
btrfs_super_csum_name(old_csum_type), old_csum_type,
|
|
btrfs_super_csum_name(new_csum_type), new_csum_type);
|
|
return ret;
|
|
}
|
|
/*
|
|
* Phase 1, generate new data csums.
|
|
*
|
|
* The new data csums would have a different key objectid, and there
|
|
* will be a temporary item in root tree to indicate the new checksum
|
|
* algo.
|
|
*/
|
|
ret = generate_new_data_csums(fs_info, new_csum_type);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to generate new data csums: %m");
|
|
return ret;
|
|
}
|
|
|
|
/* Phase 2, delete the old data csums. */
|
|
ret = delete_old_data_csums(fs_info);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
/* Phase 3, change the new csum key objectid */
|
|
ret = change_csum_objectids(fs_info);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
/*
|
|
* Phase 4, change the csums for metadata.
|
|
*
|
|
* This has to be done in-place, as we don't have a good method
|
|
* like relocation in progs.
|
|
* Thus we have to support reading a tree block with either csum.
|
|
*/
|
|
ret = change_meta_csums(fs_info, new_csum_type);
|
|
if (ret == 0)
|
|
printf("converted csum type from %s (%u) to %s (%u)\n",
|
|
btrfs_super_csum_name(old_csum_type), old_csum_type,
|
|
btrfs_super_csum_name(new_csum_type), new_csum_type);
|
|
return ret;
|
|
}
|