2007-06-12 13:07:11 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2007 Oracle. All rights reserved.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public
|
|
|
|
* License v2 as published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public
|
|
|
|
* License along with this program; if not, write to the
|
|
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
|
|
* Boston, MA 021110-1307, USA.
|
|
|
|
*/
|
|
|
|
|
2007-02-02 14:18:22 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <unistd.h>
|
2015-01-29 08:32:39 +00:00
|
|
|
#include <uuid/uuid.h>
|
2007-02-02 14:18:22 +00:00
|
|
|
#include "kerncompat.h"
|
2019-06-20 13:30:57 +00:00
|
|
|
#include "kernel-lib/radix-tree.h"
|
2020-08-18 13:56:04 +00:00
|
|
|
#include "kernel-shared/ctree.h"
|
2020-08-18 13:56:04 +00:00
|
|
|
#include "kernel-shared/disk-io.h"
|
2020-08-18 13:56:04 +00:00
|
|
|
#include "kernel-shared/volumes.h"
|
2020-08-18 13:56:04 +00:00
|
|
|
#include "kernel-shared/transaction.h"
|
2021-04-26 06:27:21 +00:00
|
|
|
#include "zoned.h"
|
2019-09-25 13:37:27 +00:00
|
|
|
#include "crypto/crc32c.h"
|
2019-06-19 23:46:21 +00:00
|
|
|
#include "common/utils.h"
|
2020-08-18 13:56:04 +00:00
|
|
|
#include "kernel-shared/print-tree.h"
|
2019-06-19 23:46:21 +00:00
|
|
|
#include "common/rbtree-utils.h"
|
2019-07-01 22:42:23 +00:00
|
|
|
#include "common/device-scan.h"
|
2019-09-25 13:37:26 +00:00
|
|
|
#include "crypto/hash.h"
|
2008-03-24 19:03:18 +00:00
|
|
|
|
2015-01-29 08:32:39 +00:00
|
|
|
/* specified errno for check_tree_block */
|
|
|
|
#define BTRFS_BAD_BYTENR (-1)
|
|
|
|
#define BTRFS_BAD_FSID (-2)
|
2015-05-22 01:01:23 +00:00
|
|
|
#define BTRFS_BAD_LEVEL (-3)
|
|
|
|
#define BTRFS_BAD_NRITEMS (-4)
|
|
|
|
|
|
|
|
/* Calculate max possible nritems for a leaf/node */
|
|
|
|
static u32 max_nritems(u8 level, u32 nodesize)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (level == 0)
|
|
|
|
return ((nodesize - sizeof(struct btrfs_header)) /
|
|
|
|
sizeof(struct btrfs_item));
|
|
|
|
return ((nodesize - sizeof(struct btrfs_header)) /
|
|
|
|
sizeof(struct btrfs_key_ptr));
|
|
|
|
}
|
2015-01-29 08:32:39 +00:00
|
|
|
|
2016-02-22 06:59:55 +00:00
|
|
|
static int check_tree_block(struct btrfs_fs_info *fs_info,
|
|
|
|
struct extent_buffer *buf)
|
2007-02-02 14:18:22 +00:00
|
|
|
{
|
2008-01-04 15:38:22 +00:00
|
|
|
|
2018-10-11 15:03:59 +00:00
|
|
|
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
|
2017-05-17 09:17:56 +00:00
|
|
|
u32 nodesize = fs_info->nodesize;
|
2018-10-11 15:03:59 +00:00
|
|
|
bool fsid_match = false;
|
2015-01-29 08:32:39 +00:00
|
|
|
int ret = BTRFS_BAD_FSID;
|
2008-11-18 15:40:06 +00:00
|
|
|
|
2015-01-29 08:32:39 +00:00
|
|
|
if (buf->start != btrfs_header_bytenr(buf))
|
|
|
|
return BTRFS_BAD_BYTENR;
|
2015-05-22 01:01:23 +00:00
|
|
|
if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL)
|
|
|
|
return BTRFS_BAD_LEVEL;
|
|
|
|
if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf),
|
2016-04-01 10:57:11 +00:00
|
|
|
nodesize))
|
2015-05-22 01:01:23 +00:00
|
|
|
return BTRFS_BAD_NRITEMS;
|
2008-11-18 15:40:06 +00:00
|
|
|
|
2016-05-19 08:44:35 +00:00
|
|
|
/* Only leaf can be empty */
|
|
|
|
if (btrfs_header_nritems(buf) == 0 &&
|
|
|
|
btrfs_header_level(buf) != 0)
|
|
|
|
return BTRFS_BAD_NRITEMS;
|
|
|
|
|
2008-11-18 15:40:06 +00:00
|
|
|
while (fs_devices) {
|
2018-10-11 15:03:59 +00:00
|
|
|
/*
|
|
|
|
* Checking the incompat flag is only valid for the current
|
|
|
|
* fs. For seed devices it's forbidden to have their uuid
|
|
|
|
* changed so reading ->fsid in this case is fine
|
|
|
|
*/
|
|
|
|
if (fs_devices == fs_info->fs_devices &&
|
|
|
|
btrfs_fs_incompat(fs_info, METADATA_UUID))
|
|
|
|
fsid_match = !memcmp_extent_buffer(buf,
|
|
|
|
fs_devices->metadata_uuid,
|
|
|
|
btrfs_header_fsid(),
|
|
|
|
BTRFS_FSID_SIZE);
|
|
|
|
else
|
|
|
|
fsid_match = !memcmp_extent_buffer(buf,
|
|
|
|
fs_devices->fsid,
|
|
|
|
btrfs_header_fsid(),
|
|
|
|
BTRFS_FSID_SIZE);
|
|
|
|
|
|
|
|
|
|
|
|
if (fs_info->ignore_fsid_mismatch || fsid_match) {
|
2008-11-18 15:40:06 +00:00
|
|
|
ret = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
fs_devices = fs_devices->seed;
|
|
|
|
}
|
|
|
|
return ret;
|
2007-02-02 14:18:22 +00:00
|
|
|
}
|
|
|
|
|
2016-02-22 06:59:55 +00:00
|
|
|
static void print_tree_block_error(struct btrfs_fs_info *fs_info,
|
2015-01-29 08:32:39 +00:00
|
|
|
struct extent_buffer *eb,
|
|
|
|
int err)
|
|
|
|
{
|
|
|
|
char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
|
|
|
|
char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
|
|
|
|
u8 buf[BTRFS_UUID_SIZE];
|
|
|
|
|
2018-06-14 02:40:39 +00:00
|
|
|
if (!err)
|
|
|
|
return;
|
|
|
|
|
|
|
|
fprintf(stderr, "bad tree block %llu, ", eb->start);
|
2015-01-29 08:32:39 +00:00
|
|
|
switch (err) {
|
|
|
|
case BTRFS_BAD_FSID:
|
|
|
|
read_extent_buffer(eb, buf, btrfs_header_fsid(),
|
|
|
|
BTRFS_UUID_SIZE);
|
|
|
|
uuid_unparse(buf, found_uuid);
|
2018-10-11 15:04:02 +00:00
|
|
|
uuid_unparse(fs_info->fs_devices->metadata_uuid, fs_uuid);
|
2015-01-29 08:32:39 +00:00
|
|
|
fprintf(stderr, "fsid mismatch, want=%s, have=%s\n",
|
|
|
|
fs_uuid, found_uuid);
|
|
|
|
break;
|
|
|
|
case BTRFS_BAD_BYTENR:
|
|
|
|
fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n",
|
|
|
|
eb->start, btrfs_header_bytenr(eb));
|
|
|
|
break;
|
2015-05-22 01:01:23 +00:00
|
|
|
case BTRFS_BAD_LEVEL:
|
2019-08-11 22:46:49 +00:00
|
|
|
fprintf(stderr, "bad level, %u > %d\n",
|
2015-05-22 01:01:23 +00:00
|
|
|
btrfs_header_level(eb), BTRFS_MAX_LEVEL);
|
|
|
|
break;
|
|
|
|
case BTRFS_BAD_NRITEMS:
|
|
|
|
fprintf(stderr, "invalid nr_items: %u\n",
|
|
|
|
btrfs_header_nritems(eb));
|
|
|
|
break;
|
2015-01-29 08:32:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-18 00:38:08 +00:00
|
|
|
int btrfs_csum_data(struct btrfs_fs_info *fs_info, u16 csum_type, const u8 *data,
|
|
|
|
u8 *out, size_t len)
|
2007-03-01 23:59:40 +00:00
|
|
|
{
|
2019-09-03 15:00:41 +00:00
|
|
|
memset(out, 0, BTRFS_CSUM_SIZE);
|
2007-03-01 23:59:40 +00:00
|
|
|
|
2019-09-03 15:00:39 +00:00
|
|
|
switch (csum_type) {
|
|
|
|
case BTRFS_CSUM_TYPE_CRC32:
|
2019-09-25 17:40:03 +00:00
|
|
|
return hash_crc32c(data, len, out);
|
2019-09-25 13:37:26 +00:00
|
|
|
case BTRFS_CSUM_TYPE_XXHASH:
|
|
|
|
return hash_xxhash(data, len, out);
|
2019-10-07 11:13:26 +00:00
|
|
|
case BTRFS_CSUM_TYPE_SHA256:
|
|
|
|
return hash_sha256(data, len, out);
|
2019-10-07 16:19:51 +00:00
|
|
|
case BTRFS_CSUM_TYPE_BLAKE2:
|
|
|
|
return hash_blake2b(data, len, out);
|
2019-09-03 15:00:41 +00:00
|
|
|
default:
|
|
|
|
fprintf(stderr, "ERROR: unknown csum type: %d\n", csum_type);
|
|
|
|
ASSERT(0);
|
2019-09-03 15:00:39 +00:00
|
|
|
}
|
2019-09-03 15:00:41 +00:00
|
|
|
|
|
|
|
return -1;
|
2007-02-02 14:18:22 +00:00
|
|
|
}
|
|
|
|
|
2013-07-03 13:25:17 +00:00
|
|
|
static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
|
2019-09-03 15:00:37 +00:00
|
|
|
int verify, int silent, u16 csum_type)
|
2007-02-02 14:18:22 +00:00
|
|
|
{
|
2016-09-17 23:10:23 +00:00
|
|
|
u8 result[BTRFS_CSUM_SIZE];
|
2008-01-04 15:38:22 +00:00
|
|
|
u32 len;
|
2007-02-02 14:18:22 +00:00
|
|
|
|
2008-01-04 15:38:22 +00:00
|
|
|
len = buf->len - BTRFS_CSUM_SIZE;
|
2021-02-18 00:38:08 +00:00
|
|
|
btrfs_csum_data(buf->fs_info, csum_type, (u8 *)buf->data + BTRFS_CSUM_SIZE,
|
2019-09-03 15:00:41 +00:00
|
|
|
result, len);
|
2007-10-15 20:25:41 +00:00
|
|
|
|
2008-01-04 15:38:22 +00:00
|
|
|
if (verify) {
|
2008-12-02 14:58:23 +00:00
|
|
|
if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
|
2021-03-03 19:18:44 +00:00
|
|
|
if (!silent) {
|
2021-08-26 06:40:34 +00:00
|
|
|
char found[BTRFS_CSUM_STRING_LEN];
|
|
|
|
char wanted[BTRFS_CSUM_STRING_LEN];
|
2021-03-03 19:18:44 +00:00
|
|
|
|
|
|
|
btrfs_format_csum(csum_type, result, found);
|
|
|
|
btrfs_format_csum(csum_type, (u8 *)buf->data, wanted);
|
|
|
|
printk(
|
|
|
|
"checksum verify failed on %llu wanted %s found %s\n",
|
2013-07-03 13:25:17 +00:00
|
|
|
(unsigned long long)buf->start,
|
2021-03-03 19:18:44 +00:00
|
|
|
wanted, found);
|
|
|
|
}
|
2008-01-04 15:38:22 +00:00
|
|
|
return 1;
|
2007-02-23 13:38:36 +00:00
|
|
|
}
|
2008-01-04 15:38:22 +00:00
|
|
|
} else {
|
2008-12-02 14:58:23 +00:00
|
|
|
write_extent_buffer(buf, result, 0, csum_size);
|
2007-02-02 14:18:22 +00:00
|
|
|
}
|
2008-01-04 15:38:22 +00:00
|
|
|
return 0;
|
2007-02-02 14:18:22 +00:00
|
|
|
}
|
|
|
|
|
2019-09-03 15:00:37 +00:00
|
|
|
int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify,
|
|
|
|
u16 csum_type)
|
2013-07-03 13:25:17 +00:00
|
|
|
{
|
2019-09-03 15:00:37 +00:00
|
|
|
return __csum_tree_block_size(buf, csum_size, verify, 0, csum_type);
|
2013-07-03 13:25:17 +00:00
|
|
|
}
|
|
|
|
|
2019-09-03 15:00:37 +00:00
|
|
|
int verify_tree_block_csum_silent(struct extent_buffer *buf, u16 csum_size,
|
|
|
|
u16 csum_type)
|
2013-07-03 13:25:17 +00:00
|
|
|
{
|
2019-09-03 15:00:37 +00:00
|
|
|
return __csum_tree_block_size(buf, csum_size, 1, 1, csum_type);
|
2013-07-03 13:25:17 +00:00
|
|
|
}
|
|
|
|
|
2017-06-13 09:19:20 +00:00
|
|
|
int csum_tree_block(struct btrfs_fs_info *fs_info,
|
|
|
|
struct extent_buffer *buf, int verify)
|
2008-12-02 14:58:23 +00:00
|
|
|
{
|
2019-09-03 15:00:37 +00:00
|
|
|
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
|
|
|
u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
|
|
|
|
|
2016-02-22 06:59:55 +00:00
|
|
|
if (verify && fs_info->suppress_check_block_errors)
|
2019-09-03 15:00:37 +00:00
|
|
|
return verify_tree_block_csum_silent(buf, csum_size, csum_type);
|
|
|
|
return csum_tree_block_size(buf, csum_size, verify, csum_type);
|
2008-12-02 14:58:23 +00:00
|
|
|
}
|
|
|
|
|
2017-06-13 09:19:25 +00:00
|
|
|
struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
|
2008-01-04 15:38:22 +00:00
|
|
|
u64 bytenr, u32 blocksize)
|
2007-03-01 23:59:40 +00:00
|
|
|
{
|
2017-06-13 09:19:25 +00:00
|
|
|
return find_extent_buffer(&fs_info->extent_cache,
|
2008-01-04 15:38:22 +00:00
|
|
|
bytenr, blocksize);
|
2007-03-01 23:59:40 +00:00
|
|
|
}
|
|
|
|
|
2016-02-22 06:59:55 +00:00
|
|
|
struct extent_buffer* btrfs_find_create_tree_block(
|
2017-08-25 15:44:22 +00:00
|
|
|
struct btrfs_fs_info *fs_info, u64 bytenr)
|
2007-03-01 23:59:40 +00:00
|
|
|
{
|
2018-03-30 05:48:55 +00:00
|
|
|
return alloc_extent_buffer(fs_info, bytenr, fs_info->nodesize);
|
2007-03-01 23:59:40 +00:00
|
|
|
}
|
|
|
|
|
2017-06-13 09:19:26 +00:00
|
|
|
void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
2017-08-25 16:07:15 +00:00
|
|
|
u64 parent_transid)
|
2007-06-28 20:20:29 +00:00
|
|
|
{
|
2008-04-03 20:35:48 +00:00
|
|
|
struct extent_buffer *eb;
|
|
|
|
u64 length;
|
2008-04-09 20:28:12 +00:00
|
|
|
struct btrfs_multi_bio *multi = NULL;
|
2008-04-03 20:35:48 +00:00
|
|
|
struct btrfs_device *device;
|
|
|
|
|
2017-08-25 16:07:15 +00:00
|
|
|
eb = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
|
2014-08-27 18:16:03 +00:00
|
|
|
if (!(eb && btrfs_buffer_uptodate(eb, parent_transid)) &&
|
2017-06-13 09:19:26 +00:00
|
|
|
!btrfs_map_block(fs_info, READ, bytenr, &length, &multi, 0,
|
2017-06-13 09:19:17 +00:00
|
|
|
NULL)) {
|
2014-08-27 18:16:03 +00:00
|
|
|
device = multi->stripes[0].dev;
|
|
|
|
device->total_ios++;
|
2017-08-25 16:07:15 +00:00
|
|
|
readahead(device->fd, multi->stripes[0].physical,
|
|
|
|
fs_info->nodesize);
|
2008-04-03 20:35:48 +00:00
|
|
|
}
|
|
|
|
|
2014-08-27 18:16:03 +00:00
|
|
|
free_extent_buffer(eb);
|
2008-04-09 20:28:12 +00:00
|
|
|
kfree(multi);
|
2007-06-28 20:20:29 +00:00
|
|
|
}
|
|
|
|
|
2008-05-13 17:48:58 +00:00
|
|
|
static int verify_parent_transid(struct extent_io_tree *io_tree,
|
2011-08-26 13:51:36 +00:00
|
|
|
struct extent_buffer *eb, u64 parent_transid,
|
|
|
|
int ignore)
|
2008-05-13 17:48:58 +00:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (extent_buffer_uptodate(eb) &&
|
|
|
|
btrfs_header_generation(eb) == parent_transid) {
|
|
|
|
ret = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
printk("parent transid verify failed on %llu wanted %llu found %llu\n",
|
|
|
|
(unsigned long long)eb->start,
|
|
|
|
(unsigned long long)parent_transid,
|
|
|
|
(unsigned long long)btrfs_header_generation(eb));
|
2011-08-26 13:51:36 +00:00
|
|
|
if (ignore) {
|
2013-10-01 13:00:19 +00:00
|
|
|
eb->flags |= EXTENT_BAD_TRANSID;
|
2011-08-26 13:51:36 +00:00
|
|
|
printk("Ignoring transid failure\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-05-13 17:48:58 +00:00
|
|
|
ret = 1;
|
|
|
|
out:
|
2016-09-28 08:30:04 +00:00
|
|
|
clear_extent_buffer_uptodate(eb);
|
2008-05-13 17:48:58 +00:00
|
|
|
return ret;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
|
2009-07-11 17:12:37 +00:00
|
|
|
{
|
|
|
|
unsigned long offset = 0;
|
|
|
|
struct btrfs_multi_bio *multi = NULL;
|
|
|
|
struct btrfs_device *device;
|
|
|
|
int ret = 0;
|
|
|
|
u64 read_len;
|
|
|
|
unsigned long bytes_left = eb->len;
|
|
|
|
|
|
|
|
while (bytes_left) {
|
|
|
|
read_len = bytes_left;
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
device = NULL;
|
|
|
|
|
2014-09-26 13:02:49 +00:00
|
|
|
if (!info->on_restoring &&
|
|
|
|
eb->start != BTRFS_SUPER_INFO_OFFSET) {
|
2017-06-13 09:19:17 +00:00
|
|
|
ret = btrfs_map_block(info, READ, eb->start + offset,
|
|
|
|
&read_len, &multi, mirror, NULL);
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
if (ret) {
|
2021-06-11 21:07:50 +00:00
|
|
|
printk("Couldn't map the block %llu\n", eb->start + offset);
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
kfree(multi);
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
device = multi->stripes[0].dev;
|
2009-07-11 17:12:37 +00:00
|
|
|
|
2015-08-21 03:21:26 +00:00
|
|
|
if (device->fd <= 0) {
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
kfree(multi);
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
eb->fd = device->fd;
|
|
|
|
device->total_ios++;
|
|
|
|
eb->dev_bytenr = multi->stripes[0].physical;
|
2013-02-25 22:54:42 +00:00
|
|
|
kfree(multi);
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
multi = NULL;
|
|
|
|
} else {
|
|
|
|
/* special case for restore metadump */
|
|
|
|
list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
|
|
|
|
if (device->devid == 1)
|
|
|
|
break;
|
|
|
|
}
|
2009-07-11 17:12:37 +00:00
|
|
|
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
eb->fd = device->fd;
|
|
|
|
eb->dev_bytenr = eb->start;
|
|
|
|
device->total_ios++;
|
|
|
|
}
|
2009-07-11 17:12:37 +00:00
|
|
|
|
|
|
|
if (read_len > bytes_left)
|
|
|
|
read_len = bytes_left;
|
|
|
|
|
|
|
|
ret = read_extent_from_disk(eb, offset, read_len);
|
|
|
|
if (ret)
|
|
|
|
return -EIO;
|
|
|
|
offset += read_len;
|
|
|
|
bytes_left -= read_len;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-08-25 14:54:16 +00:00
|
|
|
struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
2016-02-22 06:59:55 +00:00
|
|
|
u64 parent_transid)
|
2007-06-28 20:20:29 +00:00
|
|
|
{
|
2008-01-04 15:38:22 +00:00
|
|
|
int ret;
|
|
|
|
struct extent_buffer *eb;
|
2011-08-26 13:51:36 +00:00
|
|
|
u64 best_transid = 0;
|
2017-05-17 09:17:56 +00:00
|
|
|
u32 sectorsize = fs_info->sectorsize;
|
btrfs-progs: Use mirror_num start from 1 to avoid unnecessary retry
[BUG]
If the first copy of a tree block is corrupted but the other copy is
good, btrfs-progs will report the error twice:
checksum verify failed on 30556160 found 42A2DA71 wanted 00000000
checksum verify failed on 30556160 found 42A2DA71 wanted 00000000
While kernel only report it once, just as expected:
BTRFS warning (device dm-3): dm-3 checksum verify failed on 30556160 wanted 0 found 42A2DA71 level 0
[CAUSE]
We use mirror_num = 0 in read_tree_block() of btrfs-progs.
At first glance it's pretty OK, but mirror num 0 in btrfs means ANY
good copy. Real mirror num starts from 1.
In the context of read_tree_block(), since it's read_tree_block() to do
all the checks, mirror num 0 just means the first copy.
So if the first copy is corrupted, btrfs-progs will try mirror num 1
next, which is just the same as mirror num 0.
After reporting the same error on the same copy, btrfs-progs will
finally try mirror num 2, and get the good copy.
[FIX]
The fix is way simpler than all the above analyse, just starts from
mirror num 1.
Signed-off-by: Qu Wenruo <wqu@suse.com>
2019-03-07 11:31:31 +00:00
|
|
|
int mirror_num = 1;
|
2011-08-26 13:51:36 +00:00
|
|
|
int good_mirror = 0;
|
2019-03-07 11:32:24 +00:00
|
|
|
int candidate_mirror = 0;
|
2008-04-09 20:28:12 +00:00
|
|
|
int num_copies;
|
2011-08-26 13:51:36 +00:00
|
|
|
int ignore = 0;
|
2007-06-28 20:20:29 +00:00
|
|
|
|
2016-08-30 03:29:32 +00:00
|
|
|
/*
|
|
|
|
* Don't even try to create tree block for unaligned tree block
|
|
|
|
* bytenr.
|
|
|
|
* Such unaligned tree block will free overlapping extent buffer,
|
|
|
|
* causing use-after-free bugs for fuzzed images.
|
|
|
|
*/
|
2016-09-30 14:19:20 +00:00
|
|
|
if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) {
|
2016-08-30 03:29:32 +00:00
|
|
|
error("tree block bytenr %llu is not aligned to sectorsize %u",
|
|
|
|
bytenr, sectorsize);
|
|
|
|
return ERR_PTR(-EIO);
|
|
|
|
}
|
2016-09-30 14:19:20 +00:00
|
|
|
|
2017-08-25 15:44:22 +00:00
|
|
|
eb = btrfs_find_create_tree_block(fs_info, bytenr);
|
2008-01-04 15:38:22 +00:00
|
|
|
if (!eb)
|
2015-01-28 02:12:55 +00:00
|
|
|
return ERR_PTR(-ENOMEM);
|
2008-04-03 20:35:48 +00:00
|
|
|
|
2008-05-13 17:48:58 +00:00
|
|
|
if (btrfs_buffer_uptodate(eb, parent_transid))
|
2008-04-03 20:35:48 +00:00
|
|
|
return eb;
|
|
|
|
|
2019-03-07 11:31:32 +00:00
|
|
|
num_copies = btrfs_num_copies(fs_info, eb->start, eb->len);
|
2008-04-09 20:28:12 +00:00
|
|
|
while (1) {
|
2016-02-22 06:59:55 +00:00
|
|
|
ret = read_whole_eb(fs_info, eb, mirror_num);
|
2017-06-13 09:19:20 +00:00
|
|
|
if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 &&
|
2016-02-22 06:59:55 +00:00
|
|
|
check_tree_block(fs_info, eb) == 0 &&
|
2020-04-14 01:34:04 +00:00
|
|
|
verify_parent_transid(&fs_info->extent_cache, eb,
|
|
|
|
parent_transid, ignore) == 0) {
|
2013-10-01 13:00:19 +00:00
|
|
|
if (eb->flags & EXTENT_BAD_TRANSID &&
|
|
|
|
list_empty(&eb->recow)) {
|
|
|
|
list_add_tail(&eb->recow,
|
2016-02-22 06:59:55 +00:00
|
|
|
&fs_info->recow_ebs);
|
2013-10-01 13:00:19 +00:00
|
|
|
eb->refs++;
|
|
|
|
}
|
2019-03-07 11:32:24 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* check_tree_block() is less strict to allow btrfs
|
|
|
|
* check to get raw eb with bad key order and fix it.
|
|
|
|
* But we still need to try to get a good copy if
|
|
|
|
* possible, or bad key order can go into tools like
|
|
|
|
* btrfs ins dump-tree.
|
|
|
|
*/
|
|
|
|
if (btrfs_header_level(eb))
|
|
|
|
ret = btrfs_check_node(fs_info, NULL, eb);
|
|
|
|
else
|
|
|
|
ret = btrfs_check_leaf(fs_info, NULL, eb);
|
|
|
|
if (!ret || candidate_mirror == mirror_num) {
|
|
|
|
btrfs_set_buffer_uptodate(eb);
|
|
|
|
return eb;
|
|
|
|
}
|
|
|
|
if (candidate_mirror <= 0)
|
|
|
|
candidate_mirror = mirror_num;
|
2008-04-09 20:28:12 +00:00
|
|
|
}
|
2011-08-26 13:51:36 +00:00
|
|
|
if (ignore) {
|
2019-03-07 11:32:24 +00:00
|
|
|
if (candidate_mirror > 0) {
|
|
|
|
mirror_num = candidate_mirror;
|
|
|
|
continue;
|
|
|
|
}
|
2016-02-22 06:59:55 +00:00
|
|
|
if (check_tree_block(fs_info, eb)) {
|
|
|
|
if (!fs_info->suppress_check_block_errors)
|
|
|
|
print_tree_block_error(fs_info, eb,
|
|
|
|
check_tree_block(fs_info, eb));
|
2015-01-16 03:04:09 +00:00
|
|
|
} else {
|
2016-02-22 06:59:55 +00:00
|
|
|
if (!fs_info->suppress_check_block_errors)
|
2015-01-16 03:04:09 +00:00
|
|
|
fprintf(stderr, "Csum didn't match\n");
|
|
|
|
}
|
2015-01-28 02:12:55 +00:00
|
|
|
ret = -EIO;
|
2011-08-26 13:51:36 +00:00
|
|
|
break;
|
|
|
|
}
|
2008-04-09 20:28:12 +00:00
|
|
|
if (num_copies == 1) {
|
2011-08-26 13:51:36 +00:00
|
|
|
ignore = 1;
|
|
|
|
continue;
|
|
|
|
}
|
btrfs-progs: Use mirror_num start from 1 to avoid unnecessary retry
[BUG]
If the first copy of a tree block is corrupted but the other copy is
good, btrfs-progs will report the error twice:
checksum verify failed on 30556160 found 42A2DA71 wanted 00000000
checksum verify failed on 30556160 found 42A2DA71 wanted 00000000
While kernel only report it once, just as expected:
BTRFS warning (device dm-3): dm-3 checksum verify failed on 30556160 wanted 0 found 42A2DA71 level 0
[CAUSE]
We use mirror_num = 0 in read_tree_block() of btrfs-progs.
At first glance it's pretty OK, but mirror num 0 in btrfs means ANY
good copy. Real mirror num starts from 1.
In the context of read_tree_block(), since it's read_tree_block() to do
all the checks, mirror num 0 just means the first copy.
So if the first copy is corrupted, btrfs-progs will try mirror num 1
next, which is just the same as mirror num 0.
After reporting the same error on the same copy, btrfs-progs will
finally try mirror num 2, and get the good copy.
[FIX]
The fix is way simpler than all the above analyse, just starts from
mirror num 1.
Signed-off-by: Qu Wenruo <wqu@suse.com>
2019-03-07 11:31:31 +00:00
|
|
|
if (btrfs_header_generation(eb) > best_transid) {
|
2011-08-26 13:51:36 +00:00
|
|
|
best_transid = btrfs_header_generation(eb);
|
|
|
|
good_mirror = mirror_num;
|
2008-04-09 20:28:12 +00:00
|
|
|
}
|
|
|
|
mirror_num++;
|
|
|
|
if (mirror_num > num_copies) {
|
2019-03-07 11:32:24 +00:00
|
|
|
if (candidate_mirror > 0)
|
|
|
|
mirror_num = candidate_mirror;
|
|
|
|
else
|
|
|
|
mirror_num = good_mirror;
|
2011-08-26 13:51:36 +00:00
|
|
|
ignore = 1;
|
|
|
|
continue;
|
2008-04-09 20:28:12 +00:00
|
|
|
}
|
2008-01-04 15:38:22 +00:00
|
|
|
}
|
btrfs-progs: Free bad extent buffer as soon as possible
[BUG]
For the new multiple -b parameter supporting, we could hit this bug on a
16K node sized btrfs:
$ ./btrfs inspect dump-tree -b 1024 -b 2048 -b 4096 -b 8192 zimg
btrfs-progs v4.20.2
ERROR: tree block bytenr 1024 is not aligned to sectorsize 4096
ERROR: tree block bytenr 2048 is not aligned to sectorsize 4096
Couldn't map the block 4096
Invalid mapping for 4096-20480, got 13631488-22020096
Couldn't map the block 4096
bad tree block 4096, bytenr mismatch, want=4096, have=0
ERROR: failed to read tree block 4096
extent_io.c:665: free_extent_buffer_internal: BUG_ON `eb->refs < 0`
triggered, value 1
./btrfs[0x426e57]
./btrfs(free_extent_buffer+0xe)[0x427701]
./btrfs(alloc_extent_buffer+0x3f)[0x427872]
./btrfs(btrfs_find_create_tree_block+0xf)[0x415b3c]
./btrfs(read_tree_block+0x5c)[0x4171b5]
./btrfs(cmd_inspect_dump_tree+0x587)[0x46fb75]
./btrfs(handle_command_group+0x44)[0x40df89]
./btrfs(cmd_inspect+0x15)[0x44b569]
./btrfs(main+0x8b)[0x40e032]
/lib64/libc.so.6(__libc_start_main+0xeb)[0x7f2001a54b7b]
./btrfs(_start+0x2a)[0x40dd1a]
Aborted (core dumped)
This is not only limited to multiple ins dump-tree -b parameter support,
but also to possible overlapping bad tree blocks.
[CAUSE]
Btrfs delay extent freeing to improve performance.
However for the "-b 4096 -b 8192" case, the first -b 4096 will cause an
extent buffer start=4096 len=16384 refs=0 in the cached extent tree.
Then the incoming -b 8192 will hit the cache and reuse the cached extent
buffer.
And since the cached extent buffer doesn't match the bytenr, its refs
won't get increased, and we're going to free that eb again.
Since the bad cached eb already has a ref number 0, calling
free_extent_buffer() on it again will trigger the assert.
[FIX]
So for bad extent buffer we failed to read, just delete them
immediately.
This will free them from extent buffer cache, so later extent buffer
allocation will not hit the stale one, and prevent the bug from
happening.
Reported-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Qu Wenruo <wqu@suse.com>
2019-03-06 01:47:36 +00:00
|
|
|
/*
|
|
|
|
* We failed to read this tree block, it be should deleted right now
|
|
|
|
* to avoid stale cache populate the cache.
|
|
|
|
*/
|
|
|
|
free_extent_buffer_nocache(eb);
|
2015-01-28 02:12:55 +00:00
|
|
|
return ERR_PTR(ret);
|
2007-06-28 20:20:29 +00:00
|
|
|
}
|
|
|
|
|
2017-06-13 09:19:24 +00:00
|
|
|
int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical,
|
|
|
|
u64 *len, int mirror)
|
2015-06-17 07:49:00 +00:00
|
|
|
{
|
|
|
|
u64 offset = 0;
|
|
|
|
struct btrfs_multi_bio *multi = NULL;
|
|
|
|
struct btrfs_device *device;
|
|
|
|
int ret = 0;
|
|
|
|
u64 max_len = *len;
|
|
|
|
|
2017-06-13 09:19:24 +00:00
|
|
|
ret = btrfs_map_block(fs_info, READ, logical, len, &multi, mirror,
|
|
|
|
NULL);
|
2015-06-17 07:49:00 +00:00
|
|
|
if (ret) {
|
|
|
|
fprintf(stderr, "Couldn't map the block %llu\n",
|
|
|
|
logical + offset);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
device = multi->stripes[0].dev;
|
|
|
|
|
|
|
|
if (*len > max_len)
|
|
|
|
*len = max_len;
|
2018-03-30 07:35:26 +00:00
|
|
|
if (device->fd < 0) {
|
|
|
|
ret = -EIO;
|
|
|
|
goto err;
|
|
|
|
}
|
2015-06-17 07:49:00 +00:00
|
|
|
|
|
|
|
ret = pread64(device->fd, data, *len, multi->stripes[0].physical);
|
|
|
|
if (ret != *len)
|
|
|
|
ret = -EIO;
|
|
|
|
else
|
|
|
|
ret = 0;
|
|
|
|
err:
|
|
|
|
kfree(multi);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-06-13 09:19:21 +00:00
|
|
|
int write_and_map_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
|
2007-02-02 14:18:22 +00:00
|
|
|
{
|
2008-04-03 20:35:48 +00:00
|
|
|
int ret;
|
|
|
|
int dev_nr;
|
|
|
|
u64 length;
|
2009-07-11 17:12:37 +00:00
|
|
|
u64 *raid_map = NULL;
|
2008-04-09 20:28:12 +00:00
|
|
|
struct btrfs_multi_bio *multi = NULL;
|
2008-04-03 20:35:48 +00:00
|
|
|
|
|
|
|
dev_nr = 0;
|
2008-04-09 20:28:12 +00:00
|
|
|
length = eb->len;
|
2017-06-13 09:19:21 +00:00
|
|
|
ret = btrfs_map_block(fs_info, WRITE, eb->start, &length,
|
2017-06-13 09:19:17 +00:00
|
|
|
&multi, 0, &raid_map);
|
2020-03-02 04:55:09 +00:00
|
|
|
if (ret < 0) {
|
|
|
|
errno = -ret;
|
|
|
|
error("failed to map bytenr %llu length %u: %m",
|
|
|
|
eb->start, eb->len);
|
|
|
|
goto out;
|
|
|
|
}
|
2008-04-10 20:22:00 +00:00
|
|
|
|
2009-07-11 17:12:37 +00:00
|
|
|
if (raid_map) {
|
2017-06-13 09:19:21 +00:00
|
|
|
ret = write_raid56_with_parity(fs_info, eb, multi,
|
2009-07-11 17:12:37 +00:00
|
|
|
length, raid_map);
|
2020-03-02 04:55:09 +00:00
|
|
|
if (ret < 0) {
|
|
|
|
errno = -ret;
|
|
|
|
error(
|
|
|
|
"failed to write raid56 stripe for bytenr %llu length %llu: %m",
|
|
|
|
eb->start, length);
|
|
|
|
goto out;
|
|
|
|
}
|
2009-07-11 17:12:37 +00:00
|
|
|
} else while (dev_nr < multi->num_stripes) {
|
2008-04-09 20:28:12 +00:00
|
|
|
eb->fd = multi->stripes[dev_nr].dev->fd;
|
|
|
|
eb->dev_bytenr = multi->stripes[dev_nr].physical;
|
|
|
|
multi->stripes[dev_nr].dev->total_ios++;
|
2008-04-03 20:35:48 +00:00
|
|
|
dev_nr++;
|
|
|
|
ret = write_extent_to_disk(eb);
|
2020-03-02 04:55:09 +00:00
|
|
|
if (ret < 0) {
|
|
|
|
errno = -ret;
|
|
|
|
error(
|
|
|
|
"failed to write bytenr %llu length %u devid %llu dev_bytenr %llu: %m",
|
|
|
|
eb->start, eb->len,
|
|
|
|
multi->stripes[dev_nr].dev->devid,
|
|
|
|
eb->dev_bytenr);
|
|
|
|
goto out;
|
|
|
|
}
|
2008-04-03 20:35:48 +00:00
|
|
|
}
|
2020-03-02 04:55:09 +00:00
|
|
|
out:
|
2015-08-25 06:11:07 +00:00
|
|
|
kfree(raid_map);
|
2008-04-09 20:28:12 +00:00
|
|
|
kfree(multi);
|
2008-04-03 20:35:48 +00:00
|
|
|
return 0;
|
2007-02-02 14:18:22 +00:00
|
|
|
}
|
|
|
|
|
2015-05-11 08:08:46 +00:00
|
|
|
int write_tree_block(struct btrfs_trans_handle *trans,
|
2017-06-13 09:19:22 +00:00
|
|
|
struct btrfs_fs_info *fs_info,
|
2013-10-16 14:36:55 +00:00
|
|
|
struct extent_buffer *eb)
|
|
|
|
{
|
2017-06-13 09:19:22 +00:00
|
|
|
if (check_tree_block(fs_info, eb)) {
|
|
|
|
print_tree_block_error(fs_info, eb,
|
|
|
|
check_tree_block(fs_info, eb));
|
2013-10-16 14:36:55 +00:00
|
|
|
BUG();
|
2015-01-29 08:32:39 +00:00
|
|
|
}
|
2013-10-16 14:36:55 +00:00
|
|
|
|
2015-05-11 08:08:46 +00:00
|
|
|
if (trans && !btrfs_buffer_uptodate(eb, trans->transid))
|
2013-10-16 14:36:55 +00:00
|
|
|
BUG();
|
|
|
|
|
|
|
|
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
|
2017-06-13 09:19:22 +00:00
|
|
|
csum_tree_block(fs_info, eb, 0);
|
2013-10-16 14:36:55 +00:00
|
|
|
|
2017-06-13 09:19:22 +00:00
|
|
|
return write_and_map_eb(fs_info, eb);
|
2013-10-16 14:36:55 +00:00
|
|
|
}
|
|
|
|
|
2017-05-17 09:17:56 +00:00
|
|
|
void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
|
|
|
|
u64 objectid)
|
2007-03-01 23:59:40 +00:00
|
|
|
{
|
2008-01-04 15:38:22 +00:00
|
|
|
root->node = NULL;
|
|
|
|
root->commit_root = NULL;
|
|
|
|
root->ref_cows = 0;
|
2008-03-24 19:03:18 +00:00
|
|
|
root->track_dirty = 0;
|
|
|
|
|
2008-01-04 15:38:22 +00:00
|
|
|
root->fs_info = fs_info;
|
|
|
|
root->objectid = objectid;
|
|
|
|
root->last_trans = 0;
|
|
|
|
root->last_inode_alloc = 0;
|
2008-03-24 19:03:18 +00:00
|
|
|
|
|
|
|
INIT_LIST_HEAD(&root->dirty_list);
|
2019-02-27 06:05:49 +00:00
|
|
|
INIT_LIST_HEAD(&root->unaligned_extent_recs);
|
2008-01-04 15:38:22 +00:00
|
|
|
memset(&root->root_key, 0, sizeof(root->root_key));
|
|
|
|
memset(&root->root_item, 0, sizeof(root->root_item));
|
|
|
|
root->root_key.objectid = objectid;
|
2007-03-01 23:59:40 +00:00
|
|
|
}
|
|
|
|
|
2008-01-04 15:38:22 +00:00
|
|
|
static int find_and_setup_root(struct btrfs_root *tree_root,
|
2007-03-20 18:38:32 +00:00
|
|
|
struct btrfs_fs_info *fs_info,
|
2008-01-04 15:38:22 +00:00
|
|
|
u64 objectid, struct btrfs_root *root)
|
2007-03-13 20:47:54 +00:00
|
|
|
{
|
|
|
|
int ret;
|
2008-10-29 18:07:47 +00:00
|
|
|
u64 generation;
|
2007-03-13 20:47:54 +00:00
|
|
|
|
2017-05-17 09:17:56 +00:00
|
|
|
btrfs_setup_root(root, fs_info, objectid);
|
2007-03-13 20:47:54 +00:00
|
|
|
ret = btrfs_find_last_root(tree_root, objectid,
|
|
|
|
&root->root_item, &root->root_key);
|
2013-02-01 20:21:04 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2008-01-04 15:38:22 +00:00
|
|
|
|
2008-10-29 18:07:47 +00:00
|
|
|
generation = btrfs_root_generation(&root->root_item);
|
2017-05-18 02:51:08 +00:00
|
|
|
root->node = read_tree_block(fs_info,
|
2017-08-25 14:54:16 +00:00
|
|
|
btrfs_root_bytenr(&root->root_item), generation);
|
2012-02-05 21:11:48 +00:00
|
|
|
if (!extent_buffer_uptodate(root->node))
|
|
|
|
return -EIO;
|
|
|
|
|
2007-02-20 21:40:44 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-04-15 18:30:14 +00:00
|
|
|
static int find_and_setup_log_root(struct btrfs_root *tree_root,
|
|
|
|
struct btrfs_fs_info *fs_info,
|
|
|
|
struct btrfs_super_block *disk_super)
|
|
|
|
{
|
|
|
|
u64 blocknr = btrfs_super_log_root(disk_super);
|
|
|
|
struct btrfs_root *log_root = malloc(sizeof(struct btrfs_root));
|
|
|
|
|
2012-11-15 04:47:51 +00:00
|
|
|
if (!log_root)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2012-11-15 04:47:50 +00:00
|
|
|
if (blocknr == 0) {
|
|
|
|
free(log_root);
|
2009-04-15 18:30:14 +00:00
|
|
|
return 0;
|
2012-11-15 04:47:50 +00:00
|
|
|
}
|
2009-04-15 18:30:14 +00:00
|
|
|
|
2017-05-17 09:17:56 +00:00
|
|
|
btrfs_setup_root(log_root, fs_info,
|
2017-05-17 08:42:50 +00:00
|
|
|
BTRFS_TREE_LOG_OBJECTID);
|
2009-04-15 18:30:14 +00:00
|
|
|
|
2017-05-18 02:51:08 +00:00
|
|
|
log_root->node = read_tree_block(fs_info, blocknr,
|
2009-04-15 18:30:14 +00:00
|
|
|
btrfs_super_generation(disk_super) + 1);
|
|
|
|
|
|
|
|
fs_info->log_root_tree = log_root;
|
2012-02-05 21:11:48 +00:00
|
|
|
|
2012-11-15 04:47:50 +00:00
|
|
|
if (!extent_buffer_uptodate(log_root->node)) {
|
2013-03-15 21:13:08 +00:00
|
|
|
free_extent_buffer(log_root->node);
|
2012-11-15 04:47:50 +00:00
|
|
|
free(log_root);
|
2013-03-15 21:13:08 +00:00
|
|
|
fs_info->log_root_tree = NULL;
|
2012-02-05 21:11:48 +00:00
|
|
|
return -EIO;
|
2012-11-15 04:47:50 +00:00
|
|
|
}
|
|
|
|
|
2009-04-15 18:30:14 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-07-03 13:25:13 +00:00
|
|
|
int btrfs_free_fs_root(struct btrfs_root *root)
|
2008-01-04 15:38:22 +00:00
|
|
|
{
|
|
|
|
if (root->node)
|
|
|
|
free_extent_buffer(root->node);
|
|
|
|
if (root->commit_root)
|
|
|
|
free_extent_buffer(root->commit_root);
|
2009-05-29 20:35:30 +00:00
|
|
|
kfree(root);
|
|
|
|
return 0;
|
|
|
|
}
|
2008-01-04 15:38:22 +00:00
|
|
|
|
2013-07-03 13:25:14 +00:00
|
|
|
static void __free_fs_root(struct rb_node *node)
|
2009-05-29 20:35:30 +00:00
|
|
|
{
|
|
|
|
struct btrfs_root *root;
|
|
|
|
|
2013-07-03 13:25:14 +00:00
|
|
|
root = container_of(node, struct btrfs_root, rb_node);
|
2013-07-03 13:25:13 +00:00
|
|
|
btrfs_free_fs_root(root);
|
2008-01-04 15:38:22 +00:00
|
|
|
}
|
|
|
|
|
2013-07-03 13:25:14 +00:00
|
|
|
FREE_RB_BASED_TREE(fs_roots, __free_fs_root);
|
2013-07-03 13:25:13 +00:00
|
|
|
|
2009-05-29 20:35:30 +00:00
|
|
|
struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
|
|
|
|
struct btrfs_key *location)
|
2008-01-04 15:38:22 +00:00
|
|
|
{
|
|
|
|
struct btrfs_root *root;
|
|
|
|
struct btrfs_root *tree_root = fs_info->tree_root;
|
|
|
|
struct btrfs_path *path;
|
|
|
|
struct extent_buffer *l;
|
2008-10-29 18:07:47 +00:00
|
|
|
u64 generation;
|
2008-01-04 15:38:22 +00:00
|
|
|
int ret = 0;
|
|
|
|
|
2015-09-29 17:10:36 +00:00
|
|
|
root = calloc(1, sizeof(*root));
|
2008-01-04 15:38:22 +00:00
|
|
|
if (!root)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
if (location->offset == (u64)-1) {
|
|
|
|
ret = find_and_setup_root(tree_root, fs_info,
|
|
|
|
location->objectid, root);
|
|
|
|
if (ret) {
|
|
|
|
free(root);
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
|
|
|
goto insert;
|
|
|
|
}
|
|
|
|
|
2017-05-17 09:17:56 +00:00
|
|
|
btrfs_setup_root(root, fs_info,
|
2017-05-17 08:42:50 +00:00
|
|
|
location->objectid);
|
2008-01-04 15:38:22 +00:00
|
|
|
|
|
|
|
path = btrfs_alloc_path();
|
2016-08-31 18:38:46 +00:00
|
|
|
if (!path) {
|
|
|
|
free(root);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
|
2008-01-04 15:38:22 +00:00
|
|
|
ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
|
|
|
|
if (ret != 0) {
|
|
|
|
if (ret > 0)
|
|
|
|
ret = -ENOENT;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
l = path->nodes[0];
|
|
|
|
read_extent_buffer(l, &root->root_item,
|
|
|
|
btrfs_item_ptr_offset(l, path->slots[0]),
|
|
|
|
sizeof(root->root_item));
|
|
|
|
memcpy(&root->root_key, location, sizeof(*location));
|
|
|
|
ret = 0;
|
|
|
|
out:
|
|
|
|
btrfs_free_path(path);
|
|
|
|
if (ret) {
|
|
|
|
free(root);
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
2008-10-29 18:07:47 +00:00
|
|
|
generation = btrfs_root_generation(&root->root_item);
|
2017-05-18 02:51:08 +00:00
|
|
|
root->node = read_tree_block(fs_info,
|
2017-08-25 14:54:16 +00:00
|
|
|
btrfs_root_bytenr(&root->root_item), generation);
|
2015-01-28 02:12:55 +00:00
|
|
|
if (!extent_buffer_uptodate(root->node)) {
|
2014-01-10 14:50:02 +00:00
|
|
|
free(root);
|
|
|
|
return ERR_PTR(-EIO);
|
|
|
|
}
|
2008-01-04 15:38:22 +00:00
|
|
|
insert:
|
2021-08-23 20:14:51 +00:00
|
|
|
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
|
|
|
|
root->track_dirty = 1;
|
|
|
|
if (is_fstree(root->root_key.objectid))
|
|
|
|
root->ref_cows = 1;
|
2008-01-04 15:38:22 +00:00
|
|
|
return root;
|
|
|
|
}
|
|
|
|
|
2013-07-03 13:25:14 +00:00
|
|
|
static int btrfs_fs_roots_compare_objectids(struct rb_node *node,
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
u64 objectid = *((u64 *)data);
|
|
|
|
struct btrfs_root *root;
|
|
|
|
|
|
|
|
root = rb_entry(node, struct btrfs_root, rb_node);
|
|
|
|
if (objectid > root->objectid)
|
|
|
|
return 1;
|
|
|
|
else if (objectid < root->objectid)
|
|
|
|
return -1;
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-01-03 07:32:18 +00:00
|
|
|
int btrfs_fs_roots_compare_roots(struct rb_node *node1, struct rb_node *node2)
|
2013-07-03 13:25:14 +00:00
|
|
|
{
|
|
|
|
struct btrfs_root *root;
|
|
|
|
|
|
|
|
root = rb_entry(node2, struct btrfs_root, rb_node);
|
|
|
|
return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid);
|
|
|
|
}
|
|
|
|
|
2009-05-29 20:35:30 +00:00
|
|
|
struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
|
|
|
|
struct btrfs_key *location)
|
|
|
|
{
|
|
|
|
struct btrfs_root *root;
|
2013-07-03 13:25:14 +00:00
|
|
|
struct rb_node *node;
|
2009-05-29 20:35:30 +00:00
|
|
|
int ret;
|
2014-01-17 14:00:43 +00:00
|
|
|
u64 objectid = location->objectid;
|
2009-05-29 20:35:30 +00:00
|
|
|
|
|
|
|
if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
|
|
|
|
return fs_info->tree_root;
|
|
|
|
if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
|
|
|
|
return fs_info->extent_root;
|
|
|
|
if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
|
|
|
|
return fs_info->chunk_root;
|
|
|
|
if (location->objectid == BTRFS_DEV_TREE_OBJECTID)
|
|
|
|
return fs_info->dev_root;
|
|
|
|
if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
|
|
|
|
return fs_info->csum_root;
|
2019-01-03 07:32:20 +00:00
|
|
|
if (location->objectid == BTRFS_UUID_TREE_OBJECTID)
|
|
|
|
return fs_info->uuid_root ? fs_info->uuid_root : ERR_PTR(-ENOENT);
|
2014-05-07 20:07:17 +00:00
|
|
|
if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
|
2017-05-02 07:36:09 +00:00
|
|
|
return fs_info->quota_enabled ? fs_info->quota_root :
|
|
|
|
ERR_PTR(-ENOENT);
|
2018-10-01 14:46:12 +00:00
|
|
|
if (location->objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
|
|
|
|
return fs_info->free_space_root ? fs_info->free_space_root :
|
|
|
|
ERR_PTR(-ENOENT);
|
2012-02-05 21:11:48 +00:00
|
|
|
|
2021-02-09 17:34:06 +00:00
|
|
|
BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID);
|
2009-05-29 20:35:30 +00:00
|
|
|
|
2014-01-17 14:00:43 +00:00
|
|
|
node = rb_search(&fs_info->fs_root_tree, (void *)&objectid,
|
2013-07-03 13:25:14 +00:00
|
|
|
btrfs_fs_roots_compare_objectids, NULL);
|
|
|
|
if (node)
|
|
|
|
return container_of(node, struct btrfs_root, rb_node);
|
2009-05-29 20:35:30 +00:00
|
|
|
|
|
|
|
root = btrfs_read_fs_root_no_cache(fs_info, location);
|
|
|
|
if (IS_ERR(root))
|
|
|
|
return root;
|
|
|
|
|
2013-07-03 13:25:14 +00:00
|
|
|
ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node,
|
|
|
|
btrfs_fs_roots_compare_roots);
|
2009-05-29 20:35:30 +00:00
|
|
|
BUG_ON(ret);
|
|
|
|
return root;
|
|
|
|
}
|
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
|
2007-02-02 14:18:22 +00:00
|
|
|
{
|
2017-05-02 07:36:09 +00:00
|
|
|
if (fs_info->quota_root)
|
|
|
|
free(fs_info->quota_root);
|
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
free(fs_info->tree_root);
|
|
|
|
free(fs_info->extent_root);
|
|
|
|
free(fs_info->chunk_root);
|
|
|
|
free(fs_info->dev_root);
|
|
|
|
free(fs_info->csum_root);
|
2015-09-30 03:51:44 +00:00
|
|
|
free(fs_info->free_space_root);
|
2019-01-03 07:32:20 +00:00
|
|
|
free(fs_info->uuid_root);
|
2013-07-03 13:25:12 +00:00
|
|
|
free(fs_info->super_copy);
|
|
|
|
free(fs_info->log_root_tree);
|
|
|
|
free(fs_info);
|
|
|
|
}
|
2007-02-02 14:18:22 +00:00
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr)
|
|
|
|
{
|
|
|
|
struct btrfs_fs_info *fs_info;
|
2013-02-01 15:44:22 +00:00
|
|
|
|
2015-09-29 17:10:36 +00:00
|
|
|
fs_info = calloc(1, sizeof(struct btrfs_fs_info));
|
2013-07-03 13:25:12 +00:00
|
|
|
if (!fs_info)
|
|
|
|
return NULL;
|
2008-03-24 19:05:44 +00:00
|
|
|
|
2015-09-14 06:08:22 +00:00
|
|
|
fs_info->tree_root = calloc(1, sizeof(struct btrfs_root));
|
|
|
|
fs_info->extent_root = calloc(1, sizeof(struct btrfs_root));
|
|
|
|
fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root));
|
|
|
|
fs_info->dev_root = calloc(1, sizeof(struct btrfs_root));
|
|
|
|
fs_info->csum_root = calloc(1, sizeof(struct btrfs_root));
|
|
|
|
fs_info->quota_root = calloc(1, sizeof(struct btrfs_root));
|
2015-09-30 03:51:44 +00:00
|
|
|
fs_info->free_space_root = calloc(1, sizeof(struct btrfs_root));
|
2019-01-03 07:32:20 +00:00
|
|
|
fs_info->uuid_root = calloc(1, sizeof(struct btrfs_root));
|
2015-09-14 06:08:22 +00:00
|
|
|
fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE);
|
2008-03-24 19:05:44 +00:00
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
if (!fs_info->tree_root || !fs_info->extent_root ||
|
|
|
|
!fs_info->chunk_root || !fs_info->dev_root ||
|
2014-05-07 20:07:17 +00:00
|
|
|
!fs_info->csum_root || !fs_info->quota_root ||
|
2019-01-03 07:32:20 +00:00
|
|
|
!fs_info->free_space_root || !fs_info->uuid_root ||
|
|
|
|
!fs_info->super_copy)
|
2013-07-03 13:25:12 +00:00
|
|
|
goto free_all;
|
2008-03-24 19:03:18 +00:00
|
|
|
|
2008-03-04 16:16:54 +00:00
|
|
|
extent_io_tree_init(&fs_info->extent_cache);
|
|
|
|
extent_io_tree_init(&fs_info->free_space_cache);
|
|
|
|
extent_io_tree_init(&fs_info->pinned_extents);
|
|
|
|
extent_io_tree_init(&fs_info->extent_ins);
|
2019-12-18 05:18:41 +00:00
|
|
|
|
|
|
|
fs_info->block_group_cache_tree = RB_ROOT;
|
2015-02-09 15:02:25 +00:00
|
|
|
fs_info->excluded_extents = NULL;
|
|
|
|
|
2013-07-03 13:25:14 +00:00
|
|
|
fs_info->fs_root_tree = RB_ROOT;
|
2008-03-24 19:03:18 +00:00
|
|
|
cache_tree_init(&fs_info->mapping_tree.cache_tree);
|
|
|
|
|
|
|
|
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
|
2008-03-24 19:03:58 +00:00
|
|
|
INIT_LIST_HEAD(&fs_info->space_info);
|
2013-10-01 13:00:19 +00:00
|
|
|
INIT_LIST_HEAD(&fs_info->recow_ebs);
|
2008-01-04 15:38:22 +00:00
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
if (!writable)
|
|
|
|
fs_info->readonly = 1;
|
2008-03-24 19:05:44 +00:00
|
|
|
|
2008-12-17 21:10:07 +00:00
|
|
|
fs_info->super_bytenr = sb_bytenr;
|
2013-07-03 13:25:12 +00:00
|
|
|
fs_info->data_alloc_profile = (u64)-1;
|
|
|
|
fs_info->metadata_alloc_profile = (u64)-1;
|
|
|
|
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
|
|
|
|
return fs_info;
|
|
|
|
free_all:
|
|
|
|
btrfs_free_fs_info(fs_info);
|
|
|
|
return NULL;
|
|
|
|
}
|
2008-12-17 21:10:07 +00:00
|
|
|
|
2016-11-14 18:43:20 +00:00
|
|
|
int btrfs_check_fs_compatibility(struct btrfs_super_block *sb,
|
|
|
|
unsigned int flags)
|
2013-07-03 13:25:12 +00:00
|
|
|
{
|
|
|
|
u64 features;
|
2009-05-29 20:35:30 +00:00
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
features = btrfs_super_incompat_flags(sb) &
|
2009-05-29 20:35:30 +00:00
|
|
|
~BTRFS_FEATURE_INCOMPAT_SUPP;
|
|
|
|
if (features) {
|
|
|
|
printk("couldn't open because of unsupported "
|
2019-08-11 22:46:49 +00:00
|
|
|
"option features (%llx).\n",
|
2011-04-07 11:02:04 +00:00
|
|
|
(unsigned long long)features);
|
2013-07-03 13:25:12 +00:00
|
|
|
return -ENOTSUP;
|
2009-05-29 20:35:30 +00:00
|
|
|
}
|
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
features = btrfs_super_incompat_flags(sb);
|
2009-05-29 20:35:30 +00:00
|
|
|
if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
|
|
|
|
features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
|
2013-07-03 13:25:12 +00:00
|
|
|
btrfs_set_super_incompat_flags(sb, features);
|
2009-05-29 20:35:30 +00:00
|
|
|
}
|
|
|
|
|
2016-11-14 18:43:20 +00:00
|
|
|
features = btrfs_super_compat_ro_flags(sb);
|
|
|
|
if (flags & OPEN_CTREE_WRITES) {
|
|
|
|
if (flags & OPEN_CTREE_INVALIDATE_FST) {
|
|
|
|
/* Clear the FREE_SPACE_TREE_VALID bit on disk... */
|
|
|
|
features &= ~BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID;
|
|
|
|
btrfs_set_super_compat_ro_flags(sb, features);
|
|
|
|
/* ... and ignore the free space tree bit. */
|
|
|
|
features &= ~BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE;
|
|
|
|
}
|
|
|
|
if (features & ~BTRFS_FEATURE_COMPAT_RO_SUPP) {
|
|
|
|
printk("couldn't open RDWR because of unsupported "
|
2019-08-11 22:46:49 +00:00
|
|
|
"option features (0x%llx)\n",
|
2016-11-14 18:43:20 +00:00
|
|
|
(unsigned long long)features);
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2009-05-29 20:35:30 +00:00
|
|
|
}
|
2013-07-03 13:25:12 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2009-05-29 20:35:30 +00:00
|
|
|
|
2013-10-23 20:24:03 +00:00
|
|
|
static int find_best_backup_root(struct btrfs_super_block *super)
|
|
|
|
{
|
|
|
|
struct btrfs_root_backup *backup;
|
|
|
|
u64 orig_gen = btrfs_super_generation(super);
|
|
|
|
u64 gen = 0;
|
|
|
|
int best_index = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
|
|
|
|
backup = super->super_roots + i;
|
|
|
|
if (btrfs_backup_tree_root_gen(backup) != orig_gen &&
|
|
|
|
btrfs_backup_tree_root_gen(backup) > gen) {
|
|
|
|
best_index = i;
|
|
|
|
gen = btrfs_backup_tree_root_gen(backup);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return best_index;
|
|
|
|
}
|
|
|
|
|
2014-10-15 23:14:21 +00:00
|
|
|
static int setup_root_or_create_block(struct btrfs_fs_info *fs_info,
|
2016-08-19 14:20:36 +00:00
|
|
|
unsigned flags,
|
2014-10-15 23:14:21 +00:00
|
|
|
struct btrfs_root *info_root,
|
|
|
|
u64 objectid, char *str)
|
|
|
|
{
|
|
|
|
struct btrfs_root *root = fs_info->tree_root;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = find_and_setup_root(root, fs_info, objectid, info_root);
|
|
|
|
if (ret) {
|
btrfs-progs: Reduce error level from error to warning for OPEN_CTREE_PARTIAL
Even if we're using OPEN_CTREE_PARTIAL, like "rescue zero log", the
error message still looks too serious even we skipped that tree:
bad tree block 2172747776, bytenr mismatch, want=2172747776, have=0
Couldn't setup extent tree
^^^^^^^^^^^^^^^^^^^^^^^^^^
This patch will change the error message to:
- Use error() if we're not using OPEN_CTREE_PARTIAL
- Use warning() and explicitly show we're skipping that tree
So the result would be something like:
For non-OPEN_CTREE_PARTIAL case:
bad tree block 2172747776, bytenr mismatch, want=2172747776, have=0
ERROR: could not setup extent tree
For OPEN_CTREE_PARTIAL case
bad tree block 2172747776, bytenr mismatch, want=2172747776, have=0
WARNING: could not setup extent tree, skipping it
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-11-11 07:50:58 +00:00
|
|
|
if (!(flags & OPEN_CTREE_PARTIAL)) {
|
|
|
|
error("could not setup %s tree", str);
|
2014-10-15 23:14:21 +00:00
|
|
|
return -EIO;
|
btrfs-progs: Reduce error level from error to warning for OPEN_CTREE_PARTIAL
Even if we're using OPEN_CTREE_PARTIAL, like "rescue zero log", the
error message still looks too serious even we skipped that tree:
bad tree block 2172747776, bytenr mismatch, want=2172747776, have=0
Couldn't setup extent tree
^^^^^^^^^^^^^^^^^^^^^^^^^^
This patch will change the error message to:
- Use error() if we're not using OPEN_CTREE_PARTIAL
- Use warning() and explicitly show we're skipping that tree
So the result would be something like:
For non-OPEN_CTREE_PARTIAL case:
bad tree block 2172747776, bytenr mismatch, want=2172747776, have=0
ERROR: could not setup extent tree
For OPEN_CTREE_PARTIAL case
bad tree block 2172747776, bytenr mismatch, want=2172747776, have=0
WARNING: could not setup extent tree, skipping it
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-11-11 07:50:58 +00:00
|
|
|
}
|
|
|
|
warning("could not setup %s tree, skipping it", str);
|
2014-10-15 23:14:21 +00:00
|
|
|
/*
|
|
|
|
* Need a blank node here just so we don't screw up in the
|
|
|
|
* million of places that assume a root has a valid ->node
|
|
|
|
*/
|
|
|
|
info_root->node =
|
2017-08-25 15:44:22 +00:00
|
|
|
btrfs_find_create_tree_block(fs_info, 0);
|
2014-10-15 23:14:21 +00:00
|
|
|
if (!info_root->node)
|
|
|
|
return -ENOMEM;
|
2016-09-28 08:30:04 +00:00
|
|
|
clear_extent_buffer_uptodate(info_root->node);
|
2014-10-15 23:14:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-10-28 18:28:43 +00:00
|
|
|
int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info, u64 root_tree_bytenr,
|
2016-08-19 14:20:36 +00:00
|
|
|
unsigned flags)
|
2013-07-03 13:25:12 +00:00
|
|
|
{
|
|
|
|
struct btrfs_super_block *sb = fs_info->super_copy;
|
|
|
|
struct btrfs_root *root;
|
|
|
|
struct btrfs_key key;
|
|
|
|
u64 generation;
|
|
|
|
int ret;
|
2008-01-04 15:38:22 +00:00
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
root = fs_info->tree_root;
|
2017-05-17 09:17:56 +00:00
|
|
|
btrfs_setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
|
2013-07-03 13:25:12 +00:00
|
|
|
generation = btrfs_super_generation(sb);
|
2008-03-24 19:03:18 +00:00
|
|
|
|
2013-10-28 18:28:43 +00:00
|
|
|
if (!root_tree_bytenr && !(flags & OPEN_CTREE_BACKUP_ROOT)) {
|
2013-07-03 13:25:12 +00:00
|
|
|
root_tree_bytenr = btrfs_super_root(sb);
|
2013-10-28 18:28:43 +00:00
|
|
|
} else if (flags & OPEN_CTREE_BACKUP_ROOT) {
|
2013-10-23 20:24:03 +00:00
|
|
|
struct btrfs_root_backup *backup;
|
|
|
|
int index = find_best_backup_root(sb);
|
|
|
|
if (index >= BTRFS_NUM_BACKUP_ROOTS) {
|
|
|
|
fprintf(stderr, "Invalid backup root number\n");
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
backup = fs_info->super_copy->super_roots + index;
|
|
|
|
root_tree_bytenr = btrfs_backup_tree_root(backup);
|
|
|
|
generation = btrfs_backup_tree_root_gen(backup);
|
|
|
|
}
|
|
|
|
|
2017-08-25 14:54:16 +00:00
|
|
|
root->node = read_tree_block(fs_info, root_tree_bytenr, generation);
|
2013-07-03 13:25:12 +00:00
|
|
|
if (!extent_buffer_uptodate(root->node)) {
|
|
|
|
fprintf(stderr, "Couldn't read tree root\n");
|
|
|
|
return -EIO;
|
2011-08-26 13:51:36 +00:00
|
|
|
}
|
2013-07-03 13:25:12 +00:00
|
|
|
|
2014-10-15 23:14:21 +00:00
|
|
|
ret = setup_root_or_create_block(fs_info, flags, fs_info->extent_root,
|
|
|
|
BTRFS_EXTENT_TREE_OBJECTID, "extent");
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2013-07-03 13:25:12 +00:00
|
|
|
fs_info->extent_root->track_dirty = 1;
|
2008-03-24 19:03:18 +00:00
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
ret = find_and_setup_root(root, fs_info, BTRFS_DEV_TREE_OBJECTID,
|
|
|
|
fs_info->dev_root);
|
2011-08-26 13:51:36 +00:00
|
|
|
if (ret) {
|
|
|
|
printk("Couldn't setup device tree\n");
|
2013-07-03 13:25:12 +00:00
|
|
|
return -EIO;
|
2011-08-26 13:51:36 +00:00
|
|
|
}
|
2013-07-03 13:25:12 +00:00
|
|
|
fs_info->dev_root->track_dirty = 1;
|
2008-03-24 19:03:18 +00:00
|
|
|
|
2014-10-15 23:14:21 +00:00
|
|
|
ret = setup_root_or_create_block(fs_info, flags, fs_info->csum_root,
|
|
|
|
BTRFS_CSUM_TREE_OBJECTID, "csum");
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2013-07-03 13:25:12 +00:00
|
|
|
fs_info->csum_root->track_dirty = 1;
|
2008-12-17 21:10:07 +00:00
|
|
|
|
2019-01-03 07:32:20 +00:00
|
|
|
ret = find_and_setup_root(root, fs_info, BTRFS_UUID_TREE_OBJECTID,
|
|
|
|
fs_info->uuid_root);
|
|
|
|
if (ret) {
|
|
|
|
free(fs_info->uuid_root);
|
|
|
|
fs_info->uuid_root = NULL;
|
|
|
|
} else {
|
|
|
|
fs_info->uuid_root->track_dirty = 1;
|
|
|
|
}
|
|
|
|
|
2014-05-07 20:07:17 +00:00
|
|
|
ret = find_and_setup_root(root, fs_info, BTRFS_QUOTA_TREE_OBJECTID,
|
|
|
|
fs_info->quota_root);
|
2017-05-02 07:36:09 +00:00
|
|
|
if (ret) {
|
|
|
|
free(fs_info->quota_root);
|
|
|
|
fs_info->quota_root = NULL;
|
|
|
|
} else {
|
2014-05-07 20:07:17 +00:00
|
|
|
fs_info->quota_enabled = 1;
|
2017-05-02 07:36:09 +00:00
|
|
|
}
|
2014-05-07 20:07:17 +00:00
|
|
|
|
2016-11-02 22:37:03 +00:00
|
|
|
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
|
2015-09-30 03:51:44 +00:00
|
|
|
ret = find_and_setup_root(root, fs_info, BTRFS_FREE_SPACE_TREE_OBJECTID,
|
|
|
|
fs_info->free_space_root);
|
|
|
|
if (ret) {
|
2020-09-02 18:50:49 +00:00
|
|
|
free(fs_info->free_space_root);
|
|
|
|
fs_info->free_space_root = NULL;
|
2015-09-30 03:51:44 +00:00
|
|
|
printk("Couldn't read free space tree\n");
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
fs_info->free_space_root->track_dirty = 1;
|
2020-09-02 18:50:49 +00:00
|
|
|
} else {
|
|
|
|
free(fs_info->free_space_root);
|
|
|
|
fs_info->free_space_root = NULL;
|
2015-09-30 03:51:44 +00:00
|
|
|
}
|
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
ret = find_and_setup_log_root(root, fs_info, sb);
|
|
|
|
if (ret) {
|
|
|
|
printk("Couldn't setup log root tree\n");
|
2014-10-31 18:01:22 +00:00
|
|
|
if (!(flags & OPEN_CTREE_PARTIAL))
|
|
|
|
return -EIO;
|
2013-07-03 13:25:12 +00:00
|
|
|
}
|
2009-06-03 15:59:47 +00:00
|
|
|
|
2011-08-26 13:51:36 +00:00
|
|
|
fs_info->generation = generation;
|
|
|
|
fs_info->last_trans_committed = generation;
|
2013-10-28 18:28:43 +00:00
|
|
|
if (extent_buffer_uptodate(fs_info->extent_root->node) &&
|
2017-08-25 17:08:37 +00:00
|
|
|
!(flags & OPEN_CTREE_NO_BLOCK_GROUPS)) {
|
2019-10-10 06:41:51 +00:00
|
|
|
ret = btrfs_read_block_groups(fs_info);
|
2017-08-25 17:08:37 +00:00
|
|
|
/*
|
|
|
|
* If we don't find any blockgroups (ENOENT) we're either
|
|
|
|
* restoring or creating the filesystem, where it's expected,
|
|
|
|
* anything else is error
|
|
|
|
*/
|
2019-10-10 06:41:51 +00:00
|
|
|
if (ret < 0 && ret != -ENOENT) {
|
|
|
|
errno = -ret;
|
|
|
|
error("failed to read block groups: %m");
|
|
|
|
return ret;
|
|
|
|
}
|
2017-08-25 17:08:37 +00:00
|
|
|
}
|
2009-05-29 20:35:30 +00:00
|
|
|
|
|
|
|
key.objectid = BTRFS_FS_TREE_OBJECTID;
|
|
|
|
key.type = BTRFS_ROOT_ITEM_KEY;
|
|
|
|
key.offset = (u64)-1;
|
|
|
|
fs_info->fs_root = btrfs_read_fs_root(fs_info, &key);
|
2008-04-04 19:42:17 +00:00
|
|
|
|
Btrfs-progs: fsck: fix wrong check for btrfs_read_fs_root()
When encountering a corrupted fs root node, fsck hit following message:
Check tree block failed, want=29360128, have=0
Check tree block failed, want=29360128, have=0
Check tree block failed, want=29360128, have=0
Check tree block failed, want=29360128, have=0
Check tree block failed, want=29360128, have=0
read block failed check_tree_block
Checking filesystem on /dev/sda9
UUID: 0d295d80-bae2-45f2-a106-120dbfd0e173
checking extents
Segmentation fault (core dumped)
This is because in btrfs_setup_all_roots(), we check
btrfs_read_fs_root() return value by verifing whether it is
NULL pointer, this is wrong since btrfs_read_fs_root() return
PTR_ERR(ret), fix it.
Signed-off-by: Wang Shilong <wangsl.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.cz>
2014-05-28 11:20:41 +00:00
|
|
|
if (IS_ERR(fs_info->fs_root))
|
2013-07-03 13:25:12 +00:00
|
|
|
return -EIO;
|
|
|
|
return 0;
|
|
|
|
}
|
2012-02-05 21:11:48 +00:00
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
void btrfs_release_all_roots(struct btrfs_fs_info *fs_info)
|
|
|
|
{
|
2015-09-30 03:51:44 +00:00
|
|
|
if (fs_info->free_space_root)
|
|
|
|
free_extent_buffer(fs_info->free_space_root->node);
|
2014-05-07 20:07:17 +00:00
|
|
|
if (fs_info->quota_root)
|
|
|
|
free_extent_buffer(fs_info->quota_root->node);
|
2012-02-05 21:11:48 +00:00
|
|
|
if (fs_info->csum_root)
|
|
|
|
free_extent_buffer(fs_info->csum_root->node);
|
|
|
|
if (fs_info->dev_root)
|
|
|
|
free_extent_buffer(fs_info->dev_root->node);
|
|
|
|
if (fs_info->extent_root)
|
|
|
|
free_extent_buffer(fs_info->extent_root->node);
|
|
|
|
if (fs_info->tree_root)
|
|
|
|
free_extent_buffer(fs_info->tree_root->node);
|
2013-07-03 13:25:12 +00:00
|
|
|
if (fs_info->log_root_tree)
|
|
|
|
free_extent_buffer(fs_info->log_root_tree->node);
|
2012-02-05 21:11:48 +00:00
|
|
|
if (fs_info->chunk_root)
|
|
|
|
free_extent_buffer(fs_info->chunk_root->node);
|
2019-01-03 07:32:20 +00:00
|
|
|
if (fs_info->uuid_root)
|
|
|
|
free_extent_buffer(fs_info->uuid_root->node);
|
2013-07-03 13:25:12 +00:00
|
|
|
}
|
|
|
|
|
2013-07-03 13:25:13 +00:00
|
|
|
static void free_map_lookup(struct cache_extent *ce)
|
2013-07-03 13:25:12 +00:00
|
|
|
{
|
|
|
|
struct map_lookup *map;
|
|
|
|
|
2013-07-03 13:25:13 +00:00
|
|
|
map = container_of(ce, struct map_lookup, ce);
|
|
|
|
kfree(map);
|
2013-07-03 13:25:12 +00:00
|
|
|
}
|
|
|
|
|
2013-07-03 13:25:13 +00:00
|
|
|
FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup);
|
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info)
|
|
|
|
{
|
2013-10-01 13:00:19 +00:00
|
|
|
while (!list_empty(&fs_info->recow_ebs)) {
|
|
|
|
struct extent_buffer *eb;
|
|
|
|
eb = list_first_entry(&fs_info->recow_ebs,
|
|
|
|
struct extent_buffer, recow);
|
|
|
|
list_del_init(&eb->recow);
|
|
|
|
free_extent_buffer(eb);
|
|
|
|
}
|
2013-07-03 13:25:13 +00:00
|
|
|
free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree);
|
2011-08-26 13:51:36 +00:00
|
|
|
extent_io_tree_cleanup(&fs_info->extent_cache);
|
|
|
|
extent_io_tree_cleanup(&fs_info->free_space_cache);
|
|
|
|
extent_io_tree_cleanup(&fs_info->pinned_extents);
|
|
|
|
extent_io_tree_cleanup(&fs_info->extent_ins);
|
2013-07-03 13:25:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int btrfs_scan_fs_devices(int fd, const char *path,
|
2013-07-17 16:03:40 +00:00
|
|
|
struct btrfs_fs_devices **fs_devices,
|
2016-08-19 14:36:40 +00:00
|
|
|
u64 sb_bytenr, unsigned sbflags,
|
2015-02-03 14:48:57 +00:00
|
|
|
int skip_devices)
|
2013-07-03 13:25:12 +00:00
|
|
|
{
|
|
|
|
u64 total_devs;
|
2014-11-12 05:52:11 +00:00
|
|
|
u64 dev_size;
|
2014-12-19 06:13:09 +00:00
|
|
|
off_t seek_ret;
|
2013-07-03 13:25:12 +00:00
|
|
|
int ret;
|
2013-07-17 16:03:40 +00:00
|
|
|
if (!sb_bytenr)
|
|
|
|
sb_bytenr = BTRFS_SUPER_INFO_OFFSET;
|
2013-07-03 13:25:12 +00:00
|
|
|
|
2014-12-19 06:13:09 +00:00
|
|
|
seek_ret = lseek(fd, 0, SEEK_END);
|
|
|
|
if (seek_ret < 0)
|
|
|
|
return -errno;
|
|
|
|
|
|
|
|
dev_size = seek_ret;
|
2014-11-12 05:52:11 +00:00
|
|
|
lseek(fd, 0, SEEK_SET);
|
|
|
|
if (sb_bytenr > dev_size) {
|
2016-09-09 13:56:18 +00:00
|
|
|
error("superblock bytenr %llu is larger than device size %llu",
|
|
|
|
(unsigned long long)sb_bytenr,
|
|
|
|
(unsigned long long)dev_size);
|
2014-11-12 05:52:11 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
ret = btrfs_scan_one_device(fd, path, fs_devices,
|
2016-08-19 14:36:40 +00:00
|
|
|
&total_devs, sb_bytenr, sbflags);
|
2013-07-03 13:25:12 +00:00
|
|
|
if (ret) {
|
|
|
|
fprintf(stderr, "No valid Btrfs found on %s\n", path);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-02-03 14:48:57 +00:00
|
|
|
if (!skip_devices && total_devs != 1) {
|
2019-11-25 10:39:15 +00:00
|
|
|
ret = btrfs_scan_devices(0);
|
2013-07-03 13:25:12 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-03-07 04:57:41 +00:00
|
|
|
int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info,
|
|
|
|
u64 chunk_root_bytenr)
|
2013-07-03 13:25:12 +00:00
|
|
|
{
|
|
|
|
struct btrfs_super_block *sb = fs_info->super_copy;
|
|
|
|
u64 generation;
|
|
|
|
int ret;
|
|
|
|
|
2017-05-17 09:17:56 +00:00
|
|
|
btrfs_setup_root(fs_info->chunk_root, fs_info,
|
|
|
|
BTRFS_CHUNK_TREE_OBJECTID);
|
2013-07-03 13:25:12 +00:00
|
|
|
|
2017-06-13 09:19:30 +00:00
|
|
|
ret = btrfs_read_sys_array(fs_info);
|
2013-07-03 13:25:12 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
generation = btrfs_super_chunk_root_generation(sb);
|
|
|
|
|
2016-03-07 04:57:41 +00:00
|
|
|
if (chunk_root_bytenr && !IS_ALIGNED(chunk_root_bytenr,
|
2017-05-17 09:17:56 +00:00
|
|
|
fs_info->sectorsize)) {
|
2016-03-07 04:57:41 +00:00
|
|
|
warning("chunk_root_bytenr %llu is unaligned to %u, ignore it",
|
2017-05-17 09:17:56 +00:00
|
|
|
chunk_root_bytenr, fs_info->sectorsize);
|
2016-03-07 04:57:41 +00:00
|
|
|
chunk_root_bytenr = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!chunk_root_bytenr)
|
|
|
|
chunk_root_bytenr = btrfs_super_chunk_root(sb);
|
|
|
|
else
|
|
|
|
generation = 0;
|
|
|
|
|
2017-05-18 02:51:08 +00:00
|
|
|
fs_info->chunk_root->node = read_tree_block(fs_info,
|
2016-03-07 04:57:41 +00:00
|
|
|
chunk_root_bytenr,
|
2017-05-17 09:17:56 +00:00
|
|
|
generation);
|
2015-01-28 02:12:55 +00:00
|
|
|
if (!extent_buffer_uptodate(fs_info->chunk_root->node)) {
|
2016-02-22 06:59:54 +00:00
|
|
|
if (fs_info->ignore_chunk_tree_error) {
|
|
|
|
warning("cannot read chunk root, continue anyway");
|
|
|
|
fs_info->chunk_root = NULL;
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
error("cannot read chunk root");
|
|
|
|
return -EIO;
|
|
|
|
}
|
2013-07-03 13:25:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!(btrfs_super_flags(sb) & BTRFS_SUPER_FLAG_METADUMP)) {
|
2017-06-13 09:19:30 +00:00
|
|
|
ret = btrfs_read_chunk_tree(fs_info);
|
2013-07-03 13:25:12 +00:00
|
|
|
if (ret) {
|
|
|
|
fprintf(stderr, "Couldn't read chunk tree\n");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-02-18 00:27:21 +00:00
|
|
|
static struct btrfs_fs_info *__open_ctree_fd(int fp, struct open_ctree_flags *ocf)
|
2013-07-03 13:25:12 +00:00
|
|
|
{
|
|
|
|
struct btrfs_fs_info *fs_info;
|
|
|
|
struct btrfs_super_block *disk_super;
|
|
|
|
struct btrfs_fs_devices *fs_devices = NULL;
|
|
|
|
struct extent_buffer *eb;
|
|
|
|
int ret;
|
2014-01-13 13:14:55 +00:00
|
|
|
int oflags;
|
2016-08-22 14:32:24 +00:00
|
|
|
unsigned sbflags = SBREAD_DEFAULT;
|
2021-02-18 00:27:21 +00:00
|
|
|
unsigned flags = ocf->flags;
|
|
|
|
u64 sb_bytenr = ocf->sb_bytenr;
|
2013-07-03 13:25:12 +00:00
|
|
|
|
|
|
|
if (sb_bytenr == 0)
|
|
|
|
sb_bytenr = BTRFS_SUPER_INFO_OFFSET;
|
|
|
|
|
|
|
|
/* try to drop all the caches */
|
|
|
|
if (posix_fadvise(fp, 0, 0, POSIX_FADV_DONTNEED))
|
|
|
|
fprintf(stderr, "Warning, could not drop caches\n");
|
|
|
|
|
2013-10-28 18:28:43 +00:00
|
|
|
fs_info = btrfs_new_fs_info(flags & OPEN_CTREE_WRITES, sb_bytenr);
|
2013-07-03 13:25:12 +00:00
|
|
|
if (!fs_info) {
|
|
|
|
fprintf(stderr, "Failed to allocate memory for fs_info\n");
|
|
|
|
return NULL;
|
|
|
|
}
|
2013-10-28 18:28:43 +00:00
|
|
|
if (flags & OPEN_CTREE_RESTORE)
|
2013-07-03 18:24:43 +00:00
|
|
|
fs_info->on_restoring = 1;
|
2015-01-16 03:04:09 +00:00
|
|
|
if (flags & OPEN_CTREE_SUPPRESS_CHECK_BLOCK_ERRORS)
|
|
|
|
fs_info->suppress_check_block_errors = 1;
|
2015-05-11 08:08:45 +00:00
|
|
|
if (flags & OPEN_CTREE_IGNORE_FSID_MISMATCH)
|
|
|
|
fs_info->ignore_fsid_mismatch = 1;
|
2016-02-22 06:59:54 +00:00
|
|
|
if (flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR)
|
|
|
|
fs_info->ignore_chunk_tree_error = 1;
|
2020-03-04 14:43:42 +00:00
|
|
|
if (flags & OPEN_CTREE_HIDE_NAMES)
|
|
|
|
fs_info->hide_names = 1;
|
2013-07-03 13:25:12 +00:00
|
|
|
|
2016-08-22 14:32:24 +00:00
|
|
|
if ((flags & OPEN_CTREE_RECOVER_SUPER)
|
2018-04-11 07:29:35 +00:00
|
|
|
&& (flags & OPEN_CTREE_TEMPORARY_SUPER)) {
|
2016-08-22 14:32:24 +00:00
|
|
|
fprintf(stderr,
|
2018-04-11 07:29:35 +00:00
|
|
|
"cannot open a filesystem with temporary super block for recovery");
|
2016-08-22 14:32:24 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2018-04-11 07:29:35 +00:00
|
|
|
if (flags & OPEN_CTREE_TEMPORARY_SUPER)
|
|
|
|
sbflags = SBREAD_TEMPORARY;
|
2016-08-22 14:32:24 +00:00
|
|
|
|
2018-06-18 15:11:33 +00:00
|
|
|
if (flags & OPEN_CTREE_IGNORE_FSID_MISMATCH)
|
|
|
|
sbflags |= SBREAD_IGNORE_FSID_MISMATCH;
|
|
|
|
|
2021-02-18 00:27:21 +00:00
|
|
|
ret = btrfs_scan_fs_devices(fp, ocf->filename, &fs_devices, sb_bytenr,
|
|
|
|
sbflags, (flags & OPEN_CTREE_NO_DEVICES));
|
2013-07-03 13:25:12 +00:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
fs_info->fs_devices = fs_devices;
|
2013-10-28 18:28:43 +00:00
|
|
|
if (flags & OPEN_CTREE_WRITES)
|
2014-01-13 13:14:55 +00:00
|
|
|
oflags = O_RDWR;
|
2013-07-03 13:25:12 +00:00
|
|
|
else
|
2014-01-13 13:14:55 +00:00
|
|
|
oflags = O_RDONLY;
|
|
|
|
|
|
|
|
if (flags & OPEN_CTREE_EXCLUSIVE)
|
|
|
|
oflags |= O_EXCL;
|
|
|
|
|
2021-04-26 06:27:18 +00:00
|
|
|
ret = btrfs_open_devices(fs_info, fs_devices, oflags);
|
2013-07-03 13:25:12 +00:00
|
|
|
if (ret)
|
2014-04-20 13:17:53 +00:00
|
|
|
goto out;
|
2013-07-03 13:25:12 +00:00
|
|
|
|
|
|
|
disk_super = fs_info->super_copy;
|
2016-05-02 18:13:20 +00:00
|
|
|
if (flags & OPEN_CTREE_RECOVER_SUPER)
|
2016-08-19 14:36:40 +00:00
|
|
|
ret = btrfs_read_dev_super(fs_devices->latest_bdev, disk_super,
|
|
|
|
sb_bytenr, SBREAD_RECOVER);
|
2013-09-21 08:34:18 +00:00
|
|
|
else
|
2016-08-19 14:36:40 +00:00
|
|
|
ret = btrfs_read_dev_super(fp, disk_super, sb_bytenr,
|
2016-08-22 14:32:24 +00:00
|
|
|
sbflags);
|
2013-07-03 13:25:12 +00:00
|
|
|
if (ret) {
|
|
|
|
printk("No valid btrfs found\n");
|
|
|
|
goto out_devices;
|
|
|
|
}
|
|
|
|
|
2015-05-11 08:08:45 +00:00
|
|
|
if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID &&
|
|
|
|
!fs_info->ignore_fsid_mismatch) {
|
|
|
|
fprintf(stderr, "ERROR: Filesystem UUID change in progress\n");
|
|
|
|
goto out_devices;
|
|
|
|
}
|
|
|
|
|
2018-10-11 15:04:02 +00:00
|
|
|
ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE));
|
|
|
|
if (btrfs_fs_incompat(fs_info, METADATA_UUID))
|
|
|
|
ASSERT(!memcmp(disk_super->metadata_uuid,
|
|
|
|
fs_devices->metadata_uuid, BTRFS_FSID_SIZE));
|
|
|
|
|
2017-05-17 08:57:35 +00:00
|
|
|
fs_info->sectorsize = btrfs_super_sectorsize(disk_super);
|
|
|
|
fs_info->nodesize = btrfs_super_nodesize(disk_super);
|
|
|
|
fs_info->stripesize = btrfs_super_stripesize(disk_super);
|
2013-07-03 13:25:12 +00:00
|
|
|
|
2016-11-14 18:43:20 +00:00
|
|
|
ret = btrfs_check_fs_compatibility(fs_info->super_copy, flags);
|
2013-07-03 13:25:12 +00:00
|
|
|
if (ret)
|
|
|
|
goto out_devices;
|
|
|
|
|
2021-02-18 00:27:21 +00:00
|
|
|
ret = btrfs_setup_chunk_tree_and_device_map(fs_info, ocf->chunk_tree_bytenr);
|
2013-07-03 13:25:12 +00:00
|
|
|
if (ret)
|
|
|
|
goto out_chunk;
|
|
|
|
|
2016-02-22 06:59:54 +00:00
|
|
|
/* Chunk tree root is unable to read, return directly */
|
|
|
|
if (!fs_info->chunk_root)
|
|
|
|
return fs_info;
|
|
|
|
|
2021-04-26 06:27:21 +00:00
|
|
|
/*
|
|
|
|
* Get zone type information of zoned block devices. This will also
|
|
|
|
* handle emulation of a zoned filesystem if a regular device has the
|
|
|
|
* zoned incompat feature flag set.
|
|
|
|
*/
|
|
|
|
ret = btrfs_get_dev_zone_info_all_devices(fs_info);
|
|
|
|
if (ret) {
|
|
|
|
error("zoned: failed to read device zone info: %d", ret);
|
|
|
|
goto out_chunk;
|
|
|
|
}
|
2021-04-26 06:27:22 +00:00
|
|
|
|
|
|
|
ret = btrfs_check_zoned_mode(fs_info);
|
|
|
|
if (ret) {
|
|
|
|
error("zoned: failed to initialize zoned mode: %d", ret);
|
|
|
|
goto out_chunk;
|
|
|
|
}
|
2021-04-26 06:27:21 +00:00
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
eb = fs_info->chunk_root->node;
|
|
|
|
read_extent_buffer(eb, fs_info->chunk_tree_uuid,
|
2013-10-02 12:28:27 +00:00
|
|
|
btrfs_header_chunk_tree_uuid(eb),
|
2013-07-03 13:25:12 +00:00
|
|
|
BTRFS_UUID_SIZE);
|
|
|
|
|
2021-02-18 00:27:21 +00:00
|
|
|
ret = btrfs_setup_all_roots(fs_info, ocf->root_tree_bytenr, flags);
|
2016-02-22 06:59:54 +00:00
|
|
|
if (ret && !(flags & __OPEN_CTREE_RETURN_CHUNK_ROOT) &&
|
|
|
|
!fs_info->ignore_chunk_tree_error)
|
Btrfs-progs: fsck: disallow partial opening if critical roots corrupted
If btrfs tree root is corrupted, fsck will hit the following segmentation.
enabling repair mode
Check tree block failed, want=29376512, have=0
Check tree block failed, want=29376512, have=0
Check tree block failed, want=29376512, have=0
Check tree block failed, want=29376512, have=0
Check tree block failed, want=29376512, have=0
read block failed check_tree_block
Couldn't read tree root
Checking filesystem on /dev/sda9
UUID: 0e1a754d-04a5-4256-ae79-0f769751803e
Critical roots corrupted, unable to fsck the FS
Segmentation fault (core dumped)
In btrfs_setup_all_roots(), we could tolerate some trees(extent tree, csum tree)
corrupted, and we have did careful check inside that function, it will
return NULL if critial roots corrupt(for example tree root).
The problem is that we check @OPEN_CTREE_PARTIAL flag again after
calling btrfs_setup_all_roots() which will successfully return
@fs_info though critial roots corrupted.
Fix this problem by removing @OPEN_CTREE_PARTIAL flag check outsize
btrfs_setup_all_roots().
Signed-off-by: Wang Shilong <wangsl.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.cz>
2014-05-28 11:20:39 +00:00
|
|
|
goto out_chunk;
|
2013-07-03 13:25:12 +00:00
|
|
|
|
|
|
|
return fs_info;
|
|
|
|
|
|
|
|
out_chunk:
|
|
|
|
btrfs_release_all_roots(fs_info);
|
|
|
|
btrfs_cleanup_all_caches(fs_info);
|
|
|
|
out_devices:
|
|
|
|
btrfs_close_devices(fs_devices);
|
2011-08-26 13:51:36 +00:00
|
|
|
out:
|
2013-07-03 13:25:12 +00:00
|
|
|
btrfs_free_fs_info(fs_info);
|
2011-08-26 13:51:36 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-02-18 00:27:21 +00:00
|
|
|
struct btrfs_fs_info *open_ctree_fs_info(struct open_ctree_flags *ocf)
|
2011-08-26 13:51:36 +00:00
|
|
|
{
|
|
|
|
int fp;
|
2016-03-18 15:07:09 +00:00
|
|
|
int ret;
|
2012-02-05 21:11:48 +00:00
|
|
|
struct btrfs_fs_info *info;
|
2016-05-20 11:09:17 +00:00
|
|
|
int oflags = O_RDWR;
|
2016-03-18 14:03:42 +00:00
|
|
|
struct stat st;
|
|
|
|
|
2021-02-18 00:27:21 +00:00
|
|
|
ret = stat(ocf->filename, &st);
|
2016-03-18 15:07:09 +00:00
|
|
|
if (ret < 0) {
|
2021-02-18 00:27:21 +00:00
|
|
|
error("cannot stat '%s': %m", ocf->filename);
|
2016-03-18 15:07:09 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
2016-03-18 14:03:42 +00:00
|
|
|
if (!(((st.st_mode & S_IFMT) == S_IFREG) || ((st.st_mode & S_IFMT) == S_IFBLK))) {
|
2021-02-18 00:27:21 +00:00
|
|
|
error("not a regular file or block device: %s", ocf->filename);
|
2016-03-18 14:03:42 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
2011-08-26 13:51:36 +00:00
|
|
|
|
2021-02-18 00:27:21 +00:00
|
|
|
if (!(ocf->flags & OPEN_CTREE_WRITES))
|
2013-10-28 18:28:43 +00:00
|
|
|
oflags = O_RDONLY;
|
2011-08-26 13:51:36 +00:00
|
|
|
|
2021-02-18 00:27:21 +00:00
|
|
|
fp = open(ocf->filename, oflags);
|
2011-08-26 13:51:36 +00:00
|
|
|
if (fp < 0) {
|
2021-02-18 00:27:21 +00:00
|
|
|
error("cannot open '%s': %m", ocf->filename);
|
2011-08-26 13:51:36 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
2021-02-18 00:27:21 +00:00
|
|
|
info = __open_ctree_fd(fp, ocf);
|
2011-08-26 13:51:36 +00:00
|
|
|
close(fp);
|
2012-02-05 21:11:48 +00:00
|
|
|
return info;
|
|
|
|
}
|
2011-08-26 13:51:36 +00:00
|
|
|
|
2013-10-28 18:28:43 +00:00
|
|
|
struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr,
|
2016-08-19 14:20:36 +00:00
|
|
|
unsigned flags)
|
2012-02-05 21:11:48 +00:00
|
|
|
{
|
|
|
|
struct btrfs_fs_info *info;
|
2021-02-18 00:27:21 +00:00
|
|
|
struct open_ctree_flags ocf = { 0 };
|
2012-02-05 21:11:48 +00:00
|
|
|
|
2016-02-22 06:59:54 +00:00
|
|
|
/* This flags may not return fs_info with any valid root */
|
|
|
|
BUG_ON(flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR);
|
2021-02-18 00:27:21 +00:00
|
|
|
ocf.filename = filename;
|
|
|
|
ocf.sb_bytenr = sb_bytenr;
|
|
|
|
ocf.flags = flags;
|
|
|
|
info = open_ctree_fs_info(&ocf);
|
2012-02-05 21:11:48 +00:00
|
|
|
if (!info)
|
|
|
|
return NULL;
|
2015-02-12 12:41:00 +00:00
|
|
|
if (flags & __OPEN_CTREE_RETURN_CHUNK_ROOT)
|
2015-01-16 03:22:28 +00:00
|
|
|
return info->chunk_root;
|
2012-02-05 21:11:48 +00:00
|
|
|
return info->fs_root;
|
2011-08-26 13:51:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
|
2016-08-19 14:20:36 +00:00
|
|
|
unsigned flags)
|
2011-08-26 13:51:36 +00:00
|
|
|
{
|
2012-02-05 21:11:48 +00:00
|
|
|
struct btrfs_fs_info *info;
|
2021-02-18 00:27:21 +00:00
|
|
|
struct open_ctree_flags ocf = { 0 };
|
2016-02-22 06:59:54 +00:00
|
|
|
|
|
|
|
/* This flags may not return fs_info with any valid root */
|
2016-09-06 12:31:18 +00:00
|
|
|
if (flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR) {
|
|
|
|
error("invalid open_ctree flags: 0x%llx",
|
|
|
|
(unsigned long long)flags);
|
|
|
|
return NULL;
|
|
|
|
}
|
2021-02-18 00:27:21 +00:00
|
|
|
ocf.filename = path;
|
|
|
|
ocf.sb_bytenr = sb_bytenr;
|
|
|
|
ocf.flags = flags;
|
|
|
|
info = __open_ctree_fd(fp, &ocf);
|
2012-02-05 21:11:48 +00:00
|
|
|
if (!info)
|
|
|
|
return NULL;
|
2015-02-12 12:41:00 +00:00
|
|
|
if (flags & __OPEN_CTREE_RETURN_CHUNK_ROOT)
|
2015-01-16 03:22:28 +00:00
|
|
|
return info->chunk_root;
|
2012-02-05 21:11:48 +00:00
|
|
|
return info->fs_root;
|
2007-02-02 14:18:22 +00:00
|
|
|
}
|
|
|
|
|
2015-05-13 09:15:35 +00:00
|
|
|
/*
|
|
|
|
* Check if the super is valid:
|
|
|
|
* - nodesize/sectorsize - minimum, maximum, alignment
|
|
|
|
* - tree block starts - alignment
|
|
|
|
* - number of devices - something sane
|
|
|
|
* - sys array size - maximum
|
|
|
|
*/
|
2019-06-06 11:06:06 +00:00
|
|
|
int btrfs_check_super(struct btrfs_super_block *sb, unsigned sbflags)
|
2015-05-13 09:15:35 +00:00
|
|
|
{
|
2016-09-17 23:10:23 +00:00
|
|
|
u8 result[BTRFS_CSUM_SIZE];
|
2015-05-13 09:15:35 +00:00
|
|
|
u16 csum_type;
|
|
|
|
int csum_size;
|
2018-10-11 15:03:59 +00:00
|
|
|
u8 *metadata_uuid;
|
2015-05-13 09:15:35 +00:00
|
|
|
|
|
|
|
if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
|
2018-04-11 07:29:35 +00:00
|
|
|
if (btrfs_super_magic(sb) == BTRFS_MAGIC_TEMPORARY) {
|
|
|
|
if (!(sbflags & SBREAD_TEMPORARY)) {
|
2016-08-22 14:32:24 +00:00
|
|
|
error("superblock magic doesn't match");
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
}
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
csum_type = btrfs_super_csum_type(sb);
|
2019-09-25 13:37:24 +00:00
|
|
|
if (csum_type >= btrfs_super_num_csums()) {
|
2016-09-08 16:42:45 +00:00
|
|
|
error("unsupported checksum algorithm %u", csum_type);
|
2015-05-13 09:15:35 +00:00
|
|
|
return -EIO;
|
|
|
|
}
|
2019-09-25 13:37:24 +00:00
|
|
|
csum_size = btrfs_super_csum_size(sb);
|
2015-05-13 09:15:35 +00:00
|
|
|
|
2021-02-18 00:38:08 +00:00
|
|
|
btrfs_csum_data(NULL, csum_type, (u8 *)sb + BTRFS_CSUM_SIZE,
|
2019-09-03 15:00:41 +00:00
|
|
|
result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
|
2015-05-13 09:15:35 +00:00
|
|
|
|
|
|
|
if (memcmp(result, sb->csum, csum_size)) {
|
2016-05-11 13:40:38 +00:00
|
|
|
error("superblock checksum mismatch");
|
2015-05-13 09:15:35 +00:00
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
|
2016-05-11 13:40:38 +00:00
|
|
|
error("tree_root level too big: %d >= %d",
|
2015-05-13 09:15:35 +00:00
|
|
|
btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
|
2016-05-11 13:40:07 +00:00
|
|
|
goto error_out;
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
|
2016-05-11 13:40:38 +00:00
|
|
|
error("chunk_root level too big: %d >= %d",
|
2015-05-13 09:15:35 +00:00
|
|
|
btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
|
2016-05-11 13:40:07 +00:00
|
|
|
goto error_out;
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
|
2016-05-11 13:40:38 +00:00
|
|
|
error("log_root level too big: %d >= %d",
|
2015-05-13 09:15:35 +00:00
|
|
|
btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
|
2016-05-11 13:40:07 +00:00
|
|
|
goto error_out;
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) {
|
2016-05-11 13:40:38 +00:00
|
|
|
error("tree_root block unaligned: %llu", btrfs_super_root(sb));
|
2016-05-11 13:40:07 +00:00
|
|
|
goto error_out;
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) {
|
2016-05-11 13:40:38 +00:00
|
|
|
error("chunk_root block unaligned: %llu",
|
2015-05-13 09:15:35 +00:00
|
|
|
btrfs_super_chunk_root(sb));
|
2016-05-11 13:40:07 +00:00
|
|
|
goto error_out;
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) {
|
2016-05-11 13:40:38 +00:00
|
|
|
error("log_root block unaligned: %llu",
|
2015-05-13 09:15:35 +00:00
|
|
|
btrfs_super_log_root(sb));
|
2016-05-11 13:40:07 +00:00
|
|
|
goto error_out;
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
if (btrfs_super_nodesize(sb) < 4096) {
|
2016-05-11 13:40:38 +00:00
|
|
|
error("nodesize too small: %u < 4096",
|
2015-05-13 09:15:35 +00:00
|
|
|
btrfs_super_nodesize(sb));
|
2016-05-11 13:40:07 +00:00
|
|
|
goto error_out;
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) {
|
2016-05-11 13:40:38 +00:00
|
|
|
error("nodesize unaligned: %u", btrfs_super_nodesize(sb));
|
2016-05-11 13:40:07 +00:00
|
|
|
goto error_out;
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
if (btrfs_super_sectorsize(sb) < 4096) {
|
2016-05-11 13:40:38 +00:00
|
|
|
error("sectorsize too small: %u < 4096",
|
2015-05-13 09:15:35 +00:00
|
|
|
btrfs_super_sectorsize(sb));
|
2016-05-11 13:40:07 +00:00
|
|
|
goto error_out;
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) {
|
2016-05-11 13:40:38 +00:00
|
|
|
error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb));
|
2016-05-11 13:40:07 +00:00
|
|
|
goto error_out;
|
|
|
|
}
|
|
|
|
if (btrfs_super_total_bytes(sb) == 0) {
|
|
|
|
error("invalid total_bytes 0");
|
|
|
|
goto error_out;
|
|
|
|
}
|
|
|
|
if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
|
|
|
|
error("invalid bytes_used %llu", btrfs_super_bytes_used(sb));
|
|
|
|
goto error_out;
|
|
|
|
}
|
2016-06-17 05:37:54 +00:00
|
|
|
if ((btrfs_super_stripesize(sb) != 4096)
|
|
|
|
&& (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) {
|
2016-05-11 13:40:07 +00:00
|
|
|
error("invalid stripesize %u", btrfs_super_stripesize(sb));
|
|
|
|
goto error_out;
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
|
2018-10-11 15:03:59 +00:00
|
|
|
if (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID)
|
|
|
|
metadata_uuid = sb->metadata_uuid;
|
|
|
|
else
|
|
|
|
metadata_uuid = sb->fsid;
|
|
|
|
|
|
|
|
if (memcmp(metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
|
2015-05-13 09:15:35 +00:00
|
|
|
char fsid[BTRFS_UUID_UNPARSED_SIZE];
|
|
|
|
char dev_fsid[BTRFS_UUID_UNPARSED_SIZE];
|
|
|
|
|
2018-10-11 15:03:59 +00:00
|
|
|
uuid_unparse(sb->metadata_uuid, fsid);
|
2015-05-13 09:15:35 +00:00
|
|
|
uuid_unparse(sb->dev_item.fsid, dev_fsid);
|
2018-06-18 16:15:12 +00:00
|
|
|
if (sbflags & SBREAD_IGNORE_FSID_MISMATCH) {
|
|
|
|
warning("ignored: dev_item fsid mismatch: %s != %s",
|
|
|
|
dev_fsid, fsid);
|
|
|
|
} else {
|
|
|
|
error("dev_item UUID does not match fsid: %s != %s",
|
|
|
|
dev_fsid, fsid);
|
|
|
|
goto error_out;
|
|
|
|
}
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Hint to catch really bogus numbers, bitflips or so
|
|
|
|
*/
|
|
|
|
if (btrfs_super_num_devices(sb) > (1UL << 31)) {
|
2016-05-11 13:40:38 +00:00
|
|
|
warning("suspicious number of devices: %llu",
|
2015-05-13 09:15:35 +00:00
|
|
|
btrfs_super_num_devices(sb));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btrfs_super_num_devices(sb) == 0) {
|
2016-05-11 13:40:38 +00:00
|
|
|
error("number of devices is 0");
|
2016-05-11 13:40:07 +00:00
|
|
|
goto error_out;
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Obvious sys_chunk_array corruptions, it must hold at least one key
|
|
|
|
* and one chunk
|
|
|
|
*/
|
|
|
|
if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
|
2016-05-11 13:40:38 +00:00
|
|
|
error("system chunk array too big %u > %u",
|
|
|
|
btrfs_super_sys_array_size(sb),
|
|
|
|
BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
|
2016-05-11 13:40:07 +00:00
|
|
|
goto error_out;
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
|
|
|
|
+ sizeof(struct btrfs_chunk)) {
|
2016-10-06 16:04:51 +00:00
|
|
|
error("system chunk array too small %u < %zu",
|
2016-05-11 13:40:38 +00:00
|
|
|
btrfs_super_sys_array_size(sb),
|
|
|
|
sizeof(struct btrfs_disk_key) +
|
|
|
|
sizeof(struct btrfs_chunk));
|
2016-05-11 13:40:07 +00:00
|
|
|
goto error_out;
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2016-05-11 13:40:07 +00:00
|
|
|
|
|
|
|
error_out:
|
|
|
|
error("superblock checksum matches but it has invalid members");
|
|
|
|
return -EIO;
|
2015-05-13 09:15:35 +00:00
|
|
|
}
|
|
|
|
|
2017-12-05 08:39:47 +00:00
|
|
|
/*
|
|
|
|
* btrfs_read_dev_super - read a valid superblock from a block device
|
|
|
|
* @fd: file descriptor of the device
|
|
|
|
* @sb: buffer where the superblock is going to be read in
|
|
|
|
* @sb_bytenr: offset of the particular superblock copy we want
|
|
|
|
* @sbflags: flags controlling how the superblock is read
|
|
|
|
*
|
2018-11-26 17:01:42 +00:00
|
|
|
* This function is used by various btrfs commands to obtain a valid superblock.
|
2017-12-05 08:39:47 +00:00
|
|
|
*
|
|
|
|
* It's mode of operation is controlled by the @sb_bytenr and @sbdflags
|
|
|
|
* parameters. If SBREAD_RECOVER flag is set and @sb_bytenr is
|
|
|
|
* BTRFS_SUPER_INFO_OFFSET then the function reads all 3 superblock copies and
|
|
|
|
* returns the newest one. If SBREAD_RECOVER is not set then only a single
|
|
|
|
* copy is read, which one is decided by @sb_bytenr. If @sb_bytenr !=
|
|
|
|
* BTRFS_SUPER_INFO_OFFSET then the @sbflags is effectively ignored and only a
|
|
|
|
* single copy is read.
|
|
|
|
*/
|
2014-07-03 09:36:36 +00:00
|
|
|
int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr,
|
2016-08-19 14:36:40 +00:00
|
|
|
unsigned sbflags)
|
2008-12-17 21:10:07 +00:00
|
|
|
{
|
2009-06-03 15:59:47 +00:00
|
|
|
u8 fsid[BTRFS_FSID_SIZE];
|
2018-10-11 15:03:59 +00:00
|
|
|
u8 metadata_uuid[BTRFS_FSID_SIZE];
|
2012-09-04 17:59:26 +00:00
|
|
|
int fsid_is_initialized = 0;
|
2015-05-13 09:15:34 +00:00
|
|
|
char tmp[BTRFS_SUPER_INFO_SIZE];
|
|
|
|
struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp;
|
2008-12-17 21:10:07 +00:00
|
|
|
int i;
|
|
|
|
int ret;
|
2016-08-19 14:36:40 +00:00
|
|
|
int max_super = sbflags & SBREAD_RECOVER ? BTRFS_SUPER_MIRROR_MAX : 1;
|
2008-12-17 21:10:07 +00:00
|
|
|
u64 transid = 0;
|
2018-10-11 15:03:59 +00:00
|
|
|
bool metadata_uuid_set = false;
|
2008-12-17 21:10:07 +00:00
|
|
|
u64 bytenr;
|
|
|
|
|
|
|
|
if (sb_bytenr != BTRFS_SUPER_INFO_OFFSET) {
|
btrfs-progs: zoned: implement log-structured superblock
Superblock (and its copies) is the only data structure in btrfs which has a
fixed location on a device. Since we cannot overwrite in a sequential write
required zone, we cannot place superblock in the zone. One easy solution
is limiting superblock and copies to be placed only in conventional zones.
However, this method has two downsides: one is reduced number of superblock
copies. The location of the second copy of superblock is 256GB, which is in
a sequential write required zone on typical devices in the market today.
So, the number of superblock and copies is limited to be two. Second
downside is that we cannot support devices which have no conventional zones
at all.
To solve these two problems, we employ superblock log writing. It uses two
adjacent zones as a circular buffer to write updated superblocks. Once the
first zone is filled up, start writing into the second one. Then, when
both zones are filled up and before starting to write to the first zone
again, reset the first zone.
We can determine the position of the latest superblock by reading write
pointer information from a device. One corner case is when both zones are
full. For this situation, we read out the last superblock of each zone, and
compare them to determine which zone is older.
The following zones are reserved as the circular buffer on ZONED btrfs.
- primary superblock: offset 0B (and the following zone)
- first copy: offset 512G (and the following zone)
- Second copy: offset 4T (4096G, and the following zone)
If these reserved zones are conventional, superblock is written fixed at
the start of the zone without logging.
Currently, superblock reading/writing is done by pread/pwrite. This
commit replace the call sites with sbread/sbwrite to wrap the functions.
For zoned btrfs, btrfs_sb_io which is called from sbread/sbwrite
reverses the IO position back to a mirror number, maps the mirror number
into the superblock logging position, and do the IO.
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-04-26 06:27:26 +00:00
|
|
|
ret = sbread(fd, buf, sb_bytenr);
|
2016-09-26 04:54:26 +00:00
|
|
|
/* real error */
|
|
|
|
if (ret < 0)
|
|
|
|
return -errno;
|
|
|
|
|
|
|
|
/* Not large enough sb, return -ENOENT instead of normal -EIO */
|
2015-05-13 09:15:34 +00:00
|
|
|
if (ret < BTRFS_SUPER_INFO_SIZE)
|
2016-09-26 04:54:26 +00:00
|
|
|
return -ENOENT;
|
2008-12-17 21:10:07 +00:00
|
|
|
|
2015-05-13 09:15:35 +00:00
|
|
|
if (btrfs_super_bytenr(buf) != sb_bytenr)
|
2016-09-26 04:54:26 +00:00
|
|
|
return -EIO;
|
2008-12-17 21:10:07 +00:00
|
|
|
|
2019-06-06 11:06:06 +00:00
|
|
|
ret = btrfs_check_super(buf, sbflags);
|
2016-09-26 04:54:26 +00:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
2015-05-13 09:15:34 +00:00
|
|
|
memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE);
|
2008-12-17 21:10:07 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-03-17 11:59:01 +00:00
|
|
|
/*
|
|
|
|
* we would like to check all the supers, but that would make
|
|
|
|
* a btrfs mount succeed after a mkfs from a different FS.
|
|
|
|
* So, we need to add a special mount option to scan for
|
|
|
|
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
|
|
|
|
*/
|
|
|
|
|
2014-07-03 09:36:36 +00:00
|
|
|
for (i = 0; i < max_super; i++) {
|
2008-12-17 21:10:07 +00:00
|
|
|
bytenr = btrfs_sb_offset(i);
|
btrfs-progs: zoned: implement log-structured superblock
Superblock (and its copies) is the only data structure in btrfs which has a
fixed location on a device. Since we cannot overwrite in a sequential write
required zone, we cannot place superblock in the zone. One easy solution
is limiting superblock and copies to be placed only in conventional zones.
However, this method has two downsides: one is reduced number of superblock
copies. The location of the second copy of superblock is 256GB, which is in
a sequential write required zone on typical devices in the market today.
So, the number of superblock and copies is limited to be two. Second
downside is that we cannot support devices which have no conventional zones
at all.
To solve these two problems, we employ superblock log writing. It uses two
adjacent zones as a circular buffer to write updated superblocks. Once the
first zone is filled up, start writing into the second one. Then, when
both zones are filled up and before starting to write to the first zone
again, reset the first zone.
We can determine the position of the latest superblock by reading write
pointer information from a device. One corner case is when both zones are
full. For this situation, we read out the last superblock of each zone, and
compare them to determine which zone is older.
The following zones are reserved as the circular buffer on ZONED btrfs.
- primary superblock: offset 0B (and the following zone)
- first copy: offset 512G (and the following zone)
- Second copy: offset 4T (4096G, and the following zone)
If these reserved zones are conventional, superblock is written fixed at
the start of the zone without logging.
Currently, superblock reading/writing is done by pread/pwrite. This
commit replace the call sites with sbread/sbwrite to wrap the functions.
For zoned btrfs, btrfs_sb_io which is called from sbread/sbwrite
reverses the IO position back to a mirror number, maps the mirror number
into the superblock logging position, and do the IO.
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-04-26 06:27:26 +00:00
|
|
|
ret = sbread(fd, buf, bytenr);
|
|
|
|
|
2015-05-13 09:15:34 +00:00
|
|
|
if (ret < BTRFS_SUPER_INFO_SIZE)
|
2008-12-17 21:10:07 +00:00
|
|
|
break;
|
|
|
|
|
2015-05-13 09:15:34 +00:00
|
|
|
if (btrfs_super_bytenr(buf) != bytenr )
|
2012-09-04 17:59:26 +00:00
|
|
|
continue;
|
|
|
|
/* if magic is NULL, the device was removed */
|
2015-05-13 09:15:34 +00:00
|
|
|
if (btrfs_super_magic(buf) == 0 && i == 0)
|
2015-05-13 09:15:35 +00:00
|
|
|
break;
|
2019-06-06 11:06:06 +00:00
|
|
|
if (btrfs_check_super(buf, sbflags))
|
2008-12-17 21:10:07 +00:00
|
|
|
continue;
|
|
|
|
|
2012-09-04 17:59:26 +00:00
|
|
|
if (!fsid_is_initialized) {
|
2018-10-11 15:03:59 +00:00
|
|
|
if (btrfs_super_incompat_flags(buf) &
|
|
|
|
BTRFS_FEATURE_INCOMPAT_METADATA_UUID) {
|
|
|
|
metadata_uuid_set = true;
|
|
|
|
memcpy(metadata_uuid, buf->metadata_uuid,
|
|
|
|
sizeof(metadata_uuid));
|
|
|
|
}
|
2015-05-13 09:15:34 +00:00
|
|
|
memcpy(fsid, buf->fsid, sizeof(fsid));
|
2012-09-04 17:59:26 +00:00
|
|
|
fsid_is_initialized = 1;
|
2018-10-11 15:03:59 +00:00
|
|
|
} else if (memcmp(fsid, buf->fsid, sizeof(fsid)) ||
|
|
|
|
(metadata_uuid_set && memcmp(metadata_uuid,
|
|
|
|
buf->metadata_uuid,
|
|
|
|
sizeof(metadata_uuid)))) {
|
2012-09-04 17:59:26 +00:00
|
|
|
/*
|
|
|
|
* the superblocks (the original one and
|
|
|
|
* its backups) contain data of different
|
|
|
|
* filesystems -> the super cannot be trusted
|
|
|
|
*/
|
2009-06-03 15:59:47 +00:00
|
|
|
continue;
|
2012-09-04 17:59:26 +00:00
|
|
|
}
|
2009-06-03 15:59:47 +00:00
|
|
|
|
2015-05-13 09:15:34 +00:00
|
|
|
if (btrfs_super_generation(buf) > transid) {
|
|
|
|
memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE);
|
|
|
|
transid = btrfs_super_generation(buf);
|
2008-12-17 21:10:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return transid > 0 ? 0 : -1;
|
|
|
|
}
|
|
|
|
|
2017-06-13 09:19:27 +00:00
|
|
|
static int write_dev_supers(struct btrfs_fs_info *fs_info,
|
2013-08-07 01:03:33 +00:00
|
|
|
struct btrfs_super_block *sb,
|
|
|
|
struct btrfs_device *device)
|
2008-12-05 17:21:31 +00:00
|
|
|
{
|
|
|
|
u64 bytenr;
|
2019-09-03 15:00:41 +00:00
|
|
|
u8 result[BTRFS_CSUM_SIZE];
|
2008-12-05 17:21:31 +00:00
|
|
|
int i, ret;
|
2019-09-03 15:00:39 +00:00
|
|
|
u16 csum_type = btrfs_super_csum_type(sb);
|
2008-12-05 17:21:31 +00:00
|
|
|
|
btrfs-progs: disk-io: Flush to ensure super block write is FUA
[BUG]
There are tons of reports of btrfs-progs screwing up the fs, the most
recent one is "btrfs check --clear-space-cache v1" triggered BUG_ON()
and then leaving the fs with transid mismatch problem.
[CAUSE]
In kernel, we have block layer handing the flush work, even on devices
without FUA support (like most SATA device using default libata
settings), kernel handles FUA write by flushing the device, then normal
write, and finish it with another flush.
The pre-flush, write, post-flush works pretty well to implement FUA
write.
However in btrfs-progs we just use pwrite(), there is nothing keeping
the write order.
So even for basic v1 free space cache clearing, we have different vision
on the write sequence from kernel bio layer (by dm-log-writes) and user
space pwrite() calls.
In btrfs-progs, with extra debug output in write_tree_block() and
write_dev_supers(), we can see btrfs-progs follows the right write
sequence:
Opening filesystem to check...
Checking filesystem on /dev/mapper/log
UUID: 3feb3c8b-4eb3-42f3-8e9c-0af22dd58ecf
write tree block start=1708130304 gen=39
write tree block start=1708146688 gen=39
write tree block start=1708163072 gen=39
write super devid=1 gen=39
write tree block start=1708179456 gen=40
write tree block start=1708195840 gen=40
write super devid=1 gen=40
write tree block start=1708130304 gen=41
write tree block start=1708146688 gen=41
write tree block start=1708228608 gen=41
write super devid=1 gen=41
write tree block start=1708163072 gen=42
write tree block start=1708179456 gen=42
write super devid=1 gen=42
write tree block start=1708130304 gen=43
write tree block start=1708146688 gen=43
write super devid=1 gen=43
Free space cache cleared
But from dm-log-writes, the bio sequence is a different story:
replaying 1742: sector 131072, size 4096, flags 0(NONE)
replaying 1743: sector 128, size 4096, flags 0(NONE) <<< Only one sb write
replaying 1744: sector 2828480, size 4096, flags 0(NONE)
replaying 1745: sector 2828488, size 4096, flags 0(NONE)
replaying 1746: sector 2828496, size 4096, flags 0(NONE)
replaying 1787: sector 2304120, size 4096, flags 0(NONE)
......
replaying 1790: sector 2304144, size 4096, flags 0(NONE)
replaying 1791: sector 2304152, size 4096, flags 0(NONE)
replaying 1792: sector 0, size 0, flags 8(MARK)
During the free space cache clearing, we committed 3 transaction but
dm-log-write only caught one super block write.
This means all the 3 writes were merged into the last super block write.
And the super block write was the 2nd write, before all tree block
writes, completely screwing up the metadata CoW protection.
No wonder crashed btrfs-progs can make things worse.
[FIX]
Fix this super serious problem by implementing pre and post flush for
the primary super block in btrfs-progs.
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
2019-03-27 09:46:52 +00:00
|
|
|
/*
|
|
|
|
* We need to write super block after all metadata written.
|
|
|
|
* This is the equivalent of kernel pre-flush for FUA.
|
|
|
|
*/
|
|
|
|
ret = fsync(device->fd);
|
|
|
|
if (ret < 0) {
|
|
|
|
error(
|
|
|
|
"failed to write super block for devid %llu: flush error: %m",
|
|
|
|
device->devid);
|
|
|
|
return -errno;
|
|
|
|
}
|
2017-06-13 09:19:27 +00:00
|
|
|
if (fs_info->super_bytenr != BTRFS_SUPER_INFO_OFFSET) {
|
|
|
|
btrfs_set_super_bytenr(sb, fs_info->super_bytenr);
|
2021-02-18 00:38:08 +00:00
|
|
|
btrfs_csum_data(fs_info, csum_type, (u8 *)sb + BTRFS_CSUM_SIZE,
|
|
|
|
result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
|
2019-09-03 15:00:41 +00:00
|
|
|
memcpy(&sb->csum[0], result, BTRFS_CSUM_SIZE);
|
2008-12-17 21:10:07 +00:00
|
|
|
|
2013-03-06 16:32:51 +00:00
|
|
|
/*
|
|
|
|
* super_copy is BTRFS_SUPER_INFO_SIZE bytes and is
|
|
|
|
* zero filled, we can use it directly
|
|
|
|
*/
|
btrfs-progs: zoned: implement log-structured superblock
Superblock (and its copies) is the only data structure in btrfs which has a
fixed location on a device. Since we cannot overwrite in a sequential write
required zone, we cannot place superblock in the zone. One easy solution
is limiting superblock and copies to be placed only in conventional zones.
However, this method has two downsides: one is reduced number of superblock
copies. The location of the second copy of superblock is 256GB, which is in
a sequential write required zone on typical devices in the market today.
So, the number of superblock and copies is limited to be two. Second
downside is that we cannot support devices which have no conventional zones
at all.
To solve these two problems, we employ superblock log writing. It uses two
adjacent zones as a circular buffer to write updated superblocks. Once the
first zone is filled up, start writing into the second one. Then, when
both zones are filled up and before starting to write to the first zone
again, reset the first zone.
We can determine the position of the latest superblock by reading write
pointer information from a device. One corner case is when both zones are
full. For this situation, we read out the last superblock of each zone, and
compare them to determine which zone is older.
The following zones are reserved as the circular buffer on ZONED btrfs.
- primary superblock: offset 0B (and the following zone)
- first copy: offset 512G (and the following zone)
- Second copy: offset 4T (4096G, and the following zone)
If these reserved zones are conventional, superblock is written fixed at
the start of the zone without logging.
Currently, superblock reading/writing is done by pread/pwrite. This
commit replace the call sites with sbread/sbwrite to wrap the functions.
For zoned btrfs, btrfs_sb_io which is called from sbread/sbwrite
reverses the IO position back to a mirror number, maps the mirror number
into the superblock logging position, and do the IO.
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-04-26 06:27:26 +00:00
|
|
|
ret = sbwrite(device->fd, fs_info->super_copy, fs_info->super_bytenr);
|
2019-03-27 09:46:51 +00:00
|
|
|
if (ret != BTRFS_SUPER_INFO_SIZE) {
|
|
|
|
errno = EIO;
|
|
|
|
error(
|
|
|
|
"failed to write super block for devid %llu: write error: %m",
|
|
|
|
device->devid);
|
|
|
|
return -EIO;
|
|
|
|
}
|
btrfs-progs: disk-io: Flush to ensure super block write is FUA
[BUG]
There are tons of reports of btrfs-progs screwing up the fs, the most
recent one is "btrfs check --clear-space-cache v1" triggered BUG_ON()
and then leaving the fs with transid mismatch problem.
[CAUSE]
In kernel, we have block layer handing the flush work, even on devices
without FUA support (like most SATA device using default libata
settings), kernel handles FUA write by flushing the device, then normal
write, and finish it with another flush.
The pre-flush, write, post-flush works pretty well to implement FUA
write.
However in btrfs-progs we just use pwrite(), there is nothing keeping
the write order.
So even for basic v1 free space cache clearing, we have different vision
on the write sequence from kernel bio layer (by dm-log-writes) and user
space pwrite() calls.
In btrfs-progs, with extra debug output in write_tree_block() and
write_dev_supers(), we can see btrfs-progs follows the right write
sequence:
Opening filesystem to check...
Checking filesystem on /dev/mapper/log
UUID: 3feb3c8b-4eb3-42f3-8e9c-0af22dd58ecf
write tree block start=1708130304 gen=39
write tree block start=1708146688 gen=39
write tree block start=1708163072 gen=39
write super devid=1 gen=39
write tree block start=1708179456 gen=40
write tree block start=1708195840 gen=40
write super devid=1 gen=40
write tree block start=1708130304 gen=41
write tree block start=1708146688 gen=41
write tree block start=1708228608 gen=41
write super devid=1 gen=41
write tree block start=1708163072 gen=42
write tree block start=1708179456 gen=42
write super devid=1 gen=42
write tree block start=1708130304 gen=43
write tree block start=1708146688 gen=43
write super devid=1 gen=43
Free space cache cleared
But from dm-log-writes, the bio sequence is a different story:
replaying 1742: sector 131072, size 4096, flags 0(NONE)
replaying 1743: sector 128, size 4096, flags 0(NONE) <<< Only one sb write
replaying 1744: sector 2828480, size 4096, flags 0(NONE)
replaying 1745: sector 2828488, size 4096, flags 0(NONE)
replaying 1746: sector 2828496, size 4096, flags 0(NONE)
replaying 1787: sector 2304120, size 4096, flags 0(NONE)
......
replaying 1790: sector 2304144, size 4096, flags 0(NONE)
replaying 1791: sector 2304152, size 4096, flags 0(NONE)
replaying 1792: sector 0, size 0, flags 8(MARK)
During the free space cache clearing, we committed 3 transaction but
dm-log-write only caught one super block write.
This means all the 3 writes were merged into the last super block write.
And the super block write was the 2nd write, before all tree block
writes, completely screwing up the metadata CoW protection.
No wonder crashed btrfs-progs can make things worse.
[FIX]
Fix this super serious problem by implementing pre and post flush for
the primary super block in btrfs-progs.
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
2019-03-27 09:46:52 +00:00
|
|
|
ret = fsync(device->fd);
|
|
|
|
if (ret < 0) {
|
|
|
|
error(
|
|
|
|
"failed to write super block for devid %llu: flush error: %m",
|
|
|
|
device->devid);
|
|
|
|
return -errno;
|
|
|
|
}
|
2013-03-06 16:32:51 +00:00
|
|
|
return 0;
|
2008-12-17 21:10:07 +00:00
|
|
|
}
|
|
|
|
|
2008-12-05 17:21:31 +00:00
|
|
|
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
|
|
|
|
bytenr = btrfs_sb_offset(i);
|
2012-10-31 16:56:56 +00:00
|
|
|
if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
|
2008-12-05 17:21:31 +00:00
|
|
|
break;
|
|
|
|
|
2008-12-17 21:10:07 +00:00
|
|
|
btrfs_set_super_bytenr(sb, bytenr);
|
2008-12-05 17:21:31 +00:00
|
|
|
|
2021-02-18 00:38:08 +00:00
|
|
|
btrfs_csum_data(fs_info, csum_type, (u8 *)sb + BTRFS_CSUM_SIZE,
|
|
|
|
result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
|
2019-09-03 15:00:41 +00:00
|
|
|
memcpy(&sb->csum[0], result, BTRFS_CSUM_SIZE);
|
2008-12-17 21:10:07 +00:00
|
|
|
|
2013-03-06 16:32:51 +00:00
|
|
|
/*
|
|
|
|
* super_copy is BTRFS_SUPER_INFO_SIZE bytes and is
|
|
|
|
* zero filled, we can use it directly
|
|
|
|
*/
|
btrfs-progs: zoned: implement log-structured superblock
Superblock (and its copies) is the only data structure in btrfs which has a
fixed location on a device. Since we cannot overwrite in a sequential write
required zone, we cannot place superblock in the zone. One easy solution
is limiting superblock and copies to be placed only in conventional zones.
However, this method has two downsides: one is reduced number of superblock
copies. The location of the second copy of superblock is 256GB, which is in
a sequential write required zone on typical devices in the market today.
So, the number of superblock and copies is limited to be two. Second
downside is that we cannot support devices which have no conventional zones
at all.
To solve these two problems, we employ superblock log writing. It uses two
adjacent zones as a circular buffer to write updated superblocks. Once the
first zone is filled up, start writing into the second one. Then, when
both zones are filled up and before starting to write to the first zone
again, reset the first zone.
We can determine the position of the latest superblock by reading write
pointer information from a device. One corner case is when both zones are
full. For this situation, we read out the last superblock of each zone, and
compare them to determine which zone is older.
The following zones are reserved as the circular buffer on ZONED btrfs.
- primary superblock: offset 0B (and the following zone)
- first copy: offset 512G (and the following zone)
- Second copy: offset 4T (4096G, and the following zone)
If these reserved zones are conventional, superblock is written fixed at
the start of the zone without logging.
Currently, superblock reading/writing is done by pread/pwrite. This
commit replace the call sites with sbread/sbwrite to wrap the functions.
For zoned btrfs, btrfs_sb_io which is called from sbread/sbwrite
reverses the IO position back to a mirror number, maps the mirror number
into the superblock logging position, and do the IO.
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-04-26 06:27:26 +00:00
|
|
|
ret = sbwrite(device->fd, fs_info->super_copy, bytenr);
|
2019-03-27 09:46:51 +00:00
|
|
|
if (ret != BTRFS_SUPER_INFO_SIZE) {
|
|
|
|
errno = EIO;
|
|
|
|
error(
|
|
|
|
"failed to write super block for devid %llu: write error: %m",
|
|
|
|
device->devid);
|
|
|
|
return -errno;
|
|
|
|
}
|
btrfs-progs: disk-io: Flush to ensure super block write is FUA
[BUG]
There are tons of reports of btrfs-progs screwing up the fs, the most
recent one is "btrfs check --clear-space-cache v1" triggered BUG_ON()
and then leaving the fs with transid mismatch problem.
[CAUSE]
In kernel, we have block layer handing the flush work, even on devices
without FUA support (like most SATA device using default libata
settings), kernel handles FUA write by flushing the device, then normal
write, and finish it with another flush.
The pre-flush, write, post-flush works pretty well to implement FUA
write.
However in btrfs-progs we just use pwrite(), there is nothing keeping
the write order.
So even for basic v1 free space cache clearing, we have different vision
on the write sequence from kernel bio layer (by dm-log-writes) and user
space pwrite() calls.
In btrfs-progs, with extra debug output in write_tree_block() and
write_dev_supers(), we can see btrfs-progs follows the right write
sequence:
Opening filesystem to check...
Checking filesystem on /dev/mapper/log
UUID: 3feb3c8b-4eb3-42f3-8e9c-0af22dd58ecf
write tree block start=1708130304 gen=39
write tree block start=1708146688 gen=39
write tree block start=1708163072 gen=39
write super devid=1 gen=39
write tree block start=1708179456 gen=40
write tree block start=1708195840 gen=40
write super devid=1 gen=40
write tree block start=1708130304 gen=41
write tree block start=1708146688 gen=41
write tree block start=1708228608 gen=41
write super devid=1 gen=41
write tree block start=1708163072 gen=42
write tree block start=1708179456 gen=42
write super devid=1 gen=42
write tree block start=1708130304 gen=43
write tree block start=1708146688 gen=43
write super devid=1 gen=43
Free space cache cleared
But from dm-log-writes, the bio sequence is a different story:
replaying 1742: sector 131072, size 4096, flags 0(NONE)
replaying 1743: sector 128, size 4096, flags 0(NONE) <<< Only one sb write
replaying 1744: sector 2828480, size 4096, flags 0(NONE)
replaying 1745: sector 2828488, size 4096, flags 0(NONE)
replaying 1746: sector 2828496, size 4096, flags 0(NONE)
replaying 1787: sector 2304120, size 4096, flags 0(NONE)
......
replaying 1790: sector 2304144, size 4096, flags 0(NONE)
replaying 1791: sector 2304152, size 4096, flags 0(NONE)
replaying 1792: sector 0, size 0, flags 8(MARK)
During the free space cache clearing, we committed 3 transaction but
dm-log-write only caught one super block write.
This means all the 3 writes were merged into the last super block write.
And the super block write was the 2nd write, before all tree block
writes, completely screwing up the metadata CoW protection.
No wonder crashed btrfs-progs can make things worse.
[FIX]
Fix this super serious problem by implementing pre and post flush for
the primary super block in btrfs-progs.
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
2019-03-27 09:46:52 +00:00
|
|
|
/*
|
|
|
|
* Flush after the primary sb write, this is the equivalent of
|
|
|
|
* kernel post-flush for FUA write.
|
|
|
|
*/
|
|
|
|
if (i == 0) {
|
|
|
|
ret = fsync(device->fd);
|
|
|
|
if (ret < 0) {
|
|
|
|
error(
|
|
|
|
"failed to write super block for devid %llu: flush error: %m",
|
|
|
|
device->devid);
|
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
}
|
2008-12-05 17:21:31 +00:00
|
|
|
}
|
2013-03-06 16:32:51 +00:00
|
|
|
|
2008-12-05 17:21:31 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-02-27 06:05:52 +00:00
|
|
|
/*
|
|
|
|
* copy all the root pointers into the super backup array.
|
|
|
|
* this will bump the backup pointer by one when it is
|
|
|
|
* done
|
|
|
|
*/
|
|
|
|
static void backup_super_roots(struct btrfs_fs_info *info)
|
|
|
|
{
|
|
|
|
struct btrfs_root_backup *root_backup;
|
|
|
|
int next_backup;
|
|
|
|
int last_backup;
|
|
|
|
|
|
|
|
last_backup = find_best_backup_root(info->super_copy);
|
|
|
|
next_backup = (last_backup + 1) % BTRFS_NUM_BACKUP_ROOTS;
|
|
|
|
|
|
|
|
/* just overwrite the last backup if we're at the same generation */
|
|
|
|
root_backup = info->super_copy->super_roots + last_backup;
|
|
|
|
if (btrfs_backup_tree_root_gen(root_backup) ==
|
|
|
|
btrfs_header_generation(info->tree_root->node))
|
|
|
|
next_backup = last_backup;
|
|
|
|
|
|
|
|
root_backup = info->super_copy->super_roots + next_backup;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* make sure all of our padding and empty slots get zero filled
|
|
|
|
* regardless of which ones we use today
|
|
|
|
*/
|
|
|
|
memset(root_backup, 0, sizeof(*root_backup));
|
|
|
|
btrfs_set_backup_tree_root(root_backup, info->tree_root->node->start);
|
|
|
|
btrfs_set_backup_tree_root_gen(root_backup,
|
|
|
|
btrfs_header_generation(info->tree_root->node));
|
|
|
|
btrfs_set_backup_tree_root_level(root_backup,
|
|
|
|
btrfs_header_level(info->tree_root->node));
|
|
|
|
|
|
|
|
btrfs_set_backup_chunk_root(root_backup, info->chunk_root->node->start);
|
|
|
|
btrfs_set_backup_chunk_root_gen(root_backup,
|
|
|
|
btrfs_header_generation(info->chunk_root->node));
|
|
|
|
btrfs_set_backup_chunk_root_level(root_backup,
|
|
|
|
btrfs_header_level(info->chunk_root->node));
|
|
|
|
|
|
|
|
btrfs_set_backup_extent_root(root_backup, info->extent_root->node->start);
|
|
|
|
btrfs_set_backup_extent_root_gen(root_backup,
|
|
|
|
btrfs_header_generation(info->extent_root->node));
|
|
|
|
btrfs_set_backup_extent_root_level(root_backup,
|
|
|
|
btrfs_header_level(info->extent_root->node));
|
|
|
|
/*
|
|
|
|
* we might commit during log recovery, which happens before we set
|
|
|
|
* the fs_root. Make sure it is valid before we fill it in.
|
|
|
|
*/
|
|
|
|
if (info->fs_root && info->fs_root->node) {
|
|
|
|
btrfs_set_backup_fs_root(root_backup,
|
|
|
|
info->fs_root->node->start);
|
|
|
|
btrfs_set_backup_fs_root_gen(root_backup,
|
|
|
|
btrfs_header_generation(info->fs_root->node));
|
|
|
|
btrfs_set_backup_fs_root_level(root_backup,
|
|
|
|
btrfs_header_level(info->fs_root->node));
|
|
|
|
}
|
|
|
|
|
|
|
|
btrfs_set_backup_dev_root(root_backup, info->dev_root->node->start);
|
|
|
|
btrfs_set_backup_dev_root_gen(root_backup,
|
|
|
|
btrfs_header_generation(info->dev_root->node));
|
|
|
|
btrfs_set_backup_dev_root_level(root_backup,
|
|
|
|
btrfs_header_level(info->dev_root->node));
|
|
|
|
|
|
|
|
btrfs_set_backup_csum_root(root_backup, info->csum_root->node->start);
|
|
|
|
btrfs_set_backup_csum_root_gen(root_backup,
|
|
|
|
btrfs_header_generation(info->csum_root->node));
|
|
|
|
btrfs_set_backup_csum_root_level(root_backup,
|
|
|
|
btrfs_header_level(info->csum_root->node));
|
|
|
|
|
|
|
|
btrfs_set_backup_total_bytes(root_backup,
|
|
|
|
btrfs_super_total_bytes(info->super_copy));
|
|
|
|
btrfs_set_backup_bytes_used(root_backup,
|
|
|
|
btrfs_super_bytes_used(info->super_copy));
|
|
|
|
btrfs_set_backup_num_devices(root_backup,
|
|
|
|
btrfs_super_num_devices(info->super_copy));
|
|
|
|
};
|
|
|
|
|
2017-06-13 09:19:27 +00:00
|
|
|
int write_all_supers(struct btrfs_fs_info *fs_info)
|
2008-04-10 20:22:00 +00:00
|
|
|
{
|
2017-06-13 09:19:27 +00:00
|
|
|
struct list_head *head = &fs_info->fs_devices->devices;
|
2008-04-10 20:22:00 +00:00
|
|
|
struct btrfs_device *dev;
|
2008-12-17 21:10:07 +00:00
|
|
|
struct btrfs_super_block *sb;
|
2008-04-10 20:22:00 +00:00
|
|
|
struct btrfs_dev_item *dev_item;
|
|
|
|
int ret;
|
2008-12-17 21:10:07 +00:00
|
|
|
u64 flags;
|
2008-04-10 20:22:00 +00:00
|
|
|
|
2019-02-27 06:05:52 +00:00
|
|
|
backup_super_roots(fs_info);
|
2017-06-13 09:19:27 +00:00
|
|
|
sb = fs_info->super_copy;
|
2008-12-17 21:10:07 +00:00
|
|
|
dev_item = &sb->dev_item;
|
2017-12-07 09:10:05 +00:00
|
|
|
list_for_each_entry(dev, head, dev_list) {
|
2008-11-18 15:40:06 +00:00
|
|
|
if (!dev->writeable)
|
|
|
|
continue;
|
|
|
|
|
2008-12-17 21:10:07 +00:00
|
|
|
btrfs_set_stack_device_generation(dev_item, 0);
|
|
|
|
btrfs_set_stack_device_type(dev_item, dev->type);
|
|
|
|
btrfs_set_stack_device_id(dev_item, dev->devid);
|
|
|
|
btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes);
|
|
|
|
btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
|
|
|
|
btrfs_set_stack_device_io_align(dev_item, dev->io_align);
|
|
|
|
btrfs_set_stack_device_io_width(dev_item, dev->io_width);
|
|
|
|
btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
|
|
|
|
memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
|
2018-10-11 15:04:02 +00:00
|
|
|
memcpy(dev_item->fsid, fs_info->fs_devices->metadata_uuid,
|
|
|
|
BTRFS_FSID_SIZE);
|
2008-12-17 21:10:07 +00:00
|
|
|
|
|
|
|
flags = btrfs_super_flags(sb);
|
|
|
|
btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
|
|
|
|
|
2017-06-13 09:19:27 +00:00
|
|
|
ret = write_dev_supers(fs_info, sb, dev);
|
2019-04-11 05:24:26 +00:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
2008-04-10 20:22:00 +00:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-05-28 06:36:50 +00:00
|
|
|
int write_ctree_super(struct btrfs_trans_handle *trans)
|
2007-02-02 14:18:22 +00:00
|
|
|
{
|
|
|
|
int ret;
|
2018-05-28 06:36:50 +00:00
|
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
2017-06-13 09:19:27 +00:00
|
|
|
struct btrfs_root *tree_root = fs_info->tree_root;
|
|
|
|
struct btrfs_root *chunk_root = fs_info->chunk_root;
|
2008-05-05 13:45:26 +00:00
|
|
|
|
2017-06-13 09:19:27 +00:00
|
|
|
if (fs_info->readonly)
|
2008-05-05 13:45:26 +00:00
|
|
|
return 0;
|
|
|
|
|
2017-06-13 09:19:27 +00:00
|
|
|
btrfs_set_super_generation(fs_info->super_copy,
|
2008-01-04 15:38:22 +00:00
|
|
|
trans->transid);
|
2017-06-13 09:19:27 +00:00
|
|
|
btrfs_set_super_root(fs_info->super_copy,
|
2008-01-04 15:38:22 +00:00
|
|
|
tree_root->node->start);
|
2017-06-13 09:19:27 +00:00
|
|
|
btrfs_set_super_root_level(fs_info->super_copy,
|
2008-01-04 15:38:22 +00:00
|
|
|
btrfs_header_level(tree_root->node));
|
2017-06-13 09:19:27 +00:00
|
|
|
btrfs_set_super_chunk_root(fs_info->super_copy,
|
2008-03-24 19:03:18 +00:00
|
|
|
chunk_root->node->start);
|
2017-06-13 09:19:27 +00:00
|
|
|
btrfs_set_super_chunk_root_level(fs_info->super_copy,
|
2008-03-24 19:03:18 +00:00
|
|
|
btrfs_header_level(chunk_root->node));
|
2017-06-13 09:19:27 +00:00
|
|
|
btrfs_set_super_chunk_root_generation(fs_info->super_copy,
|
2008-10-29 18:07:47 +00:00
|
|
|
btrfs_header_generation(chunk_root->node));
|
2008-12-17 21:10:07 +00:00
|
|
|
|
2017-06-13 09:19:27 +00:00
|
|
|
ret = write_all_supers(fs_info);
|
2008-01-04 15:38:22 +00:00
|
|
|
if (ret)
|
2007-02-21 22:04:57 +00:00
|
|
|
fprintf(stderr, "failed to write new super block err %d\n", ret);
|
2008-01-04 15:38:22 +00:00
|
|
|
return ret;
|
2007-03-01 23:59:40 +00:00
|
|
|
}
|
2007-04-12 16:14:47 +00:00
|
|
|
|
2016-02-22 06:59:54 +00:00
|
|
|
int close_ctree_fs_info(struct btrfs_fs_info *fs_info)
|
2007-02-21 22:04:57 +00:00
|
|
|
{
|
2007-03-13 20:47:54 +00:00
|
|
|
int ret;
|
2017-08-29 15:22:39 +00:00
|
|
|
int err = 0;
|
2007-03-16 20:20:31 +00:00
|
|
|
struct btrfs_trans_handle *trans;
|
2016-02-22 06:59:54 +00:00
|
|
|
struct btrfs_root *root = fs_info->tree_root;
|
2008-01-04 15:38:22 +00:00
|
|
|
|
2011-08-26 13:51:36 +00:00
|
|
|
if (fs_info->last_trans_committed !=
|
|
|
|
fs_info->generation) {
|
2016-02-22 06:59:54 +00:00
|
|
|
BUG_ON(!root);
|
2011-08-26 13:51:36 +00:00
|
|
|
trans = btrfs_start_transaction(root, 1);
|
2017-08-29 15:22:39 +00:00
|
|
|
if (IS_ERR(trans)) {
|
|
|
|
err = PTR_ERR(trans);
|
|
|
|
goto skip_commit;
|
|
|
|
}
|
2011-08-26 13:51:36 +00:00
|
|
|
btrfs_commit_transaction(trans, root);
|
|
|
|
trans = btrfs_start_transaction(root, 1);
|
2017-08-28 14:48:16 +00:00
|
|
|
BUG_ON(IS_ERR(trans));
|
2011-08-26 13:51:36 +00:00
|
|
|
ret = commit_tree_roots(trans, fs_info);
|
|
|
|
BUG_ON(ret);
|
|
|
|
ret = __commit_transaction(trans, root);
|
|
|
|
BUG_ON(ret);
|
2019-04-11 05:24:26 +00:00
|
|
|
ret = write_ctree_super(trans);
|
2017-02-09 16:24:50 +00:00
|
|
|
kfree(trans);
|
2019-04-11 05:24:26 +00:00
|
|
|
if (ret) {
|
|
|
|
err = ret;
|
|
|
|
goto skip_commit;
|
|
|
|
}
|
2011-08-26 13:51:36 +00:00
|
|
|
}
|
2016-08-22 14:32:24 +00:00
|
|
|
|
|
|
|
if (fs_info->finalize_on_close) {
|
2021-04-26 06:27:40 +00:00
|
|
|
ret = btrfs_wipe_temporary_sb(fs_info->fs_devices);
|
|
|
|
if (ret) {
|
|
|
|
error("zoned: failed to wipe temporary super blocks: %m");
|
|
|
|
goto skip_commit;
|
|
|
|
}
|
|
|
|
|
2016-08-22 14:32:24 +00:00
|
|
|
btrfs_set_super_magic(fs_info->super_copy, BTRFS_MAGIC);
|
|
|
|
root->fs_info->finalize_on_close = 0;
|
2017-06-13 09:19:27 +00:00
|
|
|
ret = write_all_supers(fs_info);
|
2016-08-22 14:32:24 +00:00
|
|
|
if (ret)
|
|
|
|
fprintf(stderr,
|
|
|
|
"failed to write new super block err %d\n", ret);
|
|
|
|
}
|
2017-08-29 15:22:39 +00:00
|
|
|
|
|
|
|
skip_commit:
|
2009-05-29 20:35:30 +00:00
|
|
|
btrfs_free_block_groups(fs_info);
|
|
|
|
|
2013-07-03 13:25:14 +00:00
|
|
|
free_fs_roots_tree(&fs_info->fs_root_tree);
|
2008-01-04 15:38:22 +00:00
|
|
|
|
2013-07-03 13:25:12 +00:00
|
|
|
btrfs_release_all_roots(fs_info);
|
2017-03-03 17:02:14 +00:00
|
|
|
ret = btrfs_close_devices(fs_info->fs_devices);
|
2013-07-03 13:25:12 +00:00
|
|
|
btrfs_cleanup_all_caches(fs_info);
|
|
|
|
btrfs_free_fs_info(fs_info);
|
2017-08-29 15:22:39 +00:00
|
|
|
if (!err)
|
|
|
|
err = ret;
|
|
|
|
return err;
|
2007-02-02 14:18:22 +00:00
|
|
|
}
|
|
|
|
|
2018-05-28 06:36:45 +00:00
|
|
|
int clean_tree_block(struct extent_buffer *eb)
|
2007-02-02 14:18:22 +00:00
|
|
|
{
|
2008-01-04 15:38:22 +00:00
|
|
|
return clear_extent_buffer_dirty(eb);
|
|
|
|
}
|
|
|
|
|
|
|
|
void btrfs_mark_buffer_dirty(struct extent_buffer *eb)
|
|
|
|
{
|
|
|
|
set_extent_buffer_dirty(eb);
|
2007-02-02 14:18:22 +00:00
|
|
|
}
|
|
|
|
|
2008-05-13 17:48:58 +00:00
|
|
|
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
|
2008-01-04 15:38:22 +00:00
|
|
|
{
|
2008-05-13 17:48:58 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = extent_buffer_uptodate(buf);
|
|
|
|
if (!ret)
|
|
|
|
return ret;
|
|
|
|
|
2020-04-14 01:34:04 +00:00
|
|
|
ret = verify_parent_transid(&buf->fs_info->extent_cache, buf,
|
|
|
|
parent_transid, 1);
|
2008-05-13 17:48:58 +00:00
|
|
|
return !ret;
|
2008-01-04 15:38:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
|
|
|
|
{
|
|
|
|
return set_extent_buffer_uptodate(eb);
|
|
|
|
}
|
2019-01-03 07:32:17 +00:00
|
|
|
|
|
|
|
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
|
|
|
|
struct btrfs_fs_info *fs_info,
|
|
|
|
u64 objectid)
|
|
|
|
{
|
|
|
|
struct extent_buffer *leaf;
|
|
|
|
struct btrfs_root *tree_root = fs_info->tree_root;
|
|
|
|
struct btrfs_root *root;
|
|
|
|
struct btrfs_key key;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
root = kzalloc(sizeof(*root), GFP_KERNEL);
|
|
|
|
if (!root)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
btrfs_setup_root(root, fs_info, objectid);
|
|
|
|
root->root_key.objectid = objectid;
|
|
|
|
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
|
|
|
|
root->root_key.offset = 0;
|
|
|
|
|
|
|
|
leaf = btrfs_alloc_free_block(trans, root, fs_info->nodesize, objectid,
|
|
|
|
NULL, 0, 0, 0);
|
|
|
|
if (IS_ERR(leaf)) {
|
|
|
|
ret = PTR_ERR(leaf);
|
|
|
|
leaf = NULL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
|
|
|
|
btrfs_set_header_bytenr(leaf, leaf->start);
|
|
|
|
btrfs_set_header_generation(leaf, trans->transid);
|
|
|
|
btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
|
|
|
|
btrfs_set_header_owner(leaf, objectid);
|
|
|
|
root->node = leaf;
|
|
|
|
write_extent_buffer(leaf, fs_info->fs_devices->metadata_uuid,
|
|
|
|
btrfs_header_fsid(), BTRFS_FSID_SIZE);
|
|
|
|
write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
|
|
|
|
btrfs_header_chunk_tree_uuid(leaf),
|
|
|
|
BTRFS_UUID_SIZE);
|
|
|
|
btrfs_mark_buffer_dirty(leaf);
|
|
|
|
|
|
|
|
extent_buffer_get(root->node);
|
|
|
|
root->commit_root = root->node;
|
|
|
|
root->track_dirty = 1;
|
|
|
|
|
|
|
|
root->root_item.flags = 0;
|
|
|
|
root->root_item.byte_limit = 0;
|
|
|
|
btrfs_set_root_bytenr(&root->root_item, leaf->start);
|
|
|
|
btrfs_set_root_generation(&root->root_item, trans->transid);
|
|
|
|
btrfs_set_root_level(&root->root_item, 0);
|
|
|
|
btrfs_set_root_refs(&root->root_item, 1);
|
|
|
|
btrfs_set_root_used(&root->root_item, leaf->len);
|
|
|
|
btrfs_set_root_last_snapshot(&root->root_item, 0);
|
|
|
|
btrfs_set_root_dirid(&root->root_item, 0);
|
|
|
|
memset(root->root_item.uuid, 0, BTRFS_UUID_SIZE);
|
|
|
|
root->root_item.drop_level = 0;
|
|
|
|
|
|
|
|
key.objectid = objectid;
|
|
|
|
key.type = BTRFS_ROOT_ITEM_KEY;
|
|
|
|
key.offset = 0;
|
|
|
|
ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item);
|
|
|
|
if (ret)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
return root;
|
|
|
|
|
|
|
|
fail:
|
|
|
|
if (leaf)
|
|
|
|
free_extent_buffer(leaf);
|
|
|
|
|
|
|
|
kfree(root);
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|