2008-01-04 15:36:26 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2007 Oracle. All rights reserved.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public
|
|
|
|
* License v2 as published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public
|
|
|
|
* License along with this program; if not, write to the
|
|
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
|
|
* Boston, MA 021110-1307, USA.
|
|
|
|
*/
|
2021-09-06 14:23:14 +00:00
|
|
|
|
2023-08-28 20:12:13 +00:00
|
|
|
#include "kerncompat.h"
|
2008-01-04 15:36:26 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
2017-07-25 20:51:34 +00:00
|
|
|
#include <stdbool.h>
|
2023-08-28 20:12:13 +00:00
|
|
|
#include <string.h>
|
|
|
|
#include <errno.h>
|
2019-06-20 13:30:57 +00:00
|
|
|
#include "kernel-lib/list.h"
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
#include "kernel-lib/raid56.h"
|
btrfs-progs: properly handle degraded raid56 reads
[BUG]
For a degraded RAID5, btrfs check will fail to even read the chunk root:
# mkfs.btrfs -f -m raid5 -d raid5 $dev1 $dev2 $dev3
# wipefs -fa $dev1
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
bad tree block 22036480, bytenr mismatch, want=22036480, have=0
ERROR: cannot read chunk root
ERROR: cannot open file system
[CAUSE]
Although read_tree_block() function from btrfs-progs is properly
iterating the mirrors (mirror 1 is reading from the disk directly,
mirror 2 will be rebuild from parity), the raid56 recovery path is not
handling the read error correctly.
The existing code will try to read the full stripe, but any read failure
(including missing device) will immediately cause an error:
for (i = 0; i < num_stripes; i++) {
ret = btrfs_pread(multi->stripes[i].dev->fd, pointers[i],
BTRFS_STRIPE_LEN, multi->stripes[i].physical,
fs_info->zoned);
if (ret < BTRFS_STRIPE_LEN) {
ret = -EIO;
goto out;
}
}
[FIX]
To make failed_a/failed_b calculation much easier, and properly handle
too many missing devices, here this patch will introduce a new bitmap
based solution.
The new @failed_stripe_bitmap will represent all the failed stripes.
So the initial read will mark all the missing devices in the
@failed_stripe_bitmap, and later operations will all operate on that
bitmap.
Only before we call raid56_recov(), we convert the bitmap to the old
failed_a/failed_b interface and continue.
Now btrfs check can handle above case properly:
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
Checking filesystem on /dev/test/scratch2
UUID: 8b2e1cb4-f35b-4856-9b11-262d39d8458b
[1/7] checking root items
[2/7] checking extents
[3/7] checking free space tree
[4/7] checking fs roots
[5/7] checking only csums items (without verifying data)
[6/7] checking root refs
[7/7] checking quota groups skipped (not enabled on this FS)
found 147456 bytes used, no error found
total csum bytes: 0
total tree bytes: 147456
total fs tree bytes: 32768
total extent tree bytes: 16384
btree space waste bytes: 139871
file data blocks allocated: 0
referenced 0
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-11-13 06:32:38 +00:00
|
|
|
#include "kernel-lib/bitmap.h"
|
2023-08-28 20:12:13 +00:00
|
|
|
#include "kernel-shared/accessors.h"
|
|
|
|
#include "kernel-shared/extent-io-tree.h"
|
|
|
|
#include "kernel-shared/extent_io.h"
|
2020-08-18 13:56:04 +00:00
|
|
|
#include "kernel-shared/ctree.h"
|
2020-08-18 13:56:04 +00:00
|
|
|
#include "kernel-shared/volumes.h"
|
2022-04-05 12:48:27 +00:00
|
|
|
#include "kernel-shared/disk-io.h"
|
2023-04-19 21:17:14 +00:00
|
|
|
#include "kernel-shared/messages.h"
|
2023-08-28 20:12:13 +00:00
|
|
|
#include "kernel-shared/uapi/btrfs.h"
|
|
|
|
#include "kernel-shared/uapi/btrfs_tree.h"
|
|
|
|
#include "common/messages.h"
|
2019-06-19 23:46:21 +00:00
|
|
|
#include "common/utils.h"
|
2021-10-05 06:23:02 +00:00
|
|
|
#include "common/device-utils.h"
|
2019-06-19 22:44:36 +00:00
|
|
|
#include "common/internal.h"
|
2008-01-04 15:36:26 +00:00
|
|
|
|
2022-11-23 22:37:25 +00:00
|
|
|
static void free_extent_buffer_final(struct extent_buffer *eb);
|
|
|
|
|
|
|
|
void extent_buffer_init_cache(struct btrfs_fs_info *fs_info)
|
|
|
|
{
|
|
|
|
fs_info->max_cache_size = total_memory() / 4;
|
|
|
|
fs_info->cache_size = 0;
|
|
|
|
INIT_LIST_HEAD(&fs_info->lru);
|
|
|
|
}
|
|
|
|
|
|
|
|
void extent_buffer_free_cache(struct btrfs_fs_info *fs_info)
|
|
|
|
{
|
|
|
|
struct extent_buffer *eb;
|
|
|
|
|
|
|
|
while(!list_empty(&fs_info->lru)) {
|
|
|
|
eb = list_entry(fs_info->lru.next, struct extent_buffer, lru);
|
|
|
|
if (eb->refs) {
|
|
|
|
/*
|
|
|
|
* Reset extent buffer refs to 1, so the
|
|
|
|
* free_extent_buffer_nocache() can free it for sure.
|
|
|
|
*/
|
|
|
|
eb->refs = 1;
|
|
|
|
fprintf(stderr,
|
|
|
|
"extent buffer leak: start %llu len %u\n",
|
|
|
|
(unsigned long long)eb->start, eb->len);
|
|
|
|
free_extent_buffer_nocache(eb);
|
|
|
|
} else {
|
|
|
|
free_extent_buffer_final(eb);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
free_extent_cache_tree(&fs_info->extent_cache);
|
|
|
|
fs_info->cache_size = 0;
|
|
|
|
}
|
|
|
|
|
2018-10-01 14:46:13 +00:00
|
|
|
/*
|
|
|
|
* extent_buffer_bitmap_set - set an area of a bitmap
|
|
|
|
* @eb: the extent buffer
|
|
|
|
* @start: offset of the bitmap item in the extent buffer
|
|
|
|
* @pos: bit number of the first bit
|
|
|
|
* @len: number of bits to set
|
|
|
|
*/
|
|
|
|
void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
|
|
|
|
unsigned long pos, unsigned long len)
|
|
|
|
{
|
|
|
|
u8 *p = (u8 *)eb->data + start + BIT_BYTE(pos);
|
|
|
|
const unsigned int size = pos + len;
|
|
|
|
int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
|
|
|
|
u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
|
|
|
|
|
|
|
|
while (len >= bits_to_set) {
|
|
|
|
*p |= mask_to_set;
|
|
|
|
len -= bits_to_set;
|
|
|
|
bits_to_set = BITS_PER_BYTE;
|
|
|
|
mask_to_set = ~0;
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
if (len) {
|
|
|
|
mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
|
|
|
|
*p |= mask_to_set;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* extent_buffer_bitmap_clear - clear an area of a bitmap
|
|
|
|
* @eb: the extent buffer
|
|
|
|
* @start: offset of the bitmap item in the extent buffer
|
|
|
|
* @pos: bit number of the first bit
|
|
|
|
* @len: number of bits to clear
|
|
|
|
*/
|
|
|
|
void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
|
|
|
|
unsigned long pos, unsigned long len)
|
|
|
|
{
|
|
|
|
u8 *p = (u8 *)eb->data + start + BIT_BYTE(pos);
|
|
|
|
const unsigned int size = pos + len;
|
|
|
|
int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
|
|
|
|
u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
|
|
|
|
|
|
|
|
while (len >= bits_to_clear) {
|
|
|
|
*p &= ~mask_to_clear;
|
|
|
|
len -= bits_to_clear;
|
|
|
|
bits_to_clear = BITS_PER_BYTE;
|
|
|
|
mask_to_clear = ~0;
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
if (len) {
|
|
|
|
mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
|
|
|
|
*p &= ~mask_to_clear;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-30 05:48:55 +00:00
|
|
|
static struct extent_buffer *__alloc_extent_buffer(struct btrfs_fs_info *info,
|
2008-01-04 15:36:26 +00:00
|
|
|
u64 bytenr, u32 blocksize)
|
|
|
|
{
|
|
|
|
struct extent_buffer *eb;
|
|
|
|
|
2015-09-29 17:10:36 +00:00
|
|
|
eb = calloc(1, sizeof(struct extent_buffer) + blocksize);
|
2016-07-28 00:04:30 +00:00
|
|
|
if (!eb)
|
2008-01-04 15:36:26 +00:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
eb->start = bytenr;
|
|
|
|
eb->len = blocksize;
|
2013-11-27 16:08:24 +00:00
|
|
|
eb->refs = 1;
|
2008-01-04 15:36:26 +00:00
|
|
|
eb->flags = 0;
|
|
|
|
eb->cache_node.start = bytenr;
|
|
|
|
eb->cache_node.size = blocksize;
|
2018-03-30 05:48:55 +00:00
|
|
|
eb->fs_info = info;
|
2013-10-01 13:00:19 +00:00
|
|
|
INIT_LIST_HEAD(&eb->recow);
|
2018-03-30 05:48:54 +00:00
|
|
|
INIT_LIST_HEAD(&eb->lru);
|
2020-03-24 10:53:14 +00:00
|
|
|
memset_extent_buffer(eb, 0, 0, blocksize);
|
2008-01-04 15:36:26 +00:00
|
|
|
|
|
|
|
return eb;
|
|
|
|
}
|
|
|
|
|
2014-10-10 20:57:07 +00:00
|
|
|
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
|
|
|
|
{
|
|
|
|
struct extent_buffer *new;
|
|
|
|
|
2018-03-30 05:48:55 +00:00
|
|
|
new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
|
2016-07-28 00:04:30 +00:00
|
|
|
if (!new)
|
2014-10-10 20:57:07 +00:00
|
|
|
return NULL;
|
|
|
|
|
2023-06-27 14:20:30 +00:00
|
|
|
copy_extent_buffer_full(new, src);
|
2014-10-10 20:57:07 +00:00
|
|
|
new->flags |= EXTENT_BUFFER_DUMMY;
|
|
|
|
|
|
|
|
return new;
|
|
|
|
}
|
|
|
|
|
2017-07-25 20:51:34 +00:00
|
|
|
static void free_extent_buffer_final(struct extent_buffer *eb)
|
|
|
|
{
|
|
|
|
BUG_ON(eb->refs);
|
|
|
|
list_del_init(&eb->lru);
|
|
|
|
if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
|
2022-11-23 22:37:25 +00:00
|
|
|
remove_cache_extent(&eb->fs_info->extent_cache, &eb->cache_node);
|
|
|
|
BUG_ON(eb->fs_info->cache_size < eb->len);
|
|
|
|
eb->fs_info->cache_size -= eb->len;
|
2017-07-25 20:51:34 +00:00
|
|
|
}
|
2023-10-25 13:50:34 +00:00
|
|
|
kfree(eb);
|
2017-07-25 20:51:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void free_extent_buffer_internal(struct extent_buffer *eb, bool free_now)
|
2008-01-04 15:36:26 +00:00
|
|
|
{
|
2015-01-28 02:12:55 +00:00
|
|
|
if (!eb || IS_ERR(eb))
|
2008-01-04 15:36:26 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
eb->refs--;
|
|
|
|
BUG_ON(eb->refs < 0);
|
|
|
|
if (eb->refs == 0) {
|
2022-11-23 22:37:27 +00:00
|
|
|
if (eb->flags & EXTENT_BUFFER_DIRTY) {
|
btrfs-progs: only warn if there are leaked extent buffers after transaction abort
Another BUG_ON() during fuzz/003:
====== RUN MAYFAIL btrfs check --init-csum-tree tests/fuzz-tests/images/bko-161821.raw.restored
[1/7] checking root items
Fixed 0 roots.
[2/7] checking extents
parent transid verify failed on 4198400 wanted 14 found 1114126
parent transid verify failed on 4198400 wanted 14 found 1114126
Ignoring transid failure
owner ref check failed [4198400 4096]
repair deleting extent record: key [4198400,169,0]
adding new tree backref on start 4198400 len 4096 parent 0 root 5
Repaired extent references for 4198400
ref mismatch on [4222976 4096] extent item 1, found 0
backref 4222976 root 7 not referenced back 0x5617f8ecf780
incorrect global backref count on 4222976 found 1 wanted 0
backpointer mismatch on [4222976 4096]
owner ref check failed [4222976 4096]
repair deleting extent record: key [4222976,169,0]
Repaired extent references for 4222976
[3/7] checking free space cache
[4/7] checking fs roots
parent transid verify failed on 4198400 wanted 14 found 1114126
Ignoring transid failure
Wrong generation of child node/leaf, wanted: 1114126, have: 14
root 5 missing its root dir, recreating
parent transid verify failed on 4198400 wanted 14 found 1114126
Ignoring transid failure
ERROR: child eb corrupted: parent bytenr=4222976 item=0 parent level=1 child level=2
ERROR: errors found in fs roots
extent buffer leak: start 4222976 len 4096
extent_io.c:611: free_extent_buffer_internal: BUG_ON `eb->flags & EXTENT_DIRTY` triggered, value 1
failed (ignored, ret=134): btrfs check --init-csum-tree tests/fuzz-tests/images/bko-161821.raw.restored
mayfail: returned code 134 (SIGABRT), not ignored
test failed for case 003-multi-check-unmounted
Since we're shifting to use btrfs_abort_transaction() in btrfs-progs,
it will be more and more common to see dirty leaked eb. Instead of
BUG_ON(), we only need to report it as a warning.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2018-08-03 05:50:19 +00:00
|
|
|
warning(
|
|
|
|
"dirty eb leak (aborted trans): start %llu len %u",
|
|
|
|
eb->start, eb->len);
|
|
|
|
}
|
2013-10-01 13:00:19 +00:00
|
|
|
list_del_init(&eb->recow);
|
2017-07-25 20:51:34 +00:00
|
|
|
if (eb->flags & EXTENT_BUFFER_DUMMY || free_now)
|
|
|
|
free_extent_buffer_final(eb);
|
2008-01-04 15:36:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-25 20:51:34 +00:00
|
|
|
void free_extent_buffer(struct extent_buffer *eb)
|
|
|
|
{
|
|
|
|
free_extent_buffer_internal(eb, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void free_extent_buffer_nocache(struct extent_buffer *eb)
|
|
|
|
{
|
|
|
|
free_extent_buffer_internal(eb, 1);
|
|
|
|
}
|
|
|
|
|
2023-04-19 21:23:56 +00:00
|
|
|
void free_extent_buffer_stale(struct extent_buffer *eb)
|
|
|
|
{
|
|
|
|
free_extent_buffer_internal(eb, 1);
|
|
|
|
}
|
|
|
|
|
2022-11-23 22:37:22 +00:00
|
|
|
struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
|
2023-04-19 21:24:02 +00:00
|
|
|
u64 bytenr)
|
2008-01-04 15:36:26 +00:00
|
|
|
{
|
|
|
|
struct extent_buffer *eb = NULL;
|
|
|
|
struct cache_extent *cache;
|
|
|
|
|
2023-04-19 21:24:02 +00:00
|
|
|
cache = lookup_cache_extent(&fs_info->extent_cache, bytenr,
|
|
|
|
fs_info->nodesize);
|
2013-07-03 13:25:13 +00:00
|
|
|
if (cache && cache->start == bytenr &&
|
2023-04-19 21:24:02 +00:00
|
|
|
cache->size == fs_info->nodesize) {
|
2008-01-04 15:36:26 +00:00
|
|
|
eb = container_of(cache, struct extent_buffer, cache_node);
|
2022-11-23 22:37:25 +00:00
|
|
|
list_move_tail(&eb->lru, &fs_info->lru);
|
2008-01-04 15:36:26 +00:00
|
|
|
eb->refs++;
|
|
|
|
}
|
|
|
|
return eb;
|
|
|
|
}
|
|
|
|
|
2022-11-23 22:37:22 +00:00
|
|
|
struct extent_buffer *find_first_extent_buffer(struct btrfs_fs_info *fs_info,
|
2008-01-04 15:36:26 +00:00
|
|
|
u64 start)
|
|
|
|
{
|
|
|
|
struct extent_buffer *eb = NULL;
|
|
|
|
struct cache_extent *cache;
|
|
|
|
|
2022-11-23 22:37:25 +00:00
|
|
|
cache = search_cache_extent(&fs_info->extent_cache, start);
|
2008-01-04 15:36:26 +00:00
|
|
|
if (cache) {
|
|
|
|
eb = container_of(cache, struct extent_buffer, cache_node);
|
2022-11-23 22:37:25 +00:00
|
|
|
list_move_tail(&eb->lru, &fs_info->lru);
|
2008-01-04 15:36:26 +00:00
|
|
|
eb->refs++;
|
|
|
|
}
|
|
|
|
return eb;
|
|
|
|
}
|
|
|
|
|
2022-11-23 22:37:25 +00:00
|
|
|
static void trim_extent_buffer_cache(struct btrfs_fs_info *fs_info)
|
2017-07-25 20:51:34 +00:00
|
|
|
{
|
|
|
|
struct extent_buffer *eb, *tmp;
|
|
|
|
|
2022-11-23 22:37:25 +00:00
|
|
|
list_for_each_entry_safe(eb, tmp, &fs_info->lru, lru) {
|
2017-07-25 20:51:34 +00:00
|
|
|
if (eb->refs == 0)
|
|
|
|
free_extent_buffer_final(eb);
|
2022-11-23 22:37:25 +00:00
|
|
|
if (fs_info->cache_size <= ((fs_info->max_cache_size * 9) / 10))
|
2017-07-25 20:51:34 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-30 05:48:55 +00:00
|
|
|
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
|
2008-01-04 15:36:26 +00:00
|
|
|
u64 bytenr, u32 blocksize)
|
|
|
|
{
|
|
|
|
struct extent_buffer *eb;
|
|
|
|
struct cache_extent *cache;
|
|
|
|
|
2022-11-23 22:37:25 +00:00
|
|
|
cache = lookup_cache_extent(&fs_info->extent_cache, bytenr, blocksize);
|
2013-07-03 13:25:13 +00:00
|
|
|
if (cache && cache->start == bytenr &&
|
|
|
|
cache->size == blocksize) {
|
2008-01-04 15:36:26 +00:00
|
|
|
eb = container_of(cache, struct extent_buffer, cache_node);
|
2022-11-23 22:37:25 +00:00
|
|
|
list_move_tail(&eb->lru, &fs_info->lru);
|
2008-01-04 15:36:26 +00:00
|
|
|
eb->refs++;
|
|
|
|
} else {
|
2014-10-10 20:57:07 +00:00
|
|
|
int ret;
|
|
|
|
|
2008-01-04 15:36:26 +00:00
|
|
|
if (cache) {
|
|
|
|
eb = container_of(cache, struct extent_buffer,
|
|
|
|
cache_node);
|
|
|
|
free_extent_buffer(eb);
|
|
|
|
}
|
2018-03-30 05:48:55 +00:00
|
|
|
eb = __alloc_extent_buffer(fs_info, bytenr, blocksize);
|
2014-10-10 20:57:07 +00:00
|
|
|
if (!eb)
|
|
|
|
return NULL;
|
2022-11-23 22:37:25 +00:00
|
|
|
ret = insert_cache_extent(&fs_info->extent_cache, &eb->cache_node);
|
2014-10-10 20:57:07 +00:00
|
|
|
if (ret) {
|
2023-10-25 13:50:34 +00:00
|
|
|
kfree(eb);
|
2014-10-10 20:57:07 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
2022-11-23 22:37:25 +00:00
|
|
|
list_add_tail(&eb->lru, &fs_info->lru);
|
|
|
|
fs_info->cache_size += blocksize;
|
|
|
|
if (fs_info->cache_size >= fs_info->max_cache_size)
|
|
|
|
trim_extent_buffer_cache(fs_info);
|
2008-01-04 15:36:26 +00:00
|
|
|
}
|
|
|
|
return eb;
|
|
|
|
}
|
|
|
|
|
btrfs-progs: disk-io: Verify the bytenr passed in is mapped for read_tree_block()
[BUG]
For a fuzzed image, `btrfs check` will segfault at open_ctree() stage:
$ btrfs check --mode=lowmem issue_207.raw
Opening filesystem to check...
extent_io.c:665: free_extent_buffer_internal: BUG_ON `eb->refs < 0` triggered, value 1
btrfs(+0x6bf67)[0x56431d278f67]
btrfs(+0x6c16e)[0x56431d27916e]
btrfs(alloc_extent_buffer+0x45)[0x56431d279db5]
btrfs(read_tree_block+0x59)[0x56431d2848f9]
btrfs(btrfs_setup_all_roots+0x29c)[0x56431d28535c]
btrfs(+0x78903)[0x56431d285903]
btrfs(open_ctree_fs_info+0x90)[0x56431d285b60]
btrfs(+0x45a01)[0x56431d252a01]
btrfs(main+0x94)[0x56431d2220c4]
/usr/lib/libc.so.6(__libc_start_main+0xf3)[0x7f6e28519153]
btrfs(_start+0x2e)[0x56431d22235e]
[CAUSE]
The fuzzed image has a strange log root bytenr:
log_root 61440
log_root_transid 0
In fact, the log_root seems to be fuzzed, as its transid is 0, which is
invalid.
Note that range [61440, 77824) covers the physical offset of the primary
super block.
The bug is caused by the following sequence:
1. cache for tree block [64K, 68K) is created by open_ctree()
__open_ctree_fd()
|- btrfs_setup_chunk_tree_and_device_map()
|- btrfs_read_sys_array()
|- sb = btrfs_find_create_tree_block()
|- free_extent_buffer(sb)
This created an extent buffer [64K, 68K) in fs_info->extent_cache, then
reduce the refcount of that eb back to 0, but not freed yet.
2. Try to read that corrupted log root
__open_ctree_fd()
|- btrfs_setup_chunk_tree_and_device_map()
|- btrfs_setup_all_roots()
|- find_and_setup_log_root()
|- read_tree_block()
|- btrfs_find_create_tree_block()
|- alloc_extent_buffer()
The final alloc_extent_buffer() will try to free that cached eb
[64K, 68K), since it doesn't match with current search.
And since that cached eb is already released (refcount == 0), the
extra free_extent_buffer() will cause above BUG_ON().
[FIX]
Here we fix it through a more comprehensive method, instead of simply
verifying log_root_transid, here we just don't pollute eb cache when
reading sys chunk array.
So that we won't have an eb cache [64K, 68K), and will error out at
logical mapping phase.
Issue: #207
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-18 01:19:39 +00:00
|
|
|
/*
|
|
|
|
* Allocate a dummy extent buffer which won't be inserted into extent buffer
|
|
|
|
* cache.
|
|
|
|
*
|
|
|
|
* This mostly allows super block read write using existing eb infrastructure
|
|
|
|
* without pulluting the eb cache.
|
|
|
|
*
|
|
|
|
* This is especially important to avoid injecting eb->start == SZ_64K, as
|
|
|
|
* fuzzed image could have invalid tree bytenr covers super block range,
|
|
|
|
* and cause ref count underflow.
|
|
|
|
*/
|
|
|
|
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
|
|
|
|
u64 bytenr, u32 blocksize)
|
|
|
|
{
|
|
|
|
struct extent_buffer *ret;
|
|
|
|
|
|
|
|
ret = __alloc_extent_buffer(fs_info, bytenr, blocksize);
|
|
|
|
if (!ret)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
ret->flags |= EXTENT_BUFFER_DUMMY;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
static int read_raid56(struct btrfs_fs_info *fs_info, void *buf, u64 logical,
|
|
|
|
u64 len, int mirror, struct btrfs_multi_bio *multi,
|
|
|
|
u64 *raid_map)
|
|
|
|
{
|
btrfs-progs: properly handle degraded raid56 reads
[BUG]
For a degraded RAID5, btrfs check will fail to even read the chunk root:
# mkfs.btrfs -f -m raid5 -d raid5 $dev1 $dev2 $dev3
# wipefs -fa $dev1
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
bad tree block 22036480, bytenr mismatch, want=22036480, have=0
ERROR: cannot read chunk root
ERROR: cannot open file system
[CAUSE]
Although read_tree_block() function from btrfs-progs is properly
iterating the mirrors (mirror 1 is reading from the disk directly,
mirror 2 will be rebuild from parity), the raid56 recovery path is not
handling the read error correctly.
The existing code will try to read the full stripe, but any read failure
(including missing device) will immediately cause an error:
for (i = 0; i < num_stripes; i++) {
ret = btrfs_pread(multi->stripes[i].dev->fd, pointers[i],
BTRFS_STRIPE_LEN, multi->stripes[i].physical,
fs_info->zoned);
if (ret < BTRFS_STRIPE_LEN) {
ret = -EIO;
goto out;
}
}
[FIX]
To make failed_a/failed_b calculation much easier, and properly handle
too many missing devices, here this patch will introduce a new bitmap
based solution.
The new @failed_stripe_bitmap will represent all the failed stripes.
So the initial read will mark all the missing devices in the
@failed_stripe_bitmap, and later operations will all operate on that
bitmap.
Only before we call raid56_recov(), we convert the bitmap to the old
failed_a/failed_b interface and continue.
Now btrfs check can handle above case properly:
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
Checking filesystem on /dev/test/scratch2
UUID: 8b2e1cb4-f35b-4856-9b11-262d39d8458b
[1/7] checking root items
[2/7] checking extents
[3/7] checking free space tree
[4/7] checking fs roots
[5/7] checking only csums items (without verifying data)
[6/7] checking root refs
[7/7] checking quota groups skipped (not enabled on this FS)
found 147456 bytes used, no error found
total csum bytes: 0
total tree bytes: 147456
total fs tree bytes: 32768
total extent tree bytes: 16384
btree space waste bytes: 139871
file data blocks allocated: 0
referenced 0
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-11-13 06:32:38 +00:00
|
|
|
const int tolerance = (multi->type & BTRFS_RAID_RAID6 ? 2 : 1);
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
const int num_stripes = multi->num_stripes;
|
|
|
|
const u64 full_stripe_start = raid_map[0];
|
|
|
|
void **pointers = NULL;
|
btrfs-progs: properly handle degraded raid56 reads
[BUG]
For a degraded RAID5, btrfs check will fail to even read the chunk root:
# mkfs.btrfs -f -m raid5 -d raid5 $dev1 $dev2 $dev3
# wipefs -fa $dev1
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
bad tree block 22036480, bytenr mismatch, want=22036480, have=0
ERROR: cannot read chunk root
ERROR: cannot open file system
[CAUSE]
Although read_tree_block() function from btrfs-progs is properly
iterating the mirrors (mirror 1 is reading from the disk directly,
mirror 2 will be rebuild from parity), the raid56 recovery path is not
handling the read error correctly.
The existing code will try to read the full stripe, but any read failure
(including missing device) will immediately cause an error:
for (i = 0; i < num_stripes; i++) {
ret = btrfs_pread(multi->stripes[i].dev->fd, pointers[i],
BTRFS_STRIPE_LEN, multi->stripes[i].physical,
fs_info->zoned);
if (ret < BTRFS_STRIPE_LEN) {
ret = -EIO;
goto out;
}
}
[FIX]
To make failed_a/failed_b calculation much easier, and properly handle
too many missing devices, here this patch will introduce a new bitmap
based solution.
The new @failed_stripe_bitmap will represent all the failed stripes.
So the initial read will mark all the missing devices in the
@failed_stripe_bitmap, and later operations will all operate on that
bitmap.
Only before we call raid56_recov(), we convert the bitmap to the old
failed_a/failed_b interface and continue.
Now btrfs check can handle above case properly:
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
Checking filesystem on /dev/test/scratch2
UUID: 8b2e1cb4-f35b-4856-9b11-262d39d8458b
[1/7] checking root items
[2/7] checking extents
[3/7] checking free space tree
[4/7] checking fs roots
[5/7] checking only csums items (without verifying data)
[6/7] checking root refs
[7/7] checking quota groups skipped (not enabled on this FS)
found 147456 bytes used, no error found
total csum bytes: 0
total tree bytes: 147456
total fs tree bytes: 32768
total extent tree bytes: 16384
btree space waste bytes: 139871
file data blocks allocated: 0
referenced 0
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-11-13 06:32:38 +00:00
|
|
|
unsigned long *failed_stripe_bitmap = NULL;
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
int failed_a = -1;
|
|
|
|
int failed_b = -1;
|
|
|
|
int i;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Only read repair should go this path */
|
|
|
|
ASSERT(mirror > 1);
|
|
|
|
ASSERT(raid_map);
|
|
|
|
|
|
|
|
/* The read length should be inside one stripe */
|
|
|
|
ASSERT(len <= BTRFS_STRIPE_LEN);
|
|
|
|
|
|
|
|
pointers = calloc(num_stripes, sizeof(void *));
|
2024-04-03 22:55:47 +00:00
|
|
|
if (!pointers)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
/* Allocate memory for the full stripe */
|
|
|
|
for (i = 0; i < num_stripes; i++) {
|
2023-10-25 13:50:34 +00:00
|
|
|
pointers[i] = kmalloc(BTRFS_STRIPE_LEN, GFP_KERNEL);
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
if (!pointers[i]) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
btrfs-progs: properly handle degraded raid56 reads
[BUG]
For a degraded RAID5, btrfs check will fail to even read the chunk root:
# mkfs.btrfs -f -m raid5 -d raid5 $dev1 $dev2 $dev3
# wipefs -fa $dev1
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
bad tree block 22036480, bytenr mismatch, want=22036480, have=0
ERROR: cannot read chunk root
ERROR: cannot open file system
[CAUSE]
Although read_tree_block() function from btrfs-progs is properly
iterating the mirrors (mirror 1 is reading from the disk directly,
mirror 2 will be rebuild from parity), the raid56 recovery path is not
handling the read error correctly.
The existing code will try to read the full stripe, but any read failure
(including missing device) will immediately cause an error:
for (i = 0; i < num_stripes; i++) {
ret = btrfs_pread(multi->stripes[i].dev->fd, pointers[i],
BTRFS_STRIPE_LEN, multi->stripes[i].physical,
fs_info->zoned);
if (ret < BTRFS_STRIPE_LEN) {
ret = -EIO;
goto out;
}
}
[FIX]
To make failed_a/failed_b calculation much easier, and properly handle
too many missing devices, here this patch will introduce a new bitmap
based solution.
The new @failed_stripe_bitmap will represent all the failed stripes.
So the initial read will mark all the missing devices in the
@failed_stripe_bitmap, and later operations will all operate on that
bitmap.
Only before we call raid56_recov(), we convert the bitmap to the old
failed_a/failed_b interface and continue.
Now btrfs check can handle above case properly:
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
Checking filesystem on /dev/test/scratch2
UUID: 8b2e1cb4-f35b-4856-9b11-262d39d8458b
[1/7] checking root items
[2/7] checking extents
[3/7] checking free space tree
[4/7] checking fs roots
[5/7] checking only csums items (without verifying data)
[6/7] checking root refs
[7/7] checking quota groups skipped (not enabled on this FS)
found 147456 bytes used, no error found
total csum bytes: 0
total tree bytes: 147456
total fs tree bytes: 32768
total extent tree bytes: 16384
btree space waste bytes: 139871
file data blocks allocated: 0
referenced 0
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-11-13 06:32:38 +00:00
|
|
|
failed_stripe_bitmap = bitmap_zalloc(num_stripes);
|
|
|
|
if (!failed_stripe_bitmap) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
/*
|
|
|
|
* Read the full stripe.
|
|
|
|
*
|
|
|
|
* The stripes in @multi is not rotated, thus can be used to read from
|
|
|
|
* disk directly.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < num_stripes; i++) {
|
|
|
|
ret = btrfs_pread(multi->stripes[i].dev->fd, pointers[i],
|
|
|
|
BTRFS_STRIPE_LEN, multi->stripes[i].physical,
|
|
|
|
fs_info->zoned);
|
btrfs-progs: properly handle degraded raid56 reads
[BUG]
For a degraded RAID5, btrfs check will fail to even read the chunk root:
# mkfs.btrfs -f -m raid5 -d raid5 $dev1 $dev2 $dev3
# wipefs -fa $dev1
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
bad tree block 22036480, bytenr mismatch, want=22036480, have=0
ERROR: cannot read chunk root
ERROR: cannot open file system
[CAUSE]
Although read_tree_block() function from btrfs-progs is properly
iterating the mirrors (mirror 1 is reading from the disk directly,
mirror 2 will be rebuild from parity), the raid56 recovery path is not
handling the read error correctly.
The existing code will try to read the full stripe, but any read failure
(including missing device) will immediately cause an error:
for (i = 0; i < num_stripes; i++) {
ret = btrfs_pread(multi->stripes[i].dev->fd, pointers[i],
BTRFS_STRIPE_LEN, multi->stripes[i].physical,
fs_info->zoned);
if (ret < BTRFS_STRIPE_LEN) {
ret = -EIO;
goto out;
}
}
[FIX]
To make failed_a/failed_b calculation much easier, and properly handle
too many missing devices, here this patch will introduce a new bitmap
based solution.
The new @failed_stripe_bitmap will represent all the failed stripes.
So the initial read will mark all the missing devices in the
@failed_stripe_bitmap, and later operations will all operate on that
bitmap.
Only before we call raid56_recov(), we convert the bitmap to the old
failed_a/failed_b interface and continue.
Now btrfs check can handle above case properly:
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
Checking filesystem on /dev/test/scratch2
UUID: 8b2e1cb4-f35b-4856-9b11-262d39d8458b
[1/7] checking root items
[2/7] checking extents
[3/7] checking free space tree
[4/7] checking fs roots
[5/7] checking only csums items (without verifying data)
[6/7] checking root refs
[7/7] checking quota groups skipped (not enabled on this FS)
found 147456 bytes used, no error found
total csum bytes: 0
total tree bytes: 147456
total fs tree bytes: 32768
total extent tree bytes: 16384
btree space waste bytes: 139871
file data blocks allocated: 0
referenced 0
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-11-13 06:32:38 +00:00
|
|
|
if (ret < BTRFS_STRIPE_LEN)
|
|
|
|
set_bit(i, failed_stripe_bitmap);
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the failed index.
|
|
|
|
*
|
|
|
|
* Since we're reading using mirror_num > 1 already, it means the data
|
|
|
|
* stripe where @logical lies in is definitely corrupted.
|
|
|
|
*/
|
btrfs-progs: properly handle degraded raid56 reads
[BUG]
For a degraded RAID5, btrfs check will fail to even read the chunk root:
# mkfs.btrfs -f -m raid5 -d raid5 $dev1 $dev2 $dev3
# wipefs -fa $dev1
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
bad tree block 22036480, bytenr mismatch, want=22036480, have=0
ERROR: cannot read chunk root
ERROR: cannot open file system
[CAUSE]
Although read_tree_block() function from btrfs-progs is properly
iterating the mirrors (mirror 1 is reading from the disk directly,
mirror 2 will be rebuild from parity), the raid56 recovery path is not
handling the read error correctly.
The existing code will try to read the full stripe, but any read failure
(including missing device) will immediately cause an error:
for (i = 0; i < num_stripes; i++) {
ret = btrfs_pread(multi->stripes[i].dev->fd, pointers[i],
BTRFS_STRIPE_LEN, multi->stripes[i].physical,
fs_info->zoned);
if (ret < BTRFS_STRIPE_LEN) {
ret = -EIO;
goto out;
}
}
[FIX]
To make failed_a/failed_b calculation much easier, and properly handle
too many missing devices, here this patch will introduce a new bitmap
based solution.
The new @failed_stripe_bitmap will represent all the failed stripes.
So the initial read will mark all the missing devices in the
@failed_stripe_bitmap, and later operations will all operate on that
bitmap.
Only before we call raid56_recov(), we convert the bitmap to the old
failed_a/failed_b interface and continue.
Now btrfs check can handle above case properly:
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
Checking filesystem on /dev/test/scratch2
UUID: 8b2e1cb4-f35b-4856-9b11-262d39d8458b
[1/7] checking root items
[2/7] checking extents
[3/7] checking free space tree
[4/7] checking fs roots
[5/7] checking only csums items (without verifying data)
[6/7] checking root refs
[7/7] checking quota groups skipped (not enabled on this FS)
found 147456 bytes used, no error found
total csum bytes: 0
total tree bytes: 147456
total fs tree bytes: 32768
total extent tree bytes: 16384
btree space waste bytes: 139871
file data blocks allocated: 0
referenced 0
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-11-13 06:32:38 +00:00
|
|
|
set_bit((logical - full_stripe_start) / BTRFS_STRIPE_LEN, failed_stripe_bitmap);
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* For RAID6, we don't have good way to exhaust all the combinations,
|
|
|
|
* so here we can only go through the map to see if we have missing devices.
|
btrfs-progs: properly handle degraded raid56 reads
[BUG]
For a degraded RAID5, btrfs check will fail to even read the chunk root:
# mkfs.btrfs -f -m raid5 -d raid5 $dev1 $dev2 $dev3
# wipefs -fa $dev1
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
bad tree block 22036480, bytenr mismatch, want=22036480, have=0
ERROR: cannot read chunk root
ERROR: cannot open file system
[CAUSE]
Although read_tree_block() function from btrfs-progs is properly
iterating the mirrors (mirror 1 is reading from the disk directly,
mirror 2 will be rebuild from parity), the raid56 recovery path is not
handling the read error correctly.
The existing code will try to read the full stripe, but any read failure
(including missing device) will immediately cause an error:
for (i = 0; i < num_stripes; i++) {
ret = btrfs_pread(multi->stripes[i].dev->fd, pointers[i],
BTRFS_STRIPE_LEN, multi->stripes[i].physical,
fs_info->zoned);
if (ret < BTRFS_STRIPE_LEN) {
ret = -EIO;
goto out;
}
}
[FIX]
To make failed_a/failed_b calculation much easier, and properly handle
too many missing devices, here this patch will introduce a new bitmap
based solution.
The new @failed_stripe_bitmap will represent all the failed stripes.
So the initial read will mark all the missing devices in the
@failed_stripe_bitmap, and later operations will all operate on that
bitmap.
Only before we call raid56_recov(), we convert the bitmap to the old
failed_a/failed_b interface and continue.
Now btrfs check can handle above case properly:
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
Checking filesystem on /dev/test/scratch2
UUID: 8b2e1cb4-f35b-4856-9b11-262d39d8458b
[1/7] checking root items
[2/7] checking extents
[3/7] checking free space tree
[4/7] checking fs roots
[5/7] checking only csums items (without verifying data)
[6/7] checking root refs
[7/7] checking quota groups skipped (not enabled on this FS)
found 147456 bytes used, no error found
total csum bytes: 0
total tree bytes: 147456
total fs tree bytes: 32768
total extent tree bytes: 16384
btree space waste bytes: 139871
file data blocks allocated: 0
referenced 0
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-11-13 06:32:38 +00:00
|
|
|
*
|
|
|
|
* If we only have one failed stripe (marked by above set_bit()), then
|
|
|
|
* we have no better idea, fallback to use P corruption.
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
*/
|
btrfs-progs: properly handle degraded raid56 reads
[BUG]
For a degraded RAID5, btrfs check will fail to even read the chunk root:
# mkfs.btrfs -f -m raid5 -d raid5 $dev1 $dev2 $dev3
# wipefs -fa $dev1
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
bad tree block 22036480, bytenr mismatch, want=22036480, have=0
ERROR: cannot read chunk root
ERROR: cannot open file system
[CAUSE]
Although read_tree_block() function from btrfs-progs is properly
iterating the mirrors (mirror 1 is reading from the disk directly,
mirror 2 will be rebuild from parity), the raid56 recovery path is not
handling the read error correctly.
The existing code will try to read the full stripe, but any read failure
(including missing device) will immediately cause an error:
for (i = 0; i < num_stripes; i++) {
ret = btrfs_pread(multi->stripes[i].dev->fd, pointers[i],
BTRFS_STRIPE_LEN, multi->stripes[i].physical,
fs_info->zoned);
if (ret < BTRFS_STRIPE_LEN) {
ret = -EIO;
goto out;
}
}
[FIX]
To make failed_a/failed_b calculation much easier, and properly handle
too many missing devices, here this patch will introduce a new bitmap
based solution.
The new @failed_stripe_bitmap will represent all the failed stripes.
So the initial read will mark all the missing devices in the
@failed_stripe_bitmap, and later operations will all operate on that
bitmap.
Only before we call raid56_recov(), we convert the bitmap to the old
failed_a/failed_b interface and continue.
Now btrfs check can handle above case properly:
# btrfs check $dev2
Opening filesystem to check...
warning, device 1 is missing
Checking filesystem on /dev/test/scratch2
UUID: 8b2e1cb4-f35b-4856-9b11-262d39d8458b
[1/7] checking root items
[2/7] checking extents
[3/7] checking free space tree
[4/7] checking fs roots
[5/7] checking only csums items (without verifying data)
[6/7] checking root refs
[7/7] checking quota groups skipped (not enabled on this FS)
found 147456 bytes used, no error found
total csum bytes: 0
total tree bytes: 147456
total fs tree bytes: 32768
total extent tree bytes: 16384
btree space waste bytes: 139871
file data blocks allocated: 0
referenced 0
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-11-13 06:32:38 +00:00
|
|
|
if (multi->type & BTRFS_BLOCK_GROUP_RAID6 &&
|
|
|
|
bitmap_weight(failed_stripe_bitmap, num_stripes) < 2)
|
|
|
|
set_bit(num_stripes - 2, failed_stripe_bitmap);
|
|
|
|
|
|
|
|
/* Damaged beyond repair already. */
|
|
|
|
if (bitmap_weight(failed_stripe_bitmap, num_stripes) > tolerance) {
|
|
|
|
ret = -EIO;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
for_each_set_bit(i, failed_stripe_bitmap, num_stripes) {
|
|
|
|
if (failed_a < 0)
|
|
|
|
failed_a = i;
|
|
|
|
else if (failed_b < 0)
|
|
|
|
failed_b = i;
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Rebuild the full stripe */
|
|
|
|
ret = raid56_recov(num_stripes, BTRFS_STRIPE_LEN, multi->type,
|
|
|
|
failed_a, failed_b, pointers);
|
|
|
|
ASSERT(ret == 0);
|
|
|
|
|
|
|
|
/* Now copy the data back to original buf */
|
|
|
|
memcpy(buf, pointers[failed_a] + (logical - full_stripe_start) %
|
|
|
|
BTRFS_STRIPE_LEN, len);
|
|
|
|
ret = 0;
|
|
|
|
out:
|
2023-10-25 13:50:34 +00:00
|
|
|
kfree(failed_stripe_bitmap);
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
for (i = 0; i < num_stripes; i++)
|
2023-10-25 13:50:34 +00:00
|
|
|
kfree(pointers[i]);
|
|
|
|
kfree(pointers);
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-04-05 12:48:27 +00:00
|
|
|
int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 logical,
|
|
|
|
u64 *len, int mirror)
|
2013-04-04 13:57:50 +00:00
|
|
|
{
|
|
|
|
struct btrfs_multi_bio *multi = NULL;
|
|
|
|
struct btrfs_device *device;
|
2022-04-05 12:48:27 +00:00
|
|
|
u64 read_len = *len;
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
u64 *raid_map = NULL;
|
2013-04-04 13:57:50 +00:00
|
|
|
int ret;
|
|
|
|
|
2022-04-05 12:48:27 +00:00
|
|
|
ret = btrfs_map_block(info, READ, logical, &read_len, &multi, mirror,
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
&raid_map);
|
2022-04-05 12:48:27 +00:00
|
|
|
if (ret) {
|
|
|
|
fprintf(stderr, "Couldn't map the block %llu\n", logical);
|
|
|
|
return -EIO;
|
|
|
|
}
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
read_len = min(*len, read_len);
|
|
|
|
|
|
|
|
/* We need to rebuild from P/Q */
|
|
|
|
if (mirror > 1 && multi->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
|
|
|
|
ret = read_raid56(info, buf, logical, read_len, mirror, multi,
|
|
|
|
raid_map);
|
2023-10-25 13:50:34 +00:00
|
|
|
kfree(multi);
|
|
|
|
kfree(raid_map);
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
*len = read_len;
|
|
|
|
return ret;
|
|
|
|
}
|
2023-10-25 13:50:34 +00:00
|
|
|
kfree(raid_map);
|
2022-04-05 12:48:27 +00:00
|
|
|
device = multi->stripes[0].dev;
|
2013-04-04 13:57:50 +00:00
|
|
|
|
2022-04-05 12:48:27 +00:00
|
|
|
if (device->fd <= 0) {
|
2013-04-04 13:57:50 +00:00
|
|
|
kfree(multi);
|
2022-04-05 12:48:27 +00:00
|
|
|
return -EIO;
|
|
|
|
}
|
2013-04-04 13:57:50 +00:00
|
|
|
|
2022-04-05 12:48:27 +00:00
|
|
|
ret = btrfs_pread(device->fd, buf, read_len,
|
|
|
|
multi->stripes[0].physical, info->zoned);
|
|
|
|
kfree(multi);
|
|
|
|
if (ret < 0) {
|
|
|
|
fprintf(stderr, "Error reading %llu, %d\n", logical,
|
|
|
|
ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
if (ret != read_len) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"Short read for %llu, read %d, read_len %llu\n",
|
|
|
|
logical, ret, read_len);
|
|
|
|
return -EIO;
|
2013-04-04 13:57:50 +00:00
|
|
|
}
|
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by:
- Allow btrfs_map_block() to return the chunk type
This makes later work much easier
- Only reset stripe offset inside btrfs_map_block() when needed
Currently if @raid_map is not NULL, btrfs_map_block() will consider
this call is for WRITE and will reset stripe offset.
This is no longer the case, as for RAID56 read with mirror_num 1/0,
we will still call btrfs_map_block() with non-NULL raid_map.
Add a small check to make sure we won't reset stripe offset for
mirror 1/0 read.
- Add new helper read_raid56() to handle rebuild
We will read the full stripe (including all data and P/Q stripes)
do the rebuild, then only copy the refered part to the caller.
There is a catch for RAID6, we have no way to exhaust all combination,
so the current repair will assume the mirror = 0 data is corrupted,
then try to find a missing device.
But if no missing device can be found, it will assume P is corrupted.
This is just a guess, and can to totally wrong, but we have no better
idea.
Now btrfs-progs have full read ability for RAID56.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-04-05 12:48:29 +00:00
|
|
|
*len = read_len;
|
2013-04-04 13:57:50 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-08-02 07:52:41 +00:00
|
|
|
/*
|
|
|
|
* Write the data in @buf to logical bytenr @offset.
|
|
|
|
*
|
|
|
|
* Such data will be written to all mirrors and RAID56 P/Q will also be
|
|
|
|
* properly handled.
|
|
|
|
*/
|
2023-05-09 11:48:39 +00:00
|
|
|
int write_data_to_disk(struct btrfs_fs_info *info, const void *buf, u64 offset,
|
2022-08-02 07:52:41 +00:00
|
|
|
u64 bytes)
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
{
|
|
|
|
struct btrfs_multi_bio *multi = NULL;
|
|
|
|
struct btrfs_device *device;
|
|
|
|
u64 bytes_left = bytes;
|
|
|
|
u64 this_len;
|
|
|
|
u64 total_write = 0;
|
|
|
|
u64 *raid_map = NULL;
|
|
|
|
u64 dev_bytenr;
|
|
|
|
int dev_nr;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
while (bytes_left > 0) {
|
|
|
|
this_len = bytes_left;
|
|
|
|
dev_nr = 0;
|
|
|
|
|
2017-06-13 09:19:17 +00:00
|
|
|
ret = btrfs_map_block(info, WRITE, offset, &this_len, &multi,
|
2022-08-02 07:52:41 +00:00
|
|
|
0, &raid_map);
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
if (ret) {
|
2021-06-11 21:07:50 +00:00
|
|
|
fprintf(stderr, "Couldn't map the block %llu\n",
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
offset);
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (raid_map) {
|
|
|
|
struct extent_buffer *eb;
|
|
|
|
u64 stripe_len = this_len;
|
|
|
|
|
|
|
|
this_len = min(this_len, bytes_left);
|
2017-05-18 01:27:32 +00:00
|
|
|
this_len = min(this_len, (u64)info->nodesize);
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
|
2023-10-25 13:50:34 +00:00
|
|
|
eb = kmalloc(sizeof(struct extent_buffer) + this_len, GFP_KERNEL);
|
2016-07-28 00:16:43 +00:00
|
|
|
if (!eb) {
|
2022-09-30 07:12:06 +00:00
|
|
|
error_msg(ERROR_MSG_MEMORY, "extent buffer");
|
2016-07-28 00:16:43 +00:00
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
|
|
|
|
memset(eb, 0, sizeof(struct extent_buffer) + this_len);
|
|
|
|
eb->start = offset;
|
|
|
|
eb->len = this_len;
|
|
|
|
|
|
|
|
memcpy(eb->data, buf + total_write, this_len);
|
|
|
|
ret = write_raid56_with_parity(info, eb, multi,
|
|
|
|
stripe_len, raid_map);
|
2022-08-02 07:52:42 +00:00
|
|
|
BUG_ON(ret < 0);
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
|
2023-10-25 13:50:34 +00:00
|
|
|
kfree(eb);
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
kfree(raid_map);
|
|
|
|
raid_map = NULL;
|
|
|
|
} else while (dev_nr < multi->num_stripes) {
|
|
|
|
device = multi->stripes[dev_nr].dev;
|
2015-08-21 03:21:26 +00:00
|
|
|
if (device->fd <= 0) {
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
kfree(multi);
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
dev_bytenr = multi->stripes[dev_nr].physical;
|
|
|
|
this_len = min(this_len, bytes_left);
|
|
|
|
dev_nr++;
|
2022-04-05 12:48:26 +00:00
|
|
|
device->total_ios++;
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
|
2021-10-05 06:23:02 +00:00
|
|
|
ret = btrfs_pwrite(device->fd, buf + total_write,
|
|
|
|
this_len, dev_bytenr, info->zoned);
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
if (ret != this_len) {
|
|
|
|
if (ret < 0) {
|
|
|
|
fprintf(stderr, "Error writing to "
|
|
|
|
"device %d\n", errno);
|
2022-09-23 11:59:46 +00:00
|
|
|
ret = -errno;
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
kfree(multi);
|
|
|
|
return ret;
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "Short write\n");
|
|
|
|
kfree(multi);
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
BUG_ON(bytes_left < this_len);
|
|
|
|
|
|
|
|
bytes_left -= this_len;
|
|
|
|
offset += this_len;
|
|
|
|
total_write += this_len;
|
|
|
|
|
|
|
|
kfree(multi);
|
|
|
|
multi = NULL;
|
|
|
|
}
|
|
|
|
return 0;
|
2016-07-28 00:16:43 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
kfree(raid_map);
|
|
|
|
return ret;
|
Btrfs-progs: enhance btrfs-image to restore image onto multiple disks
This adds a 'btrfs-image -m' option, which let us restore an image that
is built from a btrfs of multiple disks onto several disks altogether.
This aims to address the following case,
$ mkfs.btrfs -m raid0 sda sdb
$ btrfs-image sda image.file
$ btrfs-image -r image.file sdc
---------
so we can only restore metadata onto sdc, and another thing is we can
only mount sdc with degraded mode as we don't provide informations of
another disk. And, it's built as RAID0 and we have only one disk,
so after mount sdc we'll get into readonly mode.
This is just annoying for people(like me) who're trying to restore image
but turn to find they cannot make it work.
So this'll make your life easier, just tap
$ btrfs-image -m image.file sdc sdd
---------
then you get everything about metadata done, the same offset with that of
the originals(of course, you need offer enough disk size, at least the disk
size of the original disks).
Besides, this also works with raid5 and raid6 metadata image.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-06-22 05:32:45 +00:00
|
|
|
}
|
|
|
|
|
2008-01-04 15:36:26 +00:00
|
|
|
int set_extent_buffer_dirty(struct extent_buffer *eb)
|
|
|
|
{
|
2022-11-23 22:37:23 +00:00
|
|
|
struct extent_io_tree *tree = &eb->fs_info->dirty_buffers;
|
2022-11-23 22:37:27 +00:00
|
|
|
if (!(eb->flags & EXTENT_BUFFER_DIRTY)) {
|
|
|
|
eb->flags |= EXTENT_BUFFER_DIRTY;
|
2023-04-19 21:17:19 +00:00
|
|
|
set_extent_dirty(tree, eb->start, eb->start + eb->len - 1,
|
|
|
|
GFP_NOFS);
|
2008-01-04 15:36:26 +00:00
|
|
|
extent_buffer_get(eb);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-08-23 14:32:31 +00:00
|
|
|
int btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
|
|
|
|
struct extent_buffer *eb)
|
2008-01-04 15:36:26 +00:00
|
|
|
{
|
2022-11-23 22:37:23 +00:00
|
|
|
struct extent_io_tree *tree = &eb->fs_info->dirty_buffers;
|
2022-11-23 22:37:27 +00:00
|
|
|
if (eb->flags & EXTENT_BUFFER_DIRTY) {
|
|
|
|
eb->flags &= ~EXTENT_BUFFER_DIRTY;
|
2023-04-19 21:17:19 +00:00
|
|
|
clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1,
|
|
|
|
NULL);
|
2008-01-04 15:36:26 +00:00
|
|
|
free_extent_buffer(eb);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-06-17 07:59:33 +00:00
|
|
|
int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
|
2008-01-04 15:36:26 +00:00
|
|
|
unsigned long start, unsigned long len)
|
|
|
|
{
|
|
|
|
return memcmp(eb->data + start, ptrv, len);
|
|
|
|
}
|
|
|
|
|
2019-06-17 07:59:33 +00:00
|
|
|
void read_extent_buffer(const struct extent_buffer *eb, void *dst,
|
2008-01-04 15:36:26 +00:00
|
|
|
unsigned long start, unsigned long len)
|
|
|
|
{
|
|
|
|
memcpy(dst, eb->data + start, len);
|
|
|
|
}
|
|
|
|
|
2023-06-27 14:20:30 +00:00
|
|
|
void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src)
|
|
|
|
{
|
|
|
|
write_extent_buffer(eb, src, btrfs_header_fsid(), BTRFS_FSID_SIZE);
|
|
|
|
}
|
|
|
|
|
2023-06-27 15:27:04 +00:00
|
|
|
void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
|
|
|
|
const void *src)
|
|
|
|
{
|
|
|
|
write_extent_buffer(eb, src, btrfs_header_chunk_tree_uuid(eb), BTRFS_FSID_SIZE);
|
|
|
|
}
|
|
|
|
|
2022-11-23 22:37:33 +00:00
|
|
|
void write_extent_buffer(const struct extent_buffer *eb, const void *src,
|
2008-01-04 15:36:26 +00:00
|
|
|
unsigned long start, unsigned long len)
|
|
|
|
{
|
2022-11-23 22:37:33 +00:00
|
|
|
memcpy((void *)eb->data + start, src, len);
|
2008-01-04 15:36:26 +00:00
|
|
|
}
|
|
|
|
|
2023-06-27 14:20:30 +00:00
|
|
|
void copy_extent_buffer_full(const struct extent_buffer *dst,
|
|
|
|
const struct extent_buffer *src)
|
|
|
|
{
|
|
|
|
copy_extent_buffer(dst, src, 0, 0, src->len);
|
|
|
|
}
|
|
|
|
|
2023-04-19 21:20:41 +00:00
|
|
|
void copy_extent_buffer(const struct extent_buffer *dst,
|
|
|
|
const struct extent_buffer *src,
|
2008-01-04 15:36:26 +00:00
|
|
|
unsigned long dst_offset, unsigned long src_offset,
|
|
|
|
unsigned long len)
|
|
|
|
{
|
2023-04-19 21:20:41 +00:00
|
|
|
memcpy((void *)dst->data + dst_offset, src->data + src_offset, len);
|
2023-08-23 14:32:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void memcpy_extent_buffer(const struct extent_buffer *dst, unsigned long dst_offset,
|
|
|
|
unsigned long src_offset, unsigned long len)
|
|
|
|
{
|
|
|
|
memcpy((void *)dst->data + dst_offset, dst->data + src_offset, len);
|
2008-01-04 15:36:26 +00:00
|
|
|
}
|
|
|
|
|
2023-04-19 21:20:41 +00:00
|
|
|
void memmove_extent_buffer(const struct extent_buffer *dst, unsigned long dst_offset,
|
2008-01-04 15:36:26 +00:00
|
|
|
unsigned long src_offset, unsigned long len)
|
|
|
|
{
|
2023-04-19 21:20:41 +00:00
|
|
|
memmove((void *)dst->data + dst_offset, dst->data + src_offset, len);
|
2008-01-04 15:36:26 +00:00
|
|
|
}
|
|
|
|
|
2023-04-19 21:20:41 +00:00
|
|
|
void memset_extent_buffer(const struct extent_buffer *eb, char c,
|
2008-01-04 15:36:26 +00:00
|
|
|
unsigned long start, unsigned long len)
|
|
|
|
{
|
2023-04-19 21:20:41 +00:00
|
|
|
memset((void *)eb->data + start, c, len);
|
2008-01-04 15:36:26 +00:00
|
|
|
}
|
2015-09-30 03:51:45 +00:00
|
|
|
|
2023-04-19 21:20:41 +00:00
|
|
|
int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
|
2015-09-30 03:51:45 +00:00
|
|
|
unsigned long nr)
|
|
|
|
{
|
2016-07-15 19:12:48 +00:00
|
|
|
return le_test_bit(nr, (u8 *)eb->data + start);
|
2015-09-30 03:51:45 +00:00
|
|
|
}
|
2023-04-19 21:20:46 +00:00
|
|
|
|
2023-04-19 21:24:03 +00:00
|
|
|
/*
|
|
|
|
* btrfs_readahead_node_child - readahead a node's child block
|
|
|
|
* @node: parent node we're reading from
|
|
|
|
* @slot: slot in the parent node for the child we want to read
|
|
|
|
*
|
|
|
|
* A helper for readahead_tree_block, we simply read the bytenr pointed at the
|
|
|
|
* slot in the node provided.
|
|
|
|
*/
|
|
|
|
void btrfs_readahead_node_child(struct extent_buffer *node, int slot)
|
|
|
|
{
|
|
|
|
readahead_tree_block(node->fs_info, btrfs_node_blockptr(node, slot),
|
|
|
|
btrfs_node_ptr_generation(node, slot));
|
|
|
|
}
|