btrfs-progs/kernel-shared/ctree.h

1242 lines
39 KiB
C
Raw Normal View History

2007-06-12 13:07:11 +00:00
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_CTREE_H__
#define __BTRFS_CTREE_H__
2007-02-02 14:18:22 +00:00
#include "kerncompat.h"
#include <stdbool.h>
#include <stddef.h>
#include "kernel-lib/list.h"
#include "kernel-lib/bitops.h"
#include "kernel-lib/rbtree_types.h"
#include "kernel-shared/uapi/btrfs.h"
#include "kernel-shared/uapi/btrfs_tree.h"
#include "kernel-shared/extent_io.h"
#include "kernel-shared/accessors.h"
#include "kernel-shared/extent-io-tree.h"
#include "kernel-shared/locking.h"
#include "crypto/crc32c.h"
#include "common/extent-cache.h"
struct btrfs_root;
2007-03-16 20:20:31 +00:00
struct btrfs_trans_handle;
struct btrfs_free_space_ctl;
2007-02-02 14:18:22 +00:00
/*
* Fake signature for an unfinalized filesystem, which only has barebone tree
* structures (normally 6 near empty trees, on SINGLE meta/sys temporary chunks)
*
* ascii !BHRfS_M, no null
*/
#define BTRFS_MAGIC_TEMPORARY 0x4D5F536652484221ULL
#define BTRFS_MAX_MIRRORS 3
struct btrfs_mapping_tree {
struct cache_tree cache_tree;
};
static inline unsigned long btrfs_chunk_item_size(int num_stripes)
{
BUG_ON(num_stripes == 0);
return sizeof(struct btrfs_chunk) +
sizeof(struct btrfs_stripe) * (num_stripes - 1);
}
btrfs-progs: fix printf formats on 32bit x86 When compiling btrfs-progs on 32bit x86 using GCC 11.1.0, there are several warnings: In file included from ./common/utils.h:30, from check/main.c:36: check/main.c: In function 'run_next_block': ./common/messages.h:42:31: warning: format '%lu' expects argument of type 'long unsigned int', but argument 4 has type 'u32' {aka 'unsigned int'} [-Wformat=] 42 | __btrfs_error((fmt), ##__VA_ARGS__); \ | ^~~~~ check/main.c:6496:33: note: in expansion of macro 'error' 6496 | error( | ^~~~~ In file included from ./common/utils.h:30, from kernel-shared/volumes.c:32: kernel-shared/volumes.c: In function 'btrfs_check_chunk_valid': ./common/messages.h:42:31: warning: format '%lu' expects argument of type 'long unsigned int', but argument 4 has type 'u32' {aka 'unsigned int'} [-Wformat=] 42 | __btrfs_error((fmt), ##__VA_ARGS__); \ | ^~~~~ kernel-shared/volumes.c:2052:17: note: in expansion of macro 'error' 2052 | error("invalid chunk item size, have %u expect [%zu, %lu)", | ^~~~~ image/main.c: In function 'search_for_chunk_blocks': ./common/messages.h:42:31: warning: format '%lu' expects argument of type 'long unsigned int', but argument 3 has type 'size_t' {aka 'unsigned int'} [-Wformat=] 42 | __btrfs_error((fmt), ##__VA_ARGS__); \ | ^~~~~ image/main.c:2122:33: note: in expansion of macro 'error' 2122 | error( | ^~~~~ There are two types of problems: - __BTRFS_LEAF_DATA_SIZE() This macro has no type definition, making it behaves differently on different arches. Fix this by following kernel to use inline function to make its return value fixed to u32. - size_t related output For x86_64 %lu is OK but not for x86. Fix this by using %zu. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2021-10-27 12:48:46 +00:00
static inline u32 __BTRFS_LEAF_DATA_SIZE(u32 nodesize)
{
return nodesize - sizeof(struct btrfs_header);
}
#define BTRFS_LEAF_DATA_SIZE(fs_info) (fs_info->leaf_data_size)
#define BTRFS_SUPER_INFO_OFFSET (65536)
#define BTRFS_SUPER_INFO_SIZE (4096)
/*
* The FREE_SPACE_TREE and FREE_SPACE_TREE_VALID compat_ro bits must not be
* added here until read-write support for the free space tree is implemented in
* btrfs-progs.
*/
#define BTRFS_FEATURE_COMPAT_RO_SUPP \
(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \
BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \
BTRFS_FEATURE_COMPAT_RO_VERITY | \
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE)
#if EXPERIMENTAL
#define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \
BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
BTRFS_FEATURE_INCOMPAT_RAID56 | \
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
BTRFS_FEATURE_INCOMPAT_NO_HOLES | \
BTRFS_FEATURE_INCOMPAT_RAID1C34 | \
BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
BTRFS_FEATURE_INCOMPAT_ZONED | \
BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 | \
BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE | \
BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA)
#else
#define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \
BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
BTRFS_FEATURE_INCOMPAT_RAID56 | \
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
BTRFS_FEATURE_INCOMPAT_NO_HOLES | \
BTRFS_FEATURE_INCOMPAT_RAID1C34 | \
BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
BTRFS_FEATURE_INCOMPAT_ZONED | \
BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE | \
BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA)
#endif
/*
2007-03-13 14:46:10 +00:00
* btrfs_paths remember the path taken from the root down to the leaf.
* level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point
* to any other levels that are present.
*
* The slots array records the index of the item or block pointer
* used while walking the tree.
*/
enum {
READA_NONE,
READA_BACK,
READA_FORWARD,
/*
* Similar to READA_FORWARD but unlike it:
*
* 1) It will trigger readahead even for leaves that are not close to
* each other on disk;
* 2) It also triggers readahead for nodes;
* 3) During a search, even when a node or leaf is already in memory, it
* will still trigger readahead for other nodes and leaves that follow
* it.
*
* This is meant to be used only when we know we are iterating over the
* entire tree or a very large part of it.
*/
READA_FORWARD_ALWAYS,
};
/*
* btrfs_paths remember the path taken from the root down to the leaf.
* level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point
* to any other levels that are present.
*
* The slots array records the index of the item or block pointer
* used while walking the tree.
*/
2007-03-13 14:46:10 +00:00
struct btrfs_path {
struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
2007-03-13 14:46:10 +00:00
int slots[BTRFS_MAX_LEVEL];
/* The kernel locking scheme is not done in userspace. */
u8 locks[BTRFS_MAX_LEVEL];
u8 reada;
/* keep some upper locks as we walk down */
u8 lowest_level;
/*
* set by btrfs_split_item, tells search_slot to keep all locks
* and to force calls to keep space in the nodes
*/
unsigned int search_for_split:1;
unsigned int keep_locks:1;
unsigned int skip_locking:1;
unsigned int search_commit_root:1;
unsigned int need_commit_sem:1;
unsigned int skip_release_on_error:1;
/*
* Indicate that new item (btrfs_search_slot) is extending already
* existing item and ins_len contains only the data size and not item
* header (ie. sizeof(struct btrfs_item) is not included).
*/
unsigned int search_for_extension:1;
/* Stop search if any locks need to be taken (for read) */
unsigned int nowait:1;
unsigned int skip_check_block:1;
2007-02-02 14:18:22 +00:00
};
2007-02-24 11:24:44 +00:00
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) \
((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
sizeof(struct btrfs_item))
#define BTRFS_MAX_EXTENT_SIZE 128UL * 1024 * 1024
btrfs-progs: calculate available blocks on device properly I found that mkfs.btrfs aborts when assigned multi volumes contain a small volume: # parted /dev/sdf p Model: LSI MegaRAID SAS RMB (scsi) Disk /dev/sdf: 72.8GB Sector size (logical/physical): 512B/512B Partition Table: msdos Number Start End Size Type File system Flags 1 32.3kB 72.4GB 72.4GB primary 2 72.4GB 72.8GB 461MB primary # ./mkfs.btrfs -f /dev/sdf1 /dev/sdf2 : SMALL VOLUME: forcing mixed metadata/data groups adding device /dev/sdf2 id 2 mkfs.btrfs: volumes.c:852: btrfs_alloc_chunk: Assertion `!(ret)' failed. Aborted (core dumped) This failure of btrfs_alloc_chunk was caused by following steps: 1) since there is only small space in the small device, mkfs was going to allocate a chunk from free space as much as available. So mkfs called btrfs_alloc_chunk with size = device->total_bytes - device->used_bytes. 2) (According to the comment in source code, to avoid overwriting superblock,) btrfs_alloc_chunk starts taking chunks at an offset of 1MB. It means that the layout of a disk will be like: [[1MB at beginning for sb][allocated chunks]* ... free space ... ] and you can see that the available free space for allocation is: avail = device->total_bytes - device->used_bytes - 1MB. 3) Therefore there is only free space 1MB less than requested. damn. >From further investigations I also found that this issue is easily reproduced by using -A, --alloc-start option: # truncate --size=1G testfile # ./mkfs.btrfs -A900M -f testfile : mkfs.btrfs: volumes.c:852: btrfs_alloc_chunk: Assertion `!(ret)' failed. Aborted (core dumped) In this case there is only 100MB for allocation but btrfs_alloc_chunk was going to allocate more than the 100MB. The root cause of both of above troubles is a same simple bug: btrfs_chunk_alloc does not calculate available bytes properly even though it researches how many devices have enough room to have a chunk to be allocated. So this patch introduces new function btrfs_device_avail_bytes() which returns available bytes for allocation in specified device. Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: David Sterba <dsterba@suse.cz> Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-09-05 06:57:19 +00:00
/*
* We don't want to overwrite 1M at the beginning of device, even though
* there is our 1st superblock at 64k. Some possible reasons:
* - the first 64k blank is useful for some boot loader/manager
* - the first 1M could be scratched by buggy partitioner or somesuch
*/
#define BTRFS_BLOCK_RESERVED_1M_FOR_SUPER ((u64)1 * 1024 * 1024)
btrfs-progs: calculate available blocks on device properly I found that mkfs.btrfs aborts when assigned multi volumes contain a small volume: # parted /dev/sdf p Model: LSI MegaRAID SAS RMB (scsi) Disk /dev/sdf: 72.8GB Sector size (logical/physical): 512B/512B Partition Table: msdos Number Start End Size Type File system Flags 1 32.3kB 72.4GB 72.4GB primary 2 72.4GB 72.8GB 461MB primary # ./mkfs.btrfs -f /dev/sdf1 /dev/sdf2 : SMALL VOLUME: forcing mixed metadata/data groups adding device /dev/sdf2 id 2 mkfs.btrfs: volumes.c:852: btrfs_alloc_chunk: Assertion `!(ret)' failed. Aborted (core dumped) This failure of btrfs_alloc_chunk was caused by following steps: 1) since there is only small space in the small device, mkfs was going to allocate a chunk from free space as much as available. So mkfs called btrfs_alloc_chunk with size = device->total_bytes - device->used_bytes. 2) (According to the comment in source code, to avoid overwriting superblock,) btrfs_alloc_chunk starts taking chunks at an offset of 1MB. It means that the layout of a disk will be like: [[1MB at beginning for sb][allocated chunks]* ... free space ... ] and you can see that the available free space for allocation is: avail = device->total_bytes - device->used_bytes - 1MB. 3) Therefore there is only free space 1MB less than requested. damn. >From further investigations I also found that this issue is easily reproduced by using -A, --alloc-start option: # truncate --size=1G testfile # ./mkfs.btrfs -A900M -f testfile : mkfs.btrfs: volumes.c:852: btrfs_alloc_chunk: Assertion `!(ret)' failed. Aborted (core dumped) In this case there is only 100MB for allocation but btrfs_alloc_chunk was going to allocate more than the 100MB. The root cause of both of above troubles is a same simple bug: btrfs_chunk_alloc does not calculate available bytes properly even though it researches how many devices have enough room to have a chunk to be allocated. So this patch introduces new function btrfs_device_avail_bytes() which returns available bytes for allocation in specified device. Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: David Sterba <dsterba@suse.cz> Signed-off-by: Chris Mason <chris.mason@fusionio.com>
2013-09-05 06:57:19 +00:00
enum btrfs_raid_types {
BTRFS_RAID_RAID10,
BTRFS_RAID_RAID1,
BTRFS_RAID_DUP,
BTRFS_RAID_RAID0,
BTRFS_RAID_SINGLE,
BTRFS_RAID_RAID5,
BTRFS_RAID_RAID6,
BTRFS_RAID_RAID1C3,
BTRFS_RAID_RAID1C4,
BTRFS_NR_RAID_TYPES
};
/*
* GLOBAL_RSV does not exist as a on-disk block group type and is used
* internally for exporting info about global block reserve from space infos
*/
#define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49)
#define BTRFS_QGROUP_LEVEL_SHIFT 48
static inline u64 btrfs_qgroup_subvolid(u64 qgroupid)
{
return qgroupid & ((1ULL << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
}
2008-03-24 19:03:58 +00:00
struct btrfs_space_info {
u64 flags;
u64 total_bytes;
btrfs-progs: Fix false ENOSPC alert by tracking used space correctly [BUG] There is a bug report of unexpected ENOSPC from btrfs-convert, issue #123. After some debugging, even when we have enough unallocated space, we still hit ENOSPC at btrfs_reserve_extent(). [CAUSE] Btrfs-progs relies on chunk preallocator to make enough space for data/metadata. However after the introduction of delayed-ref, it's no longer reliable to rely on btrfs_space_info::bytes_used and btrfs_space_info::bytes_pinned to calculate used metadata space. For a running transaction with a lot of allocated tree blocks, btrfs_space_info::bytes_used stays its original value, and will only be updated when running delayed ref. This makes btrfs-progs chunk preallocator completely useless. And for btrfs-convert/mkfs.btrfs --rootdir, if we're going to have enough metadata to fill a metadata block group in one transaction, we will hit ENOSPC no matter whether we have enough unallocated space. [FIX] This patch will introduce btrfs_space_info::bytes_reserved to track how many space we have reserved but not yet committed to extent tree. To support this change, this commit also introduces the following modification: - More comment on btrfs_space_info::bytes_* To make code a little easier to read - Export update_space_info() to preallocate empty data/metadata space info for mkfs. For mkfs, we only have a temporary fs image with SYSTEM chunk only. Export update_space_info() so that we can preallocate empty data/metadata space info before we start a transaction. - Proper btrfs_space_info::bytes_reserved update The timing is the as kernel (except we don't need to update bytes_reserved for data extents) * Increase bytes_reserved when call alloc_reserved_tree_block() * Decrease bytes_reserved when running delayed refs With the help of head->must_insert_reserved to determine whether we need to decrease. Issue: #123 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-05-24 23:32:43 +00:00
/*
* Space already used.
* Only accounting space in current extent tree, thus delayed ref
* won't be accounted here.
*/
2008-03-24 19:03:58 +00:00
u64 bytes_used;
btrfs-progs: Fix false ENOSPC alert by tracking used space correctly [BUG] There is a bug report of unexpected ENOSPC from btrfs-convert, issue #123. After some debugging, even when we have enough unallocated space, we still hit ENOSPC at btrfs_reserve_extent(). [CAUSE] Btrfs-progs relies on chunk preallocator to make enough space for data/metadata. However after the introduction of delayed-ref, it's no longer reliable to rely on btrfs_space_info::bytes_used and btrfs_space_info::bytes_pinned to calculate used metadata space. For a running transaction with a lot of allocated tree blocks, btrfs_space_info::bytes_used stays its original value, and will only be updated when running delayed ref. This makes btrfs-progs chunk preallocator completely useless. And for btrfs-convert/mkfs.btrfs --rootdir, if we're going to have enough metadata to fill a metadata block group in one transaction, we will hit ENOSPC no matter whether we have enough unallocated space. [FIX] This patch will introduce btrfs_space_info::bytes_reserved to track how many space we have reserved but not yet committed to extent tree. To support this change, this commit also introduces the following modification: - More comment on btrfs_space_info::bytes_* To make code a little easier to read - Export update_space_info() to preallocate empty data/metadata space info for mkfs. For mkfs, we only have a temporary fs image with SYSTEM chunk only. Export update_space_info() so that we can preallocate empty data/metadata space info before we start a transaction. - Proper btrfs_space_info::bytes_reserved update The timing is the as kernel (except we don't need to update bytes_reserved for data extents) * Increase bytes_reserved when call alloc_reserved_tree_block() * Decrease bytes_reserved when running delayed refs With the help of head->must_insert_reserved to determine whether we need to decrease. Issue: #123 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-05-24 23:32:43 +00:00
/*
* Space being pinned down.
* So extent allocator will not try to allocate space from them.
*
* For cases like extents being freed in current transaction, or
* manually pinned bytes for re-initializing certain trees.
*/
2008-03-24 19:03:58 +00:00
u64 bytes_pinned;
btrfs-progs: Fix false ENOSPC alert by tracking used space correctly [BUG] There is a bug report of unexpected ENOSPC from btrfs-convert, issue #123. After some debugging, even when we have enough unallocated space, we still hit ENOSPC at btrfs_reserve_extent(). [CAUSE] Btrfs-progs relies on chunk preallocator to make enough space for data/metadata. However after the introduction of delayed-ref, it's no longer reliable to rely on btrfs_space_info::bytes_used and btrfs_space_info::bytes_pinned to calculate used metadata space. For a running transaction with a lot of allocated tree blocks, btrfs_space_info::bytes_used stays its original value, and will only be updated when running delayed ref. This makes btrfs-progs chunk preallocator completely useless. And for btrfs-convert/mkfs.btrfs --rootdir, if we're going to have enough metadata to fill a metadata block group in one transaction, we will hit ENOSPC no matter whether we have enough unallocated space. [FIX] This patch will introduce btrfs_space_info::bytes_reserved to track how many space we have reserved but not yet committed to extent tree. To support this change, this commit also introduces the following modification: - More comment on btrfs_space_info::bytes_* To make code a little easier to read - Export update_space_info() to preallocate empty data/metadata space info for mkfs. For mkfs, we only have a temporary fs image with SYSTEM chunk only. Export update_space_info() so that we can preallocate empty data/metadata space info before we start a transaction. - Proper btrfs_space_info::bytes_reserved update The timing is the as kernel (except we don't need to update bytes_reserved for data extents) * Increase bytes_reserved when call alloc_reserved_tree_block() * Decrease bytes_reserved when running delayed refs With the help of head->must_insert_reserved to determine whether we need to decrease. Issue: #123 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-05-24 23:32:43 +00:00
/*
* Space being reserved.
* Space has already being reserved but not yet reach extent tree.
*
* New tree blocks allocated in current transaction goes here.
*/
u64 bytes_reserved;
2008-03-24 19:03:58 +00:00
int full;
struct list_head list;
};
struct btrfs_block_group {
2008-03-24 19:03:58 +00:00
struct btrfs_space_info *space_info;
struct btrfs_free_space_ctl *free_space_ctl;
u64 start;
u64 length;
u64 used;
u64 bytes_super;
u64 pinned;
u64 flags;
int cached;
int ro;
/*
* If the free space extent count exceeds this number, convert the block
* group to bitmaps.
*/
u32 bitmap_high_thresh;
/*
* If the free space extent count drops below this number, convert the
* block group back to extents.
*/
u32 bitmap_low_thresh;
/* Block group cache stuff */
struct rb_node cache_node;
/* For dirty block groups */
struct list_head dirty_list;
/*
* Allocation offset for the block group to implement sequential
* allocation. This is used only with ZONED mode enabled.
*/
u64 alloc_offset;
u64 write_offset;
u64 global_root_id;
2007-04-26 20:46:06 +00:00
};
struct btrfs_device;
struct btrfs_fs_devices;
struct btrfs_fs_info {
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
u8 *new_chunk_tree_uuid;
struct btrfs_root *fs_root;
struct btrfs_root *tree_root;
struct btrfs_root *chunk_root;
struct btrfs_root *dev_root;
struct btrfs_root *quota_root;
struct btrfs_root *uuid_root;
struct btrfs_root *block_group_root;
struct btrfs_root *stripe_root;
struct rb_root global_roots_tree;
struct rb_root fs_root_tree;
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
struct cache_tree extent_cache;
u64 max_cache_size;
u64 cache_size;
struct list_head lru;
struct extent_io_tree dirty_buffers;
struct extent_io_tree free_space_cache;
struct extent_io_tree pinned_extents;
struct extent_io_tree extent_ins;
struct extent_io_tree *excluded_extents;
spinlock_t trans_lock;
struct rw_semaphore commit_root_sem;
struct rb_root block_group_cache_tree;
/* logical->physical extent mapping */
struct btrfs_mapping_tree mapping_tree;
2007-03-20 19:57:25 +00:00
u64 generation;
u64 last_trans_committed;
u64 avail_data_alloc_bits;
u64 avail_metadata_alloc_bits;
u64 avail_system_alloc_bits;
u64 data_alloc_profile;
u64 metadata_alloc_profile;
u64 system_alloc_profile;
struct btrfs_trans_handle *running_transaction;
struct btrfs_super_block *super_copy;
u64 super_bytenr;
u64 total_pinned;
u64 nr_global_roots;
struct list_head dirty_cowonly_roots;
struct list_head recow_ebs;
struct btrfs_fs_devices *fs_devices;
2008-03-24 19:03:58 +00:00
struct list_head space_info;
unsigned int system_allocs:1;
unsigned int readonly:1;
unsigned int on_restoring:1;
unsigned int is_chunk_recover:1;
unsigned int quota_enabled:1;
unsigned int suppress_check_block_errors:1;
unsigned int ignore_fsid_mismatch:1;
/* Don't verify checksums at all */
unsigned int skip_csum_check:1;
unsigned int ignore_chunk_tree_error:1;
unsigned int avoid_meta_chunk_alloc:1;
unsigned int avoid_sys_chunk_alloc:1;
unsigned int finalize_on_close:1;
unsigned int hide_names:1;
btrfs-progs: introduce OPEN_CTREE_ALLOW_TRANSID_MISMATCH flag [BUG] There is a report that, btrfstune can even work while the fs has transid mismatch problems. $ btrfstune -f -u /dev/sdb1 Current fsid: b2b5ae8d-4c49-45f0-b42e-46fe7dcfcb07 New fsid: b2b5ae8d-4c49-45f0-b42e-46fe7dcfcb07 Set superblock flag CHANGING_FSID Change fsid in extents parent transid verify failed on 792854528 wanted 20103 found 20091 parent transid verify failed on 792854528 wanted 20103 found 20091 parent transid verify failed on 792854528 wanted 20103 found 20091 Ignoring transid failure parent transid verify failed on 792870912 wanted 20103 found 20091 parent transid verify failed on 792870912 wanted 20103 found 20091 parent transid verify failed on 792870912 wanted 20103 found 20091 Ignoring transid failure parent transid verify failed on 792887296 wanted 20103 found 20091 parent transid verify failed on 792887296 wanted 20103 found 20091 parent transid verify failed on 792887296 wanted 20103 found 20091 Ignoring transid failure ERROR: child eb corrupted: parent bytenr=38010880 item=69 parent level=1 child level=1 ERROR: failed to change UUID of metadata: -5 ERROR: btrfstune failed This leaves a corrupted fs even more corrupted, and due to the extra CHANGING_FSID flag, btrfs check will not even try to run on it: Opening filesystem to check... ERROR: Filesystem UUID change in progress ERROR: cannot open file system [CAUSE] Unlike kernel, btrfs-progs has a less strict check on transid mismatch. In read_tree_block() we will fall back to use the tree block even its transid mismatch if we can't find any better copy. However not all commands in btrfs-progs needs this feature, only btrfs-check (which may fix the problem) and btrfs-restore (it just tries to ignore any problems) really utilize this feature. [FIX] Introduce a new open ctree flag, OPEN_CTREE_ALLOW_TRANSID_MISMATCH, to be explicit about whether we really want to ignore transid error. Currently only btrfs-check and btrfs-restore will utilize this new flag. Also add btrfs-image to allow opening such fs with transid error. Link: https://www.reddit.com/r/btrfs/comments/pivpqk/failure_during_btrfstune_u/ Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2021-09-08 02:05:42 +00:00
unsigned int allow_transid_mismatch:1;
unsigned int skip_leaf_item_checks:1;
btrfs-progs: introduce a basic metadata free space reservation check Unlike kernel, in btrfs-progs btrfs_start_transaction() never checks if there is enough metadata space. This can lead to very dangerous situation where there is no metadata space left at all, deadlocking future tree operations. This patch introduces a very basic version of metadata/system free space check by: - Check if there is enough metadata/system space left If there is enough, go as usual. - If there is not enough space left, try allocating a new chunk - Recheck if the new space can meet our demand If not, return ERR_PTR(-ENOSPC). Otherwise, allocate a new trans handle to the caller. This is possible thanks to the simplified transaction model in btrfs-progs: - We don't allow joining a transaction This means we don't need to handle complex cases like data ordered extents, which need to reserve space first, then join the current transaction and use the reserved blocks. - We don't allow multiple transaction handles for one transaction Since btrfs-progs is single threaded, we always start a transaction and then commit it. However there is a feature that must be an exception for the new metadata/system free space check: - btrfs check --init-extent-tree As all the meta/system free space check is based on the space info, which is loaded from block group items. Thus when rebuilding extent tree, we can no longer have an accurate view, thus we have to disable the feature for the whole execution if we're rebuilding the extent tree. For now, there is no regression exposed during the self tests, but I really hope this can be an extra safety net to prevent causing ENOSPC deadlock in btrfs-progs. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2023-10-05 07:10:04 +00:00
unsigned int rebuilding_extent_tree:1;
int transaction_aborted;
int (*free_extent_hook)(u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset,
int refs_to_drop);
struct cache_tree *fsck_extent_cache;
struct cache_tree *corrupt_blocks;
/*
* For converting to/from bg tree feature, this records the bytenr
* of the last processed block group item.
*
* Any new block group item after this bytenr is using the target
* block group item format. (e.g. if converting to bg tree, bg item
* after this bytenr should go into block group tree).
*
* Thus the number should decrease as our convert progress goes.
*/
u64 last_converted_bg_bytenr;
/* Cached block sizes */
u32 nodesize;
u32 sectorsize;
u32 stripesize;
u32 leaf_data_size;
btrfs-progs: fix stray fd close in open_ctree_fs_info() [BUG] Although commit b2a1be83b85f ("btrfs-progs: mkfs: keep file descriptors open during whole time") is making sure we're only closing the writeable fds after the fs is properly created, there is still a missing fd not following the requirement. And this explains the issue why sometimes after mkfs.btrfs, lsblk still doesn't give a valid uuid. Shown by the strace output (the command is "mkfs.btrfs -f /dev/test/scratch1"): openat(AT_FDCWD, "/dev/test/scratch1", O_RDWR) = 5 <<< Writeable open fadvise64(5, 0, 0, POSIX_FADV_DONTNEED) = 0 sysinfo({uptime=2529, loads=[8704, 6272, 2496], totalram=4104548352, freeram=3376611328, sharedram=9211904, bufferram=43016192, totalswap=3221221376, freeswap=3221221376, procs=190, totalhigh=0, freehigh=0, mem_unit=1}) = 0 lseek(5, 0, SEEK_END) = 10737418240 lseek(5, 0, SEEK_SET) = 0 ...... close(5) = 0 <<< Closed now pwrite64(6, "O\250\22\261\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 1163264) = 16384 pwrite64(6, "\201\316\272\342\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 1179648) = 16384 pwrite64(6, "K}S\t\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 1196032) = 16384 pwrite64(6, "\207j$\265\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 1212416) = 16384 pwrite64(6, "q\267;\336\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 5242880) = 16384 fsync(6) <<< But we're still writing into the disk. [CAUSE] After more digging, it turns out we have a very obvious escape in open_ctree_fs_info(): open_ctree_fs_info() |- fp = open(oca->filename, flags); |- info = __open_ctree_fd(); |- close(fp); As later we only do IO using the device fd, this close() seems fine. But the truth is, for mkfs usage, this fs_info is a temporary one, with a special magic number for the disk. And since mkfs is doing writeable operations, this close() would immediately trigger udev scan. And since at this stage, the fs is not yet fully created, udev can race with mkfs, and may get the invalid temporary superblock. [FIX] Introduce a new btrfs_fs_info member, initial_fd, for open_ctree_fs_info() to record the fd. And on close_ctree(), if we find fs_info::initial_fd is a valid fd, then close it. By this, we make sure all writeable fds are only closed after we have written valid super blocks into the disk. Issue: #734 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2024-02-02 00:59:21 +00:00
/*
* For open_ctree_fs_info() to hold the initial fd until close.
*
* For writeable open_ctree_fs_info() call, we should not close
* the fd until the fs_info is properly closed, or it will trigger
* udev scan while our fs is not properly initialized.
*/
int initial_fd;
u16 csum_type;
u16 csum_size;
/*
* Zone size > 0 when in ZONED mode, otherwise it's used for a check
* if the mode is enabled
*/
union {
u64 zone_size;
u64 zoned;
};
struct super_block *sb;
};
static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
{
return fs_info->zoned != 0;
}
static inline bool btrfs_is_testing(const struct btrfs_fs_info *fs_info)
{
return false;
}
/*
* The state of btrfs root
*/
enum {
/*
* btrfs_record_root_in_trans is a multi-step process, and it can race
* with the balancing code. But the race is very small, and only the
* first time the root is added to each transaction. So IN_TRANS_SETUP
* is used to tell us when more checks are required
*/
BTRFS_ROOT_IN_TRANS_SETUP,
/*
* Set if tree blocks of this root can be shared by other roots.
* Only subvolume trees and their reloc trees have this bit set.
* Conflicts with TRACK_DIRTY bit.
*
* This affects two things:
*
* - How balance works
* For shareable roots, we need to use reloc tree and do path
* replacement for balance, and need various pre/post hooks for
* snapshot creation to handle them.
*
* While for non-shareable trees, we just simply do a tree search
* with COW.
*
* - How dirty roots are tracked
* For shareable roots, btrfs_record_root_in_trans() is needed to
* track them, while non-subvolume roots have TRACK_DIRTY bit, they
* don't need to set this manually.
*/
BTRFS_ROOT_SHAREABLE,
BTRFS_ROOT_TRACK_DIRTY,
BTRFS_ROOT_IN_RADIX,
BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
BTRFS_ROOT_DEFRAG_RUNNING,
BTRFS_ROOT_FORCE_COW,
BTRFS_ROOT_MULTI_LOG_TASKS,
BTRFS_ROOT_DIRTY,
BTRFS_ROOT_DELETING,
/*
* Reloc tree is orphan, only kept here for qgroup delayed subtree scan
*
* Set for the subvolume tree owning the reloc tree.
*/
BTRFS_ROOT_DEAD_RELOC_TREE,
/* Mark dead root stored on device whose cleanup needs to be resumed */
BTRFS_ROOT_DEAD_TREE,
/* The root has a log tree. Used for subvolume roots and the tree root. */
BTRFS_ROOT_HAS_LOG_TREE,
/* Qgroup flushing is in progress */
BTRFS_ROOT_QGROUP_FLUSHING,
/* We started the orphan cleanup for this root. */
BTRFS_ROOT_ORPHAN_CLEANUP,
/* This root has a drop operation that was started previously. */
BTRFS_ROOT_UNFINISHED_DROP,
/* This reloc root needs to have its buffers lockdep class reset. */
BTRFS_ROOT_RESET_LOCKDEP_CLASS,
};
/*
* in ram representation of the tree. extent_root is used for all allocations
* and for the extent tree extent_root root.
*/
struct btrfs_root {
struct rb_node rb_node;
struct extent_buffer *node;
struct extent_buffer *commit_root;
struct btrfs_root *log_root;
struct btrfs_root *reloc_root;
unsigned long state;
struct btrfs_root_item root_item;
struct btrfs_key root_key;
struct btrfs_fs_info *fs_info;
u64 objectid;
u64 last_trans;
2007-10-15 20:24:39 +00:00
u32 type;
u64 last_inode_alloc;
struct list_head unaligned_extent_recs;
/* the dirty list is only used by non-reference counted roots */
struct list_head dirty_list;
spinlock_t accounting_lock;
};
static inline u64 btrfs_root_id(const struct btrfs_root *root)
{
return root->root_key.objectid;
}
static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_fs_info *info)
{
return BTRFS_LEAF_DATA_SIZE(info) - sizeof(struct btrfs_item);
}
static inline u32 BTRFS_NODEPTRS_PER_BLOCK(const struct btrfs_fs_info *info)
{
return BTRFS_LEAF_DATA_SIZE(info) / sizeof(struct btrfs_key_ptr);
}
static inline u32 BTRFS_NODEPTRS_PER_EXTENT_BUFFER(const struct extent_buffer *eb)
{
BUG_ON(!eb->fs_info);
BUG_ON(eb->fs_info->nodesize != eb->len);
return BTRFS_LEAF_DATA_SIZE(eb->fs_info) / sizeof(struct btrfs_key_ptr);
}
static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
{
return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item);
}
2007-03-15 23:03:33 +00:00
/*
* inode items have the data typically returned from stat and store other
* info about object characteristics. There is one for every file and dir in
* the FS
*/
2007-04-26 20:46:06 +00:00
#define BTRFS_INODE_ITEM_KEY 1
#define BTRFS_INODE_REF_KEY 12
#define BTRFS_INODE_EXTREF_KEY 13
#define BTRFS_XATTR_ITEM_KEY 24
#define BTRFS_VERITY_DESC_ITEM_KEY 36
#define BTRFS_VERITY_MERKLE_ITEM_KEY 37
#define BTRFS_ORPHAN_ITEM_KEY 48
2007-03-15 23:03:33 +00:00
#define BTRFS_DIR_LOG_ITEM_KEY 60
#define BTRFS_DIR_LOG_INDEX_KEY 72
2007-03-15 23:03:33 +00:00
/*
* dir items are the name -> inode pointers in a directory. There is one
* for every name in a directory.
*/
#define BTRFS_DIR_ITEM_KEY 84
#define BTRFS_DIR_INDEX_KEY 96
2007-03-15 23:03:33 +00:00
/*
2007-04-26 20:46:06 +00:00
* extent data is for file data
2007-03-15 23:03:33 +00:00
*/
#define BTRFS_EXTENT_DATA_KEY 108
/*
* csum items have the checksums for data in the extents
*/
#define BTRFS_CSUM_ITEM_KEY 120
Btrfs: move data checksumming into a dedicated tree Btrfs stores checksums for each data block. Until now, they have been stored in the subvolume trees, indexed by the inode that is referencing the data block. This means that when we read the inode, we've probably read in at least some checksums as well. But, this has a few problems: * The checksums are indexed by logical offset in the file. When compression is on, this means we have to do the expensive checksumming on the uncompressed data. It would be faster if we could checksum the compressed data instead. * If we implement encryption, we'll be checksumming the plain text and storing that on disk. This is significantly less secure. * For either compression or encryption, we have to get the plain text back before we can verify the checksum as correct. This makes the raid layer balancing and extent moving much more expensive. * It makes the front end caching code more complex, as we have touch the subvolume and inodes as we cache extents. * There is potentitally one copy of the checksum in each subvolume referencing an extent. The solution used here is to store the extent checksums in a dedicated tree. This allows us to index the checksums by phyiscal extent start and length. It means: * The checksum is against the data stored on disk, after any compression or encryption is done. * The checksum is stored in a central location, and can be verified without following back references, or reading inodes. This makes compression significantly faster by reducing the amount of data that needs to be checksummed. It will also allow much faster raid management code in general. The checksums are indexed by a key with a fixed objectid (a magic value in ctree.h) and offset set to the starting byte of the extent. This allows us to copy the checksum items into the fsync log tree directly (or any other tree), without having to invent a second format for them. Signed-off-by: Chris Mason <chris.mason@oracle.com>
2008-12-08 22:00:31 +00:00
/*
* extent csums are stored in a separate tree and hold csums for
* an entire extent on disk.
*/
#define BTRFS_EXTENT_CSUM_KEY 128
2007-03-15 23:03:33 +00:00
/*
* root items point to tree roots. There are typically in the root
* tree used by the super block to find all the other trees
*/
#define BTRFS_ROOT_ITEM_KEY 132
/*
* root backrefs tie subvols and snapshots to the directory entries that
* reference them
*/
#define BTRFS_ROOT_BACKREF_KEY 144
/*
* root refs make a fast index for listing all of the snapshots and
* subvolumes referenced by a given root. They point directly to the
* directory item in the root that references the subvol
*/
#define BTRFS_ROOT_REF_KEY 156
2007-03-15 23:03:33 +00:00
/*
* extent items are in the extent map tree. These record which blocks
* are used, and how many references there are to each block
*/
#define BTRFS_EXTENT_ITEM_KEY 168
/*
* The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
* the length, so we save the level in key->offset instead of the length.
*/
#define BTRFS_METADATA_ITEM_KEY 169
#define BTRFS_TREE_BLOCK_REF_KEY 176
#define BTRFS_EXTENT_DATA_REF_KEY 178
/* old style extent backrefs */
#define BTRFS_EXTENT_REF_V0_KEY 180
#define BTRFS_SHARED_BLOCK_REF_KEY 182
#define BTRFS_SHARED_DATA_REF_KEY 184
2007-04-26 20:46:06 +00:00
/*
* block groups give us hints into the extent allocation trees. Which
* blocks are free etc etc
*/
#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
/*
* Every block group is represented in the free space tree by a free space info
* item, which stores some accounting information. It is keyed on
* (block_group_start, FREE_SPACE_INFO, block_group_length).
*/
#define BTRFS_FREE_SPACE_INFO_KEY 198
/*
* A free space extent tracks an extent of space that is free in a block group.
* It is keyed on (start, FREE_SPACE_EXTENT, length).
*/
#define BTRFS_FREE_SPACE_EXTENT_KEY 199
/*
* When a block group becomes very fragmented, we convert it to use bitmaps
* instead of extents. A free space bitmap is keyed on
* (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with
* (length / sectorsize) bits.
*/
#define BTRFS_FREE_SPACE_BITMAP_KEY 200
#define BTRFS_DEV_EXTENT_KEY 204
#define BTRFS_DEV_ITEM_KEY 216
#define BTRFS_CHUNK_ITEM_KEY 228
#define BTRFS_RAID_STRIPE_KEY 230
#define BTRFS_BALANCE_ITEM_KEY 248
/*
* quota groups
*/
#define BTRFS_QGROUP_STATUS_KEY 240
#define BTRFS_QGROUP_INFO_KEY 242
#define BTRFS_QGROUP_LIMIT_KEY 244
#define BTRFS_QGROUP_RELATION_KEY 246
/*
* Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY.
*/
#define BTRFS_BALANCE_ITEM_KEY 248
/*
* The key type for tree items that are stored persistently, but do not need to
* exist for extended period of time. The items can exist in any tree.
*
* [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
*
* Existing items:
*
* - balance status item (objectid -4)
* (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
*
* - second csum tree for conversion (objecitd -13)
* (BTRFS_CSUM_CHANGE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, <target csum type>)
*/
#define BTRFS_TEMPORARY_ITEM_KEY 248
/*
* Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY
*/
#define BTRFS_DEV_STATS_KEY 249
/*
* The key type for tree items that are stored persistently and usually exist
* for a long period, eg. filesystem lifetime. The item kinds can be status
* information, stats or preference values. The item can exist in any tree.
*
* [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
*
* Existing items:
*
* - device statistics, store IO stats in the device tree, one key for all
* stats
* (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
*/
#define BTRFS_PERSISTENT_ITEM_KEY 249
/*
* Persistently stores the device replace state in the device tree.
* The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
*/
#define BTRFS_DEV_REPLACE_KEY 250
/*
* Stores items that allow to quickly map UUIDs to something else.
* These items are part of the filesystem UUID tree.
* The key is built like this:
* (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
*/
#if BTRFS_UUID_SIZE != 16
#error "UUID items require BTRFS_UUID_SIZE == 16!"
#endif
#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */
#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to
* received subvols */
2007-03-15 23:03:33 +00:00
/*
* string items are for debugging. They just store a short string of
* data in the FS
*/
2007-04-26 20:46:06 +00:00
#define BTRFS_STRING_ITEM_KEY 253
static inline unsigned long btrfs_header_fsid(void)
{
return offsetof(struct btrfs_header, fsid);
}
static inline unsigned long btrfs_header_chunk_tree_uuid(const struct extent_buffer *eb)
{
return offsetof(struct btrfs_header, chunk_tree_uuid);
}
static inline struct btrfs_timespec *
btrfs_inode_atime(struct btrfs_inode_item *inode_item)
{
unsigned long ptr = (unsigned long)inode_item;
ptr += offsetof(struct btrfs_inode_item, atime);
return (struct btrfs_timespec *)ptr;
}
static inline struct btrfs_timespec *
btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
{
unsigned long ptr = (unsigned long)inode_item;
ptr += offsetof(struct btrfs_inode_item, mtime);
return (struct btrfs_timespec *)ptr;
}
static inline struct btrfs_timespec *
btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
{
unsigned long ptr = (unsigned long)inode_item;
ptr += offsetof(struct btrfs_inode_item, ctime);
return (struct btrfs_timespec *)ptr;
}
static inline struct btrfs_timespec *
btrfs_inode_otime(struct btrfs_inode_item *inode_item)
{
unsigned long ptr = (unsigned long)inode_item;
ptr += offsetof(struct btrfs_inode_item, otime);
return (struct btrfs_timespec *)ptr;
}
static inline struct btrfs_timespec* btrfs_root_ctime(
struct btrfs_root_item *root_item)
{
unsigned long ptr = (unsigned long)root_item;
ptr += offsetof(struct btrfs_root_item, ctime);
return (struct btrfs_timespec *)ptr;
}
static inline struct btrfs_timespec* btrfs_root_otime(
struct btrfs_root_item *root_item)
{
unsigned long ptr = (unsigned long)root_item;
ptr += offsetof(struct btrfs_root_item, otime);
return (struct btrfs_timespec *)ptr;
}
static inline struct btrfs_timespec* btrfs_root_stime(
struct btrfs_root_item *root_item)
{
unsigned long ptr = (unsigned long)root_item;
ptr += offsetof(struct btrfs_root_item, stime);
return (struct btrfs_timespec *)ptr;
}
static inline struct btrfs_timespec* btrfs_root_rtime(
struct btrfs_root_item *root_item)
{
unsigned long ptr = (unsigned long)root_item;
ptr += offsetof(struct btrfs_root_item, rtime);
return (struct btrfs_timespec *)ptr;
}
static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
{
unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
return (u8 *)((unsigned long)dev + ptr);
}
static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
const struct btrfs_dev_stats_item *ptr,
int index)
{
u64 val;
read_extent_buffer(eb, &val,
offsetof(struct btrfs_dev_stats_item, values) +
((unsigned long)ptr) + (index * sizeof(u64)),
sizeof(val));
return val;
}
/* struct btrfs_ioctl_search_header */
static inline u64 btrfs_search_header_transid(struct btrfs_ioctl_search_header *sh)
{
return get_unaligned_64(&sh->transid);
}
static inline u64 btrfs_search_header_objectid(struct btrfs_ioctl_search_header *sh)
{
return get_unaligned_64(&sh->objectid);
}
static inline u64 btrfs_search_header_offset(struct btrfs_ioctl_search_header *sh)
{
return get_unaligned_64(&sh->offset);
}
static inline u32 btrfs_search_header_type(struct btrfs_ioctl_search_header *sh)
{
return get_unaligned_32(&sh->type);
}
static inline u32 btrfs_search_header_len(struct btrfs_ioctl_search_header *sh)
{
return get_unaligned_32(&sh->len);
}
#define btrfs_fs_incompat(fs_info, opt) \
__btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
{
struct btrfs_super_block *disk_super;
disk_super = fs_info->super_copy;
return !!(btrfs_super_incompat_flags(disk_super) & flag);
}
#define btrfs_fs_compat_ro(fs_info, opt) \
__btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag)
{
struct btrfs_super_block *disk_super;
disk_super = fs_info->super_copy;
return !!(btrfs_super_compat_ro_flags(disk_super) & flag);
}
static inline u64 btrfs_name_hash(const char *name, int len)
{
return crc32c((u32)~1, name, len);
}
/*
* Figure the key offset of an extended inode ref
*/
static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
int len)
{
return (u64)crc32c(parent_objectid, name, len);
}
/* extent-tree.c */
int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 num_bytes, u64 empty_size,
u64 hint_byte, u64 search_end,
struct btrfs_key *ins, bool is_data);
int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans);
void btrfs_pin_extent(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes);
void btrfs_unpin_extent(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes);
struct btrfs_block_group *btrfs_lookup_block_group(struct btrfs_fs_info *info,
u64 bytenr);
struct btrfs_block_group *btrfs_lookup_first_block_group(struct
btrfs_fs_info *info,
u64 bytenr);
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size,
enum btrfs_lock_nesting nest);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 offset, int metadata, u64 *refs, u64 *flags);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, u64 flags);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int record_parent);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int record_parent);
int btrfs_free_tree_block(struct btrfs_trans_handle *trans, u64 root_id,
struct extent_buffer *buf, u64 parent, int last_ref);
int btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset);
void btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset);
int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 orig_parent, u64 parent,
u64 root_objectid, u64 ref_generation,
u64 owner_objectid);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
btrfs-progs: introduce a basic metadata free space reservation check Unlike kernel, in btrfs-progs btrfs_start_transaction() never checks if there is enough metadata space. This can lead to very dangerous situation where there is no metadata space left at all, deadlocking future tree operations. This patch introduces a very basic version of metadata/system free space check by: - Check if there is enough metadata/system space left If there is enough, go as usual. - If there is not enough space left, try allocating a new chunk - Recheck if the new space can meet our demand If not, return ERR_PTR(-ENOSPC). Otherwise, allocate a new trans handle to the caller. This is possible thanks to the simplified transaction model in btrfs-progs: - We don't allow joining a transaction This means we don't need to handle complex cases like data ordered extents, which need to reserve space first, then join the current transaction and use the reserved blocks. - We don't allow multiple transaction handles for one transaction Since btrfs-progs is single threaded, we always start a transaction and then commit it. However there is a feature that must be an exception for the new metadata/system free space check: - btrfs check --init-extent-tree As all the meta/system free space check is based on the space info, which is loaded from block group items. Thus when rebuilding extent tree, we can no longer have an accurate view, thus we have to disable the feature for the whole execution if we're rebuilding the extent tree. For now, there is no regression exposed during the self tests, but I really hope this can be an extra safety net to prevent causing ENOSPC deadlock in btrfs-progs. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2023-10-05 07:10:04 +00:00
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info, u64 flags);
btrfs-progs: Fix false ENOSPC alert by tracking used space correctly [BUG] There is a bug report of unexpected ENOSPC from btrfs-convert, issue #123. After some debugging, even when we have enough unallocated space, we still hit ENOSPC at btrfs_reserve_extent(). [CAUSE] Btrfs-progs relies on chunk preallocator to make enough space for data/metadata. However after the introduction of delayed-ref, it's no longer reliable to rely on btrfs_space_info::bytes_used and btrfs_space_info::bytes_pinned to calculate used metadata space. For a running transaction with a lot of allocated tree blocks, btrfs_space_info::bytes_used stays its original value, and will only be updated when running delayed ref. This makes btrfs-progs chunk preallocator completely useless. And for btrfs-convert/mkfs.btrfs --rootdir, if we're going to have enough metadata to fill a metadata block group in one transaction, we will hit ENOSPC no matter whether we have enough unallocated space. [FIX] This patch will introduce btrfs_space_info::bytes_reserved to track how many space we have reserved but not yet committed to extent tree. To support this change, this commit also introduces the following modification: - More comment on btrfs_space_info::bytes_* To make code a little easier to read - Export update_space_info() to preallocate empty data/metadata space info for mkfs. For mkfs, we only have a temporary fs image with SYSTEM chunk only. Export update_space_info() so that we can preallocate empty data/metadata space info before we start a transaction. - Proper btrfs_space_info::bytes_reserved update The timing is the as kernel (except we don't need to update bytes_reserved for data extents) * Increase bytes_reserved when call alloc_reserved_tree_block() * Decrease bytes_reserved when running delayed refs With the help of head->must_insert_reserved to determine whether we need to decrease. Issue: #123 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-05-24 23:32:43 +00:00
int update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
struct btrfs_space_info **space_info);
int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_read_block_groups(struct btrfs_fs_info *info);
btrfs-progs: introduce a basic metadata free space reservation check Unlike kernel, in btrfs-progs btrfs_start_transaction() never checks if there is enough metadata space. This can lead to very dangerous situation where there is no metadata space left at all, deadlocking future tree operations. This patch introduces a very basic version of metadata/system free space check by: - Check if there is enough metadata/system space left If there is enough, go as usual. - If there is not enough space left, try allocating a new chunk - Recheck if the new space can meet our demand If not, return ERR_PTR(-ENOSPC). Otherwise, allocate a new trans handle to the caller. This is possible thanks to the simplified transaction model in btrfs-progs: - We don't allow joining a transaction This means we don't need to handle complex cases like data ordered extents, which need to reserve space first, then join the current transaction and use the reserved blocks. - We don't allow multiple transaction handles for one transaction Since btrfs-progs is single threaded, we always start a transaction and then commit it. However there is a feature that must be an exception for the new metadata/system free space check: - btrfs check --init-extent-tree As all the meta/system free space check is based on the space info, which is loaded from block group items. Thus when rebuilding extent tree, we can no longer have an accurate view, thus we have to disable the feature for the whole execution if we're rebuilding the extent tree. For now, there is no regression exposed during the self tests, but I really hope this can be an extra safety net to prevent causing ENOSPC deadlock in btrfs-progs. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2023-10-05 07:10:04 +00:00
int btrfs_try_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 alloc_bytes,
u64 flags);
struct btrfs_block_group *
btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type,
u64 chunk_offset, u64 size);
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytes_used,
u64 type, u64 chunk_offset, u64 size);
int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_update_block_group(struct btrfs_trans_handle *trans, u64 bytenr,
u64 num, int alloc, int mark_free);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 len);
void free_excluded_extents(struct btrfs_fs_info *fs_info,
struct btrfs_block_group *cache);
int exclude_super_stripes(struct btrfs_fs_info *fs_info,
struct btrfs_block_group *cache);
u64 add_new_free_space(struct btrfs_block_group *block_group,
struct btrfs_fs_info *info, u64 start, u64 end);
u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
int btrfs_convert_one_bg(struct btrfs_trans_handle *trans, u64 bytenr);
/* ctree.c */
int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int level, int slot);
struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
int slot);
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
int type);
int btrfs_previous_extent_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid);
int btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *parent, int parent_slot,
struct extent_buffer **cow_ret,
enum btrfs_lock_nesting nest);
int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
struct extent_buffer **cow_ret, u64 new_root_objectid);
int btrfs_create_root(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 objectid);
void btrfs_extend_item(struct btrfs_path *path, u32 data_size);
void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end);
int btrfs_split_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
const struct btrfs_key *new_key,
unsigned long split_offset);
int btrfs_search_slot(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const struct btrfs_key *key,
struct btrfs_path *p, int ins_len, int cow);
int btrfs_search_slot_for_read(struct btrfs_root *root,
const struct btrfs_key *key,
struct btrfs_path *p, int find_higher,
int return_any);
int btrfs_bin_search(struct extent_buffer *eb, int first_slot,
const struct btrfs_key *key, int *slot);
int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
u64 iobjectid, u64 ioff, u8 key_type,
struct btrfs_key *found_key);
void btrfs_release_path(struct btrfs_path *p);
void add_root_to_dirty_list(struct btrfs_root *root);
struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p);
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int slot, int nr);
static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path)
{
return btrfs_del_items(trans, root, path, path->slots[0], 1);
}
/*
* Describes a batch of items to insert in a btree. This is used by
* btrfs_insert_empty_items().
*/
struct btrfs_item_batch {
/*
* Pointer to an array containing the keys of the items to insert (in
* sorted order).
*/
const struct btrfs_key *keys;
/* Pointer to an array containing the data size for each item to insert. */
const u32 *data_sizes;
/*
* The sum of data sizes for all items. The caller can compute this while
* setting up the data_sizes array, so it ends up being more efficient
* than having btrfs_insert_empty_items() or setup_item_for_insert()
* doing it, as it would avoid an extra loop over a potentially large
* array, and in the case of setup_item_for_insert(), we would be doing
* it while holding a write lock on a leaf and often on upper level nodes
* too, unnecessarily increasing the size of a critical section.
*/
u32 total_data_size;
/* Size of the keys and data_sizes arrays (number of items in the batch). */
int nr;
};
int btrfs_insert_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const struct btrfs_key *key,
void *data, u32 data_size);
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
const struct btrfs_item_batch *batch);
static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
const struct btrfs_key *key,
u32 data_size)
{
struct btrfs_item_batch batch;
batch.keys = key;
batch.data_sizes = &data_size;
batch.total_data_size = data_size;
batch.nr = 1;
return btrfs_insert_empty_items(trans, root, path, &batch);
}
int btrfs_next_sibling_tree_block(struct btrfs_fs_info *fs_info,
struct btrfs_path *path);
/*
* Walk up the tree as far as necessary to find the next leaf.
*
* returns 0 if it found something or 1 if there are no greater leaves.
* returns < 0 on io errors.
*/
static inline int btrfs_next_leaf(struct btrfs_root *root,
struct btrfs_path *path)
{
path->lowest_level = 0;
return btrfs_next_sibling_tree_block(root->fs_info, path);
}
static inline int btrfs_next_item(struct btrfs_root *root,
struct btrfs_path *p)
{
++p->slots[0];
btrfs-progs: check: fix a lowmem mode crash where fatal error is not properly handled [BUG] When a special image (diverted from fsck/012) has its unused slots (slot number >= nritems) with garbage, lowmem mode btrfs check can crash: (gdb) run check --mode=lowmem ~/downloads/good.img.restored Starting program: /home/adam/btrfs/btrfs-progs/btrfs check --mode=lowmem ~/downloads/good.img.restored ... ERROR: root 5 INODE[5044031582654955520] nlink(257228800) not equal to inode_refs(0) ERROR: root 5 INODE[5044031582654955520] nbytes 474624 not equal to extent_size 0 Program received signal SIGSEGV, Segmentation fault. 0x0000555555639b11 in btrfs_inode_size (eb=0x5555558a7540, s=0x642e6cd1) at ./kernel-shared/ctree.h:1703 1703 BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); (gdb) bt #0 0x0000555555639b11 in btrfs_inode_size (eb=0x5555558a7540, s=0x642e6cd1) at ./kernel-shared/ctree.h:1703 #1 0x0000555555641544 in check_inode_item (root=0x5555556c2290, path=0x7fffffffd960) at check/mode-lowmem.c:2628 [CAUSE] At check_inode_item() we have path->slot[0] at 29, while the tree block only has 26 items. This happens because two reasons: - btrfs_next_item() never reverts its slots Even if we failed to read next leaf. - check_inode_item() doesn't inform the caller that a fatal error happened In check_inode_item(), if btrfs_next_item() failed, it goes to out label, which doesn't really set @err properly. This means, when check_inode_item() fails at btrfs_next_item(), it will increase path->slots[0], while it's already beyond current tree block nritems. When the slot increases furthermore, and if the unused item slots have some garbage, we will get invalid btrfs_item_ptr() result, and causing above segfault. [FIX] Fix the problems by two ways: - Make btrfs_next_item() to revert its path->slots[0] on failure - Properly detect fatal error from check_inode_item() By this, we will no longer crash on the crafted image. Reported-by: Wang Yugui <wangyugui@e16-tech.com> Issue: #412 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2021-11-01 11:30:17 +00:00
if (p->slots[0] >= btrfs_header_nritems(p->nodes[0])) {
int ret;
ret = btrfs_next_leaf(root, p);
/*
* Revert the increased slot, or the path may point to
* an invalid item.
*/
if (ret)
p->slots[0]--;
return ret;
}
return 0;
}
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_leaf_free_space(const struct extent_buffer *leaf);
void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
const struct btrfs_key *new_key);
int btrfs_super_csum_size(const struct btrfs_super_block *sb);
const char *btrfs_super_csum_name(u16 csum_type);
const char *btrfs_super_csum_driver(u16 csum_type);
u16 btrfs_csum_type_size(u16 csum_type);
size_t __attribute_const__ btrfs_get_num_csums(void);
/* root-item.c */
int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root,
u64 root_id, u8 type, u64 ref_id,
u64 dirid, u64 sequence,
const char *name, int name_len);
2007-03-16 20:20:31 +00:00
int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_key *key, struct btrfs_root_item
*item);
int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_key *key);
2007-03-16 20:20:31 +00:00
int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_key *key, struct btrfs_root_item
*item);
int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
btrfs_root_item *item, struct btrfs_key *key);
/* dir-item.c */
2007-03-16 20:20:31 +00:00
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
*root, const char *name, int name_len, u64 dir,
struct btrfs_key *location, u8 type, u64 index);
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
const char *name, int name_len,
int mod);
struct btrfs_dir_item *btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
u64 objectid, const char *name, int name_len,
int mod);
int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_dir_item *di);
int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *name,
u16 name_len, const void *data, u16 data_len,
u64 dir);
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
struct btrfs_path *path,
const char *name, int name_len);
/* inode-item.c */
int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
2008-07-24 16:13:32 +00:00
u64 inode_objectid, u64 ref_objectid, u64 index);
2007-03-20 19:57:25 +00:00
int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root
*root, u64 objectid, struct btrfs_inode_item
*inode_item);
int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path,
struct btrfs_key *location, int mod);
struct btrfs_inode_extref *btrfs_lookup_inode_extref(struct btrfs_trans_handle
*trans, struct btrfs_path *path, struct btrfs_root *root,
u64 ino, u64 parent_ino, u64 index, const char *name,
int namelen, int ins_len);
int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid,
u64 *index);
int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, u64 index);
struct btrfs_inode_ref *btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path,
const char *name, int namelen, u64 ino, u64 parent_ino,
int ins_len);
int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *name, int name_len,
u64 ino, u64 parent_ino, u64 *index);
static inline int is_fstree(u64 rootid)
{
if (rootid == BTRFS_FS_TREE_OBJECTID ||
(signed long long)rootid >= (signed long long)BTRFS_FIRST_FREE_OBJECTID)
return 1;
return 0;
}
/* inode.c */
int btrfs_find_free_dir_index(struct btrfs_root *root, u64 dir_ino,
u64 *ret_ino);
int btrfs_check_dir_conflict(struct btrfs_root *root, const char *name,
int namelen, u64 dir, u64 index);
int btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 ino, u32 mode);
int btrfs_change_inode_flags(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 ino, u64 flags);
int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 ino, u64 parent_ino, const char *name, int namelen,
u8 type, u64 *index, int add_backref, int ignore_existed);
int btrfs_unlink(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 ino, u64 parent_ino, u64 index, const char *name,
int namelen, int add_orphan);
int btrfs_add_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path,
u64 ino);
int btrfs_mkdir(struct btrfs_trans_handle *trans, struct btrfs_root *root,
char *name, int namelen, u64 parent_ino, u64 *ino, int mode);
int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
struct btrfs_root *fs_root,
u64 dirid, u64 *objectid);
/* file.c */
int btrfs_get_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
u64 ino, u64 offset, u64 len, int ins_len);
int btrfs_punch_hole(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 ino, u64 offset, u64 len);
btrfs-progs: convert: Rework rollback Rework rollback to a more easy to understand way. New convert behavior makes us to have a more flex chunk layout, which only data chunk containing old fs data will be at the same physical location, while new chunks (data/meta/sys) can be mapped anywhere else. This behavior makes old rollback behavior can't handle it. As old behavior assumes all data/meta is mapped in a large chunk, which is mapped 1:1 on disk. So rework rollback to handle new convert behavior, enhance the check by only checking all file extents of convert image, only to check if these file extents and therir chunks are mapped 1:1. This new rollback check behavior can handle both new and old convert behavior, as the new behavior is a superset of old behavior. Further more, introduce a simple rollback mechanisim: 1) Read reserved data (offset = file offset) from convert image 2) Write reserved data into disk (offset = physical offset) Since old fs image is a valid fs, and we only need to rollback superblocks (btrfs reserved ranges), then we just read out data in reserved range, and write it back. Due to the fact that all other file extents of converted image is mapped 1:1 on disk, we put the missing piece back, then the fs is as good as old one. Then what we do in btrfs is just another dream. With this new rollback mechanisim, we can open btrfs read-only, so we won't cause any damage to current btrfs, until the final piece (0~1M, containing 1st super block) is put back. Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> [ port to v4.10 ] Signed-off-by: David Sterba <dsterba@suse.com>
2017-02-23 08:21:14 +00:00
int btrfs_read_file(struct btrfs_root *root, u64 ino, u64 start, int len,
char *dest);
/* extent-tree.c */
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long nr);
2007-02-02 14:18:22 +00:00
#endif