btrfs-progs: Fix false ENOSPC alert by tracking used space correctly

[BUG]
There is a bug report of unexpected ENOSPC from btrfs-convert, issue #123.

After some debugging, even when we have enough unallocated space, we
still hit ENOSPC at btrfs_reserve_extent().

[CAUSE]
Btrfs-progs relies on chunk preallocator to make enough space for
data/metadata.

However after the introduction of delayed-ref, it's no longer reliable
to rely on btrfs_space_info::bytes_used and
btrfs_space_info::bytes_pinned to calculate used metadata space.

For a running transaction with a lot of allocated tree blocks,
btrfs_space_info::bytes_used stays its original value, and will only be
updated when running delayed ref.

This makes btrfs-progs chunk preallocator completely useless. And for
btrfs-convert/mkfs.btrfs --rootdir, if we're going to have enough
metadata to fill a metadata block group in one transaction, we will hit
ENOSPC no matter whether we have enough unallocated space.

[FIX]
This patch will introduce btrfs_space_info::bytes_reserved to track how
many space we have reserved but not yet committed to extent tree.

To support this change, this commit also introduces the following
modification:

- More comment on btrfs_space_info::bytes_*
  To make code a little easier to read

- Export update_space_info() to preallocate empty data/metadata space
  info for mkfs.
  For mkfs, we only have a temporary fs image with SYSTEM chunk only.
  Export update_space_info() so that we can preallocate empty
  data/metadata space info before we start a transaction.

- Proper btrfs_space_info::bytes_reserved update
  The timing is the as kernel (except we don't need to update
  bytes_reserved for data extents)
  * Increase bytes_reserved when call alloc_reserved_tree_block()
  * Decrease bytes_reserved when running delayed refs
    With the help of head->must_insert_reserved to determine whether we
    need to decrease.

Issue: #123
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Qu Wenruo 2019-05-25 07:32:43 +08:00 committed by David Sterba
parent ab5079c19a
commit c31edf610c
4 changed files with 83 additions and 6 deletions

24
ctree.h
View File

@ -1058,8 +1058,29 @@ struct btrfs_qgroup_limit_item {
struct btrfs_space_info { struct btrfs_space_info {
u64 flags; u64 flags;
u64 total_bytes; u64 total_bytes;
/*
* Space already used.
* Only accounting space in current extent tree, thus delayed ref
* won't be accounted here.
*/
u64 bytes_used; u64 bytes_used;
/*
* Space being pinned down.
* So extent allocator will not try to allocate space from them.
*
* For cases like extents being freed in current transaction, or
* manually pinned bytes for re-initializing certain trees.
*/
u64 bytes_pinned; u64 bytes_pinned;
/*
* Space being reserved.
* Space has already being reserved but not yet reach extent tree.
*
* New tree blocks allocated in current transaction goes here.
*/
u64 bytes_reserved;
int full; int full;
struct list_head list; struct list_head list;
}; };
@ -2513,6 +2534,9 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
u64 root_objectid, u64 ref_generation, u64 root_objectid, u64 ref_generation,
u64 owner_objectid); u64 owner_objectid);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
int update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
struct btrfs_space_info **space_info);
int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_read_block_groups(struct btrfs_root *root); int btrfs_read_block_groups(struct btrfs_root *root);
struct btrfs_block_group_cache * struct btrfs_block_group_cache *

View File

@ -1630,9 +1630,9 @@ static int free_space_info(struct btrfs_fs_info *fs_info, u64 flags,
return 0; return 0;
} }
static int update_space_info(struct btrfs_fs_info *info, u64 flags, int update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used, u64 total_bytes, u64 bytes_used,
struct btrfs_space_info **space_info) struct btrfs_space_info **space_info)
{ {
struct btrfs_space_info *found; struct btrfs_space_info *found;
@ -1658,6 +1658,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->total_bytes = total_bytes; found->total_bytes = total_bytes;
found->bytes_used = bytes_used; found->bytes_used = bytes_used;
found->bytes_pinned = 0; found->bytes_pinned = 0;
found->bytes_reserved = 0;
found->full = 0; found->full = 0;
*space_info = found; *space_info = found;
return 0; return 0;
@ -1703,8 +1704,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
return 0; return 0;
thresh = div_factor(space_info->total_bytes, 7); thresh = div_factor(space_info->total_bytes, 7);
if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < if ((space_info->bytes_used + space_info->bytes_pinned +
thresh) space_info->bytes_reserved + alloc_bytes) < thresh)
return 0; return 0;
/* /*
@ -2375,6 +2376,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_extent_item *extent_item; struct btrfs_extent_item *extent_item;
struct btrfs_extent_inline_ref *iref; struct btrfs_extent_inline_ref *iref;
struct btrfs_space_info *sinfo;
struct extent_buffer *leaf; struct extent_buffer *leaf;
struct btrfs_path *path; struct btrfs_path *path;
struct btrfs_key ins; struct btrfs_key ins;
@ -2382,6 +2384,9 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
u64 start, end; u64 start, end;
int ret; int ret;
sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
ASSERT(sinfo);
ins.objectid = node->bytenr; ins.objectid = node->bytenr;
if (skinny_metadata) { if (skinny_metadata) {
ins.offset = ref->level; ins.offset = ref->level;
@ -2442,6 +2447,14 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
ret = update_block_group(fs_info, ins.objectid, fs_info->nodesize, 1, ret = update_block_group(fs_info, ins.objectid, fs_info->nodesize, 1,
0); 0);
if (sinfo) {
if (fs_info->nodesize > sinfo->bytes_reserved) {
WARN_ON(1);
sinfo->bytes_reserved = 0;
} else {
sinfo->bytes_reserved -= fs_info->nodesize;
}
}
if (ref->root == BTRFS_EXTENT_TREE_OBJECTID) { if (ref->root == BTRFS_EXTENT_TREE_OBJECTID) {
clear_extent_bits(&trans->fs_info->extent_ins, start, end, clear_extent_bits(&trans->fs_info->extent_ins, start, end,
@ -2461,6 +2474,8 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
int ret; int ret;
u64 extent_size; u64 extent_size;
struct btrfs_delayed_extent_op *extent_op; struct btrfs_delayed_extent_op *extent_op;
struct btrfs_space_info *sinfo;
struct btrfs_fs_info *fs_info = root->fs_info;
bool skinny_metadata = btrfs_fs_incompat(root->fs_info, bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
SKINNY_METADATA); SKINNY_METADATA);
@ -2468,6 +2483,11 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
if (!extent_op) if (!extent_op)
return -ENOMEM; return -ENOMEM;
sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
if (!sinfo) {
error("Corrupted fs, no valid METADATA block group found");
return -EUCLEAN;
}
ret = btrfs_reserve_extent(trans, root, num_bytes, empty_size, ret = btrfs_reserve_extent(trans, root, num_bytes, empty_size,
hint_byte, search_end, ins, 0); hint_byte, search_end, ins, 0);
if (ret < 0) if (ret < 0)
@ -2500,6 +2520,7 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
BUG_ON(ret); BUG_ON(ret);
} }
sinfo->bytes_reserved += extent_size;
ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, ins->objectid, ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, ins->objectid,
extent_size, 0, root_objectid, extent_size, 0, root_objectid,
level, BTRFS_ADD_DELAYED_EXTENT, level, BTRFS_ADD_DELAYED_EXTENT,
@ -2837,6 +2858,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
sinfo = list_entry(info->space_info.next, sinfo = list_entry(info->space_info.next,
struct btrfs_space_info, list); struct btrfs_space_info, list);
list_del_init(&sinfo->list); list_del_init(&sinfo->list);
if (sinfo->bytes_reserved)
warning(
"reserved space leaked, flag=0x%llx bytes_reserved=%llu",
sinfo->flags, sinfo->bytes_reserved);
kfree(sinfo); kfree(sinfo);
} }
return 0; return 0;
@ -3943,8 +3968,17 @@ int cleanup_ref_head(struct btrfs_trans_handle *trans,
rb_erase(&head->href_node, &delayed_refs->href_root); rb_erase(&head->href_node, &delayed_refs->href_root);
RB_CLEAR_NODE(&head->href_node); RB_CLEAR_NODE(&head->href_node);
if (head->must_insert_reserved) if (head->must_insert_reserved) {
btrfs_pin_extent(fs_info, head->bytenr, head->num_bytes); btrfs_pin_extent(fs_info, head->bytenr, head->num_bytes);
if (!head->is_data) {
struct btrfs_space_info *sinfo;
sinfo = __find_space_info(trans->fs_info,
BTRFS_BLOCK_GROUP_METADATA);
ASSERT(sinfo);
sinfo->bytes_reserved -= head->num_bytes;
}
}
btrfs_put_delayed_ref_head(head); btrfs_put_delayed_ref_head(head);
return 0; return 0;

View File

@ -59,11 +59,22 @@ static int create_metadata_block_groups(struct btrfs_root *root, int mixed,
{ {
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
struct btrfs_space_info *sinfo;
u64 bytes_used; u64 bytes_used;
u64 chunk_start = 0; u64 chunk_start = 0;
u64 chunk_size = 0; u64 chunk_size = 0;
int ret; int ret;
/* Create needed space info to trace extents reservation */
ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA,
0, 0, &sinfo);
if (ret < 0)
return ret;
ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA,
0, 0, &sinfo);
if (ret < 0)
return ret;
trans = btrfs_start_transaction(root, 1); trans = btrfs_start_transaction(root, 1);
BUG_ON(IS_ERR(trans)); BUG_ON(IS_ERR(trans));
bytes_used = btrfs_super_bytes_used(fs_info->super_copy); bytes_used = btrfs_super_bytes_used(fs_info->super_copy);

View File

@ -159,6 +159,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
u64 transid = trans->transid; u64 transid = trans->transid;
int ret = 0; int ret = 0;
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_space_info *sinfo;
if (trans->fs_info->transaction_aborted) if (trans->fs_info->transaction_aborted)
return -EROFS; return -EROFS;
@ -210,6 +211,13 @@ commit_tree:
root->commit_root = NULL; root->commit_root = NULL;
fs_info->running_transaction = NULL; fs_info->running_transaction = NULL;
fs_info->last_trans_committed = transid; fs_info->last_trans_committed = transid;
list_for_each_entry(sinfo, &fs_info->space_info, list) {
if (sinfo->bytes_reserved) {
warning(
"reserved space leaked, transid=%llu flag=0x%llx bytes_reserved=%llu",
transid, sinfo->flags, sinfo->bytes_reserved);
}
}
return ret; return ret;
error: error:
btrfs_destroy_delayed_refs(trans); btrfs_destroy_delayed_refs(trans);