2017-08-28 15:27:08 +00:00
|
|
|
/*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public
|
|
|
|
* License v2 as published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public
|
|
|
|
* License along with this program; if not, write to the
|
|
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
|
|
* Boston, MA 021110-1307, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "kerncompat.h"
|
2023-08-28 20:12:13 +00:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include "kernel-lib/rbtree.h"
|
2023-08-23 14:27:49 +00:00
|
|
|
#include "kernel-lib/bitops.h"
|
2020-08-18 13:56:04 +00:00
|
|
|
#include "kernel-shared/disk-io.h"
|
2020-08-18 13:56:04 +00:00
|
|
|
#include "kernel-shared/transaction.h"
|
2020-03-04 19:55:39 +00:00
|
|
|
#include "kernel-shared/delayed-ref.h"
|
2021-04-26 06:27:31 +00:00
|
|
|
#include "kernel-shared/zoned.h"
|
2023-08-28 20:12:13 +00:00
|
|
|
#include "kernel-shared/accessors.h"
|
|
|
|
#include "kernel-shared/ctree.h"
|
|
|
|
#include "kernel-shared/extent_io.h"
|
|
|
|
#include "kernel-shared/locking.h"
|
|
|
|
#include "kernel-shared/uapi/btrfs_tree.h"
|
2019-06-19 22:44:36 +00:00
|
|
|
#include "common/messages.h"
|
2017-08-28 15:27:08 +00:00
|
|
|
|
2023-05-12 13:37:03 +00:00
|
|
|
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
|
|
|
|
unsigned int num_items)
|
2017-08-28 15:27:08 +00:00
|
|
|
{
|
|
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
2022-04-19 11:17:41 +00:00
|
|
|
struct btrfs_trans_handle *h;
|
2017-08-28 15:27:08 +00:00
|
|
|
|
2017-08-29 12:55:37 +00:00
|
|
|
if (fs_info->transaction_aborted)
|
|
|
|
return ERR_PTR(-EROFS);
|
|
|
|
|
2017-08-28 15:27:08 +00:00
|
|
|
if (root->commit_root) {
|
2018-11-26 17:08:21 +00:00
|
|
|
error("commit_root already set when starting transaction");
|
2017-08-28 15:27:08 +00:00
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
}
|
|
|
|
if (fs_info->running_transaction) {
|
|
|
|
error("attempt to start transaction over already running one");
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
}
|
2022-04-19 11:17:41 +00:00
|
|
|
|
|
|
|
h = kzalloc(sizeof(*h), GFP_NOFS);
|
|
|
|
if (!h)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
2017-08-28 15:43:10 +00:00
|
|
|
h->fs_info = fs_info;
|
2017-08-28 15:27:08 +00:00
|
|
|
fs_info->running_transaction = h;
|
|
|
|
fs_info->generation++;
|
|
|
|
h->transid = fs_info->generation;
|
2023-05-12 13:37:03 +00:00
|
|
|
h->blocks_reserved = num_items;
|
2018-06-08 12:47:53 +00:00
|
|
|
h->reinit_extent_tree = false;
|
2019-04-16 10:21:43 +00:00
|
|
|
h->allocating_chunk = 0;
|
2017-08-28 15:27:08 +00:00
|
|
|
root->last_trans = h->transid;
|
|
|
|
root->commit_root = root->node;
|
|
|
|
extent_buffer_get(root->node);
|
2019-12-18 05:18:46 +00:00
|
|
|
INIT_LIST_HEAD(&h->dirty_bgs);
|
2017-08-28 15:27:08 +00:00
|
|
|
|
|
|
|
return h;
|
|
|
|
}
|
2017-08-28 15:39:26 +00:00
|
|
|
|
|
|
|
static int update_cowonly_root(struct btrfs_trans_handle *trans,
|
|
|
|
struct btrfs_root *root)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
u64 old_root_bytenr;
|
|
|
|
struct btrfs_root *tree_root = root->fs_info->tree_root;
|
|
|
|
|
|
|
|
while(1) {
|
|
|
|
old_root_bytenr = btrfs_root_bytenr(&root->root_item);
|
|
|
|
if (old_root_bytenr == root->node->start)
|
|
|
|
break;
|
|
|
|
btrfs_set_root_bytenr(&root->root_item,
|
|
|
|
root->node->start);
|
|
|
|
btrfs_set_root_generation(&root->root_item,
|
|
|
|
trans->transid);
|
|
|
|
root->root_item.level = btrfs_header_level(root->node);
|
|
|
|
ret = btrfs_update_root(trans, tree_root,
|
|
|
|
&root->root_key,
|
|
|
|
&root->root_item);
|
2018-07-05 07:37:29 +00:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
2019-12-18 05:18:40 +00:00
|
|
|
ret = btrfs_write_dirty_block_groups(trans);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2017-08-28 15:39:26 +00:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int commit_tree_roots(struct btrfs_trans_handle *trans,
|
|
|
|
struct btrfs_fs_info *fs_info)
|
|
|
|
{
|
|
|
|
struct btrfs_root *root;
|
|
|
|
struct list_head *next;
|
|
|
|
struct extent_buffer *eb;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (fs_info->readonly)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
eb = fs_info->tree_root->node;
|
|
|
|
extent_buffer_get(eb);
|
2023-08-23 14:32:54 +00:00
|
|
|
ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb,
|
|
|
|
BTRFS_NESTING_NORMAL);
|
2017-08-28 15:39:26 +00:00
|
|
|
free_extent_buffer(eb);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2018-06-08 12:47:57 +00:00
|
|
|
/*
|
|
|
|
* If the above CoW is the first one to dirty the current tree_root,
|
|
|
|
* delayed refs for it won't be run until after this function has
|
|
|
|
* finished executing, meaning we won't process the extent tree root,
|
|
|
|
* which will have been added to ->dirty_cowonly_roots. So run
|
|
|
|
* delayed refs here as well.
|
|
|
|
*/
|
|
|
|
ret = btrfs_run_delayed_refs(trans, -1);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2017-08-28 15:39:26 +00:00
|
|
|
while(!list_empty(&fs_info->dirty_cowonly_roots)) {
|
|
|
|
next = fs_info->dirty_cowonly_roots.next;
|
|
|
|
list_del_init(next);
|
|
|
|
root = list_entry(next, struct btrfs_root, dirty_list);
|
2023-08-23 14:27:49 +00:00
|
|
|
clear_bit(BTRFS_ROOT_DIRTY, &root->state);
|
2018-07-05 07:37:29 +00:00
|
|
|
ret = update_cowonly_root(trans, root);
|
2017-08-28 15:39:26 +00:00
|
|
|
free_extent_buffer(root->commit_root);
|
|
|
|
root->commit_root = NULL;
|
2018-07-05 07:37:29 +00:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
2017-08-28 15:39:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-09-05 20:21:51 +00:00
|
|
|
static void clean_dirty_buffers(struct btrfs_trans_handle *trans)
|
|
|
|
{
|
|
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
|
struct extent_io_tree *tree = &fs_info->dirty_buffers;
|
|
|
|
struct extent_buffer *eb;
|
|
|
|
u64 start, end;
|
|
|
|
|
|
|
|
while (find_first_extent_bit(tree, 0, &start, &end, EXTENT_DIRTY,
|
|
|
|
NULL) == 0) {
|
|
|
|
while (start <= end) {
|
|
|
|
eb = find_first_extent_buffer(fs_info, start);
|
|
|
|
BUG_ON(!eb || eb->start != start);
|
|
|
|
start += eb->len;
|
|
|
|
btrfs_clear_buffer_dirty(trans, eb);
|
|
|
|
free_extent_buffer(eb);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-28 15:39:26 +00:00
|
|
|
int __commit_transaction(struct btrfs_trans_handle *trans,
|
|
|
|
struct btrfs_root *root)
|
|
|
|
{
|
|
|
|
u64 start;
|
|
|
|
u64 end;
|
|
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
|
|
struct extent_buffer *eb;
|
2022-11-23 22:37:23 +00:00
|
|
|
struct extent_io_tree *tree = &fs_info->dirty_buffers;
|
2017-08-28 15:39:26 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
while(1) {
|
2021-04-26 06:27:31 +00:00
|
|
|
again:
|
2017-08-28 15:39:26 +00:00
|
|
|
ret = find_first_extent_bit(tree, 0, &start, &end,
|
2023-04-19 21:17:19 +00:00
|
|
|
EXTENT_DIRTY, NULL);
|
2017-08-28 15:39:26 +00:00
|
|
|
if (ret)
|
|
|
|
break;
|
2021-04-26 06:27:31 +00:00
|
|
|
|
|
|
|
if (btrfs_redirty_extent_buffer_for_zoned(fs_info, start, end))
|
|
|
|
goto again;
|
|
|
|
|
2017-08-28 15:39:26 +00:00
|
|
|
while(start <= end) {
|
2022-11-23 22:37:22 +00:00
|
|
|
eb = find_first_extent_buffer(fs_info, start);
|
2017-08-28 15:39:26 +00:00
|
|
|
BUG_ON(!eb || eb->start != start);
|
|
|
|
ret = write_tree_block(trans, fs_info, eb);
|
2022-09-23 11:59:46 +00:00
|
|
|
if (ret < 0) {
|
|
|
|
free_extent_buffer(eb);
|
|
|
|
errno = -ret;
|
|
|
|
error("failed to write tree block %llu: %m",
|
|
|
|
eb->start);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2017-08-28 15:39:26 +00:00
|
|
|
start += eb->len;
|
2023-08-23 14:32:31 +00:00
|
|
|
btrfs_clear_buffer_dirty(trans, eb);
|
2017-08-28 15:39:26 +00:00
|
|
|
free_extent_buffer(eb);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
2022-09-23 11:59:46 +00:00
|
|
|
cleanup:
|
|
|
|
/*
|
|
|
|
* Mark all remaining dirty ebs clean, as they have no chance to be written
|
|
|
|
* back anymore.
|
|
|
|
*/
|
2023-09-05 20:21:51 +00:00
|
|
|
clean_dirty_buffers(trans);
|
2022-09-23 11:59:46 +00:00
|
|
|
return ret;
|
2017-08-28 15:39:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
|
|
|
struct btrfs_root *root)
|
|
|
|
{
|
|
|
|
u64 transid = trans->transid;
|
|
|
|
int ret = 0;
|
|
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
btrfs-progs: Fix false ENOSPC alert by tracking used space correctly
[BUG]
There is a bug report of unexpected ENOSPC from btrfs-convert, issue #123.
After some debugging, even when we have enough unallocated space, we
still hit ENOSPC at btrfs_reserve_extent().
[CAUSE]
Btrfs-progs relies on chunk preallocator to make enough space for
data/metadata.
However after the introduction of delayed-ref, it's no longer reliable
to rely on btrfs_space_info::bytes_used and
btrfs_space_info::bytes_pinned to calculate used metadata space.
For a running transaction with a lot of allocated tree blocks,
btrfs_space_info::bytes_used stays its original value, and will only be
updated when running delayed ref.
This makes btrfs-progs chunk preallocator completely useless. And for
btrfs-convert/mkfs.btrfs --rootdir, if we're going to have enough
metadata to fill a metadata block group in one transaction, we will hit
ENOSPC no matter whether we have enough unallocated space.
[FIX]
This patch will introduce btrfs_space_info::bytes_reserved to track how
many space we have reserved but not yet committed to extent tree.
To support this change, this commit also introduces the following
modification:
- More comment on btrfs_space_info::bytes_*
To make code a little easier to read
- Export update_space_info() to preallocate empty data/metadata space
info for mkfs.
For mkfs, we only have a temporary fs image with SYSTEM chunk only.
Export update_space_info() so that we can preallocate empty
data/metadata space info before we start a transaction.
- Proper btrfs_space_info::bytes_reserved update
The timing is the as kernel (except we don't need to update
bytes_reserved for data extents)
* Increase bytes_reserved when call alloc_reserved_tree_block()
* Decrease bytes_reserved when running delayed refs
With the help of head->must_insert_reserved to determine whether we
need to decrease.
Issue: #123
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-05-24 23:32:43 +00:00
|
|
|
struct btrfs_space_info *sinfo;
|
2017-08-28 15:39:26 +00:00
|
|
|
|
2023-09-05 20:21:51 +00:00
|
|
|
if (trans->fs_info->transaction_aborted) {
|
|
|
|
ret = -EROFS;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2018-06-08 12:47:57 +00:00
|
|
|
/*
|
|
|
|
* Flush all accumulated delayed refs so that root-tree updates are
|
|
|
|
* consistent
|
|
|
|
*/
|
|
|
|
ret = btrfs_run_delayed_refs(trans, -1);
|
2019-04-16 07:15:26 +00:00
|
|
|
if (ret < 0)
|
|
|
|
goto error;
|
2017-08-29 12:55:37 +00:00
|
|
|
|
2017-08-28 15:39:26 +00:00
|
|
|
if (root->commit_root == root->node)
|
|
|
|
goto commit_tree;
|
|
|
|
if (root == root->fs_info->tree_root)
|
|
|
|
goto commit_tree;
|
|
|
|
if (root == root->fs_info->chunk_root)
|
|
|
|
goto commit_tree;
|
2022-03-07 22:10:46 +00:00
|
|
|
if (root == root->fs_info->block_group_root)
|
|
|
|
goto commit_tree;
|
2017-08-28 15:39:26 +00:00
|
|
|
|
|
|
|
free_extent_buffer(root->commit_root);
|
|
|
|
root->commit_root = NULL;
|
|
|
|
|
|
|
|
btrfs_set_root_bytenr(&root->root_item, root->node->start);
|
|
|
|
btrfs_set_root_generation(&root->root_item, trans->transid);
|
|
|
|
root->root_item.level = btrfs_header_level(root->node);
|
|
|
|
ret = btrfs_update_root(trans, root->fs_info->tree_root,
|
|
|
|
&root->root_key, &root->root_item);
|
2019-04-16 07:15:26 +00:00
|
|
|
if (ret < 0)
|
|
|
|
goto error;
|
2018-06-08 12:47:57 +00:00
|
|
|
|
2017-08-28 15:39:26 +00:00
|
|
|
commit_tree:
|
|
|
|
ret = commit_tree_roots(trans, fs_info);
|
2019-04-16 07:15:26 +00:00
|
|
|
if (ret < 0)
|
|
|
|
goto error;
|
btrfs-progs: Exhaust delayed refs and dirty block groups to prevent delayed refs lost
[BUG]
Btrfs-progs sometimes fails to find certain extent backref when
committing transaction.
The most reliable way to reproduce it is fsck-test/013 on 64K page sized
system:
[...]
adding new data backref on 315859712 root 287 owner 292 offset 0 found 1
btrfs unable to find ref byte nr 31850496 parent 0 root 2 owner 0 offset 0
Failed to find [30867456, 168, 65536]
Also there are some github bug reports related to this problem.
[CAUSE]
Commit 909357e86799 ("btrfs-progs: Wire up delayed refs") introduced
delayed refs in btrfs-progs.
However in that commit, delayed refs are not run at correct timing.
That commit calls btrfs_run_delayed_refs() before
btrfs_write_dirty_block_groups(), which needs to update
BLOCK_GROUP_ITEMs in extent tree, thus could cause new delayed refs.
This means each time we commit a transaction, we may screw up the extent
tree by dropping some pending delayed refs, like:
Transaction 711:
btrfs_commit_transaction()
|- btrfs_run_delayed_refs()
| Now all delayed refs are written to extent tree
|
|- btrfs_write_dirty_block_groups()
| Needs to update extent tree root
| ADD_DELAYED_REF to 315859712.
| Delayed refs are attached to current trans handle.
|
|- __commit_transaction()
|- write_ctree_super()
|- btrfs_finish_extent_commit()
|- kfree(trans)
Now delayed ref for 315859712 are lost
Transaction 712:
Tree block 315859712 get dropped
btrfs_commit_transaction()
|- btrfs_run_delayed_refs()
|- run_one_delayed_ref()
|- __free_extent()
As previous ADD_DELAYED_REF to 315859712 is lost, extent tree
doesn't have any backref for 315859712, causing the bug
In fact, commit c31edf610cbe ("btrfs-progs: Fix false ENOSPC alert by
tracking used space correctly") detects the tree block leakage, but in
the reproducer we have too much noise, thus nobody notices the leakage
warning.
[FIX]
We can't just move btrfs_run_delayed_refs() after
btrfs_write_dirty_block_groups(), as during btrfs_run_delayed_refs(), we
can re-dirty block groups.
Thus we need to exhaust both delayed refs and dirty blocks.
This patch will call btrfs_write_dirty_block_groups() and
btrfs_run_delayed_refs() in a loop until both delayed refs and dirty
blocks are exhausted. Much like what we do in commit_cowonly_roots() in
kernel.
Also, to prevent such problem from happening again (and not to debug
such problem again), add extra check on delayed refs before freeing the
transaction handle.
Reported-by: Klemens Schölhorn <klemens@schoelhorn.eu>
Issue: #187
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-07-08 07:33:51 +00:00
|
|
|
|
2018-06-08 12:47:57 +00:00
|
|
|
/*
|
btrfs-progs: Exhaust delayed refs and dirty block groups to prevent delayed refs lost
[BUG]
Btrfs-progs sometimes fails to find certain extent backref when
committing transaction.
The most reliable way to reproduce it is fsck-test/013 on 64K page sized
system:
[...]
adding new data backref on 315859712 root 287 owner 292 offset 0 found 1
btrfs unable to find ref byte nr 31850496 parent 0 root 2 owner 0 offset 0
Failed to find [30867456, 168, 65536]
Also there are some github bug reports related to this problem.
[CAUSE]
Commit 909357e86799 ("btrfs-progs: Wire up delayed refs") introduced
delayed refs in btrfs-progs.
However in that commit, delayed refs are not run at correct timing.
That commit calls btrfs_run_delayed_refs() before
btrfs_write_dirty_block_groups(), which needs to update
BLOCK_GROUP_ITEMs in extent tree, thus could cause new delayed refs.
This means each time we commit a transaction, we may screw up the extent
tree by dropping some pending delayed refs, like:
Transaction 711:
btrfs_commit_transaction()
|- btrfs_run_delayed_refs()
| Now all delayed refs are written to extent tree
|
|- btrfs_write_dirty_block_groups()
| Needs to update extent tree root
| ADD_DELAYED_REF to 315859712.
| Delayed refs are attached to current trans handle.
|
|- __commit_transaction()
|- write_ctree_super()
|- btrfs_finish_extent_commit()
|- kfree(trans)
Now delayed ref for 315859712 are lost
Transaction 712:
Tree block 315859712 get dropped
btrfs_commit_transaction()
|- btrfs_run_delayed_refs()
|- run_one_delayed_ref()
|- __free_extent()
As previous ADD_DELAYED_REF to 315859712 is lost, extent tree
doesn't have any backref for 315859712, causing the bug
In fact, commit c31edf610cbe ("btrfs-progs: Fix false ENOSPC alert by
tracking used space correctly") detects the tree block leakage, but in
the reproducer we have too much noise, thus nobody notices the leakage
warning.
[FIX]
We can't just move btrfs_run_delayed_refs() after
btrfs_write_dirty_block_groups(), as during btrfs_run_delayed_refs(), we
can re-dirty block groups.
Thus we need to exhaust both delayed refs and dirty blocks.
This patch will call btrfs_write_dirty_block_groups() and
btrfs_run_delayed_refs() in a loop until both delayed refs and dirty
blocks are exhausted. Much like what we do in commit_cowonly_roots() in
kernel.
Also, to prevent such problem from happening again (and not to debug
such problem again), add extra check on delayed refs before freeing the
transaction handle.
Reported-by: Klemens Schölhorn <klemens@schoelhorn.eu>
Issue: #187
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-07-08 07:33:51 +00:00
|
|
|
* btrfs_write_dirty_block_groups() can cause COW thus new delayed
|
|
|
|
* tree refs, while run such delayed tree refs can dirty block groups
|
|
|
|
* again, we need to exhause both dirty blocks and delayed refs
|
2018-06-08 12:47:57 +00:00
|
|
|
*/
|
btrfs-progs: Exhaust delayed refs and dirty block groups to prevent delayed refs lost
[BUG]
Btrfs-progs sometimes fails to find certain extent backref when
committing transaction.
The most reliable way to reproduce it is fsck-test/013 on 64K page sized
system:
[...]
adding new data backref on 315859712 root 287 owner 292 offset 0 found 1
btrfs unable to find ref byte nr 31850496 parent 0 root 2 owner 0 offset 0
Failed to find [30867456, 168, 65536]
Also there are some github bug reports related to this problem.
[CAUSE]
Commit 909357e86799 ("btrfs-progs: Wire up delayed refs") introduced
delayed refs in btrfs-progs.
However in that commit, delayed refs are not run at correct timing.
That commit calls btrfs_run_delayed_refs() before
btrfs_write_dirty_block_groups(), which needs to update
BLOCK_GROUP_ITEMs in extent tree, thus could cause new delayed refs.
This means each time we commit a transaction, we may screw up the extent
tree by dropping some pending delayed refs, like:
Transaction 711:
btrfs_commit_transaction()
|- btrfs_run_delayed_refs()
| Now all delayed refs are written to extent tree
|
|- btrfs_write_dirty_block_groups()
| Needs to update extent tree root
| ADD_DELAYED_REF to 315859712.
| Delayed refs are attached to current trans handle.
|
|- __commit_transaction()
|- write_ctree_super()
|- btrfs_finish_extent_commit()
|- kfree(trans)
Now delayed ref for 315859712 are lost
Transaction 712:
Tree block 315859712 get dropped
btrfs_commit_transaction()
|- btrfs_run_delayed_refs()
|- run_one_delayed_ref()
|- __free_extent()
As previous ADD_DELAYED_REF to 315859712 is lost, extent tree
doesn't have any backref for 315859712, causing the bug
In fact, commit c31edf610cbe ("btrfs-progs: Fix false ENOSPC alert by
tracking used space correctly") detects the tree block leakage, but in
the reproducer we have too much noise, thus nobody notices the leakage
warning.
[FIX]
We can't just move btrfs_run_delayed_refs() after
btrfs_write_dirty_block_groups(), as during btrfs_run_delayed_refs(), we
can re-dirty block groups.
Thus we need to exhaust both delayed refs and dirty blocks.
This patch will call btrfs_write_dirty_block_groups() and
btrfs_run_delayed_refs() in a loop until both delayed refs and dirty
blocks are exhausted. Much like what we do in commit_cowonly_roots() in
kernel.
Also, to prevent such problem from happening again (and not to debug
such problem again), add extra check on delayed refs before freeing the
transaction handle.
Reported-by: Klemens Schölhorn <klemens@schoelhorn.eu>
Issue: #187
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-07-08 07:33:51 +00:00
|
|
|
while (!RB_EMPTY_ROOT(&trans->delayed_refs.href_root) ||
|
2019-12-18 05:18:48 +00:00
|
|
|
!list_empty(&trans->dirty_bgs)) {
|
btrfs-progs: Exhaust delayed refs and dirty block groups to prevent delayed refs lost
[BUG]
Btrfs-progs sometimes fails to find certain extent backref when
committing transaction.
The most reliable way to reproduce it is fsck-test/013 on 64K page sized
system:
[...]
adding new data backref on 315859712 root 287 owner 292 offset 0 found 1
btrfs unable to find ref byte nr 31850496 parent 0 root 2 owner 0 offset 0
Failed to find [30867456, 168, 65536]
Also there are some github bug reports related to this problem.
[CAUSE]
Commit 909357e86799 ("btrfs-progs: Wire up delayed refs") introduced
delayed refs in btrfs-progs.
However in that commit, delayed refs are not run at correct timing.
That commit calls btrfs_run_delayed_refs() before
btrfs_write_dirty_block_groups(), which needs to update
BLOCK_GROUP_ITEMs in extent tree, thus could cause new delayed refs.
This means each time we commit a transaction, we may screw up the extent
tree by dropping some pending delayed refs, like:
Transaction 711:
btrfs_commit_transaction()
|- btrfs_run_delayed_refs()
| Now all delayed refs are written to extent tree
|
|- btrfs_write_dirty_block_groups()
| Needs to update extent tree root
| ADD_DELAYED_REF to 315859712.
| Delayed refs are attached to current trans handle.
|
|- __commit_transaction()
|- write_ctree_super()
|- btrfs_finish_extent_commit()
|- kfree(trans)
Now delayed ref for 315859712 are lost
Transaction 712:
Tree block 315859712 get dropped
btrfs_commit_transaction()
|- btrfs_run_delayed_refs()
|- run_one_delayed_ref()
|- __free_extent()
As previous ADD_DELAYED_REF to 315859712 is lost, extent tree
doesn't have any backref for 315859712, causing the bug
In fact, commit c31edf610cbe ("btrfs-progs: Fix false ENOSPC alert by
tracking used space correctly") detects the tree block leakage, but in
the reproducer we have too much noise, thus nobody notices the leakage
warning.
[FIX]
We can't just move btrfs_run_delayed_refs() after
btrfs_write_dirty_block_groups(), as during btrfs_run_delayed_refs(), we
can re-dirty block groups.
Thus we need to exhaust both delayed refs and dirty blocks.
This patch will call btrfs_write_dirty_block_groups() and
btrfs_run_delayed_refs() in a loop until both delayed refs and dirty
blocks are exhausted. Much like what we do in commit_cowonly_roots() in
kernel.
Also, to prevent such problem from happening again (and not to debug
such problem again), add extra check on delayed refs before freeing the
transaction handle.
Reported-by: Klemens Schölhorn <klemens@schoelhorn.eu>
Issue: #187
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-07-08 07:33:51 +00:00
|
|
|
ret = btrfs_write_dirty_block_groups(trans);
|
|
|
|
if (ret < 0)
|
|
|
|
goto error;
|
|
|
|
ret = btrfs_run_delayed_refs(trans, -1);
|
|
|
|
if (ret < 0)
|
|
|
|
goto error;
|
|
|
|
}
|
2022-09-23 11:59:46 +00:00
|
|
|
ret = __commit_transaction(trans, root);
|
2018-07-05 07:37:29 +00:00
|
|
|
if (ret < 0)
|
2019-04-16 07:15:26 +00:00
|
|
|
goto error;
|
btrfs-progs: Exhaust delayed refs and dirty block groups to prevent delayed refs lost
[BUG]
Btrfs-progs sometimes fails to find certain extent backref when
committing transaction.
The most reliable way to reproduce it is fsck-test/013 on 64K page sized
system:
[...]
adding new data backref on 315859712 root 287 owner 292 offset 0 found 1
btrfs unable to find ref byte nr 31850496 parent 0 root 2 owner 0 offset 0
Failed to find [30867456, 168, 65536]
Also there are some github bug reports related to this problem.
[CAUSE]
Commit 909357e86799 ("btrfs-progs: Wire up delayed refs") introduced
delayed refs in btrfs-progs.
However in that commit, delayed refs are not run at correct timing.
That commit calls btrfs_run_delayed_refs() before
btrfs_write_dirty_block_groups(), which needs to update
BLOCK_GROUP_ITEMs in extent tree, thus could cause new delayed refs.
This means each time we commit a transaction, we may screw up the extent
tree by dropping some pending delayed refs, like:
Transaction 711:
btrfs_commit_transaction()
|- btrfs_run_delayed_refs()
| Now all delayed refs are written to extent tree
|
|- btrfs_write_dirty_block_groups()
| Needs to update extent tree root
| ADD_DELAYED_REF to 315859712.
| Delayed refs are attached to current trans handle.
|
|- __commit_transaction()
|- write_ctree_super()
|- btrfs_finish_extent_commit()
|- kfree(trans)
Now delayed ref for 315859712 are lost
Transaction 712:
Tree block 315859712 get dropped
btrfs_commit_transaction()
|- btrfs_run_delayed_refs()
|- run_one_delayed_ref()
|- __free_extent()
As previous ADD_DELAYED_REF to 315859712 is lost, extent tree
doesn't have any backref for 315859712, causing the bug
In fact, commit c31edf610cbe ("btrfs-progs: Fix false ENOSPC alert by
tracking used space correctly") detects the tree block leakage, but in
the reproducer we have too much noise, thus nobody notices the leakage
warning.
[FIX]
We can't just move btrfs_run_delayed_refs() after
btrfs_write_dirty_block_groups(), as during btrfs_run_delayed_refs(), we
can re-dirty block groups.
Thus we need to exhaust both delayed refs and dirty blocks.
This patch will call btrfs_write_dirty_block_groups() and
btrfs_run_delayed_refs() in a loop until both delayed refs and dirty
blocks are exhausted. Much like what we do in commit_cowonly_roots() in
kernel.
Also, to prevent such problem from happening again (and not to debug
such problem again), add extra check on delayed refs before freeing the
transaction handle.
Reported-by: Klemens Schölhorn <klemens@schoelhorn.eu>
Issue: #187
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-07-08 07:33:51 +00:00
|
|
|
|
|
|
|
/* There should be no pending delayed refs now */
|
|
|
|
if (!RB_EMPTY_ROOT(&trans->delayed_refs.href_root)) {
|
|
|
|
error("uncommitted delayed refs detected");
|
|
|
|
goto error;
|
|
|
|
}
|
2019-04-11 05:24:26 +00:00
|
|
|
ret = write_ctree_super(trans);
|
2019-04-16 07:15:25 +00:00
|
|
|
btrfs_finish_extent_commit(trans);
|
2017-08-28 15:39:26 +00:00
|
|
|
kfree(trans);
|
|
|
|
free_extent_buffer(root->commit_root);
|
|
|
|
root->commit_root = NULL;
|
|
|
|
fs_info->running_transaction = NULL;
|
|
|
|
fs_info->last_trans_committed = transid;
|
btrfs-progs: Fix false ENOSPC alert by tracking used space correctly
[BUG]
There is a bug report of unexpected ENOSPC from btrfs-convert, issue #123.
After some debugging, even when we have enough unallocated space, we
still hit ENOSPC at btrfs_reserve_extent().
[CAUSE]
Btrfs-progs relies on chunk preallocator to make enough space for
data/metadata.
However after the introduction of delayed-ref, it's no longer reliable
to rely on btrfs_space_info::bytes_used and
btrfs_space_info::bytes_pinned to calculate used metadata space.
For a running transaction with a lot of allocated tree blocks,
btrfs_space_info::bytes_used stays its original value, and will only be
updated when running delayed ref.
This makes btrfs-progs chunk preallocator completely useless. And for
btrfs-convert/mkfs.btrfs --rootdir, if we're going to have enough
metadata to fill a metadata block group in one transaction, we will hit
ENOSPC no matter whether we have enough unallocated space.
[FIX]
This patch will introduce btrfs_space_info::bytes_reserved to track how
many space we have reserved but not yet committed to extent tree.
To support this change, this commit also introduces the following
modification:
- More comment on btrfs_space_info::bytes_*
To make code a little easier to read
- Export update_space_info() to preallocate empty data/metadata space
info for mkfs.
For mkfs, we only have a temporary fs image with SYSTEM chunk only.
Export update_space_info() so that we can preallocate empty
data/metadata space info before we start a transaction.
- Proper btrfs_space_info::bytes_reserved update
The timing is the as kernel (except we don't need to update
bytes_reserved for data extents)
* Increase bytes_reserved when call alloc_reserved_tree_block()
* Decrease bytes_reserved when running delayed refs
With the help of head->must_insert_reserved to determine whether we
need to decrease.
Issue: #123
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-05-24 23:32:43 +00:00
|
|
|
list_for_each_entry(sinfo, &fs_info->space_info, list) {
|
|
|
|
if (sinfo->bytes_reserved) {
|
|
|
|
warning(
|
|
|
|
"reserved space leaked, transid=%llu flag=0x%llx bytes_reserved=%llu",
|
|
|
|
transid, sinfo->flags, sinfo->bytes_reserved);
|
|
|
|
}
|
|
|
|
}
|
2019-04-16 07:15:26 +00:00
|
|
|
return ret;
|
|
|
|
error:
|
2022-09-23 11:59:46 +00:00
|
|
|
btrfs_abort_transaction(trans, ret);
|
2023-09-05 20:21:51 +00:00
|
|
|
clean_dirty_buffers(trans);
|
2019-04-16 07:15:26 +00:00
|
|
|
btrfs_destroy_delayed_refs(trans);
|
|
|
|
free(trans);
|
2018-07-05 07:37:29 +00:00
|
|
|
return ret;
|
2017-08-28 15:39:26 +00:00
|
|
|
}
|
|
|
|
|
2017-08-28 15:44:41 +00:00
|
|
|
void btrfs_abort_transaction(struct btrfs_trans_handle *trans, int error)
|
|
|
|
{
|
|
|
|
trans->fs_info->transaction_aborted = error;
|
|
|
|
}
|