mirror of
https://github.com/kdave/btrfs-progs
synced 2025-01-11 16:29:42 +00:00
2a8cec4b12
[BUG] Btrfs-progs sometimes fails to find certain extent backref when committing transaction. The most reliable way to reproduce it is fsck-test/013 on 64K page sized system: [...] adding new data backref on 315859712 root 287 owner 292 offset 0 found 1 btrfs unable to find ref byte nr 31850496 parent 0 root 2 owner 0 offset 0 Failed to find [30867456, 168, 65536] Also there are some github bug reports related to this problem. [CAUSE] Commit909357e867
("btrfs-progs: Wire up delayed refs") introduced delayed refs in btrfs-progs. However in that commit, delayed refs are not run at correct timing. That commit calls btrfs_run_delayed_refs() before btrfs_write_dirty_block_groups(), which needs to update BLOCK_GROUP_ITEMs in extent tree, thus could cause new delayed refs. This means each time we commit a transaction, we may screw up the extent tree by dropping some pending delayed refs, like: Transaction 711: btrfs_commit_transaction() |- btrfs_run_delayed_refs() | Now all delayed refs are written to extent tree | |- btrfs_write_dirty_block_groups() | Needs to update extent tree root | ADD_DELAYED_REF to 315859712. | Delayed refs are attached to current trans handle. | |- __commit_transaction() |- write_ctree_super() |- btrfs_finish_extent_commit() |- kfree(trans) Now delayed ref for 315859712 are lost Transaction 712: Tree block 315859712 get dropped btrfs_commit_transaction() |- btrfs_run_delayed_refs() |- run_one_delayed_ref() |- __free_extent() As previous ADD_DELAYED_REF to 315859712 is lost, extent tree doesn't have any backref for 315859712, causing the bug In fact, commitc31edf610c
("btrfs-progs: Fix false ENOSPC alert by tracking used space correctly") detects the tree block leakage, but in the reproducer we have too much noise, thus nobody notices the leakage warning. [FIX] We can't just move btrfs_run_delayed_refs() after btrfs_write_dirty_block_groups(), as during btrfs_run_delayed_refs(), we can re-dirty block groups. Thus we need to exhaust both delayed refs and dirty blocks. This patch will call btrfs_write_dirty_block_groups() and btrfs_run_delayed_refs() in a loop until both delayed refs and dirty blocks are exhausted. Much like what we do in commit_cowonly_roots() in kernel. Also, to prevent such problem from happening again (and not to debug such problem again), add extra check on delayed refs before freeing the transaction handle. Reported-by: Klemens Schölhorn <klemens@schoelhorn.eu> Issue: #187 Reviewed-by: Nikolay Borisov <nborisov@suse.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
246 lines
6.5 KiB
C
246 lines
6.5 KiB
C
/*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License v2 as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public
|
|
* License along with this program; if not, write to the
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 021110-1307, USA.
|
|
*/
|
|
|
|
#include "kerncompat.h"
|
|
#include "disk-io.h"
|
|
#include "transaction.h"
|
|
#include "delayed-ref.h"
|
|
|
|
#include "common/messages.h"
|
|
|
|
struct btrfs_trans_handle* btrfs_start_transaction(struct btrfs_root *root,
|
|
int num_blocks)
|
|
{
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
struct btrfs_trans_handle *h = kzalloc(sizeof(*h), GFP_NOFS);
|
|
|
|
if (fs_info->transaction_aborted)
|
|
return ERR_PTR(-EROFS);
|
|
|
|
if (!h)
|
|
return ERR_PTR(-ENOMEM);
|
|
if (root->commit_root) {
|
|
error("commit_root already set when starting transaction");
|
|
kfree(h);
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
if (fs_info->running_transaction) {
|
|
error("attempt to start transaction over already running one");
|
|
kfree(h);
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
h->fs_info = fs_info;
|
|
fs_info->running_transaction = h;
|
|
fs_info->generation++;
|
|
h->transid = fs_info->generation;
|
|
h->blocks_reserved = num_blocks;
|
|
h->reinit_extent_tree = false;
|
|
h->allocating_chunk = 0;
|
|
root->last_trans = h->transid;
|
|
root->commit_root = root->node;
|
|
extent_buffer_get(root->node);
|
|
|
|
return h;
|
|
}
|
|
|
|
static int update_cowonly_root(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root)
|
|
{
|
|
int ret;
|
|
u64 old_root_bytenr;
|
|
struct btrfs_root *tree_root = root->fs_info->tree_root;
|
|
|
|
while(1) {
|
|
old_root_bytenr = btrfs_root_bytenr(&root->root_item);
|
|
if (old_root_bytenr == root->node->start)
|
|
break;
|
|
btrfs_set_root_bytenr(&root->root_item,
|
|
root->node->start);
|
|
btrfs_set_root_generation(&root->root_item,
|
|
trans->transid);
|
|
root->root_item.level = btrfs_header_level(root->node);
|
|
ret = btrfs_update_root(trans, tree_root,
|
|
&root->root_key,
|
|
&root->root_item);
|
|
if (ret < 0)
|
|
return ret;
|
|
btrfs_write_dirty_block_groups(trans);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int commit_tree_roots(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info)
|
|
{
|
|
struct btrfs_root *root;
|
|
struct list_head *next;
|
|
struct extent_buffer *eb;
|
|
int ret;
|
|
|
|
if (fs_info->readonly)
|
|
return 0;
|
|
|
|
eb = fs_info->tree_root->node;
|
|
extent_buffer_get(eb);
|
|
ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb);
|
|
free_extent_buffer(eb);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/*
|
|
* If the above CoW is the first one to dirty the current tree_root,
|
|
* delayed refs for it won't be run until after this function has
|
|
* finished executing, meaning we won't process the extent tree root,
|
|
* which will have been added to ->dirty_cowonly_roots. So run
|
|
* delayed refs here as well.
|
|
*/
|
|
ret = btrfs_run_delayed_refs(trans, -1);
|
|
if (ret)
|
|
return ret;
|
|
|
|
while(!list_empty(&fs_info->dirty_cowonly_roots)) {
|
|
next = fs_info->dirty_cowonly_roots.next;
|
|
list_del_init(next);
|
|
root = list_entry(next, struct btrfs_root, dirty_list);
|
|
ret = update_cowonly_root(trans, root);
|
|
free_extent_buffer(root->commit_root);
|
|
root->commit_root = NULL;
|
|
if (ret < 0)
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int __commit_transaction(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root)
|
|
{
|
|
u64 start;
|
|
u64 end;
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
struct extent_buffer *eb;
|
|
struct extent_io_tree *tree = &fs_info->extent_cache;
|
|
int ret;
|
|
|
|
while(1) {
|
|
ret = find_first_extent_bit(tree, 0, &start, &end,
|
|
EXTENT_DIRTY);
|
|
if (ret)
|
|
break;
|
|
while(start <= end) {
|
|
eb = find_first_extent_buffer(tree, start);
|
|
BUG_ON(!eb || eb->start != start);
|
|
ret = write_tree_block(trans, fs_info, eb);
|
|
BUG_ON(ret);
|
|
start += eb->len;
|
|
clear_extent_buffer_dirty(eb);
|
|
free_extent_buffer(eb);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root)
|
|
{
|
|
u64 transid = trans->transid;
|
|
int ret = 0;
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
struct btrfs_space_info *sinfo;
|
|
|
|
if (trans->fs_info->transaction_aborted)
|
|
return -EROFS;
|
|
/*
|
|
* Flush all accumulated delayed refs so that root-tree updates are
|
|
* consistent
|
|
*/
|
|
ret = btrfs_run_delayed_refs(trans, -1);
|
|
if (ret < 0)
|
|
goto error;
|
|
|
|
if (root->commit_root == root->node)
|
|
goto commit_tree;
|
|
if (root == root->fs_info->tree_root)
|
|
goto commit_tree;
|
|
if (root == root->fs_info->chunk_root)
|
|
goto commit_tree;
|
|
|
|
free_extent_buffer(root->commit_root);
|
|
root->commit_root = NULL;
|
|
|
|
btrfs_set_root_bytenr(&root->root_item, root->node->start);
|
|
btrfs_set_root_generation(&root->root_item, trans->transid);
|
|
root->root_item.level = btrfs_header_level(root->node);
|
|
ret = btrfs_update_root(trans, root->fs_info->tree_root,
|
|
&root->root_key, &root->root_item);
|
|
if (ret < 0)
|
|
goto error;
|
|
|
|
commit_tree:
|
|
ret = commit_tree_roots(trans, fs_info);
|
|
if (ret < 0)
|
|
goto error;
|
|
|
|
/*
|
|
* btrfs_write_dirty_block_groups() can cause COW thus new delayed
|
|
* tree refs, while run such delayed tree refs can dirty block groups
|
|
* again, we need to exhause both dirty blocks and delayed refs
|
|
*/
|
|
while (!RB_EMPTY_ROOT(&trans->delayed_refs.href_root) ||
|
|
test_range_bit(&fs_info->block_group_cache, 0, (u64)-1,
|
|
BLOCK_GROUP_DIRTY, 0)) {
|
|
ret = btrfs_write_dirty_block_groups(trans);
|
|
if (ret < 0)
|
|
goto error;
|
|
ret = btrfs_run_delayed_refs(trans, -1);
|
|
if (ret < 0)
|
|
goto error;
|
|
}
|
|
__commit_transaction(trans, root);
|
|
if (ret < 0)
|
|
goto error;
|
|
|
|
/* There should be no pending delayed refs now */
|
|
if (!RB_EMPTY_ROOT(&trans->delayed_refs.href_root)) {
|
|
error("uncommitted delayed refs detected");
|
|
goto error;
|
|
}
|
|
ret = write_ctree_super(trans);
|
|
btrfs_finish_extent_commit(trans);
|
|
kfree(trans);
|
|
free_extent_buffer(root->commit_root);
|
|
root->commit_root = NULL;
|
|
fs_info->running_transaction = NULL;
|
|
fs_info->last_trans_committed = transid;
|
|
list_for_each_entry(sinfo, &fs_info->space_info, list) {
|
|
if (sinfo->bytes_reserved) {
|
|
warning(
|
|
"reserved space leaked, transid=%llu flag=0x%llx bytes_reserved=%llu",
|
|
transid, sinfo->flags, sinfo->bytes_reserved);
|
|
}
|
|
}
|
|
return ret;
|
|
error:
|
|
btrfs_destroy_delayed_refs(trans);
|
|
free(trans);
|
|
return ret;
|
|
}
|
|
|
|
void btrfs_abort_transaction(struct btrfs_trans_handle *trans, int error)
|
|
{
|
|
trans->fs_info->transaction_aborted = error;
|
|
}
|