btrfs-progs/kernel-shared/transaction.c

431 lines
11 KiB
C

/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "kerncompat.h"
#include <stdlib.h>
#include "kernel-lib/rbtree.h"
#include "kernel-lib/bitops.h"
#include "kernel-shared/disk-io.h"
#include "kernel-shared/transaction.h"
#include "kernel-shared/delayed-ref.h"
#include "kernel-shared/zoned.h"
#include "kernel-shared/accessors.h"
#include "kernel-shared/ctree.h"
#include "kernel-shared/extent_io.h"
#include "kernel-shared/locking.h"
#include "kernel-shared/uapi/btrfs_tree.h"
#include "common/messages.h"
/*
* The metadata reservation code is completely different from the kernel:
*
* - No need to support reclaim
* - No support for transaction join
*
* This is due to the fact that btrfs-progs is only single threaded, thus it
* always starts a transaction, does some tree operations, and commits the
* transaction.
*
* So here we only need to make sure we have enough metadata space, and there
* will be no metadata over-commit (allowing extra metadata operations as long
* as there is unallocated space).
*
* The only extra step we can really do to increase metadata space is to allocate
* new metadata chunks.
*/
static unsigned int calc_insert_metadata_size(const struct btrfs_fs_info *fs_info,
unsigned int num_items)
{
return fs_info->nodesize * BTRFS_MAX_LEVEL * num_items * 2;
}
static bool meta_has_enough_space(struct btrfs_fs_info *fs_info,
u64 profile, unsigned int size)
{
struct btrfs_space_info *sinfo;
profile &= BTRFS_BLOCK_GROUP_TYPE_MASK;
/*
* The fs is temporary (still during mkfs), do not check free space
* as we don't have all meta/sys chunks setup.
*/
if (btrfs_super_magic(fs_info->super_copy) != BTRFS_MAGIC)
return true;
/*
* The fs is under extent tree rebuilding, do not do any free space check
* as they are not reliable.
*/
if (fs_info->rebuilding_extent_tree)
return true;
sinfo = btrfs_find_space_info(fs_info, profile);
if (!sinfo) {
error("unable to find block group for profile 0x%llx", profile);
return false;
}
if (sinfo->bytes_used + sinfo->bytes_pinned + sinfo->bytes_reserved +
size < sinfo->total_bytes)
return true;
return false;
}
static struct btrfs_trans_handle *alloc_trans_handle(struct btrfs_root *root,
unsigned int num_items)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *h;
h = kzalloc(sizeof(*h), GFP_NOFS);
if (!h)
return ERR_PTR(-ENOMEM);
h->fs_info = fs_info;
fs_info->running_transaction = h;
fs_info->generation++;
h->transid = fs_info->generation;
h->blocks_reserved = num_items;
h->reinit_extent_tree = false;
h->allocating_chunk = 0;
root->last_trans = h->transid;
root->commit_root = root->node;
extent_buffer_get(root->node);
INIT_LIST_HEAD(&h->dirty_bgs);
return h;
}
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
unsigned int num_items)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *h;
unsigned int rsv_bytes;
bool need_retry = false;
u64 profile;
if (root->root_key.objectid == BTRFS_CHUNK_TREE_OBJECTID)
profile = BTRFS_BLOCK_GROUP_SYSTEM |
(fs_info->avail_system_alloc_bits &
fs_info->system_alloc_profile);
else
profile = BTRFS_BLOCK_GROUP_METADATA |
(fs_info->avail_metadata_alloc_bits &
fs_info->metadata_alloc_profile);
if (fs_info->transaction_aborted)
return ERR_PTR(-EROFS);
if (root->commit_root) {
error("commit_root already set when starting transaction");
return ERR_PTR(-EINVAL);
}
if (fs_info->running_transaction) {
error("attempt to start transaction over already running one");
return ERR_PTR(-EINVAL);
}
/*
* For those call sites, they are mostly delete items, in that case
* just change it to 1.
*/
if (num_items == 0)
num_items = 1;
rsv_bytes = calc_insert_metadata_size(fs_info, num_items);
/*
* We should not have so many items that it's larger than one metadata
* chunk.
*/
if (rsv_bytes > SZ_1G) {
error("too much metadata space required: num_items %u reserved bytes %u",
num_items, rsv_bytes);
return ERR_PTR(-EINVAL);
}
if (!meta_has_enough_space(fs_info, profile, rsv_bytes))
need_retry = true;
h = alloc_trans_handle(root, num_items);
if (IS_ERR(h))
return ERR_PTR(PTR_ERR(h));
if (need_retry) {
int ret;
ret = btrfs_try_chunk_alloc(h, fs_info, rsv_bytes, profile);
if (ret < 0) {
btrfs_abort_transaction(h, ret);
errno = -ret;
error("failed to allocate new chunk: %m");
return ERR_PTR(ret);
}
ret = btrfs_commit_transaction(h, root);
if (ret < 0) {
errno = -ret;
error("failed to commit transaction for the new chunk: %m");
return ERR_PTR(ret);
}
if (!meta_has_enough_space(fs_info, profile, rsv_bytes)) {
errno = -ENOSPC;
error("failed to start transaction: %m");
return ERR_PTR(-ENOSPC);
}
h = alloc_trans_handle(root, num_items);
}
return h;
}
static int update_cowonly_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
int ret;
u64 old_root_bytenr;
struct btrfs_root *tree_root = root->fs_info->tree_root;
while(1) {
old_root_bytenr = btrfs_root_bytenr(&root->root_item);
if (old_root_bytenr == root->node->start)
break;
btrfs_set_root_bytenr(&root->root_item,
root->node->start);
btrfs_set_root_generation(&root->root_item,
trans->transid);
root->root_item.level = btrfs_header_level(root->node);
ret = btrfs_update_root(trans, tree_root,
&root->root_key,
&root->root_item);
if (ret < 0)
return ret;
ret = btrfs_write_dirty_block_groups(trans);
if (ret)
return ret;
}
return 0;
}
int commit_tree_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
struct list_head *next;
struct extent_buffer *eb;
int ret;
if (fs_info->readonly)
return 0;
eb = fs_info->tree_root->node;
extent_buffer_get(eb);
ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb,
BTRFS_NESTING_NORMAL);
free_extent_buffer(eb);
if (ret)
return ret;
/*
* If the above CoW is the first one to dirty the current tree_root,
* delayed refs for it won't be run until after this function has
* finished executing, meaning we won't process the extent tree root,
* which will have been added to ->dirty_cowonly_roots. So run
* delayed refs here as well.
*/
ret = btrfs_run_delayed_refs(trans, -1);
if (ret)
return ret;
while(!list_empty(&fs_info->dirty_cowonly_roots)) {
next = fs_info->dirty_cowonly_roots.next;
list_del_init(next);
root = list_entry(next, struct btrfs_root, dirty_list);
clear_bit(BTRFS_ROOT_DIRTY, &root->state);
ret = update_cowonly_root(trans, root);
free_extent_buffer(root->commit_root);
root->commit_root = NULL;
if (ret < 0)
return ret;
}
return 0;
}
static void clean_dirty_buffers(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct extent_io_tree *tree = &fs_info->dirty_buffers;
struct extent_buffer *eb;
u64 start, end;
while (find_first_extent_bit(tree, 0, &start, &end, EXTENT_DIRTY,
NULL) == 0) {
while (start <= end) {
eb = find_first_extent_buffer(fs_info, start);
BUG_ON(!eb || eb->start != start);
start += eb->len;
btrfs_clear_buffer_dirty(trans, eb);
free_extent_buffer(eb);
}
}
}
int __commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
u64 start;
u64 end;
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *eb;
struct extent_io_tree *tree = &fs_info->dirty_buffers;
int ret;
while(1) {
again:
ret = find_first_extent_bit(tree, 0, &start, &end,
EXTENT_DIRTY, NULL);
if (ret)
break;
if (btrfs_redirty_extent_buffer_for_zoned(fs_info, start, end))
goto again;
while(start <= end) {
eb = find_first_extent_buffer(fs_info, start);
BUG_ON(!eb || eb->start != start);
ret = write_tree_block(trans, fs_info, eb);
if (ret < 0) {
free_extent_buffer(eb);
errno = -ret;
error("failed to write tree block %llu: %m",
eb->start);
goto cleanup;
}
start += eb->len;
btrfs_clear_buffer_dirty(trans, eb);
free_extent_buffer(eb);
}
}
return 0;
cleanup:
/*
* Mark all remaining dirty ebs clean, as they have no chance to be written
* back anymore.
*/
clean_dirty_buffers(trans);
return ret;
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
u64 transid = trans->transid;
int ret = 0;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_space_info *sinfo;
if (trans->fs_info->transaction_aborted) {
ret = -EROFS;
goto error;
}
/*
* Flush all accumulated delayed refs so that root-tree updates are
* consistent
*/
ret = btrfs_run_delayed_refs(trans, -1);
if (ret < 0)
goto error;
if (root->commit_root == root->node)
goto commit_tree;
if (root == root->fs_info->tree_root)
goto commit_tree;
if (root == root->fs_info->chunk_root)
goto commit_tree;
if (root == root->fs_info->block_group_root)
goto commit_tree;
free_extent_buffer(root->commit_root);
root->commit_root = NULL;
btrfs_set_root_bytenr(&root->root_item, root->node->start);
btrfs_set_root_generation(&root->root_item, trans->transid);
root->root_item.level = btrfs_header_level(root->node);
ret = btrfs_update_root(trans, root->fs_info->tree_root,
&root->root_key, &root->root_item);
if (ret < 0)
goto error;
commit_tree:
ret = commit_tree_roots(trans, fs_info);
if (ret < 0)
goto error;
/*
* btrfs_write_dirty_block_groups() can cause COW thus new delayed
* tree refs, while run such delayed tree refs can dirty block groups
* again, we need to exhause both dirty blocks and delayed refs
*/
while (!RB_EMPTY_ROOT(&trans->delayed_refs.href_root) ||
!list_empty(&trans->dirty_bgs)) {
ret = btrfs_write_dirty_block_groups(trans);
if (ret < 0)
goto error;
ret = btrfs_run_delayed_refs(trans, -1);
if (ret < 0)
goto error;
}
ret = __commit_transaction(trans, root);
if (ret < 0)
goto error;
/* There should be no pending delayed refs now */
if (!RB_EMPTY_ROOT(&trans->delayed_refs.href_root)) {
error("uncommitted delayed refs detected");
goto error;
}
ret = write_ctree_super(trans);
btrfs_finish_extent_commit(trans);
kfree(trans);
free_extent_buffer(root->commit_root);
root->commit_root = NULL;
fs_info->running_transaction = NULL;
fs_info->last_trans_committed = transid;
list_for_each_entry(sinfo, &fs_info->space_info, list) {
if (sinfo->bytes_reserved) {
warning(
"reserved space leaked, transid=%llu flag=0x%llx bytes_reserved=%llu",
transid, sinfo->flags, sinfo->bytes_reserved);
}
}
return ret;
error:
btrfs_abort_transaction(trans, ret);
clean_dirty_buffers(trans);
btrfs_destroy_delayed_refs(trans);
kfree(trans);
return ret;
}
void btrfs_abort_transaction(struct btrfs_trans_handle *trans, int error)
{
trans->fs_info->transaction_aborted = error;
}