/* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License v2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */ #include "kerncompat.h" #include #include "kernel-lib/rbtree.h" #include "kernel-lib/bitops.h" #include "kernel-shared/disk-io.h" #include "kernel-shared/transaction.h" #include "kernel-shared/delayed-ref.h" #include "kernel-shared/zoned.h" #include "kernel-shared/accessors.h" #include "kernel-shared/ctree.h" #include "kernel-shared/extent_io.h" #include "kernel-shared/locking.h" #include "kernel-shared/uapi/btrfs_tree.h" #include "common/messages.h" /* * The metadata reservation code is completely different from the kernel: * * - No need to support reclaim * - No support for transaction join * * This is due to the fact that btrfs-progs is only single threaded, thus it * always starts a transaction, does some tree operations, and commits the * transaction. * * So here we only need to make sure we have enough metadata space, and there * will be no metadata over-commit (allowing extra metadata operations as long * as there is unallocated space). * * The only extra step we can really do to increase metadata space is to allocate * new metadata chunks. */ static unsigned int calc_insert_metadata_size(const struct btrfs_fs_info *fs_info, unsigned int num_items) { return fs_info->nodesize * BTRFS_MAX_LEVEL * num_items * 2; } static bool meta_has_enough_space(struct btrfs_fs_info *fs_info, u64 profile, unsigned int size) { struct btrfs_space_info *sinfo; profile &= BTRFS_BLOCK_GROUP_TYPE_MASK; /* * The fs is temporary (still during mkfs), do not check free space * as we don't have all meta/sys chunks setup. */ if (btrfs_super_magic(fs_info->super_copy) != BTRFS_MAGIC) return true; /* * The fs is under extent tree rebuilding, do not do any free space check * as they are not reliable. */ if (fs_info->rebuilding_extent_tree) return true; sinfo = btrfs_find_space_info(fs_info, profile); if (!sinfo) { error("unable to find block group for profile 0x%llx", profile); return false; } if (sinfo->bytes_used + sinfo->bytes_pinned + sinfo->bytes_reserved + size < sinfo->total_bytes) return true; return false; } static struct btrfs_trans_handle *alloc_trans_handle(struct btrfs_root *root, unsigned int num_items) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_trans_handle *h; h = kzalloc(sizeof(*h), GFP_NOFS); if (!h) return ERR_PTR(-ENOMEM); h->fs_info = fs_info; fs_info->running_transaction = h; fs_info->generation++; h->transid = fs_info->generation; h->blocks_reserved = num_items; h->reinit_extent_tree = false; h->allocating_chunk = 0; root->last_trans = h->transid; root->commit_root = root->node; extent_buffer_get(root->node); INIT_LIST_HEAD(&h->dirty_bgs); return h; } struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, unsigned int num_items) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_trans_handle *h; unsigned int rsv_bytes; bool need_retry = false; u64 profile; if (root->root_key.objectid == BTRFS_CHUNK_TREE_OBJECTID) profile = BTRFS_BLOCK_GROUP_SYSTEM | (fs_info->avail_system_alloc_bits & fs_info->system_alloc_profile); else profile = BTRFS_BLOCK_GROUP_METADATA | (fs_info->avail_metadata_alloc_bits & fs_info->metadata_alloc_profile); if (fs_info->transaction_aborted) return ERR_PTR(-EROFS); if (root->commit_root) { error("commit_root already set when starting transaction"); return ERR_PTR(-EINVAL); } if (fs_info->running_transaction) { error("attempt to start transaction over already running one"); return ERR_PTR(-EINVAL); } /* * For those call sites, they are mostly delete items, in that case * just change it to 1. */ if (num_items == 0) num_items = 1; rsv_bytes = calc_insert_metadata_size(fs_info, num_items); /* * We should not have so many items that it's larger than one metadata * chunk. */ if (rsv_bytes > SZ_1G) { error("too much metadata space required: num_items %u reserved bytes %u", num_items, rsv_bytes); return ERR_PTR(-EINVAL); } if (!meta_has_enough_space(fs_info, profile, rsv_bytes)) need_retry = true; h = alloc_trans_handle(root, num_items); if (IS_ERR(h)) return ERR_PTR(PTR_ERR(h)); if (need_retry) { int ret; ret = btrfs_try_chunk_alloc(h, fs_info, rsv_bytes, profile); if (ret < 0) { btrfs_abort_transaction(h, ret); errno = -ret; error("failed to allocate new chunk: %m"); return ERR_PTR(ret); } ret = btrfs_commit_transaction(h, root); if (ret < 0) { errno = -ret; error("failed to commit transaction for the new chunk: %m"); return ERR_PTR(ret); } if (!meta_has_enough_space(fs_info, profile, rsv_bytes)) { errno = -ENOSPC; error("failed to start transaction: %m"); return ERR_PTR(-ENOSPC); } h = alloc_trans_handle(root, num_items); } return h; } static int update_cowonly_root(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; u64 old_root_bytenr; struct btrfs_root *tree_root = root->fs_info->tree_root; while(1) { old_root_bytenr = btrfs_root_bytenr(&root->root_item); if (old_root_bytenr == root->node->start) break; btrfs_set_root_bytenr(&root->root_item, root->node->start); btrfs_set_root_generation(&root->root_item, trans->transid); root->root_item.level = btrfs_header_level(root->node); ret = btrfs_update_root(trans, tree_root, &root->root_key, &root->root_item); if (ret < 0) return ret; ret = btrfs_write_dirty_block_groups(trans); if (ret) return ret; } return 0; } int commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { struct btrfs_root *root; struct list_head *next; struct extent_buffer *eb; int ret; if (fs_info->readonly) return 0; eb = fs_info->tree_root->node; extent_buffer_get(eb); ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb, BTRFS_NESTING_NORMAL); free_extent_buffer(eb); if (ret) return ret; /* * If the above CoW is the first one to dirty the current tree_root, * delayed refs for it won't be run until after this function has * finished executing, meaning we won't process the extent tree root, * which will have been added to ->dirty_cowonly_roots. So run * delayed refs here as well. */ ret = btrfs_run_delayed_refs(trans, -1); if (ret) return ret; while(!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; list_del_init(next); root = list_entry(next, struct btrfs_root, dirty_list); clear_bit(BTRFS_ROOT_DIRTY, &root->state); ret = update_cowonly_root(trans, root); free_extent_buffer(root->commit_root); root->commit_root = NULL; if (ret < 0) return ret; } return 0; } static void clean_dirty_buffers(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; struct extent_io_tree *tree = &fs_info->dirty_buffers; struct extent_buffer *eb; u64 start, end; while (find_first_extent_bit(tree, 0, &start, &end, EXTENT_DIRTY, NULL) == 0) { while (start <= end) { eb = find_first_extent_buffer(fs_info, start); BUG_ON(!eb || eb->start != start); start += eb->len; btrfs_clear_buffer_dirty(trans, eb); free_extent_buffer(eb); } } } int __commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { u64 start; u64 end; struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *eb; struct extent_io_tree *tree = &fs_info->dirty_buffers; int ret; while(1) { again: ret = find_first_extent_bit(tree, 0, &start, &end, EXTENT_DIRTY, NULL); if (ret) break; if (btrfs_redirty_extent_buffer_for_zoned(fs_info, start, end)) goto again; while(start <= end) { eb = find_first_extent_buffer(fs_info, start); BUG_ON(!eb || eb->start != start); ret = write_tree_block(trans, fs_info, eb); if (ret < 0) { free_extent_buffer(eb); errno = -ret; error("failed to write tree block %llu: %m", eb->start); goto cleanup; } start += eb->len; btrfs_clear_buffer_dirty(trans, eb); free_extent_buffer(eb); } } return 0; cleanup: /* * Mark all remaining dirty ebs clean, as they have no chance to be written * back anymore. */ clean_dirty_buffers(trans); return ret; } int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { u64 transid = trans->transid; int ret = 0; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_space_info *sinfo; if (trans->fs_info->transaction_aborted) { ret = -EROFS; goto error; } /* * Flush all accumulated delayed refs so that root-tree updates are * consistent */ ret = btrfs_run_delayed_refs(trans, -1); if (ret < 0) goto error; if (root->commit_root == root->node) goto commit_tree; if (root == root->fs_info->tree_root) goto commit_tree; if (root == root->fs_info->chunk_root) goto commit_tree; if (root == root->fs_info->block_group_root) goto commit_tree; free_extent_buffer(root->commit_root); root->commit_root = NULL; btrfs_set_root_bytenr(&root->root_item, root->node->start); btrfs_set_root_generation(&root->root_item, trans->transid); root->root_item.level = btrfs_header_level(root->node); ret = btrfs_update_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); if (ret < 0) goto error; commit_tree: ret = commit_tree_roots(trans, fs_info); if (ret < 0) goto error; /* * btrfs_write_dirty_block_groups() can cause COW thus new delayed * tree refs, while run such delayed tree refs can dirty block groups * again, we need to exhause both dirty blocks and delayed refs */ while (!RB_EMPTY_ROOT(&trans->delayed_refs.href_root) || !list_empty(&trans->dirty_bgs)) { ret = btrfs_write_dirty_block_groups(trans); if (ret < 0) goto error; ret = btrfs_run_delayed_refs(trans, -1); if (ret < 0) goto error; } ret = __commit_transaction(trans, root); if (ret < 0) goto error; /* There should be no pending delayed refs now */ if (!RB_EMPTY_ROOT(&trans->delayed_refs.href_root)) { error("uncommitted delayed refs detected"); goto error; } ret = write_ctree_super(trans); btrfs_finish_extent_commit(trans); kfree(trans); free_extent_buffer(root->commit_root); root->commit_root = NULL; fs_info->running_transaction = NULL; fs_info->last_trans_committed = transid; list_for_each_entry(sinfo, &fs_info->space_info, list) { if (sinfo->bytes_reserved) { warning( "reserved space leaked, transid=%llu flag=0x%llx bytes_reserved=%llu", transid, sinfo->flags, sinfo->bytes_reserved); } } return ret; error: btrfs_abort_transaction(trans, ret); clean_dirty_buffers(trans); btrfs_destroy_delayed_refs(trans); kfree(trans); return ret; } void btrfs_abort_transaction(struct btrfs_trans_handle *trans, int error) { trans->fs_info->transaction_aborted = error; }