mirror of
https://github.com/kdave/btrfs-progs
synced 2024-12-26 16:12:34 +00:00
e3f227c6c6
This is how the kernel initializes blocks, so anybody who uses btrfs_alloc_tree_block in the kernel expects the blocks to be already initialized. Put this init code into btrfs-progs so as we sync code from the kernel we get the correct behavior. Signed-off-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: David Sterba <dsterba@suse.com>
3929 lines
102 KiB
C
3929 lines
102 KiB
C
/*
|
|
* Copyright (C) 2007 Oracle. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License v2 as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public
|
|
* License along with this program; if not, write to the
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 021110-1307, USA.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <math.h>
|
|
#include "kerncompat.h"
|
|
#include "kernel-lib/list.h"
|
|
#include "kernel-lib/rbtree.h"
|
|
#include "kernel-shared/ctree.h"
|
|
#include "kernel-shared/disk-io.h"
|
|
#include "kernel-shared/print-tree.h"
|
|
#include "kernel-shared/transaction.h"
|
|
#include "kernel-shared/volumes.h"
|
|
#include "kernel-shared/free-space-cache.h"
|
|
#include "kernel-shared/free-space-tree.h"
|
|
#include "kernel-shared/zoned.h"
|
|
#include "kernel-shared/file-item.h"
|
|
#include "crypto/crc32c.h"
|
|
#include "common/utils.h"
|
|
|
|
#define PENDING_EXTENT_INSERT 0
|
|
#define PENDING_EXTENT_DELETE 1
|
|
#define PENDING_BACKREF_UPDATE 2
|
|
|
|
struct pending_extent_op {
|
|
int type;
|
|
u64 bytenr;
|
|
u64 num_bytes;
|
|
u64 flags;
|
|
struct btrfs_disk_key key;
|
|
int level;
|
|
};
|
|
|
|
static int __free_extent(struct btrfs_trans_handle *trans,
|
|
u64 bytenr, u64 num_bytes, u64 parent,
|
|
u64 root_objectid, u64 owner_objectid,
|
|
u64 owner_offset, int refs_to_drop);
|
|
static struct btrfs_block_group *
|
|
btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group
|
|
*hint, u64 search_start, u64 profile, int owner);
|
|
|
|
static int remove_sb_from_cache(struct btrfs_root *root,
|
|
struct btrfs_block_group *cache)
|
|
{
|
|
u64 bytenr;
|
|
u64 *logical;
|
|
int stripe_len;
|
|
int i, nr, ret;
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
struct extent_io_tree *free_space_cache;
|
|
|
|
free_space_cache = &fs_info->free_space_cache;
|
|
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
|
|
bytenr = btrfs_sb_offset(i);
|
|
ret = btrfs_rmap_block(fs_info, cache->start, bytenr,
|
|
&logical, &nr, &stripe_len);
|
|
BUG_ON(ret);
|
|
while (nr--) {
|
|
clear_extent_dirty(free_space_cache, logical[nr],
|
|
logical[nr] + stripe_len - 1, NULL);
|
|
}
|
|
kfree(logical);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int cache_block_group(struct btrfs_root *root,
|
|
struct btrfs_block_group *block_group)
|
|
{
|
|
struct btrfs_path *path;
|
|
int ret;
|
|
struct btrfs_key key;
|
|
struct extent_buffer *leaf;
|
|
struct extent_io_tree *free_space_cache;
|
|
int slot;
|
|
u64 last;
|
|
u64 hole_size;
|
|
|
|
if (!block_group)
|
|
return 0;
|
|
|
|
root = btrfs_extent_root(root->fs_info, 0);
|
|
free_space_cache = &root->fs_info->free_space_cache;
|
|
|
|
if (block_group->cached)
|
|
return 0;
|
|
|
|
path = btrfs_alloc_path();
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
path->reada = READA_FORWARD;
|
|
last = max_t(u64, block_group->start, BTRFS_SUPER_INFO_OFFSET);
|
|
key.objectid = last;
|
|
key.offset = 0;
|
|
key.type = 0;
|
|
|
|
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
|
if (ret < 0)
|
|
goto err;
|
|
|
|
while(1) {
|
|
leaf = path->nodes[0];
|
|
slot = path->slots[0];
|
|
if (slot >= btrfs_header_nritems(leaf)) {
|
|
ret = btrfs_next_leaf(root, path);
|
|
if (ret < 0)
|
|
goto err;
|
|
if (ret == 0) {
|
|
continue;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
btrfs_item_key_to_cpu(leaf, &key, slot);
|
|
if (key.objectid < block_group->start) {
|
|
goto next;
|
|
}
|
|
if (key.objectid >= block_group->start + block_group->length) {
|
|
break;
|
|
}
|
|
|
|
if (key.type == BTRFS_EXTENT_ITEM_KEY ||
|
|
key.type == BTRFS_METADATA_ITEM_KEY) {
|
|
if (key.objectid > last) {
|
|
hole_size = key.objectid - last;
|
|
set_extent_dirty(free_space_cache, last,
|
|
last + hole_size - 1, GFP_NOFS);
|
|
}
|
|
if (key.type == BTRFS_METADATA_ITEM_KEY)
|
|
last = key.objectid + root->fs_info->nodesize;
|
|
else
|
|
last = key.objectid + key.offset;
|
|
}
|
|
next:
|
|
path->slots[0]++;
|
|
}
|
|
|
|
if (block_group->start + block_group->length > last) {
|
|
hole_size = block_group->start + block_group->length - last;
|
|
set_extent_dirty(free_space_cache, last, last + hole_size - 1,
|
|
GFP_NOFS);
|
|
}
|
|
remove_sb_from_cache(root, block_group);
|
|
block_group->cached = 1;
|
|
err:
|
|
btrfs_free_path(path);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* This adds the block group to the fs_info rb tree for the block group cache
|
|
*/
|
|
static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
|
|
struct btrfs_block_group *block_group)
|
|
{
|
|
struct rb_node **p;
|
|
struct rb_node *parent = NULL;
|
|
struct btrfs_block_group *cache;
|
|
|
|
ASSERT(block_group->length != 0);
|
|
p = &info->block_group_cache_tree.rb_node;
|
|
|
|
while (*p) {
|
|
parent = *p;
|
|
cache = rb_entry(parent, struct btrfs_block_group,
|
|
cache_node);
|
|
if (block_group->start < cache->start)
|
|
p = &(*p)->rb_left;
|
|
else if (block_group->start > cache->start)
|
|
p = &(*p)->rb_right;
|
|
else
|
|
return -EEXIST;
|
|
}
|
|
|
|
rb_link_node(&block_group->cache_node, parent, p);
|
|
rb_insert_color(&block_group->cache_node,
|
|
&info->block_group_cache_tree);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* This will return the block group which contains @bytenr if it exists.
|
|
* If found nothing, the return depends on @next.
|
|
*
|
|
* @next:
|
|
* if 0, return NULL if there's no block group containing the bytenr.
|
|
* if 1, return the block group which starts after @bytenr.
|
|
*/
|
|
static struct btrfs_block_group *block_group_cache_tree_search(
|
|
struct btrfs_fs_info *info, u64 bytenr, int next)
|
|
{
|
|
struct btrfs_block_group *cache, *ret = NULL;
|
|
struct rb_node *n;
|
|
u64 end, start;
|
|
|
|
n = info->block_group_cache_tree.rb_node;
|
|
|
|
while (n) {
|
|
cache = rb_entry(n, struct btrfs_block_group,
|
|
cache_node);
|
|
end = cache->start + cache->length - 1;
|
|
start = cache->start;
|
|
|
|
if (bytenr < start) {
|
|
if (next && (!ret || start < ret->start))
|
|
ret = cache;
|
|
n = n->rb_left;
|
|
} else if (bytenr > start) {
|
|
if (bytenr <= end) {
|
|
ret = cache;
|
|
break;
|
|
}
|
|
n = n->rb_right;
|
|
} else {
|
|
ret = cache;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Return the block group that contains @bytenr, otherwise return the next one
|
|
* that starts after @bytenr
|
|
*/
|
|
struct btrfs_block_group *btrfs_lookup_first_block_group(
|
|
struct btrfs_fs_info *info, u64 bytenr)
|
|
{
|
|
return block_group_cache_tree_search(info, bytenr, 1);
|
|
}
|
|
|
|
/*
|
|
* Return the block group that contains the given bytenr
|
|
*/
|
|
struct btrfs_block_group *btrfs_lookup_block_group(
|
|
struct btrfs_fs_info *info, u64 bytenr)
|
|
{
|
|
return block_group_cache_tree_search(info, bytenr, 0);
|
|
}
|
|
|
|
static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
|
|
{
|
|
return (cache->flags & bits) == bits;
|
|
}
|
|
|
|
static int noinline find_search_start(struct btrfs_root *root,
|
|
struct btrfs_block_group **cache_ret,
|
|
u64 *start_ret, int num, u64 profile)
|
|
{
|
|
int ret;
|
|
struct btrfs_block_group *cache = *cache_ret;
|
|
u64 last = *start_ret;
|
|
u64 start = 0;
|
|
u64 end = 0;
|
|
u64 search_start = *start_ret;
|
|
int wrapped = 0;
|
|
|
|
if (!cache)
|
|
goto out;
|
|
again:
|
|
ret = cache_block_group(root, cache);
|
|
if (ret)
|
|
goto out;
|
|
|
|
last = max(search_start, cache->start);
|
|
if (cache->ro || !block_group_bits(cache, profile))
|
|
goto new_group;
|
|
|
|
if (btrfs_is_zoned(root->fs_info)) {
|
|
if (cache->length - cache->alloc_offset < num)
|
|
goto new_group;
|
|
*start_ret = cache->start + cache->alloc_offset;
|
|
cache->alloc_offset += num;
|
|
return 0;
|
|
}
|
|
|
|
while(1) {
|
|
ret = find_first_extent_bit(&root->fs_info->free_space_cache,
|
|
last, &start, &end, EXTENT_DIRTY,
|
|
NULL);
|
|
if (ret) {
|
|
goto new_group;
|
|
}
|
|
|
|
start = max(last, start);
|
|
last = end + 1;
|
|
if (last - start < num) {
|
|
continue;
|
|
}
|
|
if (start + num > cache->start + cache->length) {
|
|
goto new_group;
|
|
}
|
|
*start_ret = start;
|
|
return 0;
|
|
}
|
|
out:
|
|
*start_ret = last;
|
|
cache = btrfs_lookup_block_group(root->fs_info, search_start);
|
|
if (!cache) {
|
|
printk("Unable to find block group for %llu\n",
|
|
(unsigned long long)search_start);
|
|
return -ENOENT;
|
|
}
|
|
return -ENOSPC;
|
|
|
|
new_group:
|
|
last = cache->start + cache->length;
|
|
wrapped:
|
|
cache = btrfs_lookup_first_block_group(root->fs_info, last);
|
|
if (!cache) {
|
|
if (!wrapped) {
|
|
wrapped = 1;
|
|
last = search_start;
|
|
goto wrapped;
|
|
}
|
|
goto out;
|
|
}
|
|
*cache_ret = cache;
|
|
goto again;
|
|
}
|
|
|
|
static struct btrfs_block_group *
|
|
btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group
|
|
*hint, u64 search_start, u64 profile, int owner)
|
|
{
|
|
struct btrfs_block_group *cache;
|
|
struct btrfs_block_group *found_group = NULL;
|
|
struct btrfs_fs_info *info = root->fs_info;
|
|
u64 used;
|
|
u64 last = 0;
|
|
u64 hint_last;
|
|
u64 free_check;
|
|
int full_search = 0;
|
|
int factor = 10;
|
|
|
|
if (!owner)
|
|
factor = 10;
|
|
|
|
if (search_start) {
|
|
struct btrfs_block_group *shint;
|
|
shint = btrfs_lookup_block_group(info, search_start);
|
|
if (shint && !shint->ro && block_group_bits(shint, profile)) {
|
|
used = shint->used;
|
|
if (used + shint->pinned <
|
|
div_factor(shint->length, factor)) {
|
|
return shint;
|
|
}
|
|
}
|
|
}
|
|
if (hint && !hint->ro && block_group_bits(hint, profile)) {
|
|
used = hint->used;
|
|
if (used + hint->pinned <
|
|
div_factor(hint->length, factor)) {
|
|
return hint;
|
|
}
|
|
last = hint->start + hint->length;
|
|
hint_last = last;
|
|
} else {
|
|
if (hint)
|
|
hint_last = max(hint->start , search_start);
|
|
else
|
|
hint_last = search_start;
|
|
|
|
last = hint_last;
|
|
}
|
|
again:
|
|
while(1) {
|
|
cache = btrfs_lookup_first_block_group(info, last);
|
|
if (!cache)
|
|
break;
|
|
|
|
last = cache->start + cache->length;
|
|
used = cache->used;
|
|
|
|
if (!cache->ro && block_group_bits(cache, profile)) {
|
|
if (full_search)
|
|
free_check = cache->length;
|
|
else
|
|
free_check = div_factor(cache->length, factor);
|
|
|
|
if (used + cache->pinned < free_check) {
|
|
found_group = cache;
|
|
goto found;
|
|
}
|
|
}
|
|
cond_resched();
|
|
}
|
|
if (!full_search) {
|
|
last = search_start;
|
|
full_search = 1;
|
|
goto again;
|
|
}
|
|
found:
|
|
return found_group;
|
|
}
|
|
|
|
/*
|
|
* Back reference rules. Back refs have three main goals:
|
|
*
|
|
* 1) differentiate between all holders of references to an extent so that
|
|
* when a reference is dropped we can make sure it was a valid reference
|
|
* before freeing the extent.
|
|
*
|
|
* 2) Provide enough information to quickly find the holders of an extent
|
|
* if we notice a given block is corrupted or bad.
|
|
*
|
|
* 3) Make it easy to migrate blocks for FS shrinking or storage pool
|
|
* maintenance. This is actually the same as #2, but with a slightly
|
|
* different use case.
|
|
*
|
|
* There are two kinds of back refs. The implicit back refs is optimized
|
|
* for pointers in non-shared tree blocks. For a given pointer in a block,
|
|
* back refs of this kind provide information about the block's owner tree
|
|
* and the pointer's key. These information allow us to find the block by
|
|
* b-tree searching. The full back refs is for pointers in tree blocks not
|
|
* referenced by their owner trees. The location of tree block is recorded
|
|
* in the back refs. Actually the full back refs is generic, and can be
|
|
* used in all cases the implicit back refs is used. The major shortcoming
|
|
* of the full back refs is its overhead. Every time a tree block gets
|
|
* COWed, we have to update back refs entry for all pointers in it.
|
|
*
|
|
* For a newly allocated tree block, we use implicit back refs for
|
|
* pointers in it. This means most tree related operations only involve
|
|
* implicit back refs. For a tree block created in old transaction, the
|
|
* only way to drop a reference to it is COW it. So we can detect the
|
|
* event that tree block loses its owner tree's reference and do the
|
|
* back refs conversion.
|
|
*
|
|
* When a tree block is COW'd through a tree, there are four cases:
|
|
*
|
|
* The reference count of the block is one and the tree is the block's
|
|
* owner tree. Nothing to do in this case.
|
|
*
|
|
* The reference count of the block is one and the tree is not the
|
|
* block's owner tree. In this case, full back refs is used for pointers
|
|
* in the block. Remove these full back refs, add implicit back refs for
|
|
* every pointers in the new block.
|
|
*
|
|
* The reference count of the block is greater than one and the tree is
|
|
* the block's owner tree. In this case, implicit back refs is used for
|
|
* pointers in the block. Add full back refs for every pointers in the
|
|
* block, increase lower level extents' reference counts. The original
|
|
* implicit back refs are entailed to the new block.
|
|
*
|
|
* The reference count of the block is greater than one and the tree is
|
|
* not the block's owner tree. Add implicit back refs for every pointer in
|
|
* the new block, increase lower level extents' reference count.
|
|
*
|
|
* Back Reference Key composing:
|
|
*
|
|
* The key objectid corresponds to the first byte in the extent,
|
|
* The key type is used to differentiate between types of back refs.
|
|
* There are different meanings of the key offset for different types
|
|
* of back refs.
|
|
*
|
|
* File extents can be referenced by:
|
|
*
|
|
* - multiple snapshots, subvolumes, or different generations in one subvol
|
|
* - different files inside a single subvolume
|
|
* - different offsets inside a file (bookend extents in file.c)
|
|
*
|
|
* The extent ref structure for the implicit back refs has fields for:
|
|
*
|
|
* - Objectid of the subvolume root
|
|
* - objectid of the file holding the reference
|
|
* - original offset in the file
|
|
* - how many bookend extents
|
|
*
|
|
* The key offset for the implicit back refs is hash of the first
|
|
* three fields.
|
|
*
|
|
* The extent ref structure for the full back refs has field for:
|
|
*
|
|
* - number of pointers in the tree leaf
|
|
*
|
|
* The key offset for the implicit back refs is the first byte of
|
|
* the tree leaf
|
|
*
|
|
* When a file extent is allocated, The implicit back refs is used.
|
|
* the fields are filled in:
|
|
*
|
|
* (root_key.objectid, inode objectid, offset in file, 1)
|
|
*
|
|
* When a file extent is removed file truncation, we find the
|
|
* corresponding implicit back refs and check the following fields:
|
|
*
|
|
* (btrfs_header_owner(leaf), inode objectid, offset in file)
|
|
*
|
|
* Btree extents can be referenced by:
|
|
*
|
|
* - Different subvolumes
|
|
*
|
|
* Both the implicit back refs and the full back refs for tree blocks
|
|
* only consist of key. The key offset for the implicit back refs is
|
|
* objectid of block's owner tree. The key offset for the full back refs
|
|
* is the first byte of parent block.
|
|
*
|
|
* When implicit back refs is used, information about the lowest key and
|
|
* level of the tree block are required. These information are stored in
|
|
* tree block info structure.
|
|
*/
|
|
|
|
u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
|
|
{
|
|
u32 high_crc = ~(u32)0;
|
|
u32 low_crc = ~(u32)0;
|
|
__le64 lenum;
|
|
|
|
lenum = cpu_to_le64(root_objectid);
|
|
high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
|
|
lenum = cpu_to_le64(owner);
|
|
low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
|
|
lenum = cpu_to_le64(offset);
|
|
low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
|
|
|
|
return ((u64)high_crc << 31) ^ (u64)low_crc;
|
|
}
|
|
|
|
static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
|
|
struct btrfs_extent_data_ref *ref)
|
|
{
|
|
return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
|
|
btrfs_extent_data_ref_objectid(leaf, ref),
|
|
btrfs_extent_data_ref_offset(leaf, ref));
|
|
}
|
|
|
|
static int match_extent_data_ref(struct extent_buffer *leaf,
|
|
struct btrfs_extent_data_ref *ref,
|
|
u64 root_objectid, u64 owner, u64 offset)
|
|
{
|
|
if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
|
|
btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
|
|
btrfs_extent_data_ref_offset(leaf, ref) != offset)
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_path *path,
|
|
u64 bytenr, u64 parent,
|
|
u64 root_objectid,
|
|
u64 owner, u64 offset)
|
|
{
|
|
struct btrfs_key key;
|
|
struct btrfs_extent_data_ref *ref;
|
|
struct extent_buffer *leaf;
|
|
u32 nritems;
|
|
int ret;
|
|
int recow;
|
|
int err = -ENOENT;
|
|
|
|
key.objectid = bytenr;
|
|
if (parent) {
|
|
key.type = BTRFS_SHARED_DATA_REF_KEY;
|
|
key.offset = parent;
|
|
} else {
|
|
key.type = BTRFS_EXTENT_DATA_REF_KEY;
|
|
key.offset = hash_extent_data_ref(root_objectid,
|
|
owner, offset);
|
|
}
|
|
again:
|
|
recow = 0;
|
|
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
|
if (ret < 0) {
|
|
err = ret;
|
|
goto fail;
|
|
}
|
|
|
|
if (parent) {
|
|
if (!ret)
|
|
return 0;
|
|
goto fail;
|
|
}
|
|
|
|
leaf = path->nodes[0];
|
|
nritems = btrfs_header_nritems(leaf);
|
|
while (1) {
|
|
if (path->slots[0] >= nritems) {
|
|
ret = btrfs_next_leaf(root, path);
|
|
if (ret < 0)
|
|
err = ret;
|
|
if (ret)
|
|
goto fail;
|
|
|
|
leaf = path->nodes[0];
|
|
nritems = btrfs_header_nritems(leaf);
|
|
recow = 1;
|
|
}
|
|
|
|
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
|
if (key.objectid != bytenr ||
|
|
key.type != BTRFS_EXTENT_DATA_REF_KEY)
|
|
goto fail;
|
|
|
|
ref = btrfs_item_ptr(leaf, path->slots[0],
|
|
struct btrfs_extent_data_ref);
|
|
|
|
if (match_extent_data_ref(leaf, ref, root_objectid,
|
|
owner, offset)) {
|
|
if (recow) {
|
|
btrfs_release_path(path);
|
|
goto again;
|
|
}
|
|
err = 0;
|
|
break;
|
|
}
|
|
path->slots[0]++;
|
|
}
|
|
fail:
|
|
return err;
|
|
}
|
|
|
|
static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_path *path,
|
|
u64 bytenr, u64 parent,
|
|
u64 root_objectid, u64 owner,
|
|
u64 offset, int refs_to_add)
|
|
{
|
|
struct btrfs_key key;
|
|
struct extent_buffer *leaf;
|
|
u32 size;
|
|
u32 num_refs;
|
|
int ret;
|
|
|
|
key.objectid = bytenr;
|
|
if (parent) {
|
|
key.type = BTRFS_SHARED_DATA_REF_KEY;
|
|
key.offset = parent;
|
|
size = sizeof(struct btrfs_shared_data_ref);
|
|
} else {
|
|
key.type = BTRFS_EXTENT_DATA_REF_KEY;
|
|
key.offset = hash_extent_data_ref(root_objectid,
|
|
owner, offset);
|
|
size = sizeof(struct btrfs_extent_data_ref);
|
|
}
|
|
|
|
ret = btrfs_insert_empty_item(trans, root, path, &key, size);
|
|
if (ret && ret != -EEXIST)
|
|
goto fail;
|
|
|
|
leaf = path->nodes[0];
|
|
if (parent) {
|
|
struct btrfs_shared_data_ref *ref;
|
|
ref = btrfs_item_ptr(leaf, path->slots[0],
|
|
struct btrfs_shared_data_ref);
|
|
if (ret == 0) {
|
|
btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
|
|
} else {
|
|
num_refs = btrfs_shared_data_ref_count(leaf, ref);
|
|
num_refs += refs_to_add;
|
|
btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
|
|
}
|
|
} else {
|
|
struct btrfs_extent_data_ref *ref;
|
|
while (ret == -EEXIST) {
|
|
ref = btrfs_item_ptr(leaf, path->slots[0],
|
|
struct btrfs_extent_data_ref);
|
|
if (match_extent_data_ref(leaf, ref, root_objectid,
|
|
owner, offset))
|
|
break;
|
|
btrfs_release_path(path);
|
|
|
|
key.offset++;
|
|
ret = btrfs_insert_empty_item(trans, root, path, &key,
|
|
size);
|
|
if (ret && ret != -EEXIST)
|
|
goto fail;
|
|
|
|
leaf = path->nodes[0];
|
|
}
|
|
ref = btrfs_item_ptr(leaf, path->slots[0],
|
|
struct btrfs_extent_data_ref);
|
|
if (ret == 0) {
|
|
btrfs_set_extent_data_ref_root(leaf, ref,
|
|
root_objectid);
|
|
btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
|
|
btrfs_set_extent_data_ref_offset(leaf, ref, offset);
|
|
btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
|
|
} else {
|
|
num_refs = btrfs_extent_data_ref_count(leaf, ref);
|
|
num_refs += refs_to_add;
|
|
btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
|
|
}
|
|
}
|
|
btrfs_mark_buffer_dirty(leaf);
|
|
ret = 0;
|
|
fail:
|
|
btrfs_release_path(path);
|
|
return ret;
|
|
}
|
|
|
|
static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_path *path,
|
|
int refs_to_drop)
|
|
{
|
|
struct btrfs_key key;
|
|
struct btrfs_extent_data_ref *ref1 = NULL;
|
|
struct btrfs_shared_data_ref *ref2 = NULL;
|
|
struct extent_buffer *leaf;
|
|
u32 num_refs = 0;
|
|
int ret = 0;
|
|
|
|
leaf = path->nodes[0];
|
|
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
|
|
|
if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
|
|
ref1 = btrfs_item_ptr(leaf, path->slots[0],
|
|
struct btrfs_extent_data_ref);
|
|
num_refs = btrfs_extent_data_ref_count(leaf, ref1);
|
|
} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
|
|
ref2 = btrfs_item_ptr(leaf, path->slots[0],
|
|
struct btrfs_shared_data_ref);
|
|
num_refs = btrfs_shared_data_ref_count(leaf, ref2);
|
|
} else {
|
|
BUG();
|
|
}
|
|
|
|
BUG_ON(num_refs < refs_to_drop);
|
|
num_refs -= refs_to_drop;
|
|
|
|
if (num_refs == 0) {
|
|
ret = btrfs_del_item(trans, root, path);
|
|
} else {
|
|
if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
|
|
btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
|
|
else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
|
|
btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
|
|
btrfs_mark_buffer_dirty(leaf);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static noinline u32 extent_data_ref_count(struct btrfs_path *path,
|
|
struct btrfs_extent_inline_ref *iref)
|
|
{
|
|
struct btrfs_key key;
|
|
struct extent_buffer *leaf;
|
|
struct btrfs_extent_data_ref *ref1;
|
|
struct btrfs_shared_data_ref *ref2;
|
|
u32 num_refs = 0;
|
|
|
|
leaf = path->nodes[0];
|
|
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
|
if (iref) {
|
|
if (btrfs_extent_inline_ref_type(leaf, iref) ==
|
|
BTRFS_EXTENT_DATA_REF_KEY) {
|
|
ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
|
|
num_refs = btrfs_extent_data_ref_count(leaf, ref1);
|
|
} else {
|
|
ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
|
|
num_refs = btrfs_shared_data_ref_count(leaf, ref2);
|
|
}
|
|
} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
|
|
ref1 = btrfs_item_ptr(leaf, path->slots[0],
|
|
struct btrfs_extent_data_ref);
|
|
num_refs = btrfs_extent_data_ref_count(leaf, ref1);
|
|
} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
|
|
ref2 = btrfs_item_ptr(leaf, path->slots[0],
|
|
struct btrfs_shared_data_ref);
|
|
num_refs = btrfs_shared_data_ref_count(leaf, ref2);
|
|
} else {
|
|
BUG();
|
|
}
|
|
return num_refs;
|
|
}
|
|
|
|
static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_path *path,
|
|
u64 bytenr, u64 parent,
|
|
u64 root_objectid)
|
|
{
|
|
struct btrfs_key key;
|
|
int ret;
|
|
|
|
key.objectid = bytenr;
|
|
if (parent) {
|
|
key.type = BTRFS_SHARED_BLOCK_REF_KEY;
|
|
key.offset = parent;
|
|
} else {
|
|
key.type = BTRFS_TREE_BLOCK_REF_KEY;
|
|
key.offset = root_objectid;
|
|
}
|
|
|
|
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
|
if (ret > 0)
|
|
ret = -ENOENT;
|
|
return ret;
|
|
}
|
|
|
|
static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_path *path,
|
|
u64 bytenr, u64 parent,
|
|
u64 root_objectid)
|
|
{
|
|
struct btrfs_key key;
|
|
int ret;
|
|
|
|
key.objectid = bytenr;
|
|
if (parent) {
|
|
key.type = BTRFS_SHARED_BLOCK_REF_KEY;
|
|
key.offset = parent;
|
|
} else {
|
|
key.type = BTRFS_TREE_BLOCK_REF_KEY;
|
|
key.offset = root_objectid;
|
|
}
|
|
|
|
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
|
|
|
|
btrfs_release_path(path);
|
|
return ret;
|
|
}
|
|
|
|
static inline int extent_ref_type(u64 parent, u64 owner)
|
|
{
|
|
int type;
|
|
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
|
|
if (parent > 0)
|
|
type = BTRFS_SHARED_BLOCK_REF_KEY;
|
|
else
|
|
type = BTRFS_TREE_BLOCK_REF_KEY;
|
|
} else {
|
|
if (parent > 0)
|
|
type = BTRFS_SHARED_DATA_REF_KEY;
|
|
else
|
|
type = BTRFS_EXTENT_DATA_REF_KEY;
|
|
}
|
|
return type;
|
|
}
|
|
|
|
static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_path *path,
|
|
struct btrfs_extent_inline_ref **ref_ret,
|
|
u64 bytenr, u64 num_bytes,
|
|
u64 parent, u64 root_objectid,
|
|
u64 owner, u64 offset, int insert)
|
|
{
|
|
struct btrfs_key key;
|
|
struct extent_buffer *leaf;
|
|
struct btrfs_extent_item *ei;
|
|
struct btrfs_extent_inline_ref *iref;
|
|
u64 flags;
|
|
u32 item_size;
|
|
unsigned long ptr;
|
|
unsigned long end;
|
|
int extra_size;
|
|
int type;
|
|
int want;
|
|
int ret;
|
|
int err = 0;
|
|
int skinny_metadata =
|
|
btrfs_fs_incompat(root->fs_info, SKINNY_METADATA);
|
|
|
|
key.objectid = bytenr;
|
|
key.type = BTRFS_EXTENT_ITEM_KEY;
|
|
key.offset = num_bytes;
|
|
|
|
want = extent_ref_type(parent, owner);
|
|
if (insert) {
|
|
extra_size = btrfs_extent_inline_ref_size(want);
|
|
path->search_for_extension = 1;
|
|
} else {
|
|
extra_size = -1;
|
|
}
|
|
|
|
if (owner < BTRFS_FIRST_FREE_OBJECTID && skinny_metadata) {
|
|
key.type = BTRFS_METADATA_ITEM_KEY;
|
|
key.offset = owner;
|
|
} else if (skinny_metadata) {
|
|
skinny_metadata = 0;
|
|
}
|
|
|
|
again:
|
|
ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
|
|
if (ret < 0) {
|
|
err = ret;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* We may be a newly converted file system which still has the old fat
|
|
* extent entries for metadata, so try and see if we have one of those.
|
|
*/
|
|
if (ret > 0 && skinny_metadata) {
|
|
skinny_metadata = 0;
|
|
if (path->slots[0]) {
|
|
path->slots[0]--;
|
|
btrfs_item_key_to_cpu(path->nodes[0], &key,
|
|
path->slots[0]);
|
|
if (key.objectid == bytenr &&
|
|
key.type == BTRFS_EXTENT_ITEM_KEY &&
|
|
key.offset == num_bytes)
|
|
ret = 0;
|
|
}
|
|
if (ret) {
|
|
key.type = BTRFS_EXTENT_ITEM_KEY;
|
|
key.offset = num_bytes;
|
|
btrfs_release_path(path);
|
|
goto again;
|
|
}
|
|
}
|
|
|
|
if (ret) {
|
|
printf("Failed to find [%llu, %u, %llu]\n", key.objectid, key.type, key.offset);
|
|
return -ENOENT;
|
|
}
|
|
|
|
BUG_ON(ret);
|
|
|
|
leaf = path->nodes[0];
|
|
item_size = btrfs_item_size(leaf, path->slots[0]);
|
|
if (item_size < sizeof(*ei)) {
|
|
printf("Size is %u, needs to be %u, slot %d\n",
|
|
(unsigned)item_size,
|
|
(unsigned)sizeof(*ei), path->slots[0]);
|
|
btrfs_print_leaf(leaf);
|
|
return -EINVAL;
|
|
}
|
|
|
|
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
|
|
flags = btrfs_extent_flags(leaf, ei);
|
|
|
|
ptr = (unsigned long)(ei + 1);
|
|
end = (unsigned long)ei + item_size;
|
|
|
|
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
|
|
ptr += sizeof(struct btrfs_tree_block_info);
|
|
BUG_ON(ptr > end);
|
|
} else if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
|
|
if (!(flags & BTRFS_EXTENT_FLAG_DATA)) {
|
|
return -EIO;
|
|
}
|
|
}
|
|
|
|
err = -ENOENT;
|
|
while (1) {
|
|
if (ptr >= end) {
|
|
WARN_ON(ptr > end);
|
|
break;
|
|
}
|
|
iref = (struct btrfs_extent_inline_ref *)ptr;
|
|
type = btrfs_extent_inline_ref_type(leaf, iref);
|
|
if (want < type)
|
|
break;
|
|
if (want > type) {
|
|
ptr += btrfs_extent_inline_ref_size(type);
|
|
continue;
|
|
}
|
|
|
|
if (type == BTRFS_EXTENT_DATA_REF_KEY) {
|
|
struct btrfs_extent_data_ref *dref;
|
|
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
|
|
if (match_extent_data_ref(leaf, dref, root_objectid,
|
|
owner, offset)) {
|
|
err = 0;
|
|
break;
|
|
}
|
|
if (hash_extent_data_ref_item(leaf, dref) <
|
|
hash_extent_data_ref(root_objectid, owner, offset))
|
|
break;
|
|
} else {
|
|
u64 ref_offset;
|
|
ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
|
|
if (parent > 0) {
|
|
if (parent == ref_offset) {
|
|
err = 0;
|
|
break;
|
|
}
|
|
if (ref_offset < parent)
|
|
break;
|
|
} else {
|
|
if (root_objectid == ref_offset) {
|
|
err = 0;
|
|
break;
|
|
}
|
|
if (ref_offset < root_objectid)
|
|
break;
|
|
}
|
|
}
|
|
ptr += btrfs_extent_inline_ref_size(type);
|
|
}
|
|
if (err == -ENOENT && insert) {
|
|
if (item_size + extra_size >=
|
|
BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
|
|
err = -EAGAIN;
|
|
goto out;
|
|
}
|
|
/*
|
|
* To add new inline back ref, we have to make sure
|
|
* there is no corresponding back ref item.
|
|
* For simplicity, we just do not add new inline back
|
|
* ref if there is any back ref item.
|
|
*/
|
|
if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
|
|
key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
|
|
err = -EAGAIN;
|
|
goto out;
|
|
}
|
|
}
|
|
*ref_ret = (struct btrfs_extent_inline_ref *)ptr;
|
|
out:
|
|
if (insert)
|
|
path->search_for_extension = 0;
|
|
return err;
|
|
}
|
|
|
|
static int setup_inline_extent_backref(struct btrfs_root *root,
|
|
struct btrfs_path *path,
|
|
struct btrfs_extent_inline_ref *iref,
|
|
u64 parent, u64 root_objectid,
|
|
u64 owner, u64 offset, int refs_to_add)
|
|
{
|
|
struct extent_buffer *leaf;
|
|
struct btrfs_extent_item *ei;
|
|
unsigned long ptr;
|
|
unsigned long end;
|
|
unsigned long item_offset;
|
|
u64 refs;
|
|
int size;
|
|
int type;
|
|
|
|
leaf = path->nodes[0];
|
|
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
|
|
item_offset = (unsigned long)iref - (unsigned long)ei;
|
|
|
|
type = extent_ref_type(parent, owner);
|
|
size = btrfs_extent_inline_ref_size(type);
|
|
|
|
btrfs_extend_item(path, size);
|
|
|
|
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
|
|
refs = btrfs_extent_refs(leaf, ei);
|
|
refs += refs_to_add;
|
|
btrfs_set_extent_refs(leaf, ei, refs);
|
|
|
|
ptr = (unsigned long)ei + item_offset;
|
|
end = (unsigned long)ei + btrfs_item_size(leaf, path->slots[0]);
|
|
if (ptr < end - size)
|
|
memmove_extent_buffer(leaf, ptr + size, ptr,
|
|
end - size - ptr);
|
|
|
|
iref = (struct btrfs_extent_inline_ref *)ptr;
|
|
btrfs_set_extent_inline_ref_type(leaf, iref, type);
|
|
if (type == BTRFS_EXTENT_DATA_REF_KEY) {
|
|
struct btrfs_extent_data_ref *dref;
|
|
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
|
|
btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
|
|
btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
|
|
btrfs_set_extent_data_ref_offset(leaf, dref, offset);
|
|
btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
|
|
} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
|
|
struct btrfs_shared_data_ref *sref;
|
|
sref = (struct btrfs_shared_data_ref *)(iref + 1);
|
|
btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
|
|
btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
|
|
} else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
|
|
btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
|
|
} else {
|
|
btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
|
|
}
|
|
btrfs_mark_buffer_dirty(leaf);
|
|
return 0;
|
|
}
|
|
|
|
static int lookup_extent_backref(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_path *path,
|
|
struct btrfs_extent_inline_ref **ref_ret,
|
|
u64 bytenr, u64 num_bytes, u64 parent,
|
|
u64 root_objectid, u64 owner, u64 offset)
|
|
{
|
|
int ret;
|
|
|
|
ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
|
|
bytenr, num_bytes, parent,
|
|
root_objectid, owner, offset, 0);
|
|
if (ret != -ENOENT)
|
|
return ret;
|
|
|
|
btrfs_release_path(path);
|
|
*ref_ret = NULL;
|
|
|
|
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
|
|
ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
|
|
root_objectid);
|
|
} else {
|
|
ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
|
|
root_objectid, owner, offset);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int update_inline_extent_backref(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_path *path,
|
|
struct btrfs_extent_inline_ref *iref,
|
|
int refs_to_mod)
|
|
{
|
|
struct extent_buffer *leaf;
|
|
struct btrfs_extent_item *ei;
|
|
struct btrfs_extent_data_ref *dref = NULL;
|
|
struct btrfs_shared_data_ref *sref = NULL;
|
|
unsigned long ptr;
|
|
unsigned long end;
|
|
u32 item_size;
|
|
int size;
|
|
int type;
|
|
u64 refs;
|
|
|
|
leaf = path->nodes[0];
|
|
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
|
|
refs = btrfs_extent_refs(leaf, ei);
|
|
WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
|
|
refs += refs_to_mod;
|
|
btrfs_set_extent_refs(leaf, ei, refs);
|
|
|
|
type = btrfs_extent_inline_ref_type(leaf, iref);
|
|
|
|
if (type == BTRFS_EXTENT_DATA_REF_KEY) {
|
|
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
|
|
refs = btrfs_extent_data_ref_count(leaf, dref);
|
|
} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
|
|
sref = (struct btrfs_shared_data_ref *)(iref + 1);
|
|
refs = btrfs_shared_data_ref_count(leaf, sref);
|
|
} else {
|
|
refs = 1;
|
|
BUG_ON(refs_to_mod != -1);
|
|
}
|
|
|
|
BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
|
|
refs += refs_to_mod;
|
|
|
|
if (refs > 0) {
|
|
if (type == BTRFS_EXTENT_DATA_REF_KEY)
|
|
btrfs_set_extent_data_ref_count(leaf, dref, refs);
|
|
else
|
|
btrfs_set_shared_data_ref_count(leaf, sref, refs);
|
|
} else {
|
|
size = btrfs_extent_inline_ref_size(type);
|
|
item_size = btrfs_item_size(leaf, path->slots[0]);
|
|
ptr = (unsigned long)iref;
|
|
end = (unsigned long)ei + item_size;
|
|
if (ptr + size < end)
|
|
memmove_extent_buffer(leaf, ptr, ptr + size,
|
|
end - ptr - size);
|
|
item_size -= size;
|
|
btrfs_truncate_item(path, item_size, 1);
|
|
}
|
|
btrfs_mark_buffer_dirty(leaf);
|
|
return 0;
|
|
}
|
|
|
|
static int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_path *path,
|
|
u64 bytenr, u64 num_bytes, u64 parent,
|
|
u64 root_objectid, u64 owner,
|
|
u64 offset, int refs_to_add)
|
|
{
|
|
struct btrfs_extent_inline_ref *iref;
|
|
int ret;
|
|
|
|
ret = lookup_inline_extent_backref(trans, root, path, &iref,
|
|
bytenr, num_bytes, parent,
|
|
root_objectid, owner, offset, 1);
|
|
if (ret == 0) {
|
|
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
|
|
ret = update_inline_extent_backref(trans, root, path, iref,
|
|
refs_to_add);
|
|
} else if (ret == -ENOENT) {
|
|
ret = setup_inline_extent_backref(root, path, iref,
|
|
parent, root_objectid,
|
|
owner, offset, refs_to_add);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int insert_extent_backref(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_path *path,
|
|
u64 bytenr, u64 parent, u64 root_objectid,
|
|
u64 owner, u64 offset, int refs_to_add)
|
|
{
|
|
int ret;
|
|
|
|
if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
|
|
ret = insert_extent_data_ref(trans, root, path, bytenr,
|
|
parent, root_objectid,
|
|
owner, offset, refs_to_add);
|
|
} else {
|
|
BUG_ON(refs_to_add != 1);
|
|
ret = insert_tree_block_ref(trans, root, path, bytenr,
|
|
parent, root_objectid);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int remove_extent_backref(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_path *path,
|
|
struct btrfs_extent_inline_ref *iref,
|
|
int refs_to_drop, int is_data)
|
|
{
|
|
int ret;
|
|
|
|
BUG_ON(!is_data && refs_to_drop != 1);
|
|
if (iref) {
|
|
ret = update_inline_extent_backref(trans, root, path, iref,
|
|
-refs_to_drop);
|
|
} else if (is_data) {
|
|
ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
|
|
} else {
|
|
ret = btrfs_del_item(trans, root, path);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
|
u64 bytenr, u64 num_bytes, u64 parent,
|
|
u64 root_objectid, u64 owner, u64 offset)
|
|
{
|
|
struct btrfs_root *extent_root = btrfs_extent_root(trans->fs_info,
|
|
bytenr);
|
|
struct btrfs_path *path;
|
|
struct extent_buffer *leaf;
|
|
struct btrfs_extent_item *item;
|
|
u64 refs;
|
|
int ret;
|
|
int err = 0;
|
|
|
|
path = btrfs_alloc_path();
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
ret = insert_inline_extent_backref(trans, extent_root, path, bytenr,
|
|
num_bytes, parent, root_objectid,
|
|
owner, offset, 1);
|
|
if (ret == 0)
|
|
goto out;
|
|
|
|
if (ret != -EAGAIN) {
|
|
err = ret;
|
|
goto out;
|
|
}
|
|
|
|
leaf = path->nodes[0];
|
|
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
|
|
refs = btrfs_extent_refs(leaf, item);
|
|
btrfs_set_extent_refs(leaf, item, refs + 1);
|
|
|
|
btrfs_mark_buffer_dirty(leaf);
|
|
btrfs_release_path(path);
|
|
|
|
/* now insert the actual backref */
|
|
ret = insert_extent_backref(trans, extent_root, path, bytenr, parent,
|
|
root_objectid, owner, offset, 1);
|
|
if (ret)
|
|
err = ret;
|
|
out:
|
|
btrfs_free_path(path);
|
|
BUG_ON(err);
|
|
return err;
|
|
}
|
|
|
|
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info, u64 bytenr,
|
|
u64 offset, int metadata, u64 *refs, u64 *flags)
|
|
{
|
|
struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr);
|
|
struct btrfs_path *path;
|
|
int ret;
|
|
struct btrfs_key key;
|
|
struct extent_buffer *l;
|
|
struct btrfs_extent_item *item;
|
|
u32 item_size;
|
|
u64 num_refs;
|
|
u64 extent_flags;
|
|
|
|
if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
|
|
offset = fs_info->nodesize;
|
|
metadata = 0;
|
|
}
|
|
|
|
path = btrfs_alloc_path();
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
key.objectid = bytenr;
|
|
key.offset = offset;
|
|
if (metadata)
|
|
key.type = BTRFS_METADATA_ITEM_KEY;
|
|
else
|
|
key.type = BTRFS_EXTENT_ITEM_KEY;
|
|
|
|
again:
|
|
ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
|
|
if (ret < 0)
|
|
goto out;
|
|
|
|
/*
|
|
* Deal with the fact that we may have mixed SKINNY and normal refs. If
|
|
* we didn't find what we wanted check and see if we have a normal ref
|
|
* right next to us, or re-search if we are on the edge of the leaf just
|
|
* to make sure.
|
|
*/
|
|
if (ret > 0 && metadata) {
|
|
if (path->slots[0]) {
|
|
path->slots[0]--;
|
|
btrfs_item_key_to_cpu(path->nodes[0], &key,
|
|
path->slots[0]);
|
|
if (key.objectid == bytenr &&
|
|
key.type == BTRFS_EXTENT_ITEM_KEY &&
|
|
key.offset == fs_info->nodesize)
|
|
ret = 0;
|
|
}
|
|
|
|
if (ret) {
|
|
btrfs_release_path(path);
|
|
key.type = BTRFS_EXTENT_ITEM_KEY;
|
|
key.offset = fs_info->nodesize;
|
|
metadata = 0;
|
|
goto again;
|
|
}
|
|
}
|
|
|
|
if (ret != 0) {
|
|
ret = -EIO;
|
|
goto out;
|
|
}
|
|
|
|
l = path->nodes[0];
|
|
item_size = btrfs_item_size(l, path->slots[0]);
|
|
if (item_size >= sizeof(*item)) {
|
|
item = btrfs_item_ptr(l, path->slots[0],
|
|
struct btrfs_extent_item);
|
|
num_refs = btrfs_extent_refs(l, item);
|
|
extent_flags = btrfs_extent_flags(l, item);
|
|
} else {
|
|
BUG();
|
|
}
|
|
item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
|
|
if (refs)
|
|
*refs = num_refs;
|
|
if (flags)
|
|
*flags = extent_flags;
|
|
out:
|
|
btrfs_free_path(path);
|
|
return ret;
|
|
}
|
|
|
|
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
|
|
struct extent_buffer *eb, u64 flags)
|
|
{
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
struct btrfs_root *extent_root;
|
|
struct btrfs_path *path;
|
|
int ret;
|
|
struct btrfs_key key;
|
|
struct extent_buffer *l;
|
|
struct btrfs_extent_item *item;
|
|
u32 item_size;
|
|
u64 bytenr = eb->start;
|
|
int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
|
|
|
|
path = btrfs_alloc_path();
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
extent_root = btrfs_extent_root(fs_info, bytenr);
|
|
|
|
key.objectid = bytenr;
|
|
if (skinny_metadata) {
|
|
key.offset = btrfs_header_level(eb);
|
|
key.type = BTRFS_METADATA_ITEM_KEY;
|
|
} else {
|
|
key.offset = fs_info->nodesize;
|
|
key.type = BTRFS_EXTENT_ITEM_KEY;
|
|
}
|
|
|
|
again:
|
|
ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
|
|
if (ret < 0)
|
|
goto out;
|
|
|
|
if (ret > 0 && skinny_metadata) {
|
|
skinny_metadata = 0;
|
|
if (path->slots[0]) {
|
|
path->slots[0]--;
|
|
btrfs_item_key_to_cpu(path->nodes[0], &key,
|
|
path->slots[0]);
|
|
if (key.objectid == bytenr &&
|
|
key.offset == fs_info->nodesize &&
|
|
key.type == BTRFS_EXTENT_ITEM_KEY)
|
|
ret = 0;
|
|
}
|
|
if (ret) {
|
|
btrfs_release_path(path);
|
|
key.offset = fs_info->nodesize;
|
|
key.type = BTRFS_EXTENT_ITEM_KEY;
|
|
goto again;
|
|
}
|
|
}
|
|
|
|
if (ret != 0) {
|
|
btrfs_print_leaf(path->nodes[0]);
|
|
printk("failed to find block number %llu\n",
|
|
(unsigned long long)bytenr);
|
|
BUG();
|
|
}
|
|
l = path->nodes[0];
|
|
item_size = btrfs_item_size(l, path->slots[0]);
|
|
if (item_size < sizeof(*item)) {
|
|
error(
|
|
"unsupported or corrupted extent item, item size=%u expect minimal size=%zu",
|
|
item_size, sizeof(*item));
|
|
ret = -EUCLEAN;
|
|
goto out;
|
|
}
|
|
item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
|
|
flags |= btrfs_extent_flags(l, item);
|
|
btrfs_set_extent_flags(l, item, flags);
|
|
out:
|
|
btrfs_free_path(path);
|
|
return ret;
|
|
}
|
|
|
|
static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct extent_buffer *buf,
|
|
int record_parent, int inc)
|
|
{
|
|
u64 bytenr;
|
|
u64 num_bytes;
|
|
u64 parent;
|
|
u64 ref_root;
|
|
u32 nritems;
|
|
struct btrfs_key key;
|
|
struct btrfs_file_extent_item *fi;
|
|
int i;
|
|
int level;
|
|
int ret = 0;
|
|
int (*process_func)(struct btrfs_trans_handle *trans,
|
|
u64, u64, u64, u64, u64, u64);
|
|
|
|
ref_root = btrfs_header_owner(buf);
|
|
nritems = btrfs_header_nritems(buf);
|
|
level = btrfs_header_level(buf);
|
|
|
|
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
|
|
return 0;
|
|
|
|
if (inc)
|
|
process_func = btrfs_inc_extent_ref;
|
|
else
|
|
process_func = btrfs_free_extent;
|
|
|
|
if (record_parent)
|
|
parent = buf->start;
|
|
else
|
|
parent = 0;
|
|
|
|
for (i = 0; i < nritems; i++) {
|
|
cond_resched();
|
|
if (level == 0) {
|
|
btrfs_item_key_to_cpu(buf, &key, i);
|
|
if (key.type != BTRFS_EXTENT_DATA_KEY)
|
|
continue;
|
|
fi = btrfs_item_ptr(buf, i,
|
|
struct btrfs_file_extent_item);
|
|
if (btrfs_file_extent_type(buf, fi) ==
|
|
BTRFS_FILE_EXTENT_INLINE)
|
|
continue;
|
|
bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
|
|
if (bytenr == 0)
|
|
continue;
|
|
|
|
num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
|
|
key.offset -= btrfs_file_extent_offset(buf, fi);
|
|
ret = process_func(trans, bytenr, num_bytes, parent,
|
|
ref_root, key.objectid, key.offset);
|
|
if (ret) {
|
|
WARN_ON(1);
|
|
goto fail;
|
|
}
|
|
} else {
|
|
bytenr = btrfs_node_blockptr(buf, i);
|
|
num_bytes = root->fs_info->nodesize;
|
|
ret = process_func(trans, bytenr, num_bytes, parent,
|
|
ref_root, level - 1, 0);
|
|
if (ret) {
|
|
WARN_ON(1);
|
|
goto fail;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
fail:
|
|
WARN_ON(1);
|
|
return ret;
|
|
}
|
|
|
|
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
|
struct extent_buffer *buf, int record_parent)
|
|
{
|
|
return __btrfs_mod_ref(trans, root, buf, record_parent, 1);
|
|
}
|
|
|
|
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
|
struct extent_buffer *buf, int record_parent)
|
|
{
|
|
return __btrfs_mod_ref(trans, root, buf, record_parent, 0);
|
|
}
|
|
|
|
static int update_block_group_item(struct btrfs_trans_handle *trans,
|
|
struct btrfs_path *path,
|
|
struct btrfs_block_group *cache)
|
|
{
|
|
int ret;
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
struct btrfs_root *root = btrfs_block_group_root(fs_info);
|
|
unsigned long bi;
|
|
struct btrfs_block_group_item bgi;
|
|
struct extent_buffer *leaf;
|
|
struct btrfs_key key;
|
|
|
|
/*
|
|
* If we're doing convert and the bg is beyond our last converted bg,
|
|
* it should go to the new root.
|
|
*/
|
|
if (btrfs_super_flags(fs_info->super_copy) &
|
|
BTRFS_SUPER_FLAG_CHANGING_BG_TREE &&
|
|
cache->start >= fs_info->last_converted_bg_bytenr) {
|
|
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
|
|
/* Converting back to extent tree. */
|
|
root = btrfs_extent_root(fs_info, 0);
|
|
else
|
|
/* Convert to new bg tree.*/
|
|
root = fs_info->block_group_root;
|
|
}
|
|
|
|
key.objectid = cache->start;
|
|
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
|
|
key.offset = cache->length;
|
|
|
|
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
|
|
if (ret > 0)
|
|
ret = -ENOENT;
|
|
if (ret < 0)
|
|
goto fail;
|
|
|
|
leaf = path->nodes[0];
|
|
bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
|
btrfs_set_stack_block_group_used(&bgi, cache->used);
|
|
btrfs_set_stack_block_group_flags(&bgi, cache->flags);
|
|
btrfs_set_stack_block_group_chunk_objectid(&bgi,
|
|
cache->global_root_id);
|
|
write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
|
|
btrfs_mark_buffer_dirty(leaf);
|
|
fail:
|
|
btrfs_release_path(path);
|
|
return ret;
|
|
|
|
}
|
|
|
|
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
|
|
{
|
|
struct btrfs_block_group *cache;
|
|
struct btrfs_path *path;
|
|
int ret = 0;
|
|
|
|
path = btrfs_alloc_path();
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
while (!list_empty(&trans->dirty_bgs)) {
|
|
cache = list_first_entry(&trans->dirty_bgs,
|
|
struct btrfs_block_group, dirty_list);
|
|
list_del_init(&cache->dirty_list);
|
|
ret = update_block_group_item(trans, path, cache);
|
|
if (ret)
|
|
break;
|
|
}
|
|
btrfs_free_path(path);
|
|
return ret;
|
|
}
|
|
|
|
static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
|
|
u64 flags)
|
|
{
|
|
struct btrfs_space_info *found;
|
|
|
|
flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
|
|
|
|
list_for_each_entry(found, &info->space_info, list) {
|
|
if (found->flags & flags)
|
|
return found;
|
|
}
|
|
return NULL;
|
|
|
|
}
|
|
|
|
static int free_space_info(struct btrfs_fs_info *fs_info, u64 flags,
|
|
u64 total_bytes, u64 bytes_used,
|
|
struct btrfs_space_info **space_info)
|
|
{
|
|
struct btrfs_space_info *found;
|
|
|
|
/* only support free block group which is empty */
|
|
if (bytes_used)
|
|
return -ENOTEMPTY;
|
|
|
|
found = __find_space_info(fs_info, flags);
|
|
if (!found)
|
|
return -ENOENT;
|
|
if (found->total_bytes < total_bytes) {
|
|
fprintf(stderr,
|
|
"WARNING: bad space info to free %llu only have %llu\n",
|
|
total_bytes, found->total_bytes);
|
|
return -EINVAL;
|
|
}
|
|
found->total_bytes -= total_bytes;
|
|
if (space_info)
|
|
*space_info = found;
|
|
return 0;
|
|
}
|
|
|
|
int update_space_info(struct btrfs_fs_info *info, u64 flags,
|
|
u64 total_bytes, u64 bytes_used,
|
|
struct btrfs_space_info **space_info)
|
|
{
|
|
struct btrfs_space_info *found;
|
|
|
|
found = __find_space_info(info, flags);
|
|
if (found) {
|
|
found->total_bytes += total_bytes;
|
|
found->bytes_used += bytes_used;
|
|
if (found->total_bytes < found->bytes_used) {
|
|
fprintf(stderr, "warning, bad space info total_bytes "
|
|
"%llu used %llu\n",
|
|
(unsigned long long)found->total_bytes,
|
|
(unsigned long long)found->bytes_used);
|
|
}
|
|
*space_info = found;
|
|
return 0;
|
|
}
|
|
found = kmalloc(sizeof(*found), GFP_NOFS);
|
|
if (!found)
|
|
return -ENOMEM;
|
|
|
|
list_add(&found->list, &info->space_info);
|
|
found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
|
|
found->total_bytes = total_bytes;
|
|
found->bytes_used = bytes_used;
|
|
found->bytes_pinned = 0;
|
|
found->bytes_reserved = 0;
|
|
found->full = 0;
|
|
*space_info = found;
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
|
|
{
|
|
u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
|
|
BTRFS_BLOCK_GROUP_RAID1_MASK |
|
|
BTRFS_BLOCK_GROUP_RAID10 |
|
|
BTRFS_BLOCK_GROUP_RAID56_MASK |
|
|
BTRFS_BLOCK_GROUP_DUP);
|
|
if (extra_flags) {
|
|
if (flags & BTRFS_BLOCK_GROUP_DATA)
|
|
fs_info->avail_data_alloc_bits |= extra_flags;
|
|
if (flags & BTRFS_BLOCK_GROUP_METADATA)
|
|
fs_info->avail_metadata_alloc_bits |= extra_flags;
|
|
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
|
fs_info->avail_system_alloc_bits |= extra_flags;
|
|
}
|
|
}
|
|
|
|
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info, u64 alloc_bytes,
|
|
u64 flags)
|
|
{
|
|
struct btrfs_space_info *space_info;
|
|
u64 thresh;
|
|
u64 start;
|
|
u64 num_bytes;
|
|
int ret;
|
|
|
|
space_info = __find_space_info(fs_info, flags);
|
|
if (!space_info) {
|
|
ret = update_space_info(fs_info, flags, 0, 0, &space_info);
|
|
BUG_ON(ret);
|
|
}
|
|
BUG_ON(!space_info);
|
|
|
|
if (space_info->full)
|
|
return 0;
|
|
|
|
thresh = div_factor(space_info->total_bytes, 7);
|
|
if ((space_info->bytes_used + space_info->bytes_pinned +
|
|
space_info->bytes_reserved + alloc_bytes) < thresh)
|
|
return 0;
|
|
|
|
/*
|
|
* Avoid allocating given chunk type
|
|
*/
|
|
if (fs_info->avoid_meta_chunk_alloc &&
|
|
(flags & BTRFS_BLOCK_GROUP_METADATA))
|
|
return 0;
|
|
if (fs_info->avoid_sys_chunk_alloc &&
|
|
(flags & BTRFS_BLOCK_GROUP_SYSTEM))
|
|
return 0;
|
|
|
|
/*
|
|
* We're going to allocate new chunk, during the process, we will
|
|
* allocate new tree blocks, which can trigger new chunk allocation
|
|
* again. Avoid the recursion.
|
|
*/
|
|
if (trans->allocating_chunk)
|
|
return 0;
|
|
trans->allocating_chunk = 1;
|
|
|
|
/*
|
|
* The space_info only has block group type (data/meta/sys), doesn't
|
|
* have the proper profile.
|
|
* While we still want to handle mixed block groups properly.
|
|
* So here add the extra bits for mixed profile.
|
|
*/
|
|
flags |= space_info->flags;
|
|
ret = btrfs_alloc_chunk(trans, fs_info, &start, &num_bytes, flags);
|
|
if (ret == -ENOSPC) {
|
|
space_info->full = 1;
|
|
trans->allocating_chunk = 0;
|
|
return 0;
|
|
}
|
|
|
|
BUG_ON(ret);
|
|
|
|
ret = btrfs_make_block_group(trans, fs_info, 0, flags, start,
|
|
num_bytes);
|
|
BUG_ON(ret);
|
|
trans->allocating_chunk = 0;
|
|
return 0;
|
|
}
|
|
|
|
static int update_block_group(struct btrfs_trans_handle *trans, u64 bytenr,
|
|
u64 num_bytes, int alloc, int mark_free)
|
|
{
|
|
struct btrfs_fs_info *info = trans->fs_info;
|
|
struct btrfs_block_group *cache;
|
|
u64 total = num_bytes;
|
|
u64 old_val;
|
|
u64 byte_in_group;
|
|
|
|
/* block accounting for super block */
|
|
old_val = btrfs_super_bytes_used(info->super_copy);
|
|
if (alloc)
|
|
old_val += num_bytes;
|
|
else
|
|
old_val -= num_bytes;
|
|
btrfs_set_super_bytes_used(info->super_copy, old_val);
|
|
|
|
while(total) {
|
|
cache = btrfs_lookup_block_group(info, bytenr);
|
|
if (!cache) {
|
|
return -1;
|
|
}
|
|
byte_in_group = bytenr - cache->start;
|
|
WARN_ON(byte_in_group > cache->length);
|
|
if (list_empty(&cache->dirty_list))
|
|
list_add_tail(&cache->dirty_list, &trans->dirty_bgs);
|
|
old_val = cache->used;
|
|
num_bytes = min(total, cache->length- byte_in_group);
|
|
|
|
if (alloc) {
|
|
old_val += num_bytes;
|
|
cache->space_info->bytes_used += num_bytes;
|
|
} else {
|
|
old_val -= num_bytes;
|
|
cache->space_info->bytes_used -= num_bytes;
|
|
if (mark_free) {
|
|
set_extent_dirty(&info->free_space_cache,
|
|
bytenr, bytenr + num_bytes - 1,
|
|
GFP_NOFS);
|
|
}
|
|
}
|
|
cache->used = old_val;
|
|
total -= num_bytes;
|
|
bytenr += num_bytes;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int update_pinned_extents(struct btrfs_fs_info *fs_info,
|
|
u64 bytenr, u64 num, int pin)
|
|
{
|
|
u64 len;
|
|
struct btrfs_block_group *cache;
|
|
|
|
if (pin) {
|
|
set_extent_dirty(&fs_info->pinned_extents,
|
|
bytenr, bytenr + num - 1, GFP_NOFS);
|
|
} else {
|
|
clear_extent_dirty(&fs_info->pinned_extents,
|
|
bytenr, bytenr + num - 1, NULL);
|
|
}
|
|
while (num > 0) {
|
|
cache = btrfs_lookup_block_group(fs_info, bytenr);
|
|
if (!cache) {
|
|
len = min((u64)fs_info->sectorsize, num);
|
|
goto next;
|
|
}
|
|
WARN_ON(!cache);
|
|
len = min(num, cache->length - (bytenr - cache->start));
|
|
if (pin) {
|
|
cache->pinned += len;
|
|
cache->space_info->bytes_pinned += len;
|
|
fs_info->total_pinned += len;
|
|
} else {
|
|
cache->pinned -= len;
|
|
cache->space_info->bytes_pinned -= len;
|
|
fs_info->total_pinned -= len;
|
|
}
|
|
next:
|
|
bytenr += len;
|
|
num -= len;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
|
|
{
|
|
u64 start;
|
|
u64 end;
|
|
int ret;
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
struct extent_io_tree *free_space_cache = &fs_info->free_space_cache;
|
|
struct extent_io_tree *pinned_extents = &fs_info->pinned_extents;
|
|
|
|
while(1) {
|
|
ret = find_first_extent_bit(pinned_extents, 0, &start, &end,
|
|
EXTENT_DIRTY, NULL);
|
|
if (ret)
|
|
break;
|
|
update_pinned_extents(trans->fs_info, start, end + 1 - start,
|
|
0);
|
|
clear_extent_dirty(pinned_extents, start, end, NULL);
|
|
set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
|
|
}
|
|
}
|
|
|
|
static int pin_down_bytes(struct btrfs_trans_handle *trans, u64 bytenr,
|
|
u64 num_bytes, int is_data)
|
|
{
|
|
int err = 0;
|
|
struct extent_buffer *buf;
|
|
|
|
if (is_data)
|
|
goto pinit;
|
|
|
|
buf = btrfs_find_tree_block(trans->fs_info, bytenr, num_bytes);
|
|
if (!buf)
|
|
goto pinit;
|
|
|
|
/* we can reuse a block if it hasn't been written
|
|
* and it is from this transaction. We can't
|
|
* reuse anything from the tree log root because
|
|
* it has tiny sub-transactions.
|
|
*/
|
|
if (btrfs_buffer_uptodate(buf, 0, 0)) {
|
|
u64 header_owner = btrfs_header_owner(buf);
|
|
u64 header_transid = btrfs_header_generation(buf);
|
|
if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
|
|
header_transid == trans->transid &&
|
|
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
|
|
btrfs_clear_buffer_dirty(trans, buf);
|
|
free_extent_buffer(buf);
|
|
return 1;
|
|
}
|
|
}
|
|
free_extent_buffer(buf);
|
|
pinit:
|
|
update_pinned_extents(trans->fs_info, bytenr, num_bytes, 1);
|
|
|
|
BUG_ON(err < 0);
|
|
return 0;
|
|
}
|
|
|
|
void btrfs_pin_extent(struct btrfs_fs_info *fs_info,
|
|
u64 bytenr, u64 num_bytes)
|
|
{
|
|
update_pinned_extents(fs_info, bytenr, num_bytes, 1);
|
|
}
|
|
|
|
void btrfs_unpin_extent(struct btrfs_fs_info *fs_info,
|
|
u64 bytenr, u64 num_bytes)
|
|
{
|
|
update_pinned_extents(fs_info, bytenr, num_bytes, 0);
|
|
}
|
|
|
|
/*
|
|
* remove an extent from the root, returns 0 on success
|
|
*/
|
|
static int __free_extent(struct btrfs_trans_handle *trans,
|
|
u64 bytenr, u64 num_bytes, u64 parent,
|
|
u64 root_objectid, u64 owner_objectid,
|
|
u64 owner_offset, int refs_to_drop)
|
|
{
|
|
|
|
struct btrfs_root *extent_root = btrfs_extent_root(trans->fs_info,
|
|
bytenr);
|
|
struct btrfs_key key;
|
|
struct btrfs_path *path;
|
|
struct extent_buffer *leaf;
|
|
struct btrfs_extent_item *ei;
|
|
struct btrfs_extent_inline_ref *iref;
|
|
int ret;
|
|
int is_data;
|
|
int extent_slot = 0;
|
|
int found_extent = 0;
|
|
int num_to_del = 1;
|
|
u32 item_size;
|
|
u64 refs;
|
|
int skinny_metadata =
|
|
btrfs_fs_incompat(extent_root->fs_info, SKINNY_METADATA);
|
|
|
|
if (trans->fs_info->free_extent_hook) {
|
|
trans->fs_info->free_extent_hook(bytenr, num_bytes,
|
|
parent, root_objectid, owner_objectid,
|
|
owner_offset, refs_to_drop);
|
|
|
|
}
|
|
path = btrfs_alloc_path();
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
|
|
if (is_data)
|
|
skinny_metadata = 0;
|
|
BUG_ON(!is_data && refs_to_drop != 1);
|
|
|
|
ret = lookup_extent_backref(trans, extent_root, path, &iref,
|
|
bytenr, num_bytes, parent,
|
|
root_objectid, owner_objectid,
|
|
owner_offset);
|
|
if (ret == 0) {
|
|
extent_slot = path->slots[0];
|
|
while (extent_slot >= 0) {
|
|
btrfs_item_key_to_cpu(path->nodes[0], &key,
|
|
extent_slot);
|
|
if (key.objectid != bytenr)
|
|
break;
|
|
if (key.type == BTRFS_EXTENT_ITEM_KEY &&
|
|
key.offset == num_bytes) {
|
|
found_extent = 1;
|
|
break;
|
|
}
|
|
if (key.type == BTRFS_METADATA_ITEM_KEY &&
|
|
key.offset == owner_objectid) {
|
|
found_extent = 1;
|
|
break;
|
|
}
|
|
if (path->slots[0] - extent_slot > 5)
|
|
break;
|
|
extent_slot--;
|
|
}
|
|
if (!found_extent) {
|
|
BUG_ON(iref);
|
|
ret = remove_extent_backref(trans, extent_root, path,
|
|
NULL, refs_to_drop,
|
|
is_data);
|
|
BUG_ON(ret);
|
|
btrfs_release_path(path);
|
|
|
|
key.objectid = bytenr;
|
|
|
|
if (skinny_metadata) {
|
|
key.type = BTRFS_METADATA_ITEM_KEY;
|
|
key.offset = owner_objectid;
|
|
} else {
|
|
key.type = BTRFS_EXTENT_ITEM_KEY;
|
|
key.offset = num_bytes;
|
|
}
|
|
|
|
ret = btrfs_search_slot(trans, extent_root,
|
|
&key, path, -1, 1);
|
|
if (ret > 0 && skinny_metadata && path->slots[0]) {
|
|
path->slots[0]--;
|
|
btrfs_item_key_to_cpu(path->nodes[0],
|
|
&key,
|
|
path->slots[0]);
|
|
if (key.objectid == bytenr &&
|
|
key.type == BTRFS_EXTENT_ITEM_KEY &&
|
|
key.offset == num_bytes)
|
|
ret = 0;
|
|
}
|
|
|
|
if (ret > 0 && skinny_metadata) {
|
|
skinny_metadata = 0;
|
|
btrfs_release_path(path);
|
|
key.type = BTRFS_EXTENT_ITEM_KEY;
|
|
key.offset = num_bytes;
|
|
ret = btrfs_search_slot(trans, extent_root,
|
|
&key, path, -1, 1);
|
|
}
|
|
|
|
if (ret) {
|
|
printk(KERN_ERR "umm, got %d back from search"
|
|
", was looking for %llu\n", ret,
|
|
(unsigned long long)bytenr);
|
|
btrfs_print_leaf(path->nodes[0]);
|
|
}
|
|
BUG_ON(ret);
|
|
extent_slot = path->slots[0];
|
|
}
|
|
} else {
|
|
printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
|
|
"parent %llu root %llu owner %llu offset %llu\n",
|
|
(unsigned long long)bytenr,
|
|
(unsigned long long)parent,
|
|
(unsigned long long)root_objectid,
|
|
(unsigned long long)owner_objectid,
|
|
(unsigned long long)owner_offset);
|
|
printf("path->slots[0]: %d path->nodes[0]:\n", path->slots[0]);
|
|
btrfs_print_leaf(path->nodes[0]);
|
|
ret = -EIO;
|
|
goto fail;
|
|
}
|
|
|
|
leaf = path->nodes[0];
|
|
item_size = btrfs_item_size(leaf, extent_slot);
|
|
if (item_size < sizeof(*ei)) {
|
|
error(
|
|
"unsupported or corrupted extent item, item size=%u expect minimal size=%zu",
|
|
item_size, sizeof(*ei));
|
|
ret = -EUCLEAN;
|
|
goto fail;
|
|
}
|
|
ei = btrfs_item_ptr(leaf, extent_slot,
|
|
struct btrfs_extent_item);
|
|
if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
|
|
key.type == BTRFS_EXTENT_ITEM_KEY) {
|
|
struct btrfs_tree_block_info *bi;
|
|
BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
|
|
bi = (struct btrfs_tree_block_info *)(ei + 1);
|
|
WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
|
|
}
|
|
|
|
refs = btrfs_extent_refs(leaf, ei);
|
|
BUG_ON(refs < refs_to_drop);
|
|
refs -= refs_to_drop;
|
|
|
|
if (refs > 0) {
|
|
/*
|
|
* In the case of inline back ref, reference count will
|
|
* be updated by remove_extent_backref
|
|
*/
|
|
if (iref) {
|
|
BUG_ON(!found_extent);
|
|
} else {
|
|
btrfs_set_extent_refs(leaf, ei, refs);
|
|
btrfs_mark_buffer_dirty(leaf);
|
|
}
|
|
if (found_extent) {
|
|
ret = remove_extent_backref(trans, extent_root, path,
|
|
iref, refs_to_drop,
|
|
is_data);
|
|
BUG_ON(ret);
|
|
}
|
|
} else {
|
|
int mark_free = 0;
|
|
|
|
if (found_extent) {
|
|
BUG_ON(is_data && refs_to_drop !=
|
|
extent_data_ref_count(path, iref));
|
|
if (iref) {
|
|
BUG_ON(path->slots[0] != extent_slot);
|
|
} else {
|
|
BUG_ON(path->slots[0] != extent_slot + 1);
|
|
path->slots[0] = extent_slot;
|
|
num_to_del = 2;
|
|
}
|
|
}
|
|
|
|
ret = pin_down_bytes(trans, bytenr, num_bytes,
|
|
is_data);
|
|
if (ret > 0)
|
|
mark_free = 1;
|
|
BUG_ON(ret < 0);
|
|
|
|
ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
|
|
num_to_del);
|
|
BUG_ON(ret);
|
|
btrfs_release_path(path);
|
|
|
|
if (is_data) {
|
|
struct btrfs_root *csum_root;
|
|
|
|
csum_root = btrfs_csum_root(trans->fs_info, bytenr);
|
|
ret = btrfs_del_csums(trans, csum_root, bytenr,
|
|
num_bytes);
|
|
BUG_ON(ret);
|
|
}
|
|
|
|
ret = add_to_free_space_tree(trans, bytenr, num_bytes);
|
|
if (ret) {
|
|
goto fail;
|
|
}
|
|
|
|
update_block_group(trans, bytenr, num_bytes, 0, mark_free);
|
|
}
|
|
fail:
|
|
btrfs_free_path(path);
|
|
return ret;
|
|
}
|
|
|
|
int btrfs_free_tree_block(struct btrfs_trans_handle *trans, u64 root_id,
|
|
struct extent_buffer *buf, u64 parent, int last_ref)
|
|
{
|
|
return btrfs_free_extent(trans, buf->start, buf->len, parent, root_id,
|
|
btrfs_header_level(buf), 0);
|
|
}
|
|
|
|
/*
|
|
* remove an extent from the root, returns 0 on success
|
|
*/
|
|
|
|
int btrfs_free_extent(struct btrfs_trans_handle *trans,
|
|
u64 bytenr, u64 num_bytes, u64 parent,
|
|
u64 root_objectid, u64 owner, u64 offset)
|
|
{
|
|
int ret;
|
|
|
|
WARN_ON(num_bytes < trans->fs_info->sectorsize);
|
|
/*
|
|
* tree log blocks never actually go into the extent allocation
|
|
* tree, just update pinning info and exit early.
|
|
*/
|
|
if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
|
|
printf("PINNING EXTENTS IN LOG TREE\n");
|
|
WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
|
|
btrfs_pin_extent(trans->fs_info, bytenr, num_bytes);
|
|
ret = 0;
|
|
} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
|
|
BUG_ON(offset);
|
|
ret = btrfs_add_delayed_tree_ref(trans->fs_info, trans,
|
|
bytenr, num_bytes, parent,
|
|
root_objectid, (int)owner,
|
|
BTRFS_DROP_DELAYED_REF,
|
|
NULL, NULL, NULL);
|
|
} else {
|
|
ret = __free_extent(trans, bytenr, num_bytes, parent,
|
|
root_objectid, owner, offset, 1);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static u64 stripe_align(struct btrfs_root *root, u64 val)
|
|
{
|
|
return round_up(val, (u64)root->fs_info->stripesize);
|
|
}
|
|
|
|
/*
|
|
* walks the btree of allocated extents and find a hole of a given size.
|
|
* The key ins is changed to record the hole:
|
|
* ins->objectid == block start
|
|
* ins->flags = BTRFS_EXTENT_ITEM_KEY
|
|
* ins->offset == number of blocks
|
|
* Any available blocks before search_start are skipped.
|
|
*/
|
|
static int noinline find_free_extent(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *orig_root,
|
|
u64 num_bytes, u64 empty_size,
|
|
u64 search_start, u64 search_end,
|
|
u64 hint_byte, struct btrfs_key *ins,
|
|
u64 exclude_start, u64 exclude_nr,
|
|
u64 profile)
|
|
{
|
|
int ret;
|
|
u64 orig_search_start = search_start;
|
|
struct btrfs_root *root = orig_root->fs_info->tree_root;
|
|
struct btrfs_fs_info *info = root->fs_info;
|
|
u64 total_needed = num_bytes;
|
|
struct btrfs_block_group *block_group;
|
|
int full_scan = 0;
|
|
int wrapped = 0;
|
|
|
|
WARN_ON(num_bytes < info->sectorsize);
|
|
ins->type = BTRFS_EXTENT_ITEM_KEY;
|
|
|
|
search_start = stripe_align(root, search_start);
|
|
|
|
if (hint_byte) {
|
|
block_group = btrfs_lookup_first_block_group(info, hint_byte);
|
|
if (!block_group)
|
|
hint_byte = search_start;
|
|
block_group = btrfs_find_block_group(root, block_group,
|
|
hint_byte, profile, 1);
|
|
} else {
|
|
block_group = btrfs_find_block_group(root,
|
|
trans->block_group,
|
|
search_start, profile, 1);
|
|
}
|
|
|
|
total_needed += empty_size;
|
|
|
|
check_failed:
|
|
search_start = stripe_align(root, search_start);
|
|
if (!block_group) {
|
|
block_group = btrfs_lookup_first_block_group(info,
|
|
search_start);
|
|
if (!block_group)
|
|
block_group = btrfs_lookup_first_block_group(info,
|
|
orig_search_start);
|
|
}
|
|
ret = find_search_start(root, &block_group, &search_start,
|
|
total_needed, profile);
|
|
if (ret)
|
|
goto new_group;
|
|
|
|
ins->objectid = search_start;
|
|
ins->offset = num_bytes;
|
|
|
|
if (ins->objectid + num_bytes >
|
|
block_group->start + block_group->length) {
|
|
search_start = block_group->start + block_group->length;
|
|
goto new_group;
|
|
}
|
|
|
|
if (test_range_bit(&info->extent_ins, ins->objectid,
|
|
ins->objectid + num_bytes -1, EXTENT_LOCKED, 0,
|
|
NULL)) {
|
|
search_start = ins->objectid + num_bytes;
|
|
goto new_group;
|
|
}
|
|
|
|
if (test_range_bit(&info->pinned_extents, ins->objectid,
|
|
ins->objectid + num_bytes -1, EXTENT_DIRTY, 0,
|
|
NULL)) {
|
|
search_start = ins->objectid + num_bytes;
|
|
goto new_group;
|
|
}
|
|
|
|
if (info->excluded_extents &&
|
|
test_range_bit(info->excluded_extents, ins->objectid,
|
|
ins->objectid + num_bytes -1, EXTENT_DIRTY, 0,
|
|
NULL)) {
|
|
search_start = ins->objectid + num_bytes;
|
|
goto new_group;
|
|
}
|
|
|
|
if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
|
|
ins->objectid < exclude_start + exclude_nr)) {
|
|
search_start = exclude_start + exclude_nr;
|
|
goto new_group;
|
|
}
|
|
|
|
if (!(profile & BTRFS_BLOCK_GROUP_DATA)) {
|
|
if (check_crossing_stripes(info, ins->objectid, num_bytes)) {
|
|
struct btrfs_block_group *bg_cache;
|
|
u64 bg_offset;
|
|
|
|
bg_cache = btrfs_lookup_block_group(info, ins->objectid);
|
|
if (!bg_cache)
|
|
goto no_bg_cache;
|
|
bg_offset = ins->objectid - bg_cache->start;
|
|
|
|
search_start = round_up(
|
|
bg_offset + num_bytes, BTRFS_STRIPE_LEN) +
|
|
bg_cache->start;
|
|
goto new_group;
|
|
}
|
|
no_bg_cache:
|
|
block_group = btrfs_lookup_block_group(info, ins->objectid);
|
|
if (block_group)
|
|
trans->block_group = block_group;
|
|
}
|
|
ins->offset = num_bytes;
|
|
return 0;
|
|
|
|
new_group:
|
|
block_group = btrfs_lookup_first_block_group(info, search_start);
|
|
if (!block_group) {
|
|
search_start = orig_search_start;
|
|
if (full_scan) {
|
|
ret = -ENOSPC;
|
|
goto error;
|
|
}
|
|
if (wrapped) {
|
|
if (!full_scan)
|
|
total_needed -= empty_size;
|
|
full_scan = 1;
|
|
} else
|
|
wrapped = 1;
|
|
}
|
|
cond_resched();
|
|
block_group = btrfs_find_block_group(root, block_group,
|
|
search_start, profile, 0);
|
|
goto check_failed;
|
|
|
|
error:
|
|
return ret;
|
|
}
|
|
|
|
int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
u64 num_bytes, u64 empty_size,
|
|
u64 hint_byte, u64 search_end,
|
|
struct btrfs_key *ins, bool is_data)
|
|
{
|
|
int ret;
|
|
u64 search_start = 0;
|
|
u64 alloc_profile;
|
|
u64 profile;
|
|
struct btrfs_fs_info *info = root->fs_info;
|
|
|
|
if (is_data) {
|
|
alloc_profile = info->avail_data_alloc_bits &
|
|
info->data_alloc_profile;
|
|
profile = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
|
|
} else if (info->system_allocs == 1 || root == info->chunk_root) {
|
|
alloc_profile = info->avail_system_alloc_bits &
|
|
info->system_alloc_profile;
|
|
profile = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
|
|
} else {
|
|
alloc_profile = info->avail_metadata_alloc_bits &
|
|
info->metadata_alloc_profile;
|
|
profile = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
|
|
}
|
|
|
|
/*
|
|
* Also preallocate metadata for csum tree and fs trees
|
|
* (BTRFS_ROOT_SHAREABLE already set) as they can consume a lot of
|
|
* metadata space. Pre-allocate to avoid unexpected ENOSPC.
|
|
*/
|
|
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
|
|
root->root_key.objectid == BTRFS_CSUM_TREE_OBJECTID) {
|
|
if (!(profile & BTRFS_BLOCK_GROUP_METADATA)) {
|
|
ret = do_chunk_alloc(trans, info,
|
|
num_bytes,
|
|
BTRFS_BLOCK_GROUP_METADATA);
|
|
BUG_ON(ret);
|
|
}
|
|
ret = do_chunk_alloc(trans, info,
|
|
num_bytes + SZ_2M, profile);
|
|
BUG_ON(ret);
|
|
}
|
|
|
|
WARN_ON(num_bytes < info->sectorsize);
|
|
ret = find_free_extent(trans, root, num_bytes, empty_size,
|
|
search_start, search_end, hint_byte, ins,
|
|
trans->alloc_exclude_start,
|
|
trans->alloc_exclude_nr, profile);
|
|
if (ret < 0)
|
|
return ret;
|
|
clear_extent_dirty(&info->free_space_cache,
|
|
ins->objectid, ins->objectid + ins->offset - 1,
|
|
NULL);
|
|
return ret;
|
|
}
|
|
|
|
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
|
|
struct btrfs_delayed_ref_node *node,
|
|
struct btrfs_delayed_extent_op *extent_op)
|
|
{
|
|
|
|
struct btrfs_root *extent_root = btrfs_extent_root(trans->fs_info,
|
|
node->bytenr);
|
|
struct btrfs_delayed_tree_ref *ref = btrfs_delayed_node_to_tree_ref(node);
|
|
bool skinny_metadata = btrfs_fs_incompat(trans->fs_info, SKINNY_METADATA);
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
struct btrfs_extent_item *extent_item;
|
|
struct btrfs_extent_inline_ref *iref;
|
|
struct btrfs_space_info *sinfo;
|
|
struct extent_buffer *leaf;
|
|
struct btrfs_path *path;
|
|
struct btrfs_key ins;
|
|
u32 size = sizeof(*extent_item) + sizeof(*iref);
|
|
u64 start, end;
|
|
int ret;
|
|
|
|
sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
|
|
ASSERT(sinfo);
|
|
|
|
ins.objectid = node->bytenr;
|
|
if (skinny_metadata) {
|
|
ins.offset = ref->level;
|
|
ins.type = BTRFS_METADATA_ITEM_KEY;
|
|
} else {
|
|
ins.offset = node->num_bytes;
|
|
ins.type = BTRFS_EXTENT_ITEM_KEY;
|
|
|
|
size += sizeof(struct btrfs_tree_block_info);
|
|
}
|
|
|
|
if (ref->root == BTRFS_EXTENT_TREE_OBJECTID) {
|
|
ret = find_first_extent_bit(&trans->fs_info->extent_ins,
|
|
node->bytenr, &start, &end,
|
|
EXTENT_LOCKED, NULL);
|
|
ASSERT(!ret);
|
|
ASSERT(start == node->bytenr);
|
|
ASSERT(end == node->bytenr + node->num_bytes - 1);
|
|
}
|
|
|
|
path = btrfs_alloc_path();
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
ret = btrfs_insert_empty_item(trans, extent_root, path, &ins, size);
|
|
if (ret)
|
|
return ret;
|
|
|
|
leaf = path->nodes[0];
|
|
extent_item = btrfs_item_ptr(leaf, path->slots[0],
|
|
struct btrfs_extent_item);
|
|
btrfs_set_extent_refs(leaf, extent_item, 1);
|
|
btrfs_set_extent_generation(leaf, extent_item, trans->transid);
|
|
btrfs_set_extent_flags(leaf, extent_item,
|
|
extent_op->flags_to_set |
|
|
BTRFS_EXTENT_FLAG_TREE_BLOCK);
|
|
|
|
if (skinny_metadata) {
|
|
iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
|
|
} else {
|
|
struct btrfs_tree_block_info *block_info;
|
|
block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
|
|
btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
|
|
btrfs_set_tree_block_level(leaf, block_info, ref->level);
|
|
iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
|
|
}
|
|
|
|
btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY);
|
|
btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
|
|
|
|
btrfs_mark_buffer_dirty(leaf);
|
|
btrfs_free_path(path);
|
|
|
|
ret = remove_from_free_space_tree(trans, ins.objectid, fs_info->nodesize);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = update_block_group(trans, ins.objectid, fs_info->nodesize, 1, 0);
|
|
if (sinfo) {
|
|
if (fs_info->nodesize > sinfo->bytes_reserved) {
|
|
WARN_ON(1);
|
|
sinfo->bytes_reserved = 0;
|
|
} else {
|
|
sinfo->bytes_reserved -= fs_info->nodesize;
|
|
}
|
|
}
|
|
|
|
if (ref->root == BTRFS_EXTENT_TREE_OBJECTID) {
|
|
clear_extent_bits(&trans->fs_info->extent_ins, start, end,
|
|
EXTENT_LOCKED);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int alloc_tree_block(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root, u64 parent,
|
|
u64 root_objectid, u64 generation,
|
|
u64 flags, struct btrfs_disk_key *key,
|
|
int level, u64 empty_size, u64 hint_byte,
|
|
u64 search_end, struct btrfs_key *ins)
|
|
{
|
|
int ret;
|
|
u64 extent_size;
|
|
struct btrfs_delayed_extent_op *extent_op;
|
|
struct btrfs_space_info *sinfo;
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
u64 num_bytes = fs_info->nodesize;
|
|
bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
|
|
SKINNY_METADATA);
|
|
|
|
extent_op = btrfs_alloc_delayed_extent_op();
|
|
if (!extent_op)
|
|
return -ENOMEM;
|
|
|
|
sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
|
|
if (!sinfo) {
|
|
error("Corrupted fs, no valid METADATA block group found");
|
|
return -EUCLEAN;
|
|
}
|
|
ret = btrfs_reserve_extent(trans, root, num_bytes, empty_size,
|
|
hint_byte, search_end, ins, 0);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
if (key)
|
|
memcpy(&extent_op->key, key, sizeof(extent_op->key));
|
|
else
|
|
memset(&extent_op->key, 0, sizeof(extent_op->key));
|
|
extent_op->flags_to_set = flags;
|
|
extent_op->update_key = skinny_metadata ? false : true;
|
|
extent_op->update_flags = true;
|
|
extent_op->is_data = false;
|
|
extent_op->level = level;
|
|
|
|
extent_size = ins->offset;
|
|
|
|
if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
|
|
ins->offset = level;
|
|
ins->type = BTRFS_METADATA_ITEM_KEY;
|
|
}
|
|
|
|
/* Ensure this reserved extent is not found by the allocator */
|
|
if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) {
|
|
ret = set_extent_bits(&trans->fs_info->extent_ins,
|
|
ins->objectid,
|
|
ins->objectid + extent_size - 1,
|
|
EXTENT_LOCKED);
|
|
|
|
BUG_ON(ret);
|
|
}
|
|
|
|
sinfo->bytes_reserved += extent_size;
|
|
ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, ins->objectid,
|
|
extent_size, parent, root_objectid,
|
|
level, BTRFS_ADD_DELAYED_EXTENT,
|
|
extent_op, NULL, NULL);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* helper function to allocate a block for a given tree
|
|
* returns the tree buffer or NULL.
|
|
*/
|
|
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
u64 parent, u64 root_objectid,
|
|
struct btrfs_disk_key *key, int level,
|
|
u64 hint, u64 empty_size,
|
|
enum btrfs_lock_nesting nest)
|
|
{
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
struct btrfs_key ins;
|
|
int ret;
|
|
struct extent_buffer *buf;
|
|
|
|
ret = alloc_tree_block(trans, root, parent, root_objectid,
|
|
trans->transid, 0, key, level,
|
|
empty_size, hint, (u64)-1, &ins);
|
|
if (ret) {
|
|
BUG_ON(ret > 0);
|
|
return ERR_PTR(ret);
|
|
}
|
|
|
|
buf = btrfs_find_create_tree_block(root->fs_info, ins.objectid);
|
|
if (!buf) {
|
|
btrfs_free_extent(trans, ins.objectid, ins.offset, 0,
|
|
root->root_key.objectid, level, 0);
|
|
BUG_ON(1);
|
|
return ERR_PTR(-ENOMEM);
|
|
}
|
|
btrfs_set_buffer_uptodate(buf);
|
|
memset_extent_buffer(buf, 0, 0, sizeof(struct btrfs_header));
|
|
btrfs_set_header_level(buf, level);
|
|
btrfs_set_header_bytenr(buf, buf->start);
|
|
btrfs_set_header_generation(buf, trans->transid);
|
|
btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
|
|
btrfs_set_header_owner(buf, root_objectid);
|
|
write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
|
|
write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
|
|
trans->blocks_used++;
|
|
|
|
return buf;
|
|
}
|
|
|
|
int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
|
{
|
|
struct btrfs_space_info *sinfo;
|
|
struct btrfs_block_group *cache, *next;
|
|
u64 start;
|
|
u64 end;
|
|
int ret;
|
|
|
|
rbtree_postorder_for_each_entry_safe(cache, next,
|
|
&info->block_group_cache_tree, cache_node) {
|
|
if (!list_empty(&cache->dirty_list))
|
|
list_del_init(&cache->dirty_list);
|
|
RB_CLEAR_NODE(&cache->cache_node);
|
|
if (cache->free_space_ctl) {
|
|
btrfs_remove_free_space_cache(cache);
|
|
kfree(cache->free_space_ctl);
|
|
}
|
|
kfree(cache);
|
|
}
|
|
|
|
while(1) {
|
|
ret = find_first_extent_bit(&info->free_space_cache, 0,
|
|
&start, &end, EXTENT_DIRTY, NULL);
|
|
if (ret)
|
|
break;
|
|
clear_extent_dirty(&info->free_space_cache, start, end, NULL);
|
|
}
|
|
|
|
while (!list_empty(&info->space_info)) {
|
|
sinfo = list_entry(info->space_info.next,
|
|
struct btrfs_space_info, list);
|
|
list_del_init(&sinfo->list);
|
|
if (sinfo->bytes_reserved)
|
|
warning(
|
|
"reserved space leaked, flag=0x%llx bytes_reserved=%llu",
|
|
sinfo->flags, sinfo->bytes_reserved);
|
|
kfree(sinfo);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Find a block group which starts >= @key->objectid in extent tree.
|
|
*
|
|
* Return 0 for found
|
|
* Return >0 for not found
|
|
* Return <0 for error
|
|
*/
|
|
static int find_first_block_group(struct btrfs_root *root,
|
|
struct btrfs_path *path, struct btrfs_key *key)
|
|
{
|
|
int ret;
|
|
struct btrfs_key found_key;
|
|
struct extent_buffer *leaf;
|
|
int slot;
|
|
|
|
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
|
|
if (ret < 0)
|
|
return ret;
|
|
while(1) {
|
|
slot = path->slots[0];
|
|
leaf = path->nodes[0];
|
|
if (slot >= btrfs_header_nritems(leaf)) {
|
|
ret = btrfs_next_leaf(root, path);
|
|
if (ret == 0)
|
|
continue;
|
|
if (ret < 0)
|
|
goto error;
|
|
break;
|
|
}
|
|
btrfs_item_key_to_cpu(leaf, &found_key, slot);
|
|
|
|
if (found_key.objectid >= key->objectid &&
|
|
found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
|
|
return 0;
|
|
path->slots[0]++;
|
|
}
|
|
ret = 1;
|
|
error:
|
|
return ret;
|
|
}
|
|
|
|
static int read_block_group_item(struct btrfs_block_group *cache,
|
|
struct btrfs_path *path,
|
|
const struct btrfs_key *key)
|
|
{
|
|
struct extent_buffer *leaf = path->nodes[0];
|
|
struct btrfs_block_group_item bgi;
|
|
int slot = path->slots[0];
|
|
|
|
ASSERT(key->type == BTRFS_BLOCK_GROUP_ITEM_KEY);
|
|
|
|
cache->start = key->objectid;
|
|
cache->length = key->offset;
|
|
|
|
read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
|
|
sizeof(bgi));
|
|
cache->used = btrfs_stack_block_group_used(&bgi);
|
|
cache->flags = btrfs_stack_block_group_flags(&bgi);
|
|
cache->global_root_id = btrfs_stack_block_group_chunk_objectid(&bgi);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Read out one BLOCK_GROUP_ITEM and insert it into block group cache.
|
|
*
|
|
* Return 0 if nothing wrong (either insert the bg cache or skip 0 sized bg)
|
|
* Return <0 for error.
|
|
*/
|
|
static int read_one_block_group(struct btrfs_fs_info *fs_info,
|
|
struct btrfs_path *path)
|
|
{
|
|
struct extent_buffer *leaf = path->nodes[0];
|
|
struct btrfs_space_info *space_info;
|
|
struct btrfs_block_group *cache;
|
|
struct btrfs_key key;
|
|
int slot = path->slots[0];
|
|
int ret;
|
|
|
|
btrfs_item_key_to_cpu(leaf, &key, slot);
|
|
ASSERT(key.type == BTRFS_BLOCK_GROUP_ITEM_KEY);
|
|
|
|
/*
|
|
* Skip 0 sized block group, don't insert them into block group cache
|
|
* tree, as its length is 0, it won't get freed at close_ctree() time.
|
|
*/
|
|
if (key.offset == 0)
|
|
return 0;
|
|
|
|
cache = kzalloc(sizeof(*cache), GFP_NOFS);
|
|
if (!cache)
|
|
return -ENOMEM;
|
|
ret = read_block_group_item(cache, path, &key);
|
|
if (ret < 0) {
|
|
free(cache);
|
|
return ret;
|
|
}
|
|
set_free_space_tree_thresholds(fs_info, cache);
|
|
INIT_LIST_HEAD(&cache->dirty_list);
|
|
|
|
set_avail_alloc_bits(fs_info, cache->flags);
|
|
ret = btrfs_chunk_readonly(fs_info, cache->start);
|
|
if (ret < 0) {
|
|
free(cache);
|
|
return ret;
|
|
}
|
|
if (ret)
|
|
cache->ro = 1;
|
|
exclude_super_stripes(fs_info, cache);
|
|
|
|
ret = update_space_info(fs_info, cache->flags, cache->length,
|
|
cache->used, &space_info);
|
|
if (ret < 0) {
|
|
free(cache);
|
|
return ret;
|
|
}
|
|
cache->space_info = space_info;
|
|
|
|
ret = btrfs_load_block_group_zone_info(fs_info, cache);
|
|
if (ret)
|
|
return ret;
|
|
|
|
btrfs_add_block_group_cache(fs_info, cache);
|
|
return 0;
|
|
}
|
|
|
|
static int get_last_converted_bg(struct btrfs_fs_info *fs_info)
|
|
{
|
|
struct btrfs_root *bg_root = fs_info->block_group_root;
|
|
struct btrfs_path path = {0};
|
|
struct btrfs_key key = {0};
|
|
int ret;
|
|
|
|
/* Converting back to extent tree case. */
|
|
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) {
|
|
struct btrfs_root *extent_root = btrfs_extent_root(fs_info, 0);
|
|
|
|
/* Load the first bg in bg tree. */
|
|
ret = btrfs_search_slot(NULL, bg_root, &key, &path, 0, 0);
|
|
if (ret < 0)
|
|
return ret;
|
|
ASSERT(ret > 0);
|
|
/* We should always be at the slot 0 of the first leaf. */
|
|
ASSERT(path.slots[0] == 0);
|
|
|
|
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
|
|
|
|
/* Empty bg tree, all converted, then grab the first bg. */
|
|
if (btrfs_header_nritems(path.nodes[0]) == 0) {
|
|
btrfs_release_path(&path);
|
|
ret = find_first_block_group(extent_root, &path, &key);
|
|
/* We should have at least one bg item in extent tree. */
|
|
ASSERT(ret == 0);
|
|
|
|
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
|
|
fs_info->last_converted_bg_bytenr = key.objectid;
|
|
goto out;
|
|
}
|
|
btrfs_release_path(&path);
|
|
|
|
/* Grab the last bg in extent tree as the last converted one. */
|
|
key.objectid = (u64)-1;
|
|
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
|
|
key.offset = (u64)-1;
|
|
|
|
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
|
|
if (ret < 0)
|
|
goto out;
|
|
ASSERT(ret > 0);
|
|
ret = btrfs_previous_item(extent_root, &path, 0, BTRFS_BLOCK_GROUP_ITEM_KEY);
|
|
if (ret < 0)
|
|
goto out;
|
|
|
|
/* No converted bg item in extent tree.*/
|
|
if (ret > 0) {
|
|
ret = 0;
|
|
fs_info->last_converted_bg_bytenr = (u64)-1;
|
|
goto out;
|
|
}
|
|
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
|
|
fs_info->last_converted_bg_bytenr = key.objectid;
|
|
goto out;
|
|
}
|
|
/* Load the first bg in bg tree, that would be our last converted bg. */
|
|
ret = btrfs_search_slot(NULL, bg_root, &key, &path, 0, 0);
|
|
if (ret < 0)
|
|
return ret;
|
|
ASSERT(ret > 0);
|
|
/* We should always be at the slot 0 of the first leaf. */
|
|
ASSERT(path.slots[0] == 0);
|
|
|
|
/* Empty bg tree, no converted bg item at all. */
|
|
if (btrfs_header_nritems(path.nodes[0]) == 0) {
|
|
fs_info->last_converted_bg_bytenr = (u64)-1;
|
|
ret = 0;
|
|
goto out;
|
|
}
|
|
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
|
|
ASSERT(key.type == BTRFS_BLOCK_GROUP_ITEM_KEY);
|
|
fs_info->last_converted_bg_bytenr = key.objectid;
|
|
out:
|
|
btrfs_release_path(&path);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Helper to read old block groups items from specified root.
|
|
*
|
|
* The difference between this and read_block_groups_from_root() is,
|
|
* we will exit if we have already read the last bg in the old root.
|
|
*
|
|
* This is to avoid wasting time finding bg items which should be in the
|
|
* new root.
|
|
*/
|
|
static int read_old_block_groups_from_root(struct btrfs_fs_info *fs_info,
|
|
struct btrfs_root *root)
|
|
{
|
|
struct btrfs_path path = {0};
|
|
struct btrfs_key key;
|
|
struct cache_extent *ce;
|
|
/* The last block group bytenr in the old root. */
|
|
u64 last_bg_in_old_root;
|
|
int ret;
|
|
|
|
if (fs_info->last_converted_bg_bytenr != (u64)-1) {
|
|
/*
|
|
* We know the last converted bg in the other tree, load the chunk
|
|
* before that last converted as our last bg in the tree.
|
|
*/
|
|
ce = search_cache_extent(&fs_info->mapping_tree.cache_tree,
|
|
fs_info->last_converted_bg_bytenr);
|
|
if (!ce || ce->start != fs_info->last_converted_bg_bytenr) {
|
|
error("no chunk found for bytenr %llu",
|
|
fs_info->last_converted_bg_bytenr);
|
|
return -ENOENT;
|
|
}
|
|
ce = prev_cache_extent(ce);
|
|
/*
|
|
* We should have previous unconverted chunk, or we have
|
|
* already finished the convert.
|
|
*/
|
|
ASSERT(ce);
|
|
|
|
last_bg_in_old_root = ce->start;
|
|
} else {
|
|
last_bg_in_old_root = (u64)-1;
|
|
}
|
|
|
|
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
|
|
|
|
while (true) {
|
|
ret = find_first_block_group(root, &path, &key);
|
|
if (ret > 0) {
|
|
ret = 0;
|
|
goto out;
|
|
}
|
|
if (ret != 0) {
|
|
goto out;
|
|
}
|
|
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
|
|
|
|
ret = read_one_block_group(fs_info, &path);
|
|
if (ret < 0 && ret != -ENOENT)
|
|
goto out;
|
|
|
|
/* We have reached last bg in the old root, no need to continue */
|
|
if (key.objectid >= last_bg_in_old_root)
|
|
break;
|
|
|
|
if (key.offset == 0)
|
|
key.objectid++;
|
|
else
|
|
key.objectid = key.objectid + key.offset;
|
|
key.offset = 0;
|
|
btrfs_release_path(&path);
|
|
}
|
|
ret = 0;
|
|
out:
|
|
btrfs_release_path(&path);
|
|
return ret;
|
|
}
|
|
|
|
/* Helper to read all block groups items from specified root. */
|
|
static int read_block_groups_from_root(struct btrfs_fs_info *fs_info,
|
|
struct btrfs_root *root)
|
|
{
|
|
struct btrfs_path path = {0};
|
|
struct btrfs_key key = {0};
|
|
int ret;
|
|
|
|
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
|
|
|
|
while (true) {
|
|
ret = find_first_block_group(root, &path, &key);
|
|
if (ret > 0) {
|
|
ret = 0;
|
|
goto out;
|
|
}
|
|
if (ret != 0) {
|
|
goto out;
|
|
}
|
|
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
|
|
|
|
ret = read_one_block_group(fs_info, &path);
|
|
if (ret < 0 && ret != -ENOENT)
|
|
goto out;
|
|
|
|
if (key.offset == 0)
|
|
key.objectid++;
|
|
else
|
|
key.objectid = key.objectid + key.offset;
|
|
key.offset = 0;
|
|
btrfs_release_path(&path);
|
|
}
|
|
ret = 0;
|
|
out:
|
|
btrfs_release_path(&path);
|
|
return ret;
|
|
}
|
|
|
|
static int read_converting_block_groups(struct btrfs_fs_info *fs_info)
|
|
{
|
|
struct btrfs_root *old_root;
|
|
struct btrfs_root *new_root;
|
|
int ret;
|
|
|
|
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) {
|
|
/* Converting back to extent tree. */
|
|
old_root = fs_info->block_group_root;
|
|
new_root = btrfs_extent_root(fs_info, 0);
|
|
} else {
|
|
/* Converting to block group tree. */
|
|
old_root = btrfs_extent_root(fs_info, 0);
|
|
new_root = fs_info->block_group_root;
|
|
}
|
|
|
|
ret = get_last_converted_bg(fs_info);
|
|
if (ret < 0) {
|
|
error("failed to load the last converted bg: %d", ret);
|
|
return ret;
|
|
}
|
|
|
|
ret = read_old_block_groups_from_root(fs_info, old_root);
|
|
if (ret < 0) {
|
|
error("failed to load block groups from the old root: %d", ret);
|
|
return ret;
|
|
}
|
|
|
|
/* For block group items in the new tree, just read them all. */
|
|
ret = read_block_groups_from_root(fs_info, new_root);
|
|
if (ret < 0) {
|
|
error("failed to load block groups from the new root: %d", ret);
|
|
return ret;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int btrfs_read_block_groups(struct btrfs_fs_info *fs_info)
|
|
{
|
|
struct btrfs_root *root;
|
|
|
|
if (btrfs_super_flags(fs_info->super_copy) &
|
|
BTRFS_SUPER_FLAG_CHANGING_BG_TREE)
|
|
return read_converting_block_groups(fs_info);
|
|
|
|
root = btrfs_block_group_root(fs_info);
|
|
return read_block_groups_from_root(fs_info, root);
|
|
}
|
|
|
|
/*
|
|
* For extent tree v2 we use the block_group_item->chunk_offset to point at our
|
|
* global root id. For v1 it's always set to BTRFS_FIRST_CHUNK_TREE_OBJECTID.
|
|
*/
|
|
static u64 calculate_global_root_id(struct btrfs_fs_info *fs_info, u64 offset)
|
|
{
|
|
u64 div = SZ_1G;
|
|
|
|
if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
|
|
return BTRFS_FIRST_CHUNK_TREE_OBJECTID;
|
|
|
|
/* If we have a smaller fs index based on 128m. */
|
|
if (btrfs_super_total_bytes(fs_info->super_copy) <= (SZ_1G * 10ULL))
|
|
div = SZ_128M;
|
|
|
|
return (div_u64(offset, div) % fs_info->nr_global_roots);
|
|
}
|
|
|
|
struct btrfs_block_group *
|
|
btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type,
|
|
u64 chunk_offset, u64 size)
|
|
{
|
|
int ret;
|
|
struct btrfs_block_group *cache;
|
|
|
|
cache = kzalloc(sizeof(*cache), GFP_NOFS);
|
|
BUG_ON(!cache);
|
|
cache->start = chunk_offset;
|
|
cache->length = size;
|
|
cache->global_root_id = calculate_global_root_id(fs_info, chunk_offset);
|
|
|
|
ret = btrfs_load_block_group_zone_info(fs_info, cache);
|
|
BUG_ON(ret);
|
|
|
|
cache->used = bytes_used;
|
|
cache->flags = type;
|
|
INIT_LIST_HEAD(&cache->dirty_list);
|
|
|
|
exclude_super_stripes(fs_info, cache);
|
|
ret = update_space_info(fs_info, cache->flags, size, bytes_used,
|
|
&cache->space_info);
|
|
BUG_ON(ret);
|
|
|
|
ret = btrfs_add_block_group_cache(fs_info, cache);
|
|
BUG_ON(ret);
|
|
set_avail_alloc_bits(fs_info, type);
|
|
|
|
return cache;
|
|
}
|
|
|
|
static int insert_block_group_item(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group)
|
|
{
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
struct btrfs_block_group_item bgi;
|
|
struct btrfs_root *root;
|
|
struct btrfs_key key;
|
|
|
|
btrfs_set_stack_block_group_used(&bgi, block_group->used);
|
|
btrfs_set_stack_block_group_chunk_objectid(&bgi,
|
|
block_group->global_root_id);
|
|
btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
|
|
key.objectid = block_group->start;
|
|
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
|
|
key.offset = block_group->length;
|
|
|
|
root = btrfs_block_group_root(fs_info);
|
|
/*
|
|
* If we're doing convert and the bg is beyond our last converted bg,
|
|
* it should go to the new root.
|
|
*/
|
|
if (btrfs_super_flags(fs_info->super_copy) &
|
|
BTRFS_SUPER_FLAG_CHANGING_BG_TREE &&
|
|
block_group->start >= fs_info->last_converted_bg_bytenr) {
|
|
if (btrfs_super_compat_ro_flags(fs_info->super_copy) &
|
|
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE)
|
|
/* Converting to extent tree, return extent root. */
|
|
root = btrfs_extent_root(fs_info, block_group->start);
|
|
else
|
|
/* Converting to bg tree, return bg root. */
|
|
root = fs_info->block_group_root;
|
|
}
|
|
|
|
return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi));
|
|
}
|
|
|
|
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info, u64 bytes_used,
|
|
u64 type, u64 chunk_offset, u64 size)
|
|
{
|
|
struct btrfs_block_group *cache;
|
|
int ret;
|
|
|
|
cache = btrfs_add_block_group(fs_info, bytes_used, type, chunk_offset,
|
|
size);
|
|
set_free_space_tree_thresholds(fs_info, cache);
|
|
ret = insert_block_group_item(trans, cache);
|
|
if (ret)
|
|
return ret;
|
|
add_block_group_free_space(trans, cache);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* This is for converter use only.
|
|
*
|
|
* In that case, we don't know where are free blocks located.
|
|
* Therefore all block group cache entries must be setup properly
|
|
* before doing any block allocation.
|
|
*/
|
|
int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info)
|
|
{
|
|
u64 total_bytes;
|
|
u64 cur_start;
|
|
u64 group_type;
|
|
u64 group_size;
|
|
u64 group_align;
|
|
u64 total_data = 0;
|
|
u64 total_metadata = 0;
|
|
int ret;
|
|
struct btrfs_block_group *cache;
|
|
|
|
total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
|
|
group_align = 64 * fs_info->sectorsize;
|
|
|
|
cur_start = 0;
|
|
while (cur_start < total_bytes) {
|
|
group_size = total_bytes / 12;
|
|
group_size = min_t(u64, group_size, total_bytes - cur_start);
|
|
if (cur_start == 0) {
|
|
group_type = BTRFS_BLOCK_GROUP_SYSTEM;
|
|
group_size /= 4;
|
|
group_size &= ~(group_align - 1);
|
|
group_size = max_t(u64, group_size, SZ_8M);
|
|
group_size = min_t(u64, group_size, SZ_32M);
|
|
} else {
|
|
group_size &= ~(group_align - 1);
|
|
if (total_data >= total_metadata * 2) {
|
|
group_type = BTRFS_BLOCK_GROUP_METADATA;
|
|
group_size = min_t(u64, group_size, SZ_1G);
|
|
total_metadata += group_size;
|
|
} else {
|
|
group_type = BTRFS_BLOCK_GROUP_DATA;
|
|
group_size = min_t(u64, group_size,
|
|
5ULL * SZ_1G);
|
|
total_data += group_size;
|
|
}
|
|
if ((total_bytes - cur_start) * 4 < group_size * 5)
|
|
group_size = total_bytes - cur_start;
|
|
}
|
|
|
|
cache = kzalloc(sizeof(*cache), GFP_NOFS);
|
|
BUG_ON(!cache);
|
|
|
|
cache->start = cur_start;
|
|
cache->length = group_size;
|
|
cache->used = 0;
|
|
cache->flags = group_type;
|
|
INIT_LIST_HEAD(&cache->dirty_list);
|
|
|
|
ret = update_space_info(fs_info, group_type, group_size,
|
|
0, &cache->space_info);
|
|
BUG_ON(ret);
|
|
set_avail_alloc_bits(fs_info, group_type);
|
|
btrfs_add_block_group_cache(fs_info, cache);
|
|
cur_start += group_size;
|
|
}
|
|
/* then insert all the items */
|
|
cur_start = 0;
|
|
while(cur_start < total_bytes) {
|
|
cache = btrfs_lookup_block_group(fs_info, cur_start);
|
|
BUG_ON(!cache);
|
|
|
|
ret = insert_block_group_item(trans, cache);
|
|
BUG_ON(ret);
|
|
|
|
cur_start = cache->start + cache->length;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
|
u64 bytenr, u64 num_bytes, int alloc,
|
|
int mark_free)
|
|
{
|
|
return update_block_group(trans, bytenr, num_bytes, alloc, mark_free);
|
|
}
|
|
|
|
/*
|
|
* Just remove a block group item in extent tree
|
|
* Caller should ensure the block group is empty and all space is pinned.
|
|
* Or new tree block/data may be allocated into it.
|
|
*/
|
|
static int remove_block_group_item(struct btrfs_trans_handle *trans,
|
|
struct btrfs_path *path,
|
|
struct btrfs_block_group *block_group)
|
|
{
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
struct btrfs_key key;
|
|
struct btrfs_root *root = btrfs_block_group_root(fs_info);
|
|
int ret = 0;
|
|
|
|
key.objectid = block_group->start;
|
|
key.offset = block_group->length;
|
|
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
|
|
|
|
/*
|
|
* If we're doing convert and the bg is beyond our last converted bg,
|
|
* it should go to the new root.
|
|
*/
|
|
if (btrfs_super_flags(fs_info->super_copy) &
|
|
BTRFS_SUPER_FLAG_CHANGING_BG_TREE &&
|
|
block_group->start >= fs_info->last_converted_bg_bytenr)
|
|
root = fs_info->block_group_root;
|
|
|
|
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
|
if (ret > 0)
|
|
ret = -ENOENT;
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
return btrfs_del_item(trans, root, path);
|
|
}
|
|
|
|
static int free_dev_extent_item(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info,
|
|
u64 devid, u64 dev_offset)
|
|
{
|
|
struct btrfs_root *root = fs_info->dev_root;
|
|
struct btrfs_path *path;
|
|
struct btrfs_key key;
|
|
int ret;
|
|
|
|
path = btrfs_alloc_path();
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
key.objectid = devid;
|
|
key.type = BTRFS_DEV_EXTENT_KEY;
|
|
key.offset = dev_offset;
|
|
|
|
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
|
if (ret < 0)
|
|
goto out;
|
|
if (ret > 0) {
|
|
ret = -ENOENT;
|
|
goto out;
|
|
}
|
|
|
|
ret = btrfs_del_item(trans, root, path);
|
|
out:
|
|
btrfs_free_path(path);
|
|
return ret;
|
|
}
|
|
|
|
static int free_chunk_dev_extent_items(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info,
|
|
u64 chunk_offset)
|
|
{
|
|
struct btrfs_chunk *chunk = NULL;
|
|
struct btrfs_root *root= fs_info->chunk_root;
|
|
struct btrfs_path *path;
|
|
struct btrfs_key key;
|
|
u16 num_stripes;
|
|
int i;
|
|
int ret;
|
|
|
|
path = btrfs_alloc_path();
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
|
|
key.type = BTRFS_CHUNK_ITEM_KEY;
|
|
key.offset = chunk_offset;
|
|
|
|
ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
|
|
if (ret < 0)
|
|
goto out;
|
|
if (ret > 0) {
|
|
ret = -ENOENT;
|
|
goto out;
|
|
}
|
|
chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
|
struct btrfs_chunk);
|
|
num_stripes = btrfs_chunk_num_stripes(path->nodes[0], chunk);
|
|
for (i = 0; i < num_stripes; i++) {
|
|
u64 devid = btrfs_stripe_devid_nr(path->nodes[0], chunk, i);
|
|
u64 offset = btrfs_stripe_offset_nr(path->nodes[0], chunk, i);
|
|
u64 length = btrfs_stripe_length(fs_info, path->nodes[0], chunk);
|
|
|
|
ret = btrfs_reset_chunk_zones(fs_info, devid, offset, length);
|
|
if (ret < 0)
|
|
goto out;
|
|
|
|
ret = free_dev_extent_item(trans, fs_info,
|
|
btrfs_stripe_devid_nr(path->nodes[0], chunk, i),
|
|
btrfs_stripe_offset_nr(path->nodes[0], chunk, i));
|
|
if (ret < 0)
|
|
goto out;
|
|
}
|
|
out:
|
|
btrfs_free_path(path);
|
|
return ret;
|
|
}
|
|
|
|
static int free_system_chunk_item(struct btrfs_super_block *super,
|
|
struct btrfs_key *key)
|
|
{
|
|
struct btrfs_disk_key *disk_key;
|
|
struct btrfs_key cpu_key;
|
|
u32 array_size = btrfs_super_sys_array_size(super);
|
|
char *ptr = (char *)super->sys_chunk_array;
|
|
int cur = 0;
|
|
int ret = -ENOENT;
|
|
|
|
while (cur < btrfs_super_sys_array_size(super)) {
|
|
struct btrfs_chunk *chunk;
|
|
u32 num_stripes;
|
|
u32 chunk_len;
|
|
|
|
disk_key = (struct btrfs_disk_key *)(ptr + cur);
|
|
btrfs_disk_key_to_cpu(&cpu_key, disk_key);
|
|
if (cpu_key.type != BTRFS_CHUNK_ITEM_KEY) {
|
|
/* just in case */
|
|
ret = -EIO;
|
|
goto out;
|
|
}
|
|
|
|
chunk = (struct btrfs_chunk *)(ptr + cur + sizeof(*disk_key));
|
|
num_stripes = btrfs_stack_chunk_num_stripes(chunk);
|
|
chunk_len = btrfs_chunk_item_size(num_stripes) +
|
|
sizeof(*disk_key);
|
|
|
|
if (key->objectid == cpu_key.objectid &&
|
|
key->offset == cpu_key.offset &&
|
|
key->type == cpu_key.type) {
|
|
memmove(ptr + cur, ptr + cur + chunk_len,
|
|
array_size - cur - chunk_len);
|
|
array_size -= chunk_len;
|
|
btrfs_set_super_sys_array_size(super, array_size);
|
|
ret = 0;
|
|
goto out;
|
|
}
|
|
|
|
cur += chunk_len;
|
|
}
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static int free_chunk_item(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info,
|
|
u64 bytenr)
|
|
{
|
|
struct btrfs_path *path;
|
|
struct btrfs_key key;
|
|
struct btrfs_root *root = fs_info->chunk_root;
|
|
struct btrfs_chunk *chunk;
|
|
u64 chunk_type;
|
|
int ret;
|
|
|
|
key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
|
|
key.offset = bytenr;
|
|
key.type = BTRFS_CHUNK_ITEM_KEY;
|
|
|
|
path = btrfs_alloc_path();
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
|
if (ret > 0) {
|
|
ret = -ENOENT;
|
|
goto out;
|
|
}
|
|
if (ret < 0)
|
|
goto out;
|
|
chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
|
struct btrfs_chunk);
|
|
chunk_type = btrfs_chunk_type(path->nodes[0], chunk);
|
|
|
|
ret = btrfs_del_item(trans, root, path);
|
|
if (ret < 0)
|
|
goto out;
|
|
|
|
if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
|
|
ret = free_system_chunk_item(fs_info->super_copy, &key);
|
|
out:
|
|
btrfs_free_path(path);
|
|
return ret;
|
|
}
|
|
|
|
static u64 get_dev_extent_len(struct map_lookup *map)
|
|
{
|
|
int div = 1;
|
|
|
|
switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
|
|
case 0: /* Single */
|
|
case BTRFS_BLOCK_GROUP_DUP:
|
|
case BTRFS_BLOCK_GROUP_RAID1:
|
|
case BTRFS_BLOCK_GROUP_RAID1C3:
|
|
case BTRFS_BLOCK_GROUP_RAID1C4:
|
|
/* The default value can already handle it. */
|
|
break;
|
|
case BTRFS_BLOCK_GROUP_RAID5:
|
|
case BTRFS_BLOCK_GROUP_RAID6:
|
|
div = map->num_stripes - btrfs_bg_type_to_nparity(map->type);
|
|
break;
|
|
case BTRFS_BLOCK_GROUP_RAID10:
|
|
div = (map->num_stripes / map->sub_stripes);
|
|
break;
|
|
default:
|
|
/* normally, read chunk security hook should handled it */
|
|
BUG_ON(1);
|
|
}
|
|
return map->ce.size / div;
|
|
}
|
|
|
|
/* free block group/chunk related caches */
|
|
static int free_block_group_cache(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info,
|
|
u64 bytenr, u64 len)
|
|
{
|
|
struct btrfs_block_group *cache;
|
|
struct cache_extent *ce;
|
|
struct map_lookup *map;
|
|
int ret;
|
|
int i;
|
|
u64 flags;
|
|
|
|
/* Free block group cache first */
|
|
cache = btrfs_lookup_block_group(fs_info, bytenr);
|
|
if (!cache)
|
|
return -ENOENT;
|
|
flags = cache->flags;
|
|
if (cache->free_space_ctl) {
|
|
btrfs_remove_free_space_cache(cache);
|
|
kfree(cache->free_space_ctl);
|
|
}
|
|
if (!list_empty(&cache->dirty_list))
|
|
list_del(&cache->dirty_list);
|
|
rb_erase(&cache->cache_node, &fs_info->block_group_cache_tree);
|
|
ret = free_space_info(fs_info, flags, len, 0, NULL);
|
|
if (ret < 0)
|
|
goto out;
|
|
kfree(cache);
|
|
|
|
/* Then free mapping info and dev usage info */
|
|
ce = search_cache_extent(&fs_info->mapping_tree.cache_tree, bytenr);
|
|
if (!ce || ce->start != bytenr) {
|
|
ret = -ENOENT;
|
|
goto out;
|
|
}
|
|
map = container_of(ce, struct map_lookup, ce);
|
|
for (i = 0; i < map->num_stripes; i++) {
|
|
struct btrfs_device *device;
|
|
|
|
device = map->stripes[i].dev;
|
|
device->bytes_used -= get_dev_extent_len(map);
|
|
ret = btrfs_update_device(trans, device);
|
|
if (ret < 0)
|
|
goto out;
|
|
}
|
|
remove_cache_extent(&fs_info->mapping_tree.cache_tree, ce);
|
|
free(map);
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
|
u64 bytenr, u64 len)
|
|
{
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
struct btrfs_block_group *block_group;
|
|
struct btrfs_path path = { 0 };
|
|
int ret = 0;
|
|
|
|
block_group = btrfs_lookup_block_group(fs_info, bytenr);
|
|
if (!block_group || block_group->start != bytenr ||
|
|
block_group->length != len)
|
|
return -ENOENT;
|
|
/* Double check the block group to ensure it's empty */
|
|
if (block_group->used) {
|
|
fprintf(stderr,
|
|
"WARNING: block group [%llu,%llu) is not empty\n",
|
|
bytenr, bytenr + len);
|
|
return -EUCLEAN;
|
|
}
|
|
|
|
/*
|
|
* Now pin all space in the block group, to prevent further transaction
|
|
* allocate space from it.
|
|
* Every operation needs a transaction must be in the range.
|
|
*/
|
|
btrfs_pin_extent(fs_info, bytenr, len);
|
|
|
|
/* delete block group item and chunk item */
|
|
ret = remove_block_group_item(trans, &path, block_group);
|
|
btrfs_release_path(&path);
|
|
if (ret < 0) {
|
|
fprintf(stderr,
|
|
"failed to free block group item for [%llu,%llu)\n",
|
|
bytenr, bytenr + len);
|
|
btrfs_unpin_extent(fs_info, bytenr, len);
|
|
return ret;
|
|
}
|
|
|
|
ret = free_chunk_dev_extent_items(trans, fs_info, bytenr);
|
|
if (ret < 0) {
|
|
fprintf(stderr,
|
|
"failed to dev extents belongs to [%llu,%llu)\n",
|
|
bytenr, bytenr + len);
|
|
btrfs_unpin_extent(fs_info, bytenr, len);
|
|
return ret;
|
|
}
|
|
ret = free_chunk_item(trans, fs_info, bytenr);
|
|
if (ret < 0) {
|
|
fprintf(stderr,
|
|
"failed to free chunk for [%llu,%llu)\n",
|
|
bytenr, bytenr + len);
|
|
btrfs_unpin_extent(fs_info, bytenr, len);
|
|
return ret;
|
|
}
|
|
|
|
/* Now release the block_group_cache */
|
|
ret = free_block_group_cache(trans, fs_info, bytenr, len);
|
|
btrfs_unpin_extent(fs_info, bytenr, len);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int add_excluded_extent(struct btrfs_fs_info *fs_info,
|
|
u64 start, u64 num_bytes)
|
|
{
|
|
u64 end = start + num_bytes - 1;
|
|
set_extent_bits(&fs_info->pinned_extents,
|
|
start, end, EXTENT_UPTODATE);
|
|
return 0;
|
|
}
|
|
|
|
void free_excluded_extents(struct btrfs_fs_info *fs_info,
|
|
struct btrfs_block_group *cache)
|
|
{
|
|
u64 start, end;
|
|
|
|
start = cache->start;
|
|
end = start + cache->length - 1;
|
|
|
|
clear_extent_bits(&fs_info->pinned_extents,
|
|
start, end, EXTENT_UPTODATE);
|
|
}
|
|
|
|
int exclude_super_stripes(struct btrfs_fs_info *fs_info,
|
|
struct btrfs_block_group *cache)
|
|
{
|
|
u64 bytenr;
|
|
u64 *logical;
|
|
int stripe_len;
|
|
int i, nr, ret;
|
|
|
|
if (cache->start < BTRFS_SUPER_INFO_OFFSET) {
|
|
stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start;
|
|
cache->bytes_super += stripe_len;
|
|
ret = add_excluded_extent(fs_info, cache->start, stripe_len);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
|
|
bytenr = btrfs_sb_offset(i);
|
|
ret = btrfs_rmap_block(fs_info, cache->start, bytenr,
|
|
&logical, &nr, &stripe_len);
|
|
if (ret)
|
|
return ret;
|
|
|
|
while (nr--) {
|
|
u64 start, len;
|
|
|
|
if (logical[nr] >= cache->start + cache->length)
|
|
continue;
|
|
|
|
if (logical[nr] + stripe_len <= cache->start)
|
|
continue;
|
|
|
|
start = logical[nr];
|
|
if (start < cache->start) {
|
|
start = cache->start;
|
|
len = (logical[nr] + stripe_len) - start;
|
|
} else {
|
|
len = min_t(u64, stripe_len, cache->start +
|
|
cache->length - start);
|
|
}
|
|
|
|
cache->bytes_super += len;
|
|
ret = add_excluded_extent(fs_info, start, len);
|
|
if (ret) {
|
|
kfree(logical);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
kfree(logical);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
u64 add_new_free_space(struct btrfs_block_group *block_group,
|
|
struct btrfs_fs_info *info, u64 start, u64 end)
|
|
{
|
|
u64 extent_start, extent_end, size, total_added = 0;
|
|
int ret;
|
|
|
|
while (start < end) {
|
|
ret = find_first_extent_bit(&info->pinned_extents, start,
|
|
&extent_start, &extent_end,
|
|
EXTENT_DIRTY | EXTENT_UPTODATE,
|
|
NULL);
|
|
if (ret)
|
|
break;
|
|
|
|
if (extent_start <= start) {
|
|
start = extent_end + 1;
|
|
} else if (extent_start > start && extent_start < end) {
|
|
size = extent_start - start;
|
|
total_added += size;
|
|
ret = btrfs_add_free_space(block_group->free_space_ctl,
|
|
start, size);
|
|
BUG_ON(ret); /* -ENOMEM or logic error */
|
|
start = extent_end + 1;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (start < end) {
|
|
size = end - start;
|
|
total_added += size;
|
|
ret = btrfs_add_free_space(block_group->free_space_ctl, start,
|
|
size);
|
|
BUG_ON(ret); /* -ENOMEM or logic error */
|
|
}
|
|
|
|
return total_added;
|
|
}
|
|
|
|
static void cleanup_extent_op(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info,
|
|
struct btrfs_delayed_ref_head *head)
|
|
{
|
|
struct btrfs_delayed_extent_op *extent_op = head->extent_op;
|
|
|
|
if (!extent_op)
|
|
return;
|
|
head->extent_op = NULL;
|
|
btrfs_free_delayed_extent_op(extent_op);
|
|
}
|
|
|
|
static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
|
|
struct btrfs_delayed_ref_head *head)
|
|
{
|
|
head->processing = 0;
|
|
delayed_refs->num_heads_ready++;
|
|
}
|
|
|
|
int cleanup_ref_head(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info,
|
|
struct btrfs_delayed_ref_head *head)
|
|
{
|
|
struct btrfs_delayed_ref_root *delayed_refs;
|
|
|
|
delayed_refs = &trans->delayed_refs;
|
|
|
|
cleanup_extent_op(trans, fs_info, head);
|
|
|
|
/*
|
|
* Need to drop our head ref lock and re-acquire the delayed ref lock
|
|
* and then re-check to make sure nobody got added.
|
|
*/
|
|
if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op)
|
|
return 1;
|
|
|
|
delayed_refs->num_heads--;
|
|
rb_erase(&head->href_node, &delayed_refs->href_root);
|
|
RB_CLEAR_NODE(&head->href_node);
|
|
|
|
if (head->must_insert_reserved) {
|
|
btrfs_pin_extent(fs_info, head->bytenr, head->num_bytes);
|
|
if (!head->is_data) {
|
|
struct btrfs_space_info *sinfo;
|
|
|
|
sinfo = __find_space_info(trans->fs_info,
|
|
BTRFS_BLOCK_GROUP_METADATA);
|
|
ASSERT(sinfo);
|
|
sinfo->bytes_reserved -= head->num_bytes;
|
|
}
|
|
}
|
|
|
|
btrfs_put_delayed_ref_head(head);
|
|
return 0;
|
|
}
|
|
|
|
static inline struct btrfs_delayed_ref_node *
|
|
select_delayed_ref(struct btrfs_delayed_ref_head *head)
|
|
{
|
|
struct btrfs_delayed_ref_node *ref;
|
|
|
|
if (RB_EMPTY_ROOT(&head->ref_tree))
|
|
return NULL;
|
|
/*
|
|
* Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
|
|
* This is to prevent a ref count from going down to zero, which deletes
|
|
* the extent item from the extent tree, when there still are references
|
|
* to add, which would fail because they would not find the extent item.
|
|
*/
|
|
if (!list_empty(&head->ref_add_list))
|
|
return list_first_entry(&head->ref_add_list,
|
|
struct btrfs_delayed_ref_node,
|
|
add_list);
|
|
ref = rb_entry(rb_first(&head->ref_tree),
|
|
struct btrfs_delayed_ref_node, ref_node);
|
|
ASSERT(list_empty(&ref->add_list));
|
|
return ref;
|
|
}
|
|
|
|
|
|
static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info,
|
|
struct btrfs_delayed_ref_node *node,
|
|
struct btrfs_delayed_extent_op *extent_op,
|
|
int insert_reserved)
|
|
{
|
|
int ret = 0;
|
|
struct btrfs_delayed_tree_ref *ref;
|
|
u64 parent = 0;
|
|
u64 ref_root = 0;
|
|
|
|
ref = btrfs_delayed_node_to_tree_ref(node);
|
|
|
|
if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
|
|
parent = ref->parent;
|
|
ref_root = ref->root;
|
|
|
|
if (node->ref_mod != 1) {
|
|
printf("btree block(%llu) has %d references rather than 1: action %u ref_root %llu parent %llu",
|
|
node->bytenr, node->ref_mod, node->action, ref_root,
|
|
parent);
|
|
return -EIO;
|
|
}
|
|
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
|
|
BUG_ON(!extent_op || !extent_op->update_flags);
|
|
ret = alloc_reserved_tree_block(trans, node, extent_op);
|
|
} else if (node->action == BTRFS_DROP_DELAYED_REF) {
|
|
struct btrfs_delayed_tree_ref *ref = btrfs_delayed_node_to_tree_ref(node);
|
|
ret = __free_extent(trans, node->bytenr, node->num_bytes,
|
|
ref->parent, ref->root, ref->level, 0, 1);
|
|
} else {
|
|
BUG();
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* helper function to actually process a single delayed ref entry */
|
|
static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info,
|
|
struct btrfs_delayed_ref_node *node,
|
|
struct btrfs_delayed_extent_op *extent_op,
|
|
int insert_reserved)
|
|
{
|
|
int ret = 0;
|
|
|
|
if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
|
node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
|
|
ret = run_delayed_tree_ref(trans, fs_info, node, extent_op,
|
|
insert_reserved);
|
|
} else
|
|
BUG();
|
|
return ret;
|
|
}
|
|
|
|
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long nr)
|
|
{
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
struct btrfs_delayed_ref_root *delayed_refs;
|
|
struct btrfs_delayed_ref_node *ref;
|
|
struct btrfs_delayed_ref_head *locked_ref = NULL;
|
|
struct btrfs_delayed_extent_op *extent_op;
|
|
int ret;
|
|
int must_insert_reserved = 0;
|
|
|
|
delayed_refs = &trans->delayed_refs;
|
|
while (1) {
|
|
if (!locked_ref) {
|
|
locked_ref = btrfs_select_ref_head(trans);
|
|
if (!locked_ref)
|
|
break;
|
|
}
|
|
/*
|
|
* We need to try and merge add/drops of the same ref since we
|
|
* can run into issues with relocate dropping the implicit ref
|
|
* and then it being added back again before the drop can
|
|
* finish. If we merged anything we need to re-loop so we can
|
|
* get a good ref.
|
|
* Or we can get node references of the same type that weren't
|
|
* merged when created due to bumps in the tree mod seq, and
|
|
* we need to merge them to prevent adding an inline extent
|
|
* backref before dropping it (triggering a BUG_ON at
|
|
* insert_inline_extent_backref()).
|
|
*/
|
|
btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
|
|
ref = select_delayed_ref(locked_ref);
|
|
/*
|
|
* We're done processing refs in this ref_head, clean everything
|
|
* up and move on to the next ref_head.
|
|
*/
|
|
if (!ref) {
|
|
ret = cleanup_ref_head(trans, fs_info, locked_ref);
|
|
if (ret > 0 ) {
|
|
/* We dropped our lock, we need to loop. */
|
|
ret = 0;
|
|
continue;
|
|
}
|
|
locked_ref = NULL;
|
|
continue;
|
|
}
|
|
|
|
ref->in_tree = 0;
|
|
rb_erase(&ref->ref_node, &locked_ref->ref_tree);
|
|
RB_CLEAR_NODE(&ref->ref_node);
|
|
if (!list_empty(&ref->add_list))
|
|
list_del(&ref->add_list);
|
|
/*
|
|
* When we play the delayed ref, also correct the ref_mod on
|
|
* head
|
|
*/
|
|
switch (ref->action) {
|
|
case BTRFS_ADD_DELAYED_REF:
|
|
case BTRFS_ADD_DELAYED_EXTENT:
|
|
locked_ref->ref_mod -= ref->ref_mod;
|
|
break;
|
|
case BTRFS_DROP_DELAYED_REF:
|
|
locked_ref->ref_mod += ref->ref_mod;
|
|
break;
|
|
default:
|
|
WARN_ON(1);
|
|
}
|
|
|
|
/*
|
|
* Record the must-insert_reserved flag before we drop the spin
|
|
* lock.
|
|
*/
|
|
must_insert_reserved = locked_ref->must_insert_reserved;
|
|
locked_ref->must_insert_reserved = 0;
|
|
|
|
extent_op = locked_ref->extent_op;
|
|
locked_ref->extent_op = NULL;
|
|
|
|
ret = run_one_delayed_ref(trans, fs_info, ref, extent_op,
|
|
must_insert_reserved);
|
|
|
|
btrfs_free_delayed_extent_op(extent_op);
|
|
/*
|
|
* If we are re-initing extent tree in this transaction
|
|
* failure in freeing old roots are expected (because we don't
|
|
* have the old extent tree, hence backref resolution will
|
|
* return -EIO).
|
|
*/
|
|
if (ret && (!trans->reinit_extent_tree ||
|
|
ref->action != BTRFS_DROP_DELAYED_REF)) {
|
|
unselect_delayed_ref_head(delayed_refs, locked_ref);
|
|
btrfs_put_delayed_ref(ref);
|
|
return ret;
|
|
}
|
|
|
|
btrfs_put_delayed_ref(ref);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int btrfs_convert_one_bg(struct btrfs_trans_handle *trans, u64 bytenr)
|
|
{
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
struct btrfs_root *new_root = fs_info->block_group_root;
|
|
struct btrfs_root *old_root = btrfs_extent_root(fs_info, 0);
|
|
struct btrfs_block_group *bg;
|
|
struct btrfs_path path = {0};
|
|
int ret;
|
|
|
|
ASSERT(new_root);
|
|
ASSERT(old_root);
|
|
ASSERT(btrfs_super_flags(fs_info->super_copy) &
|
|
BTRFS_SUPER_FLAG_CHANGING_BG_TREE);
|
|
|
|
bg = btrfs_lookup_block_group(fs_info, bytenr);
|
|
if (!bg) {
|
|
error("failed to find block group for bytenr %llu", bytenr);
|
|
return -ENOENT;
|
|
}
|
|
/*
|
|
* Delete the block group item from the old tree first.
|
|
* As we haven't yet update last_converted_bg_bytenr, the delete will
|
|
* be done in the old tree.
|
|
*/
|
|
ret = remove_block_group_item(trans, &path, bg);
|
|
btrfs_release_path(&path);
|
|
if (ret < 0) {
|
|
error("failed to delete block group item from the old root: %d",
|
|
ret);
|
|
return ret;
|
|
}
|
|
fs_info->last_converted_bg_bytenr = bytenr;
|
|
/*
|
|
* Now last_converted_bg_bytenr is updated, the insert will happen for
|
|
* the new root.
|
|
*/
|
|
ret = insert_block_group_item(trans, bg);
|
|
if (ret < 0) {
|
|
error("failed to insert block group item into the new root: %d",
|
|
ret);
|
|
return ret;
|
|
}
|
|
return ret;
|
|
}
|