btrfs-progs/mkfs/main.c

2170 lines
55 KiB
C

/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "kerncompat.h"
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <getopt.h>
#include <errno.h>
#include <stdbool.h>
#include <string.h>
#include <pthread.h>
#include <uuid/uuid.h>
#include <blkid/blkid.h>
#include "kernel-lib/list.h"
#include "kernel-lib/list_sort.h"
#include "kernel-lib/rbtree.h"
#include "kernel-lib/sizes.h"
#include "kernel-shared/accessors.h"
#include "kernel-shared/extent_io.h"
#include "kernel-shared/uapi/btrfs_tree.h"
#include "kernel-shared/ctree.h"
#include "kernel-shared/disk-io.h"
#include "kernel-shared/volumes.h"
#include "kernel-shared/transaction.h"
#include "kernel-shared/zoned.h"
#include "kernel-shared/uuid-tree.h"
#include "crypto/hash.h"
#include "common/defs.h"
#include "common/internal.h"
#include "common/messages.h"
#include "common/cpu-utils.h"
#include "common/utils.h"
#include "common/path-utils.h"
#include "common/device-utils.h"
#include "common/device-scan.h"
#include "common/help.h"
#include "common/rbtree-utils.h"
#include "common/parse-utils.h"
#include "common/fsfeatures.h"
#include "common/box.h"
#include "common/units.h"
#include "common/string-utils.h"
#include "common/string-table.h"
#include "common/root-tree-utils.h"
#include "cmds/commands.h"
#include "check/qgroup-verify.h"
#include "mkfs/common.h"
#include "mkfs/rootdir.h"
struct mkfs_allocation {
u64 data;
u64 metadata;
u64 mixed;
u64 system;
};
static bool opt_zero_end = true;
static bool opt_discard = true;
static bool opt_zoned = true;
static int opt_oflags = O_RDWR;
struct prepare_device_progress {
int fd;
char *file;
u64 dev_byte_count;
u64 byte_count;
int ret;
};
static int create_metadata_block_groups(struct btrfs_root *root, bool mixed,
struct mkfs_allocation *allocation)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
struct btrfs_space_info *sinfo;
u64 flags = BTRFS_BLOCK_GROUP_METADATA;
u64 chunk_start = 0;
u64 chunk_size = 0;
u64 system_group_size = BTRFS_MKFS_SYSTEM_GROUP_SIZE;
int ret;
if (btrfs_is_zoned(fs_info)) {
/* Two zones are reserved for superblock */
system_group_size = fs_info->zone_size;
}
if (mixed)
flags |= BTRFS_BLOCK_GROUP_DATA;
/* Create needed space info to trace extents reservation */
ret = update_space_info(fs_info, flags, 0, 0, &sinfo);
if (ret < 0)
return ret;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
return ret;
}
root->fs_info->system_allocs = 1;
/*
* We already created the block group item for our temporary system
* chunk in make_btrfs(), so account for the size here.
*/
allocation->system += system_group_size;
if (ret)
return ret;
if (mixed) {
ret = btrfs_alloc_chunk(trans, fs_info,
&chunk_start, &chunk_size,
BTRFS_BLOCK_GROUP_METADATA |
BTRFS_BLOCK_GROUP_DATA);
if (ret == -ENOSPC) {
error("no space to allocate data/metadata chunk");
goto err;
}
if (ret)
return ret;
ret = btrfs_make_block_group(trans, fs_info, 0,
BTRFS_BLOCK_GROUP_METADATA |
BTRFS_BLOCK_GROUP_DATA,
chunk_start, chunk_size);
if (ret)
return ret;
allocation->mixed += chunk_size;
} else {
ret = btrfs_alloc_chunk(trans, fs_info,
&chunk_start, &chunk_size,
BTRFS_BLOCK_GROUP_METADATA);
if (ret == -ENOSPC) {
error("no space to allocate metadata chunk");
goto err;
}
if (ret)
return ret;
ret = btrfs_make_block_group(trans, fs_info, 0,
BTRFS_BLOCK_GROUP_METADATA,
chunk_start, chunk_size);
allocation->metadata += chunk_size;
if (ret)
return ret;
}
root->fs_info->system_allocs = 0;
ret = btrfs_commit_transaction(trans, root);
if (ret) {
errno = -ret;
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
}
err:
return ret;
}
static int create_data_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root, bool mixed,
struct mkfs_allocation *allocation)
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 chunk_start = 0;
u64 chunk_size = 0;
int ret = 0;
if (!mixed) {
struct btrfs_space_info *sinfo;
ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA,
0, 0, &sinfo);
if (ret < 0)
return ret;
ret = btrfs_alloc_chunk(trans, fs_info,
&chunk_start, &chunk_size,
BTRFS_BLOCK_GROUP_DATA);
if (ret == -ENOSPC) {
error("no space to allocate data chunk");
goto err;
}
if (ret)
return ret;
ret = btrfs_make_block_group(trans, fs_info, 0,
BTRFS_BLOCK_GROUP_DATA,
chunk_start, chunk_size);
allocation->data += chunk_size;
if (ret)
return ret;
}
err:
return ret;
}
static int make_root_dir(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_key location;
int ret;
ret = btrfs_make_root_dir(trans, root->fs_info->tree_root,
BTRFS_ROOT_TREE_DIR_OBJECTID);
if (ret)
goto err;
ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
if (ret)
goto err;
memcpy(&location, &root->fs_info->fs_root->root_key, sizeof(location));
location.offset = (u64)-1;
ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
"default", 7,
btrfs_super_root_dir(root->fs_info->super_copy),
&location, BTRFS_FT_DIR, 0);
if (ret)
goto err;
ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
"default", 7, location.objectid,
BTRFS_ROOT_TREE_DIR_OBJECTID, 0);
if (ret)
goto err;
err:
return ret;
}
static int __recow_root(struct btrfs_trans_handle *trans, struct btrfs_root *root)
{
struct btrfs_path path = { 0 };
struct btrfs_key key;
int ret;
key.objectid = 0;
key.type = 0;
key.offset = 0;
/* Get a path to the left-most leaves */
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0)
return ret;
while (true) {
struct btrfs_key found_key;
/*
* Our parent nodes must not be newer than the leaf, thus if
* the leaf is as new as the transaction, no need to re-COW.
*/
if (btrfs_header_generation(path.nodes[0]) == trans->transid)
goto next;
/*
* Grab the key of current tree block and do a COW search to
* the current tree block.
*/
btrfs_item_key_to_cpu(path.nodes[0], &key, 0);
btrfs_release_path(&path);
/* This will ensure this leaf and all its parent get COWed */
ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
if (ret < 0)
goto out;
ret = 0;
btrfs_item_key_to_cpu(path.nodes[0], &found_key, 0);
UASSERT(btrfs_comp_cpu_keys(&key, &found_key) == 0);
next:
ret = btrfs_next_leaf(root, &path);
if (ret < 0)
goto out;
if (ret > 0) {
ret = 0;
goto out;
}
}
out:
btrfs_release_path(&path);
return ret;
}
static int recow_global_roots(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root;
struct rb_node *n;
int ret = 0;
for (n = rb_first(&fs_info->global_roots_tree); n; n = rb_next(n)) {
root = rb_entry(n, struct btrfs_root, rb_node);
ret = __recow_root(trans, root);
if (ret)
return ret;
}
return ret;
}
static int recow_roots(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_fs_info *info = root->fs_info;
int ret;
ret = __recow_root(trans, info->fs_root);
if (ret)
return ret;
ret = __recow_root(trans, info->tree_root);
if (ret)
return ret;
ret = __recow_root(trans, info->chunk_root);
if (ret)
return ret;
ret = __recow_root(trans, info->dev_root);
if (ret)
return ret;
if (btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE)) {
ret = __recow_root(trans, info->block_group_root);
if (ret)
return ret;
}
ret = recow_global_roots(trans);
if (ret)
return ret;
return 0;
}
static int create_one_raid_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 type,
struct mkfs_allocation *allocation)
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 chunk_start;
u64 chunk_size;
int ret;
ret = btrfs_alloc_chunk(trans, fs_info,
&chunk_start, &chunk_size, type);
if (ret == -ENOSPC) {
error("not enough free space to allocate chunk");
exit(1);
}
if (ret)
return ret;
ret = btrfs_make_block_group(trans, fs_info, 0,
type, chunk_start, chunk_size);
type &= BTRFS_BLOCK_GROUP_TYPE_MASK;
if (type == BTRFS_BLOCK_GROUP_DATA) {
allocation->data += chunk_size;
} else if (type == BTRFS_BLOCK_GROUP_METADATA) {
allocation->metadata += chunk_size;
} else if (type == BTRFS_BLOCK_GROUP_SYSTEM) {
allocation->system += chunk_size;
} else if (type ==
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA)) {
allocation->mixed += chunk_size;
} else {
error("unrecognized profile type: 0x%llx", type);
ret = -EINVAL;
}
return ret;
}
static int create_raid_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 data_profile,
u64 metadata_profile, bool mixed,
struct mkfs_allocation *allocation)
{
int ret = 0;
if (metadata_profile) {
u64 meta_flags = BTRFS_BLOCK_GROUP_METADATA;
ret = create_one_raid_group(trans, root,
BTRFS_BLOCK_GROUP_SYSTEM |
metadata_profile, allocation);
if (ret)
return ret;
if (mixed)
meta_flags |= BTRFS_BLOCK_GROUP_DATA;
ret = create_one_raid_group(trans, root, meta_flags |
metadata_profile, allocation);
if (ret)
return ret;
}
if (!mixed && data_profile) {
ret = create_one_raid_group(trans, root,
BTRFS_BLOCK_GROUP_DATA |
data_profile, allocation);
if (ret)
return ret;
}
return ret;
}
static const char * const mkfs_usage[] = {
"mkfs.btrfs [options] <dev> [<dev...>]",
"Create a BTRFS filesystem on a device or multiple devices",
"",
"Allocation profiles:",
OPTLINE("-d|--data PROFILE", "data profile, raid0, raid1, raid1c3, raid1c4, raid5, raid6, raid10, dup or single"),
OPTLINE("-m|--metadata PROFILE", "metadata profile, values like for data profile"),
OPTLINE("-M|--mixed","mix metadata and data together"),
"Features:",
OPTLINE("--csum TYPE", ""),
OPTLINE("--checksum TYPE", "checksum algorithm to use, crc32c (default), xxhash, sha256, blake2"),
OPTLINE("-n|--nodesize SIZE", "size of btree nodes"),
OPTLINE("-s|--sectorsize SIZE", "data block size (may not be mountable by current kernel)"),
OPTLINE("-O|--features LIST", "comma separated list of filesystem features (use '-O list-all' to list features)"),
OPTLINE("-L|--label LABEL", "set the filesystem label"),
OPTLINE("-U|--uuid UUID", "specify the filesystem UUID (must be unique for a filesystem with multiple devices)"),
OPTLINE("--device-uuid UUID", "Specify the filesystem device UUID (a.k.a sub-uuid) (for single device filesystem only)"),
"Creation:",
OPTLINE("-b|--byte-count SIZE", "set size of each device to SIZE (filesystem size is sum of all device sizes)"),
OPTLINE("-r|--rootdir DIR", "copy files from DIR to the image root directory"),
OPTLINE("-u|--subvol SUBDIR:FLAGS", "create SUBDIR as subvolume rather than normal directory, can be specified multiple times"),
OPTLINE("--shrink", "(with --rootdir) shrink the filled filesystem to minimal size"),
OPTLINE("-K|--nodiscard", "do not perform whole device TRIM"),
OPTLINE("-f|--force", "force overwrite of existing filesystem"),
"General:",
OPTLINE("-q|--quiet", "no messages except errors"),
OPTLINE("-v|--verbose", "increase verbosity level, default is 1"),
OPTLINE("-V|--version", "print the mkfs.btrfs version and exit"),
OPTLINE("--help", "print this help and exit"),
"Deprecated:",
OPTLINE("-l|--leafsize SIZE", "removed in 6.0, use --nodesize"),
OPTLINE("-R|--runtime-features LIST", "removed in 6.3, use -O|--features"),
NULL
};
static const struct cmd_struct mkfs_cmd = {
.usagestr = mkfs_usage
};
static int zero_output_file(int out_fd, u64 size)
{
int loop_num;
u64 location = 0;
char buf[SZ_4K];
int ret = 0, i;
ssize_t written;
memset(buf, 0, SZ_4K);
/* Only zero out the first 1M */
loop_num = SZ_1M / SZ_4K;
for (i = 0; i < loop_num; i++) {
written = pwrite(out_fd, buf, SZ_4K, location);
if (written != SZ_4K)
ret = -EIO;
location += SZ_4K;
}
/* Then enlarge the file to size */
written = pwrite(out_fd, buf, 1, size - 1);
if (written < 1)
ret = -EIO;
return ret;
}
static int _cmp_device_by_id(void *priv, struct list_head *a,
struct list_head *b)
{
return list_entry(a, struct btrfs_device, dev_list)->devid -
list_entry(b, struct btrfs_device, dev_list)->devid;
}
static void list_all_devices(struct btrfs_root *root, bool is_zoned)
{
struct btrfs_fs_devices *fs_devices;
struct btrfs_device *device;
int number_of_devices = 0;
struct string_table *tab;
int row, col;
fs_devices = root->fs_info->fs_devices;
list_for_each_entry(device, &fs_devices->devices, dev_list)
number_of_devices++;
list_sort(NULL, &fs_devices->devices, _cmp_device_by_id);
printf("Number of devices: %d\n", number_of_devices);
printf("Devices:\n");
if (is_zoned)
tab = table_create(4, number_of_devices + 1);
else
tab = table_create(3, number_of_devices + 1);
tab->spacing = STRING_TABLE_SPACING_2;
col = 0;
table_printf(tab, col++, 0, "> ID");
table_printf(tab, col++, 0, "> SIZE");
if (is_zoned)
table_printf(tab, col++, 0, ">ZONES");
table_printf(tab, col++, 0, "<PATH");
row = 1;
list_for_each_entry(device, &fs_devices->devices, dev_list) {
col = 0;
table_printf(tab, col++, row, ">%llu", device->devid);
table_printf(tab, col++, row, ">%s", pretty_size(device->total_bytes));
if (is_zoned)
table_printf(tab, col++, row, ">%u", device->zone_info->nr_zones);
table_printf(tab, col++, row, "<%s", device->name);
row++;
}
table_dump(tab);
printf("\n");
table_free(tab);
}
static bool is_temp_block_group(struct extent_buffer *node,
struct btrfs_block_group_item *bgi,
u64 data_profile, u64 meta_profile,
u64 sys_profile)
{
u64 flag = btrfs_block_group_flags(node, bgi);
u64 flag_type = flag & BTRFS_BLOCK_GROUP_TYPE_MASK;
u64 flag_profile = flag & BTRFS_BLOCK_GROUP_PROFILE_MASK;
u64 used = btrfs_block_group_used(node, bgi);
/*
* Chunks meets all the following conditions is a temp chunk
* 1) Empty chunk
* Temp chunk is always empty.
*
* 2) profile mismatch with mkfs profile.
* Temp chunk is always in SINGLE
*
* 3) Size differs with mkfs_alloc
* Special case for SINGLE/SINGLE btrfs.
* In that case, temp data chunk and real data chunk are always empty.
* So we need to use mkfs_alloc to be sure which chunk is the newly
* allocated.
*
* Normally, new chunk size is equal to mkfs one (One chunk)
* If it has multiple chunks, we just refuse to delete any one.
* As they are all single, so no real problem will happen.
* So only use condition 1) and 2) to judge them.
*/
if (used != 0)
return false;
switch (flag_type) {
case BTRFS_BLOCK_GROUP_DATA:
case BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA:
data_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
if (flag_profile != data_profile)
return true;
break;
case BTRFS_BLOCK_GROUP_METADATA:
meta_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
if (flag_profile != meta_profile)
return true;
break;
case BTRFS_BLOCK_GROUP_SYSTEM:
sys_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
if (flag_profile != sys_profile)
return true;
break;
}
return false;
}
/* Note: if current is a block group, it will skip it anyway */
static int next_block_group(struct btrfs_root *root,
struct btrfs_path *path)
{
struct btrfs_key key;
int ret = 0;
while (1) {
ret = btrfs_next_item(root, path);
if (ret)
goto out;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
goto out;
}
out:
return ret;
}
/* This function will cleanup */
static int cleanup_temp_chunks(struct btrfs_fs_info *fs_info,
struct mkfs_allocation *alloc,
u64 data_profile, u64 meta_profile,
u64 sys_profile)
{
struct btrfs_trans_handle *trans = NULL;
struct btrfs_block_group_item *bgi;
struct btrfs_root *root = btrfs_block_group_root(fs_info);
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_path path = { 0 };
int ret = 0;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
return ret;
}
key.objectid = 0;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = 0;
while (1) {
/*
* as the rest of the loop may modify the tree, we need to
* start a new search each time.
*/
ret = btrfs_search_slot(trans, root, &key, &path, 0, 0);
if (ret < 0)
goto out;
/* Don't pollute ret for >0 case */
if (ret > 0)
ret = 0;
btrfs_item_key_to_cpu(path.nodes[0], &found_key,
path.slots[0]);
if (found_key.objectid < key.objectid)
goto out;
if (found_key.type != BTRFS_BLOCK_GROUP_ITEM_KEY) {
ret = next_block_group(root, &path);
if (ret < 0)
goto out;
if (ret > 0) {
ret = 0;
goto out;
}
btrfs_item_key_to_cpu(path.nodes[0], &found_key,
path.slots[0]);
}
bgi = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_block_group_item);
if (is_temp_block_group(path.nodes[0], bgi,
data_profile, meta_profile,
sys_profile)) {
u64 flags = btrfs_block_group_flags(path.nodes[0], bgi);
ret = btrfs_remove_block_group(trans,
found_key.objectid, found_key.offset);
if (ret < 0)
goto out;
if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) ==
BTRFS_BLOCK_GROUP_DATA)
alloc->data -= found_key.offset;
else if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) ==
BTRFS_BLOCK_GROUP_METADATA)
alloc->metadata -= found_key.offset;
else if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) ==
BTRFS_BLOCK_GROUP_SYSTEM)
alloc->system -= found_key.offset;
else if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) ==
(BTRFS_BLOCK_GROUP_METADATA |
BTRFS_BLOCK_GROUP_DATA))
alloc->mixed -= found_key.offset;
}
btrfs_release_path(&path);
key.objectid = found_key.objectid + found_key.offset;
}
out:
if (trans) {
ret = btrfs_commit_transaction(trans, root);
if (ret) {
errno = -ret;
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
}
}
btrfs_release_path(&path);
return ret;
}
/*
* Just update chunk allocation info, since --rootdir may allocate new
* chunks which is not updated in @allocation structure.
*/
static void update_chunk_allocation(struct btrfs_fs_info *fs_info,
struct mkfs_allocation *allocation)
{
struct btrfs_block_group *bg_cache;
const u64 mixed_flag = BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA;
u64 search_start = 0;
allocation->mixed = 0;
allocation->data = 0;
allocation->metadata = 0;
allocation->system = 0;
while (1) {
bg_cache = btrfs_lookup_first_block_group(fs_info,
search_start);
if (!bg_cache)
break;
if ((bg_cache->flags & mixed_flag) == mixed_flag)
allocation->mixed += bg_cache->length;
else if (bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)
allocation->data += bg_cache->length;
else if (bg_cache->flags & BTRFS_BLOCK_GROUP_METADATA)
allocation->metadata += bg_cache->length;
else
allocation->system += bg_cache->length;
search_start = bg_cache->start + bg_cache->length;
}
}
static int create_global_root(struct btrfs_trans_handle *trans, u64 objectid,
int root_id)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root;
struct btrfs_key key = {
.objectid = objectid,
.type = BTRFS_ROOT_ITEM_KEY,
.offset = root_id,
};
int ret = 0;
root = btrfs_create_tree(trans, &key);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
}
ret = btrfs_global_root_insert(fs_info, root);
out:
if (ret)
btrfs_abort_transaction(trans, ret);
return ret;
}
static int create_global_roots(struct btrfs_trans_handle *trans,
int nr_global_roots)
{
int ret, i;
for (i = 1; i < nr_global_roots; i++) {
ret = create_global_root(trans, BTRFS_EXTENT_TREE_OBJECTID, i);
if (ret)
return ret;
ret = create_global_root(trans, BTRFS_CSUM_TREE_OBJECTID, i);
if (ret)
return ret;
ret = create_global_root(trans, BTRFS_FREE_SPACE_TREE_OBJECTID, i);
if (ret)
return ret;
}
btrfs_set_super_nr_global_roots(trans->fs_info->super_copy,
nr_global_roots);
return 0;
}
static int insert_qgroup_items(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 qgroupid)
{
struct btrfs_path path = { 0 };
struct btrfs_root *quota_root = fs_info->quota_root;
struct btrfs_key key;
int ret;
if (qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT) {
error("qgroup level other than 0 is not supported yet");
return -ENOTTY;
}
key.objectid = 0;
key.type = BTRFS_QGROUP_INFO_KEY;
key.offset = qgroupid;
ret = btrfs_insert_empty_item(trans, quota_root, &path, &key,
sizeof(struct btrfs_qgroup_info_item));
btrfs_release_path(&path);
if (ret < 0)
return ret;
key.objectid = 0;
key.type = BTRFS_QGROUP_LIMIT_KEY;
key.offset = qgroupid;
ret = btrfs_insert_empty_item(trans, quota_root, &path, &key,
sizeof(struct btrfs_qgroup_limit_item));
btrfs_release_path(&path);
return ret;
}
/*
* Workaround for squota so the enable_gen can be properly used.
*/
static int touch_root_subvol(struct btrfs_fs_info *fs_info)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key = {
.objectid = BTRFS_FIRST_FREE_OBJECTID,
.type = BTRFS_INODE_ITEM_KEY,
.offset = 0,
};
struct extent_buffer *leaf;
int slot;
struct btrfs_path path = { 0 };
int ret;
trans = btrfs_start_transaction(fs_info->fs_root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
return ret;
}
ret = btrfs_search_slot(trans, fs_info->fs_root, &key, &path, 0, 1);
if (ret)
goto fail;
leaf = path.nodes[0];
slot = path.slots[0];
btrfs_item_key_to_cpu(leaf, &key, slot);
btrfs_mark_buffer_dirty(leaf);
ret = btrfs_commit_transaction(trans, fs_info->fs_root);
if (ret < 0) {
errno = -ret;
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
return ret;
}
btrfs_release_path(&path);
return 0;
fail:
btrfs_abort_transaction(trans, ret);
btrfs_release_path(&path);
return ret;
}
static int setup_quota_root(struct btrfs_fs_info *fs_info)
{
struct btrfs_trans_handle *trans;
struct btrfs_qgroup_status_item *qsi;
struct btrfs_root *quota_root;
struct btrfs_path path = { 0 };
struct btrfs_key key;
int qgroup_repaired = 0;
bool simple = btrfs_fs_incompat(fs_info, SIMPLE_QUOTA);
int flags;
int ret;
/* One to modify tree root, one for quota root */
trans = btrfs_start_transaction(fs_info->tree_root, 2);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
return ret;
}
ret = btrfs_create_root(trans, fs_info, BTRFS_QUOTA_TREE_OBJECTID);
if (ret < 0) {
error("failed to create quota root: %d (%m)", ret);
goto fail;
}
quota_root = fs_info->quota_root;
key.objectid = 0;
key.type = BTRFS_QGROUP_STATUS_KEY;
key.offset = 0;
ret = btrfs_insert_empty_item(trans, quota_root, &path, &key,
sizeof(*qsi));
if (ret < 0) {
error("failed to insert qgroup status item: %d (%m)", ret);
goto fail;
}
qsi = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_qgroup_status_item);
btrfs_set_qgroup_status_generation(path.nodes[0], qsi, trans->transid);
btrfs_set_qgroup_status_rescan(path.nodes[0], qsi, 0);
flags = BTRFS_QGROUP_STATUS_FLAG_ON;
if (simple) {
btrfs_set_qgroup_status_enable_gen(path.nodes[0], qsi, trans->transid);
flags |= BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE;
}
else {
flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
}
btrfs_set_qgroup_status_version(path.nodes[0], qsi, 1);
btrfs_set_qgroup_status_flags(path.nodes[0], qsi, flags);
btrfs_release_path(&path);
/* Currently mkfs will only create one subvolume */
ret = insert_qgroup_items(trans, fs_info, BTRFS_FS_TREE_OBJECTID);
if (ret < 0) {
error("failed to insert qgroup items: %d (%m)", ret);
goto fail;
}
ret = btrfs_commit_transaction(trans, fs_info->tree_root);
if (ret < 0) {
errno = -ret;
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
return ret;
}
/* Hack to count the default subvol metadata by dirtying it */
if (simple) {
ret = touch_root_subvol(fs_info);
if (ret) {
error("failed to touch root dir for simple quota accounting %d (%m)", ret);
goto fail;
}
}
/*
* Qgroup is setup but with wrong info, use qgroup-verify
* infrastructure to repair them. (Just acts as offline rescan)
*/
ret = qgroup_verify_all(fs_info);
if (ret < 0) {
error("qgroup rescan failed: %d (%m)", ret);
return ret;
}
ret = repair_qgroups(fs_info, &qgroup_repaired, true);
if (ret < 0)
error("failed to fill qgroup info: %d (%m)", ret);
return ret;
fail:
btrfs_abort_transaction(trans, ret);
return ret;
}
static int setup_raid_stripe_tree_root(struct btrfs_fs_info *fs_info)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *stripe_root;
struct btrfs_key key = {
.objectid = BTRFS_RAID_STRIPE_TREE_OBJECTID,
.type = BTRFS_ROOT_ITEM_KEY,
};
int ret;
trans = btrfs_start_transaction(fs_info->tree_root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
return ret;
}
stripe_root = btrfs_create_tree(trans, &key);
if (IS_ERR(stripe_root)) {
ret = PTR_ERR(stripe_root);
btrfs_abort_transaction(trans, ret);
return ret;
}
fs_info->stripe_root = stripe_root;
add_root_to_dirty_list(stripe_root);
ret = btrfs_commit_transaction(trans, fs_info->tree_root);
if (ret) {
errno = -ret;
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
return ret;
}
return 0;
}
/* Thread callback for device preparation */
static void *prepare_one_device(void *ctx)
{
struct prepare_device_progress *prepare_ctx = ctx;
prepare_ctx->fd = open(prepare_ctx->file, opt_oflags);
if (prepare_ctx->fd < 0) {
error("unable to open %s: %m", prepare_ctx->file);
prepare_ctx->ret = -errno;
return NULL;
}
prepare_ctx->ret = btrfs_prepare_device(prepare_ctx->fd,
prepare_ctx->file,
&prepare_ctx->dev_byte_count,
prepare_ctx->byte_count,
(bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
(opt_zero_end ? PREP_DEVICE_ZERO_END : 0) |
(opt_discard ? PREP_DEVICE_DISCARD : 0) |
(opt_zoned ? PREP_DEVICE_ZONED : 0));
return NULL;
}
static int parse_subvol_flags(struct rootdir_subvol *subvol, const char *flags)
{
char *buf, *orig_buf;
int ret;
buf = orig_buf = strdup(flags);
if (!buf) {
error_msg(ERROR_MSG_MEMORY, NULL);
ret = -ENOMEM;
goto out;
}
while (true) {
char *comma = strstr(buf, ",");
if (comma)
*comma = 0;
if (!strcmp(buf, "default")) {
subvol->is_default = true;
} else if (!strcmp(buf, "ro")) {
subvol->readonly = true;
} else if (buf[0] != 0) {
error("unrecognized subvol flag \"%s\"", buf);
ret = 1;
goto out;
}
if (comma)
buf = comma + 1;
else
break;
}
ret = 0;
out:
free(orig_buf);
return ret;
}
int BOX_MAIN(mkfs)(int argc, char **argv)
{
char *file;
struct btrfs_root *root;
struct btrfs_fs_info *fs_info;
struct btrfs_trans_handle *trans;
struct open_ctree_args oca = { 0 };
int ret = 0;
int close_ret;
int i;
bool ssd = false;
bool shrink_rootdir = false;
u64 source_dir_size = 0;
u64 min_dev_size;
u64 shrink_size;
int device_count = 0;
int saved_optind;
pthread_t *t_prepare = NULL;
struct prepare_device_progress *prepare_ctx = NULL;
struct mkfs_allocation allocation = { 0 };
struct btrfs_mkfs_config mkfs_cfg;
/* Options */
bool force_overwrite = false;
struct btrfs_mkfs_features features = btrfs_mkfs_default_features;
enum btrfs_csum_type csum_type = BTRFS_CSUM_TYPE_CRC32;
char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = { 0 };
char dev_uuid[BTRFS_UUID_UNPARSED_SIZE] = { 0 };
u32 nodesize = 0;
bool nodesize_forced = false;
u32 sectorsize = 0;
u32 stripesize = 4096;
u64 metadata_profile = 0;
bool metadata_profile_set = false;
u64 data_profile = 0;
bool data_profile_set = false;
u64 byte_count = 0;
u64 dev_byte_count = 0;
bool mixed = false;
char *label = NULL;
int nr_global_roots = sysconf(_SC_NPROCESSORS_ONLN);
char *source_dir = NULL;
size_t source_dir_len = 0;
struct rootdir_subvol *rds;
bool has_default_subvol = false;
LIST_HEAD(subvols);
cpu_detect_flags();
hash_init_accel();
btrfs_config_init();
btrfs_assert_feature_buf_size();
while(1) {
int c;
enum {
GETOPT_VAL_SHRINK = GETOPT_VAL_FIRST,
GETOPT_VAL_CHECKSUM,
GETOPT_VAL_GLOBAL_ROOTS,
GETOPT_VAL_DEVICE_UUID,
};
static const struct option long_options[] = {
{ "byte-count", required_argument, NULL, 'b' },
{ "csum", required_argument, NULL,
GETOPT_VAL_CHECKSUM },
{ "checksum", required_argument, NULL,
GETOPT_VAL_CHECKSUM },
{ "force", no_argument, NULL, 'f' },
{ "leafsize", required_argument, NULL, 'l' },
{ "label", required_argument, NULL, 'L'},
{ "metadata", required_argument, NULL, 'm' },
{ "mixed", no_argument, NULL, 'M' },
{ "nodesize", required_argument, NULL, 'n' },
{ "sectorsize", required_argument, NULL, 's' },
{ "data", required_argument, NULL, 'd' },
{ "version", no_argument, NULL, 'V' },
{ "rootdir", required_argument, NULL, 'r' },
{ "subvol", required_argument, NULL, 'u' },
{ "nodiscard", no_argument, NULL, 'K' },
{ "features", required_argument, NULL, 'O' },
{ "runtime-features", required_argument, NULL, 'R' },
{ "uuid", required_argument, NULL, 'U' },
{ "device-uuid", required_argument, NULL,
GETOPT_VAL_DEVICE_UUID },
{ "quiet", 0, NULL, 'q' },
{ "verbose", 0, NULL, 'v' },
{ "shrink", no_argument, NULL, GETOPT_VAL_SHRINK },
#if EXPERIMENTAL
{ "param", required_argument, NULL, GETOPT_VAL_PARAM },
{ "num-global-roots", required_argument, NULL, GETOPT_VAL_GLOBAL_ROOTS },
#endif
{ "help", no_argument, NULL, GETOPT_VAL_HELP },
{ NULL, 0, NULL, 0}
};
c = getopt_long(argc, argv, "A:b:fl:n:s:m:d:L:R:O:r:U:VvMKqu:",
long_options, NULL);
if (c < 0)
break;
switch(c) {
case 'f':
force_overwrite = true;
break;
case 'd':
ret = parse_bg_profile(optarg, &data_profile);
if (ret) {
error("unknown data profile %s", optarg);
exit(1);
}
data_profile_set = true;
break;
case 'l':
/* Deprecated in 4.0 */
error("--leafsize has been removed in 6.0, use --nodesize");
ret = 1;
goto error;
case 'n':
nodesize = arg_strtou64_with_suffix(optarg);
nodesize_forced = true;
break;
case 'L':
free(label);
ret = strlen(optarg);
if (ret >= BTRFS_LABEL_SIZE) {
error("label %s is too long (max %d)",
optarg, BTRFS_LABEL_SIZE - 1);
exit(1);
}
label = strdup(optarg);
break;
case 'm':
ret = parse_bg_profile(optarg, &metadata_profile);
if (ret) {
error("unknown metadata profile %s", optarg);
exit(1);
}
metadata_profile_set = true;
break;
case 'M':
mixed = true;
break;
case 'O': {
char *orig = strdup(optarg);
char *tmp = orig;
tmp = btrfs_parse_fs_features(tmp, &features);
if (tmp) {
error("unrecognized filesystem feature '%s'",
tmp);
free(orig);
ret = 1;
goto error;
}
free(orig);
if (features.runtime_flags &
BTRFS_FEATURE_RUNTIME_LIST_ALL) {
btrfs_list_all_fs_features(NULL);
goto success;
}
break;
}
case 'R': {
char *orig = strdup(optarg);
char *tmp = orig;
warning("runtime features are deprecated, use -O|--features instead");
tmp = btrfs_parse_runtime_features(tmp,
&features);
if (tmp) {
error("unrecognized runtime feature '%s'",
tmp);
free(orig);
ret = 1;
goto error;
}
free(orig);
if (features.runtime_flags &
BTRFS_FEATURE_RUNTIME_LIST_ALL) {
btrfs_list_all_runtime_features(NULL);
goto success;
}
break;
}
case 's':
sectorsize = arg_strtou64_with_suffix(optarg);
break;
case 'b':
byte_count = arg_strtou64_with_suffix(optarg);
opt_zero_end = false;
break;
case 'v':
bconf_be_verbose();
break;
case 'V':
printf("mkfs.btrfs, part of %s\n",
PACKAGE_STRING);
goto success;
case 'r':
free(source_dir);
source_dir = strdup(optarg);
break;
case 'u': {
struct rootdir_subvol *subvol;
char *colon;
subvol = calloc(1, sizeof(struct rootdir_subvol));
if (!subvol) {
error_msg(ERROR_MSG_MEMORY, NULL);
ret = 1;
goto error;
}
colon = strstr(optarg, ":");
if (colon) {
/* Make sure we choose the last colon in
* optarg, in case the subvol name
* itself contains a colon. */
do {
char *colon2;
colon2 = strstr(colon + 1, ":");
if (colon2)
colon = colon2;
else
break;
} while (true);
subvol->dir = strndup(optarg, colon - optarg);
if (parse_subvol_flags(subvol, colon + 1)) {
ret = 1;
goto error;
}
} else {
subvol->dir = strdup(optarg);
}
if (subvol->is_default) {
if (has_default_subvol) {
error("subvol default flag can only be specified once");
ret = 1;
goto error;
}
has_default_subvol = true;
}
list_add_tail(&subvol->list, &subvols);
break;
}
case 'U':
strncpy_null(fs_uuid, optarg, BTRFS_UUID_UNPARSED_SIZE);
break;
case 'K':
opt_discard = false;
break;
case 'q':
bconf_be_quiet();
break;
case GETOPT_VAL_DEVICE_UUID:
strncpy_null(dev_uuid, optarg, BTRFS_UUID_UNPARSED_SIZE);
break;
case GETOPT_VAL_SHRINK:
shrink_rootdir = true;
break;
case GETOPT_VAL_CHECKSUM:
csum_type = parse_csum_type(optarg);
break;
case GETOPT_VAL_GLOBAL_ROOTS:
btrfs_warn_experimental("Feature: num-global-roots is part of exten-tree-v2");
nr_global_roots = (int)arg_strtou64(optarg);
break;
case GETOPT_VAL_PARAM:
bconf_save_param(optarg);
break;
case GETOPT_VAL_HELP:
default:
usage(&mkfs_cmd, c != GETOPT_VAL_HELP);
}
}
if (bconf.verbose) {
printf("%s\n", PACKAGE_STRING);
printf("See %s for more information.\n\n", PACKAGE_URL);
}
if (!sectorsize)
sectorsize = (u32)SZ_4K;
if (btrfs_check_sectorsize(sectorsize)) {
ret = 1;
goto error;
}
if (!nodesize)
nodesize = max_t(u32, sectorsize, BTRFS_MKFS_DEFAULT_NODE_SIZE);
stripesize = sectorsize;
saved_optind = optind;
device_count = argc - optind;
if (device_count == 0)
usage(&mkfs_cmd, 1);
opt_zoned = !!(features.incompat_flags & BTRFS_FEATURE_INCOMPAT_ZONED);
if (source_dir && device_count > 1) {
error("the option -r is limited to a single device");
ret = 1;
goto error;
}
if (shrink_rootdir && source_dir == NULL) {
error("the option --shrink must be used with --rootdir");
ret = 1;
goto error;
}
if (!list_empty(&subvols) && source_dir == NULL) {
error("option --subvol must be used with --rootdir");
ret = 1;
goto error;
}
if (source_dir) {
char *canonical = realpath(source_dir, NULL);
if (!canonical) {
error("could not get canonical path to %s", source_dir);
ret = 1;
goto error;
}
free(source_dir);
source_dir = canonical;
source_dir_len = strlen(source_dir);
}
list_for_each_entry(rds, &subvols, list) {
char *path, *canonical;
struct rootdir_subvol *rds2;
size_t dir_len;
dir_len = strlen(rds->dir);
path = malloc(source_dir_len + 1 + dir_len + 1);
if (!path) {
error_msg(ERROR_MSG_MEMORY, NULL);
ret = 1;
goto error;
}
memcpy(path, source_dir, source_dir_len);
path[source_dir_len] = '/';
memcpy(path + source_dir_len + 1, rds->dir, dir_len + 1);
canonical = realpath(path, NULL);
if (!canonical) {
error("could not get canonical path to %s", rds->dir);
free(path);
ret = 1;
goto error;
}
free(path);
path = canonical;
if (!path_exists(path)) {
error("subvolume %s does not exist", rds->dir);
free(path);
ret = 1;
goto error;
}
if (!path_is_dir(path)) {
error("subvolume %s is not a directory", rds->dir);
free(path);
ret = 1;
goto error;
}
rds->full_path = path;
if (strlen(path) < source_dir_len + 1 ||
memcmp(path, source_dir, source_dir_len) != 0 ||
path[source_dir_len] != '/') {
error("subvolume %s is not a child of %s", rds->dir, source_dir);
ret = 1;
goto error;
}
for (rds2 = list_first_entry(&subvols, struct rootdir_subvol, list);
rds2 != rds;
rds2 = list_next_entry(rds2, list)) {
if (strcmp(rds2->full_path, path) == 0) {
error("subvolume %s specified more than once", rds->dir);
ret = 1;
goto error;
}
}
}
if (*fs_uuid) {
uuid_t dummy_uuid;
if (uuid_parse(fs_uuid, dummy_uuid) != 0) {
error("could not parse UUID: %s", fs_uuid);
ret = 1;
goto error;
}
/* We allow non-unique fsid for single device btrfs filesystem. */
if (device_count != 1 && !test_uuid_unique(fs_uuid)) {
error("non-unique UUID: %s", fs_uuid);
ret = 1;
goto error;
}
}
if (*dev_uuid) {
uuid_t dummy_uuid;
if (uuid_parse(dev_uuid, dummy_uuid) != 0) {
error("could not parse device UUID: %s", dev_uuid);
ret = 1;
goto error;
}
/* We allow non-unique device uuid for single device filesystem. */
if (device_count != 1 && !test_uuid_unique(dev_uuid)) {
error("the option --device-uuid %s can be used only for a single device filesystem",
dev_uuid);
ret = 1;
goto error;
}
}
for (i = 0; i < device_count; i++) {
file = argv[optind++];
if (source_dir && path_exists(file) == 0)
ret = 0;
else if (path_is_block_device(file) == 1)
ret = test_dev_for_mkfs(file, force_overwrite);
else
ret = test_status_for_mkfs(file, force_overwrite);
if (ret)
goto error;
}
optind = saved_optind;
device_count = argc - optind;
file = argv[optind++];
ssd = device_get_rotational(file);
if (opt_zoned) {
if (!zone_size(file)) {
error("zoned: %s: zone size undefined", file);
exit(1);
}
} else if (zoned_model(file) == ZONED_HOST_MANAGED) {
if (bconf.verbose)
printf(
"Zoned: %s: host-managed device detected, setting zoned feature\n",
file);
opt_zoned = true;
features.incompat_flags |= BTRFS_FEATURE_INCOMPAT_ZONED;
}
/*
* Set default profiles according to number of added devices.
* For mixed groups defaults are single/single.
*/
if (!mixed) {
u64 tmp;
if (!metadata_profile_set) {
if (device_count > 1)
tmp = BTRFS_MKFS_DEFAULT_META_MULTI_DEVICE;
else
tmp = BTRFS_MKFS_DEFAULT_META_ONE_DEVICE;
metadata_profile = tmp;
}
if (!data_profile_set) {
if (device_count > 1)
tmp = BTRFS_MKFS_DEFAULT_DATA_MULTI_DEVICE;
else
tmp = BTRFS_MKFS_DEFAULT_DATA_ONE_DEVICE;
data_profile = tmp;
}
} else {
if (metadata_profile_set || data_profile_set) {
if (metadata_profile != data_profile) {
error(
"with mixed block groups data and metadata profiles must be the same");
ret = 1;
goto error;
}
}
if (!nodesize_forced)
nodesize = sectorsize;
}
/*
* FS features that can be set by other means than -O
* just set the bit here
*/
if (mixed)
features.incompat_flags |= BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS;
if ((data_profile | metadata_profile) & BTRFS_BLOCK_GROUP_RAID56_MASK) {
features.incompat_flags |= BTRFS_FEATURE_INCOMPAT_RAID56;
warning("RAID5/6 support has known problems is strongly discouraged\n"
"\t to be used besides testing or evaluation.\n");
}
if ((data_profile | metadata_profile) &
(BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4)) {
features.incompat_flags |= BTRFS_FEATURE_INCOMPAT_RAID1C34;
}
/* Extent tree v2 comes with a set of mandatory features. */
if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2) {
features.incompat_flags |= BTRFS_FEATURE_INCOMPAT_NO_HOLES;
features.compat_ro_flags |=
BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |
BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID |
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE;
if (!nr_global_roots) {
error("you must set a non-zero num-global-roots value");
exit(1);
}
}
/* Block group tree feature requires no-holes and free-space-tree. */
if (features.compat_ro_flags & BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE &&
(!(features.incompat_flags & BTRFS_FEATURE_INCOMPAT_NO_HOLES) ||
!(features.compat_ro_flags & BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))) {
error("block group tree requires no-holes and free-space-tree features");
exit(1);
}
if (opt_zoned) {
const int blkid_version = blkid_get_library_version(NULL, NULL);
if (source_dir) {
error("the option -r and zoned mode are incompatible");
exit(1);
}
if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) {
error("cannot enable mixed-bg in zoned mode");
exit(1);
}
if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_RAID56) {
error("cannot enable RAID5/6 in zoned mode");
exit(1);
}
if (blkid_version < 2380)
warning("libblkid < 2.38 does not support zoned mode's superblock location, update recommended");
}
if (btrfs_check_nodesize(nodesize, sectorsize, &features)) {
ret = 1;
goto error;
}
if (sectorsize < sizeof(struct btrfs_super_block)) {
error("sectorsize smaller than superblock: %u < %zu",
sectorsize, sizeof(struct btrfs_super_block));
ret = 1;
goto error;
}
min_dev_size = btrfs_min_dev_size(nodesize, mixed,
opt_zoned ? zone_size(file) : 0,
metadata_profile, data_profile);
if (byte_count) {
byte_count = round_down(byte_count, sectorsize);
if (opt_zoned)
byte_count = round_down(byte_count, zone_size(file));
}
/*
* Enlarge the destination file or create a new one, using the size
* calculated from source dir.
*
* This must be done before minimal device size checks.
*/
if (source_dir) {
int oflags = O_RDWR;
struct stat statbuf;
int fd;
if (path_exists(file) == 0)
oflags |= O_CREAT;
fd = open(file, oflags, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP |
S_IROTH);
if (fd < 0) {
error("unable to open %s: %m", file);
ret = 1;
goto error;
}
ret = fstat(fd, &statbuf);
if (ret < 0) {
error("unable to stat %s: %m", file);
ret = -errno;
goto error;
}
/*
* Block_count not specified, use file/device size first.
* Or we will always use source_dir_size calculated for mkfs.
*/
if (!byte_count)
byte_count = round_down(device_get_partition_size_fd_stat(fd, &statbuf),
sectorsize);
source_dir_size = btrfs_mkfs_size_dir(source_dir, sectorsize,
min_dev_size, metadata_profile, data_profile);
UASSERT(IS_ALIGNED(source_dir_size, sectorsize));
if (byte_count < source_dir_size) {
if (S_ISREG(statbuf.st_mode)) {
byte_count = source_dir_size;
} else {
warning(
"the target device %llu (%s) is smaller than the calculated source directory size %llu (%s), mkfs may fail",
byte_count, pretty_size(byte_count),
source_dir_size, pretty_size(source_dir_size));
}
}
ret = zero_output_file(fd, byte_count);
if (ret) {
error("unable to zero the output file");
close(fd);
goto error;
}
/* our "device" is the new image file */
dev_byte_count = byte_count;
close(fd);
}
/* Check device/byte_count after the nodesize is determined */
if (byte_count && byte_count < min_dev_size) {
error("size %llu is too small to make a usable filesystem", byte_count);
error("minimum size for a %sbtrfs filesystem is %llu",
opt_zoned ? "zoned mode " : "", min_dev_size);
ret = 1;
goto error;
}
for (i = saved_optind; i < saved_optind + device_count; i++) {
char *path;
path = argv[i];
ret = test_minimum_size(path, min_dev_size);
if (ret < 0) {
error("failed to check size for %s: %m", path);
goto error;
}
if (ret > 0) {
error("'%s' is too small to make a usable filesystem",
path);
error("minimum size for each btrfs device is %llu",
min_dev_size);
goto error;
}
}
ret = test_num_disk_vs_raid(metadata_profile, data_profile,
device_count, mixed, ssd);
if (ret)
goto error;
if (opt_zoned && device_count) {
switch (data_profile & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
case BTRFS_BLOCK_GROUP_DUP:
case BTRFS_BLOCK_GROUP_RAID1:
case BTRFS_BLOCK_GROUP_RAID1C3:
case BTRFS_BLOCK_GROUP_RAID1C4:
case BTRFS_BLOCK_GROUP_RAID0:
case BTRFS_BLOCK_GROUP_RAID10:
#if EXPERIMENTAL
features.incompat_flags |= BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE;
#endif
break;
default:
break;
}
}
if (opt_zoned) {
u64 metadata = BTRFS_BLOCK_GROUP_METADATA | metadata_profile;
u64 data = BTRFS_BLOCK_GROUP_DATA | data_profile;
bool rst = false;
if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE)
rst = true;
if (!zoned_profile_supported(metadata, rst) ||
!zoned_profile_supported(data, rst)) {
error("zoned mode does not yet support the selected RAID profiles");
ret = 1;
goto error;
}
}
t_prepare = calloc(device_count, sizeof(*t_prepare));
prepare_ctx = calloc(device_count, sizeof(*prepare_ctx));
if (!t_prepare || !prepare_ctx) {
error_msg(ERROR_MSG_MEMORY, "thread for preparing devices");
ret = 1;
goto error;
}
opt_oflags = O_RDWR;
for (i = 0; i < device_count; i++) {
if (opt_zoned &&
zoned_model(argv[optind + i - 1]) == ZONED_HOST_MANAGED) {
opt_oflags |= O_DIRECT;
break;
}
}
/* Start threads */
for (i = 0; i < device_count; i++) {
prepare_ctx[i].file = argv[optind + i - 1];
prepare_ctx[i].byte_count = byte_count;
prepare_ctx[i].dev_byte_count = byte_count;
ret = pthread_create(&t_prepare[i], NULL, prepare_one_device,
&prepare_ctx[i]);
if (ret) {
errno = -ret;
error("failed to create thread for prepare device %s: %m",
prepare_ctx[i].file);
goto error;
}
}
/* Wait for threads */
for (i = 0; i < device_count; i++)
pthread_join(t_prepare[i], NULL);
ret = prepare_ctx[0].ret;
if (ret) {
error("unable prepare device: %s", prepare_ctx[0].file);
goto error;
}
dev_byte_count = prepare_ctx[0].dev_byte_count;
if (byte_count && byte_count > dev_byte_count) {
error("%s is smaller than requested size, expected %llu, found %llu",
file, byte_count, dev_byte_count);
ret = 1;
goto error;
}
if (btrfs_bg_type_to_tolerated_failures(metadata_profile) <
btrfs_bg_type_to_tolerated_failures(data_profile))
warning("metadata has lower redundancy than data!\n");
if (bconf.verbose) {
printf("NOTE: several default settings have changed in version 5.15, please make sure\n");
printf(" this does not affect your deployments:\n");
printf(" - DUP for metadata (-m dup)\n");
printf(" - enabled no-holes (-O no-holes)\n");
printf(" - enabled free-space-tree (-R free-space-tree)\n");
printf("\n");
}
mkfs_cfg.label = label;
memcpy(mkfs_cfg.fs_uuid, fs_uuid, sizeof(mkfs_cfg.fs_uuid));
memcpy(mkfs_cfg.dev_uuid, dev_uuid, sizeof(mkfs_cfg.dev_uuid));
mkfs_cfg.num_bytes = dev_byte_count;
mkfs_cfg.nodesize = nodesize;
mkfs_cfg.sectorsize = sectorsize;
mkfs_cfg.stripesize = stripesize;
mkfs_cfg.features = features;
mkfs_cfg.csum_type = csum_type;
mkfs_cfg.leaf_data_size = __BTRFS_LEAF_DATA_SIZE(nodesize);
if (opt_zoned)
mkfs_cfg.zone_size = zone_size(file);
else
mkfs_cfg.zone_size = 0;
ret = make_btrfs(prepare_ctx[0].fd, &mkfs_cfg);
if (ret) {
errno = -ret;
error("error during mkfs: %m");
goto error;
}
oca.filename = file;
oca.flags = OPEN_CTREE_WRITES | OPEN_CTREE_TEMPORARY_SUPER |
OPEN_CTREE_EXCLUSIVE;
fs_info = open_ctree_fs_info(&oca);
if (!fs_info) {
error("open ctree failed");
ret = 1;
goto error;
}
root = fs_info->fs_root;
ret = create_metadata_block_groups(root, mixed, &allocation);
if (ret) {
error("failed to create default block groups: %d", ret);
goto error;
}
if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE) {
ret = setup_raid_stripe_tree_root(fs_info);
if (ret < 0) {
error("failed to initialize raid-stripe-tree: %d (%m)", ret);
goto out;
}
}
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
errno = -PTR_ERR(trans);
error_msg(ERROR_MSG_START_TRANS, "%m");
ret = 1;
goto error;
}
ret = create_data_block_groups(trans, root, mixed, &allocation);
if (ret) {
error("failed to create default data block groups: %d", ret);
goto error;
}
if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2) {
ret = create_global_roots(trans, nr_global_roots);
if (ret) {
error("failed to create global roots: %d", ret);
goto error;
}
}
ret = make_root_dir(trans, root);
if (ret) {
error("failed to setup the root directory: %d", ret);
goto error;
}
ret = btrfs_commit_transaction(trans, root);
if (ret) {
errno = -ret;
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
goto out;
}
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
errno = -PTR_ERR(trans);
error_msg(ERROR_MSG_START_TRANS, "%m");
ret = 1;
goto error;
}
if (device_count == 0)
goto raid_groups;
for (i = 1; i < device_count; i++) {
ret = btrfs_device_already_in_root(root, prepare_ctx[i].fd,
BTRFS_SUPER_INFO_OFFSET);
if (ret) {
error("skipping duplicate device %s in the filesystem",
file);
continue;
}
dev_byte_count = prepare_ctx[i].dev_byte_count;
if (prepare_ctx[i].ret) {
errno = -prepare_ctx[i].ret;
error("unable to prepare device %s: %m", prepare_ctx[i].file);
ret = 1;
goto error;
}
ret = btrfs_add_to_fsid(trans, root, prepare_ctx[i].fd,
prepare_ctx[i].file, dev_byte_count,
sectorsize, sectorsize, sectorsize);
if (ret) {
error("unable to add %s to filesystem: %d",
prepare_ctx[i].file, ret);
goto error;
}
if (bconf.verbose >= 2) {
struct btrfs_device *device;
device = container_of(fs_info->fs_devices->devices.next,
struct btrfs_device, dev_list);
printf("adding device %s id %llu\n", file, device->devid);
}
}
if (opt_zoned)
btrfs_get_dev_zone_info_all_devices(fs_info);
raid_groups:
ret = create_raid_groups(trans, root, data_profile,
metadata_profile, mixed, &allocation);
if (ret) {
error("unable to create raid groups: %d", ret);
goto out;
}
/*
* Commit current transaction so we can COW all existing tree blocks
* to newly created raid groups.
* As currently we use btrfs_search_slot() to COW tree blocks in
* recow_roots(), if a tree block is already modified in current trans,
* it won't be re-COWed, thus it will stay in temporary chunks.
*/
ret = btrfs_commit_transaction(trans, root);
if (ret) {
errno = -ret;
error_msg(ERROR_MSG_COMMIT_TRANS, "before recowing trees: %m");
goto out;
}
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
errno = -PTR_ERR(trans);
error_msg(ERROR_MSG_START_TRANS, "%m");
ret = 1;
goto error;
}
/* COW all tree blocks to newly created chunks */
ret = recow_roots(trans, root);
if (ret) {
errno = -ret;
error("unable to COW tree blocks to new profiles: %m");
goto out;
}
ret = btrfs_make_subvolume(trans, BTRFS_DATA_RELOC_TREE_OBJECTID,
false);
if (ret) {
error("unable to create data reloc tree: %d", ret);
goto out;
}
ret = btrfs_commit_transaction(trans, root);
if (ret) {
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
goto out;
}
if (source_dir) {
pr_verbose(LOG_DEFAULT, "Rootdir from: %s\n", source_dir);
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
errno = -PTR_ERR(trans);
error_msg(ERROR_MSG_START_TRANS, "%m");
goto out;
}
ret = btrfs_mkfs_fill_dir(trans, source_dir, root,
&subvols);
if (ret) {
error("error while filling filesystem: %d", ret);
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_commit_transaction(trans, root);
if (ret) {
errno = -ret;
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
goto out;
}
list_for_each_entry(rds, &subvols, list) {
pr_verbose(LOG_DEFAULT, " Subvolume: %s\n",
rds->full_path);
}
if (shrink_rootdir) {
pr_verbose(LOG_DEFAULT, " Shrink: yes\n");
ret = btrfs_mkfs_shrink_fs(fs_info, &shrink_size,
shrink_rootdir);
if (ret < 0) {
error("error while shrinking filesystem: %d",
ret);
goto out;
}
} else {
pr_verbose(LOG_DEFAULT, " Shrink: no\n");
}
}
ret = btrfs_rebuild_uuid_tree(fs_info);
if (ret < 0)
goto out;
ret = cleanup_temp_chunks(fs_info, &allocation, data_profile,
metadata_profile, metadata_profile);
if (ret < 0) {
error("failed to cleanup temporary chunks: %d", ret);
goto out;
}
if (features.runtime_flags & BTRFS_FEATURE_RUNTIME_QUOTA ||
features.incompat_flags & BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA) {
ret = setup_quota_root(fs_info);
if (ret < 0) {
error("failed to initialize quota: %d (%m)", ret);
goto out;
}
}
if (bconf.verbose) {
char features_buf[BTRFS_FEATURE_STRING_BUF_SIZE];
update_chunk_allocation(fs_info, &allocation);
printf("Label: %s\n", label);
printf("UUID: %s\n", mkfs_cfg.fs_uuid);
if (dev_uuid[0] != 0)
printf("Device UUID: %s\n", mkfs_cfg.dev_uuid);
printf("Node size: %u\n", nodesize);
printf("Sector size: %u\t(CPU page size: %lu)\n",
sectorsize, sysconf(_SC_PAGESIZE));
printf("Filesystem size: %s\n",
pretty_size(btrfs_super_total_bytes(fs_info->super_copy)));
printf("Block group profiles:\n");
if (allocation.data)
printf(" Data: %-8s %16s\n",
btrfs_group_profile_str(data_profile),
pretty_size(allocation.data));
if (allocation.metadata)
printf(" Metadata: %-8s %16s\n",
btrfs_group_profile_str(metadata_profile),
pretty_size(allocation.metadata));
if (allocation.mixed)
printf(" Data+Metadata: %-8s %16s\n",
btrfs_group_profile_str(data_profile),
pretty_size(allocation.mixed));
printf(" System: %-8s %16s\n",
btrfs_group_profile_str(metadata_profile),
pretty_size(allocation.system));
printf("SSD detected: %s\n", ssd ? "yes" : "no");
printf("Zoned device: %s\n", opt_zoned ? "yes" : "no");
if (opt_zoned)
printf(" Zone size: %s\n",
pretty_size(fs_info->zone_size));
btrfs_parse_fs_features_to_string(features_buf, &features);
printf("Features: %s\n", features_buf);
printf("Checksum: %s\n",
btrfs_super_csum_name(mkfs_cfg.csum_type));
list_all_devices(root, opt_zoned);
if (mkfs_cfg.csum_type == BTRFS_CSUM_TYPE_SHA256) {
printf(
"NOTE: you may need to manually load kernel module implementing accelerated SHA256 in case\n"
" the generic implementation is built-in, before mount. Check lsmod or /proc/crypto\n\n"
);
}
}
/*
* The filesystem is now fully set up, commit the remaining changes and
* fix the signature as the last step before closing the devices.
*/
fs_info->finalize_on_close = 1;
out:
close_ret = close_ctree(root);
if (!close_ret) {
optind = saved_optind;
device_count = argc - optind;
while (device_count-- > 0) {
file = argv[optind++];
if (path_is_block_device(file) == 1)
btrfs_register_one_device(file);
}
}
if (!ret && close_ret) {
ret = close_ret;
error("failed to close ctree, the filesystem may be inconsistent: %d",
ret);
}
btrfs_close_all_devices();
error:
if (prepare_ctx) {
for (i = 0; i < device_count; i++)
close(prepare_ctx[i].fd);
}
free(t_prepare);
free(prepare_ctx);
free(label);
free(source_dir);
while (!list_empty(&subvols)) {
struct rootdir_subvol *head;
head = list_entry(subvols.next, struct rootdir_subvol, list);
free(head->dir);
free(head->full_path);
list_del(&head->list);
free(head);
}
return !!ret;
success:
exit(0);
}