/* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License v2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */ #include #include #include #include #include #include #include #include #include #include #include #include "kernel-shared/ctree.h" #include "kernel-shared/accessors.h" #include "kernel-shared/disk-io.h" #include "kernel-shared/volumes.h" #include "kernel-shared/transaction.h" #include "kernel-shared/extent_io.h" #include "kernel-shared/zoned.h" #include "common/fsfeatures.h" #include "common/internal.h" #include "common/messages.h" #include "common/path-utils.h" #include "common/device-utils.h" #include "common/open-utils.h" #include "common/string-utils.h" #include "mkfs/common.h" static u64 reference_root_table[] = { [MKFS_ROOT_TREE] = BTRFS_ROOT_TREE_OBJECTID, [MKFS_EXTENT_TREE] = BTRFS_EXTENT_TREE_OBJECTID, [MKFS_CHUNK_TREE] = BTRFS_CHUNK_TREE_OBJECTID, [MKFS_DEV_TREE] = BTRFS_DEV_TREE_OBJECTID, [MKFS_FS_TREE] = BTRFS_FS_TREE_OBJECTID, [MKFS_CSUM_TREE] = BTRFS_CSUM_TREE_OBJECTID, [MKFS_FREE_SPACE_TREE] = BTRFS_FREE_SPACE_TREE_OBJECTID, [MKFS_BLOCK_GROUP_TREE] = BTRFS_BLOCK_GROUP_TREE_OBJECTID, }; static int btrfs_write_empty_tree(int fd, struct btrfs_mkfs_config *cfg, struct extent_buffer *buf, u64 objectid, u64 block) { int ret; memset(buf->data + sizeof(struct btrfs_header), 0, cfg->nodesize - sizeof(struct btrfs_header)); btrfs_set_header_bytenr(buf, block); btrfs_set_header_owner(buf, objectid); btrfs_set_header_nritems(buf, 0); csum_tree_block_size(buf, btrfs_csum_type_size(cfg->csum_type), 0, cfg->csum_type); ret = btrfs_pwrite(fd, buf->data, cfg->nodesize, block, cfg->zone_size); if (ret != cfg->nodesize) return ret < 0 ? -errno : -EIO; return 0; } static int btrfs_create_tree_root(int fd, struct btrfs_mkfs_config *cfg, struct extent_buffer *buf, const enum btrfs_mkfs_block *blocks, int blocks_nr) { struct btrfs_root_item root_item; struct btrfs_inode_item *inode_item; struct btrfs_disk_key disk_key; u32 nritems = 0; u32 itemoff; int ret = 0; int blk; int i; u8 uuid[BTRFS_UUID_SIZE]; bool block_group_tree = !!(cfg->features.compat_ro_flags & BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE); memset(buf->data + sizeof(struct btrfs_header), 0, cfg->nodesize - sizeof(struct btrfs_header)); memset(&root_item, 0, sizeof(root_item)); memset(&disk_key, 0, sizeof(disk_key)); /* create the items for the root tree */ inode_item = &root_item.inode; btrfs_set_stack_inode_generation(inode_item, 1); btrfs_set_stack_inode_size(inode_item, 3); btrfs_set_stack_inode_nlink(inode_item, 1); btrfs_set_stack_inode_nbytes(inode_item, cfg->nodesize); btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); btrfs_set_root_refs(&root_item, 1); btrfs_set_root_used(&root_item, cfg->nodesize); btrfs_set_root_generation(&root_item, 1); btrfs_set_disk_key_type(&disk_key, BTRFS_ROOT_ITEM_KEY); btrfs_set_disk_key_offset(&disk_key, 0); itemoff = cfg->leaf_data_size - sizeof(root_item); for (i = 0; i < blocks_nr; i++) { blk = blocks[i]; if (blk == MKFS_ROOT_TREE || blk == MKFS_CHUNK_TREE) continue; if (!block_group_tree && blk == MKFS_BLOCK_GROUP_TREE) continue; btrfs_set_root_bytenr(&root_item, cfg->blocks[blk]); btrfs_set_disk_key_objectid(&disk_key, reference_root_table[blk]); btrfs_set_item_key(buf, &disk_key, nritems); btrfs_set_item_offset(buf, nritems, itemoff); btrfs_set_item_size(buf, nritems, sizeof(root_item)); if (blk == MKFS_FS_TREE) { time_t now = time(NULL); uuid_generate(uuid); memcpy(root_item.uuid, uuid, BTRFS_UUID_SIZE); btrfs_set_stack_timespec_sec(&root_item.otime, now); btrfs_set_stack_timespec_sec(&root_item.ctime, now); } else { memset(uuid, 0, BTRFS_UUID_SIZE); memcpy(root_item.uuid, uuid, BTRFS_UUID_SIZE); btrfs_set_stack_timespec_sec(&root_item.otime, 0); btrfs_set_stack_timespec_sec(&root_item.ctime, 0); } write_extent_buffer(buf, &root_item, btrfs_item_ptr_offset(buf, nritems), sizeof(root_item)); nritems++; itemoff -= sizeof(root_item); } btrfs_set_header_nritems(buf, nritems); /* generate checksum */ csum_tree_block_size(buf, btrfs_csum_type_size(cfg->csum_type), 0, cfg->csum_type); /* write back root tree */ ret = btrfs_pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[MKFS_ROOT_TREE], cfg->zone_size); if (ret != cfg->nodesize) return (ret < 0 ? -errno : -EIO); return ret; } static int create_free_space_tree(int fd, struct btrfs_mkfs_config *cfg, struct extent_buffer *buf, u64 group_start, u64 group_size, u64 free_start) { struct btrfs_free_space_info *info; struct btrfs_disk_key disk_key; int itemoff = cfg->leaf_data_size; int nritems = 0; int ret; memset(buf->data + sizeof(struct btrfs_header), 0, cfg->nodesize - sizeof(struct btrfs_header)); itemoff -= sizeof(*info); btrfs_set_disk_key_objectid(&disk_key, group_start); btrfs_set_disk_key_offset(&disk_key, group_size); btrfs_set_disk_key_type(&disk_key, BTRFS_FREE_SPACE_INFO_KEY); btrfs_set_item_key(buf, &disk_key, nritems); btrfs_set_item_offset(buf, nritems, itemoff); btrfs_set_item_size(buf, nritems, sizeof(*info)); info = btrfs_item_ptr(buf, nritems, struct btrfs_free_space_info); btrfs_set_free_space_extent_count(buf, info, 1); btrfs_set_free_space_flags(buf, info, 0); nritems++; btrfs_set_disk_key_objectid(&disk_key, free_start); btrfs_set_disk_key_offset(&disk_key, group_start + group_size - free_start); btrfs_set_disk_key_type(&disk_key, BTRFS_FREE_SPACE_EXTENT_KEY); btrfs_set_item_key(buf, &disk_key, nritems); btrfs_set_item_offset(buf, nritems, itemoff); btrfs_set_item_size(buf, nritems, 0); nritems++; btrfs_set_header_bytenr(buf, cfg->blocks[MKFS_FREE_SPACE_TREE]); btrfs_set_header_owner(buf, BTRFS_FREE_SPACE_TREE_OBJECTID); btrfs_set_header_nritems(buf, nritems); csum_tree_block_size(buf, btrfs_csum_type_size(cfg->csum_type), 0, cfg->csum_type); ret = btrfs_pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[MKFS_FREE_SPACE_TREE], cfg->zone_size); if (ret != cfg->nodesize) return ret < 0 ? -errno : -EIO; return 0; } static void write_block_group_item(struct extent_buffer *buf, u32 nr, u64 objectid, u64 offset, u64 used, u64 chunk_objectid, u32 itemoff) { struct btrfs_block_group_item *bg_item; struct btrfs_disk_key disk_key; btrfs_set_disk_key_objectid(&disk_key, objectid); btrfs_set_disk_key_offset(&disk_key, offset); btrfs_set_disk_key_type(&disk_key, BTRFS_BLOCK_GROUP_ITEM_KEY); btrfs_set_item_key(buf, &disk_key, nr); btrfs_set_item_offset(buf, nr, itemoff); btrfs_set_item_size(buf, nr, sizeof(*bg_item)); bg_item = btrfs_item_ptr(buf, nr, struct btrfs_block_group_item); btrfs_set_block_group_used(buf, bg_item, used); btrfs_set_block_group_flags(buf, bg_item, BTRFS_BLOCK_GROUP_SYSTEM); btrfs_set_block_group_chunk_objectid(buf, bg_item, chunk_objectid); } static int create_block_group_tree(int fd, struct btrfs_mkfs_config *cfg, struct extent_buffer *buf, u64 bg_offset, u64 bg_size, u64 bg_used) { int ret; u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; /* * For extent-tree-v2, chunk_objectid of block group item is reused * to indicate which extent-tree the block group is in. * * Thus for the initial image, we should set the chunk_objectid to 0, * as all initial bgs are in the extent tree with global id 0. */ if (cfg->features.incompat_flags & BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2) chunk_objectid = 0; memset(buf->data + sizeof(struct btrfs_header), 0, cfg->nodesize - sizeof(struct btrfs_header)); write_block_group_item(buf, 0, bg_offset, bg_size, bg_used, chunk_objectid, cfg->leaf_data_size - sizeof(struct btrfs_block_group_item)); btrfs_set_header_bytenr(buf, cfg->blocks[MKFS_BLOCK_GROUP_TREE]); btrfs_set_header_owner(buf, BTRFS_BLOCK_GROUP_TREE_OBJECTID); btrfs_set_header_nritems(buf, 1); csum_tree_block_size(buf, btrfs_csum_type_size(cfg->csum_type), 0, cfg->csum_type); ret = btrfs_pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[MKFS_BLOCK_GROUP_TREE], cfg->zone_size); if (ret != cfg->nodesize) return ret < 0 ? -errno : -EIO; return 0; } static u64 zoned_system_group_offset(u64 zone_size) { const int zone_shift = ilog2(zone_size); u32 zone_num = BTRFS_NR_SB_LOG_ZONES; u64 start = (u64)zone_num * zone_size; u32 sb_zones[BTRFS_SUPER_MIRROR_MAX]; int i; for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) sb_zones[i] = sb_zone_number(zone_shift, i); for (;;) { for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { if (zone_num == sb_zones[i] || !(btrfs_sb_offset(i) + BTRFS_SUPER_INFO_SIZE <= start || start + zone_size <= btrfs_sb_offset(i))) goto next; } return start; next: zone_num++; start += zone_size; } __builtin_unreachable(); } /* * Add @block into the @blocks array. * * The @blocks should already be in ascending order and no duplicate. */ static void mkfs_blocks_add(enum btrfs_mkfs_block *blocks, int *blocks_nr, enum btrfs_mkfs_block to_add) { int i; for (i = 0; i < *blocks_nr; i++) { /* The target is already in the array. */ if (blocks[i] == to_add) return; /* * We find the first one past @to_add, move the array one slot * right, insert a new one. */ if (blocks[i] > to_add) { memmove(blocks + i + 1, blocks + i, *blocks_nr - i); blocks[i] = to_add; (*blocks_nr)++; return; } /* Current one still smaller than @to_add, go to next slot. */ } /* All slots iterated and not match, insert into the last slot. */ blocks[i] = to_add; (*blocks_nr)++; return; } /* * Remove @block from the @blocks array. * * The @blocks should already be in ascending order and no duplicate. */ static void mkfs_blocks_remove(enum btrfs_mkfs_block *blocks, int *blocks_nr, enum btrfs_mkfs_block to_remove) { int i; for (i = 0; i < *blocks_nr; i++) { /* Found the target, move the array one slot left. */ if (blocks[i] == to_remove) { memmove(blocks + i, blocks + i + 1, *blocks_nr - i - 1); (*blocks_nr)--; } } /* Nothing found, exit directly. */ return; } /* * @fs_uuid - if NULL, generates a UUID, returns back the new filesystem UUID * * The superblock signature is not valid, denotes a partially created * filesystem, needs to be finalized. * * The temporary fs will have the following chunk layout: * Device extent: * 0 1M 5M ...... * | Reserved | dev extent for SYS chunk | * * And chunk mapping will be: * Chunk mapping: * 0 1M 5M * | | System chunk, 1:1 mapped | * * That's to say, there will only be *ONE* system chunk, mapped to * [1M, 5M) physical offset. * And the only chunk is also in logical address [1M, 5M), containing * all essential tree blocks. */ int make_btrfs(int fd, struct btrfs_mkfs_config *cfg) { struct btrfs_super_block super; struct extent_buffer *buf; struct btrfs_disk_key disk_key; struct btrfs_extent_item *extent_item; struct btrfs_chunk *chunk; struct btrfs_dev_item *dev_item; struct btrfs_dev_extent *dev_extent; enum btrfs_mkfs_block blocks[MKFS_BLOCK_COUNT]; u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; u8 *ptr; int i; int ret; int blocks_nr; int blk; u32 itemoff; u32 nritems = 0; u64 first_free; u64 ref_root; u32 array_size; u32 item_size; u64 total_used = 0; int skinny_metadata = !!(cfg->features.incompat_flags & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); u64 num_bytes; u64 system_group_offset = BTRFS_BLOCK_RESERVED_1M_FOR_SUPER; u64 system_group_size = BTRFS_MKFS_SYSTEM_GROUP_SIZE; bool add_block_group = true; bool free_space_tree = !!(cfg->features.compat_ro_flags & BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE); bool block_group_tree = !!(cfg->features.compat_ro_flags & BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE); bool extent_tree_v2 = !!(cfg->features.incompat_flags & BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2); memcpy(blocks, default_blocks, sizeof(enum btrfs_mkfs_block) * ARRAY_SIZE(default_blocks)); blocks_nr = ARRAY_SIZE(default_blocks); /* * Add one new block for block group tree. * And for block group tree, we don't need to add block group item * into extent tree, the item will be handled in block group tree * initialization. */ if (block_group_tree) { mkfs_blocks_add(blocks, &blocks_nr, MKFS_BLOCK_GROUP_TREE); add_block_group = false; } /* Don't include the free space tree in the blocks to process. */ if (!free_space_tree) mkfs_blocks_remove(blocks, &blocks_nr, MKFS_FREE_SPACE_TREE); if ((cfg->features.incompat_flags & BTRFS_FEATURE_INCOMPAT_ZONED)) { system_group_offset = zoned_system_group_offset(cfg->zone_size); system_group_size = cfg->zone_size; } buf = malloc(sizeof(*buf) + max(cfg->sectorsize, cfg->nodesize)); if (!buf) return -ENOMEM; first_free = BTRFS_SUPER_INFO_OFFSET + cfg->sectorsize * 2 - 1; first_free &= ~((u64)cfg->sectorsize - 1); memset(&super, 0, sizeof(super)); num_bytes = (cfg->num_bytes / cfg->sectorsize) * cfg->sectorsize; if (!*cfg->fs_uuid) { uuid_generate(super.fsid); uuid_unparse(super.fsid, cfg->fs_uuid); } else { uuid_parse(cfg->fs_uuid, super.fsid); } if (!*cfg->dev_uuid) { uuid_generate(super.dev_item.uuid); uuid_unparse(super.dev_item.uuid, cfg->dev_uuid); } else { uuid_parse(cfg->dev_uuid, super.dev_item.uuid); } uuid_generate(chunk_tree_uuid); for (i = 0; i < blocks_nr; i++) { blk = blocks[i]; cfg->blocks[blk] = system_group_offset + cfg->nodesize * i; total_used += cfg->nodesize; } btrfs_set_super_bytenr(&super, BTRFS_SUPER_INFO_OFFSET); btrfs_set_super_num_devices(&super, 1); btrfs_set_super_magic(&super, BTRFS_MAGIC_TEMPORARY); btrfs_set_super_generation(&super, 1); btrfs_set_super_root(&super, cfg->blocks[MKFS_ROOT_TREE]); btrfs_set_super_chunk_root(&super, cfg->blocks[MKFS_CHUNK_TREE]); btrfs_set_super_total_bytes(&super, num_bytes); btrfs_set_super_bytes_used(&super, total_used); btrfs_set_super_sectorsize(&super, cfg->sectorsize); super.__unused_leafsize = cpu_to_le32(cfg->nodesize); btrfs_set_super_nodesize(&super, cfg->nodesize); btrfs_set_super_stripesize(&super, cfg->stripesize); btrfs_set_super_csum_type(&super, cfg->csum_type); btrfs_set_super_chunk_root_generation(&super, 1); if (cfg->features.incompat_flags & BTRFS_FEATURE_INCOMPAT_ZONED) btrfs_set_super_cache_generation(&super, 0); else btrfs_set_super_cache_generation(&super, -1); btrfs_set_super_incompat_flags(&super, cfg->features.incompat_flags); if (free_space_tree) btrfs_set_super_cache_generation(&super, 0); btrfs_set_super_compat_ro_flags(&super, cfg->features.compat_ro_flags); if (extent_tree_v2) btrfs_set_super_nr_global_roots(&super, 1); if (cfg->label) strncpy_null(super.label, cfg->label, BTRFS_LABEL_SIZE); /* create the tree of root objects */ memset(buf->data, 0, cfg->nodesize); buf->len = cfg->nodesize; btrfs_set_header_bytenr(buf, cfg->blocks[MKFS_ROOT_TREE]); btrfs_set_header_generation(buf, 1); btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(buf, BTRFS_ROOT_TREE_OBJECTID); write_extent_buffer(buf, super.fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE); write_extent_buffer(buf, chunk_tree_uuid, btrfs_header_chunk_tree_uuid(buf), BTRFS_UUID_SIZE); ret = btrfs_create_tree_root(fd, cfg, buf, blocks, blocks_nr); if (ret < 0) goto out; /* create the items for the extent tree */ memset(buf->data + sizeof(struct btrfs_header), 0, cfg->nodesize - sizeof(struct btrfs_header)); nritems = 0; itemoff = cfg->leaf_data_size; for (i = 0; i < blocks_nr; i++) { blk = blocks[i]; /* Add the block group item for our temporary chunk. */ if (cfg->blocks[blk] > system_group_offset && add_block_group) { itemoff -= sizeof(struct btrfs_block_group_item); write_block_group_item(buf, nritems, system_group_offset, system_group_size, total_used, BTRFS_FIRST_CHUNK_TREE_OBJECTID, itemoff); add_block_group = false; nritems++; } item_size = sizeof(struct btrfs_extent_item); if (!skinny_metadata) item_size += sizeof(struct btrfs_tree_block_info); if (cfg->blocks[blk] < first_free) { error("block[%d] below first free: %llu < %llu", i, cfg->blocks[blk], first_free); ret = -EINVAL; goto out; } if (i > 0 && cfg->blocks[blk] < cfg->blocks[blocks[i - 1]]) { error("blocks %d and %d in reverse order: %llu < %llu", blk, blocks[i - 1], cfg->blocks[blk], cfg->blocks[blocks[i - 1]]); ret = -EINVAL; goto out; } /* create extent item */ itemoff -= item_size; btrfs_set_disk_key_objectid(&disk_key, cfg->blocks[blk]); if (skinny_metadata) { btrfs_set_disk_key_type(&disk_key, BTRFS_METADATA_ITEM_KEY); btrfs_set_disk_key_offset(&disk_key, 0); } else { btrfs_set_disk_key_type(&disk_key, BTRFS_EXTENT_ITEM_KEY); btrfs_set_disk_key_offset(&disk_key, cfg->nodesize); } btrfs_set_item_key(buf, &disk_key, nritems); btrfs_set_item_offset(buf, nritems, itemoff); btrfs_set_item_size(buf, nritems, item_size); extent_item = btrfs_item_ptr(buf, nritems, struct btrfs_extent_item); btrfs_set_extent_refs(buf, extent_item, 1); btrfs_set_extent_generation(buf, extent_item, 1); btrfs_set_extent_flags(buf, extent_item, BTRFS_EXTENT_FLAG_TREE_BLOCK); nritems++; /* create extent ref */ ref_root = reference_root_table[blk]; btrfs_set_disk_key_objectid(&disk_key, cfg->blocks[blk]); btrfs_set_disk_key_offset(&disk_key, ref_root); btrfs_set_disk_key_type(&disk_key, BTRFS_TREE_BLOCK_REF_KEY); btrfs_set_item_key(buf, &disk_key, nritems); btrfs_set_item_offset(buf, nritems, itemoff); btrfs_set_item_size(buf, nritems, 0); nritems++; } btrfs_set_header_bytenr(buf, cfg->blocks[MKFS_EXTENT_TREE]); btrfs_set_header_owner(buf, BTRFS_EXTENT_TREE_OBJECTID); btrfs_set_header_nritems(buf, nritems); csum_tree_block_size(buf, btrfs_csum_type_size(cfg->csum_type), 0, cfg->csum_type); ret = btrfs_pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[MKFS_EXTENT_TREE], cfg->zone_size); if (ret != cfg->nodesize) { ret = (ret < 0 ? -errno : -EIO); goto out; } /* create the chunk tree */ memset(buf->data + sizeof(struct btrfs_header), 0, cfg->nodesize - sizeof(struct btrfs_header)); nritems = 0; item_size = sizeof(*dev_item); itemoff = cfg->leaf_data_size - item_size; /* first device 1 (there is no device 0) */ btrfs_set_disk_key_objectid(&disk_key, BTRFS_DEV_ITEMS_OBJECTID); btrfs_set_disk_key_offset(&disk_key, 1); btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_ITEM_KEY); btrfs_set_item_key(buf, &disk_key, nritems); btrfs_set_item_offset(buf, nritems, itemoff); btrfs_set_item_size(buf, nritems, item_size); dev_item = btrfs_item_ptr(buf, nritems, struct btrfs_dev_item); btrfs_set_device_id(buf, dev_item, 1); btrfs_set_device_generation(buf, dev_item, 0); btrfs_set_device_total_bytes(buf, dev_item, num_bytes); btrfs_set_device_bytes_used(buf, dev_item, system_group_size); btrfs_set_device_io_align(buf, dev_item, cfg->sectorsize); btrfs_set_device_io_width(buf, dev_item, cfg->sectorsize); btrfs_set_device_sector_size(buf, dev_item, cfg->sectorsize); btrfs_set_device_type(buf, dev_item, 0); write_extent_buffer(buf, super.dev_item.uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); write_extent_buffer(buf, super.fsid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); read_extent_buffer(buf, &super.dev_item, (unsigned long)dev_item, sizeof(*dev_item)); nritems++; item_size = btrfs_chunk_item_size(1); itemoff = itemoff - item_size; /* then we have chunk 0 */ btrfs_set_disk_key_objectid(&disk_key, BTRFS_FIRST_CHUNK_TREE_OBJECTID); btrfs_set_disk_key_offset(&disk_key, system_group_offset); btrfs_set_disk_key_type(&disk_key, BTRFS_CHUNK_ITEM_KEY); btrfs_set_item_key(buf, &disk_key, nritems); btrfs_set_item_offset(buf, nritems, itemoff); btrfs_set_item_size(buf, nritems, item_size); chunk = btrfs_item_ptr(buf, nritems, struct btrfs_chunk); btrfs_set_chunk_length(buf, chunk, system_group_size); btrfs_set_chunk_owner(buf, chunk, BTRFS_EXTENT_TREE_OBJECTID); btrfs_set_chunk_stripe_len(buf, chunk, BTRFS_STRIPE_LEN); btrfs_set_chunk_type(buf, chunk, BTRFS_BLOCK_GROUP_SYSTEM); btrfs_set_chunk_io_align(buf, chunk, cfg->sectorsize); btrfs_set_chunk_io_width(buf, chunk, cfg->sectorsize); btrfs_set_chunk_sector_size(buf, chunk, cfg->sectorsize); btrfs_set_chunk_num_stripes(buf, chunk, 1); btrfs_set_stripe_devid_nr(buf, chunk, 0, 1); btrfs_set_stripe_offset_nr(buf, chunk, 0, system_group_offset); nritems++; write_extent_buffer(buf, super.dev_item.uuid, (unsigned long)btrfs_stripe_dev_uuid(&chunk->stripe), BTRFS_UUID_SIZE); /* copy the key for the chunk to the system array */ ptr = super.sys_chunk_array; array_size = sizeof(disk_key); memcpy(ptr, &disk_key, sizeof(disk_key)); ptr += sizeof(disk_key); /* copy the chunk to the system array */ read_extent_buffer(buf, ptr, (unsigned long)chunk, item_size); array_size += item_size; ptr += item_size; btrfs_set_super_sys_array_size(&super, array_size); btrfs_set_header_bytenr(buf, cfg->blocks[MKFS_CHUNK_TREE]); btrfs_set_header_owner(buf, BTRFS_CHUNK_TREE_OBJECTID); btrfs_set_header_nritems(buf, nritems); csum_tree_block_size(buf, btrfs_csum_type_size(cfg->csum_type), 0, cfg->csum_type); ret = btrfs_pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[MKFS_CHUNK_TREE], cfg->zone_size); if (ret != cfg->nodesize) { ret = (ret < 0 ? -errno : -EIO); goto out; } /* create the device tree */ memset(buf->data + sizeof(struct btrfs_header), 0, cfg->nodesize - sizeof(struct btrfs_header)); nritems = 0; itemoff = cfg->leaf_data_size - sizeof(struct btrfs_dev_extent); btrfs_set_disk_key_objectid(&disk_key, 1); btrfs_set_disk_key_offset(&disk_key, system_group_offset); btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_EXTENT_KEY); btrfs_set_item_key(buf, &disk_key, nritems); btrfs_set_item_offset(buf, nritems, itemoff); btrfs_set_item_size(buf, nritems, sizeof(struct btrfs_dev_extent)); dev_extent = btrfs_item_ptr(buf, nritems, struct btrfs_dev_extent); btrfs_set_dev_extent_chunk_tree(buf, dev_extent, BTRFS_CHUNK_TREE_OBJECTID); btrfs_set_dev_extent_chunk_objectid(buf, dev_extent, BTRFS_FIRST_CHUNK_TREE_OBJECTID); btrfs_set_dev_extent_chunk_offset(buf, dev_extent, system_group_offset); write_extent_buffer(buf, chunk_tree_uuid, (unsigned long)btrfs_dev_extent_chunk_tree_uuid(dev_extent), BTRFS_UUID_SIZE); btrfs_set_dev_extent_length(buf, dev_extent, system_group_size); nritems++; btrfs_set_header_bytenr(buf, cfg->blocks[MKFS_DEV_TREE]); btrfs_set_header_owner(buf, BTRFS_DEV_TREE_OBJECTID); btrfs_set_header_nritems(buf, nritems); csum_tree_block_size(buf, btrfs_csum_type_size(cfg->csum_type), 0, cfg->csum_type); ret = btrfs_pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[MKFS_DEV_TREE], cfg->zone_size); if (ret != cfg->nodesize) { ret = (ret < 0 ? -errno : -EIO); goto out; } /* create the FS root */ ret = btrfs_write_empty_tree(fd, cfg, buf, BTRFS_FS_TREE_OBJECTID, cfg->blocks[MKFS_FS_TREE]); if (ret) goto out; /* finally create the csum root */ ret = btrfs_write_empty_tree(fd, cfg, buf, BTRFS_CSUM_TREE_OBJECTID, cfg->blocks[MKFS_CSUM_TREE]); if (ret) goto out; if (free_space_tree) { ret = create_free_space_tree(fd, cfg, buf, system_group_offset, system_group_size, system_group_offset + total_used); if (ret) goto out; } if (block_group_tree) { ret = create_block_group_tree(fd, cfg, buf, system_group_offset, system_group_size, total_used); if (ret) goto out; } /* and write out the super block */ memset(buf->data, 0, BTRFS_SUPER_INFO_SIZE); memcpy(buf->data, &super, sizeof(super)); buf->len = BTRFS_SUPER_INFO_SIZE; csum_tree_block_size(buf, btrfs_csum_type_size(cfg->csum_type), 0, cfg->csum_type); ret = sbwrite(fd, buf->data, BTRFS_SUPER_INFO_OFFSET); if (ret != BTRFS_SUPER_INFO_SIZE) { ret = (ret < 0 ? -errno : -EIO); goto out; } ret = fsync(fd); if (ret) goto out; ret = 0; out: free(buf); return ret; } /* * Btrfs minimum size calculation is complicated, it should include at least: * 1. system group size * 2. minimum global block reserve * 3. metadata used at mkfs * 4. space reservation to create uuid for first mount. * Also, raid factor should also be taken into consideration. * To avoid the overkill calculation, (system group + global block rsv) * 2 * for *EACH* device should be good enough. */ static u64 btrfs_min_global_blk_rsv_size(u32 nodesize) { return (u64)nodesize << 10; } u64 btrfs_min_dev_size(u32 nodesize, bool mixed, u64 zone_size, u64 meta_profile, u64 data_profile) { u64 reserved = 0; u64 meta_size; u64 data_size; u64 dev_stripes; if (zone_size) { /* 2 zones for the primary superblock. */ reserved += 2 * zone_size; /* * 1 zone each for the initial SINGLE system, SINGLE metadata, * and SINGLE data block group. */ reserved += 3 * zone_size; /* * On non-SINGLE profile, we need to add real system and * metadata block group. And, we also need to add a space for a * tree-log block group. * * SINGLE profile can reuse the initial block groups and only * need to add a tree-log block group */ dev_stripes = ((meta_profile & BTRFS_BLOCK_GROUP_DUP) ? 2 : 1); if (meta_profile & BTRFS_BLOCK_GROUP_PROFILE_MASK) meta_size = 3 * dev_stripes * zone_size; else meta_size = dev_stripes * zone_size; reserved += meta_size; /* * On non-SINGLE profile, we need to add real data block group. * And, we also need to add a space for a data relocation block * group. * * SINGLE profile can reuse the initial block groups and only * need to add a data relocation block group. */ dev_stripes = (data_profile & BTRFS_BLOCK_GROUP_DUP) ? 2 : 1; if (data_profile & BTRFS_BLOCK_GROUP_PROFILE_MASK) data_size = 2 * dev_stripes * zone_size; else data_size = dev_stripes * zone_size; reserved += data_size; return reserved; } if (mixed) return 2 * (BTRFS_MKFS_SYSTEM_GROUP_SIZE + btrfs_min_global_blk_rsv_size(nodesize)); /* * Minimal size calculation is complex due to several factors: * 0) Reserved 1M range. * * 1) Temporary chunk reuse * If specified chunk profile is SINGLE, we can reuse * temporary chunks, no need to allocate new chunks. * * 2) Different minimal chunk size for different profiles: * For initial sys chunk, chunk size is fixed to 4M. * For single profile, minimal chunk size is 8M for all. * For other profiles, minimal chunk and stripe size ranges from 8M * to 64M. * * To calculate it a little easier, here we assume we don't reuse any * temporary chunk, and calculate the size completely by ourselves. * * Temporary chunks sizes are always fixed: * One initial sys chunk, one SINGLE meta, and one SINGLE data. * The latter two are all 8M, according to @calc_size of * btrfs_alloc_chunk(). */ reserved += BTRFS_BLOCK_RESERVED_1M_FOR_SUPER + BTRFS_MKFS_SYSTEM_GROUP_SIZE + SZ_8M * 2; /* * For real chunks, we need to select different sizes: * For SINGLE, it's still fixed to 8M (@calc_size). * For other profiles, refer to max(@min_stripe_size, @calc_size). * * And use the stripe size to calculate its physical used space. */ dev_stripes = ((meta_profile & BTRFS_BLOCK_GROUP_DUP) ? 2 : 1); if (meta_profile & BTRFS_BLOCK_GROUP_PROFILE_MASK) meta_size = dev_stripes * (SZ_8M + SZ_32M); else meta_size = dev_stripes * (SZ_8M + SZ_8M); reserved += meta_size; dev_stripes = ((data_profile & BTRFS_BLOCK_GROUP_DUP) ? 2 : 1); if (data_profile & BTRFS_BLOCK_GROUP_PROFILE_MASK) data_size = dev_stripes * SZ_64M; else data_size = dev_stripes * SZ_8M; reserved += data_size; return reserved; } #define isoctal(c) (((c) & ~7) == '0') static inline void translate(char *f, char *t) { while (*f != '\0') { if (*f == '\\' && isoctal(f[1]) && isoctal(f[2]) && isoctal(f[3])) { *t++ = 64*(f[1] & 7) + 8*(f[2] & 7) + (f[3] & 7); f += 4; } else *t++ = *f++; } *t = '\0'; return; } /* * Checks if the swap device. * Returns 1 if swap device, < 0 on error or 0 if not swap device. */ static int is_swap_device(const char *file) { FILE *f; struct stat st_buf; dev_t dev; ino_t ino = 0; char tmp[PATH_MAX]; char buf[PATH_MAX]; char *cp; int ret = 0; if (stat(file, &st_buf) < 0) return -errno; if (S_ISBLK(st_buf.st_mode)) dev = st_buf.st_rdev; else if (S_ISREG(st_buf.st_mode)) { dev = st_buf.st_dev; ino = st_buf.st_ino; } else return 0; if ((f = fopen("/proc/swaps", "r")) == NULL) return 0; /* skip the first line */ if (fgets(tmp, sizeof(tmp), f) == NULL) goto out; while (fgets(tmp, sizeof(tmp), f) != NULL) { if ((cp = strchr(tmp, ' ')) != NULL) *cp = '\0'; if ((cp = strchr(tmp, '\t')) != NULL) *cp = '\0'; translate(tmp, buf); if (stat(buf, &st_buf) != 0) continue; if (S_ISBLK(st_buf.st_mode)) { if (dev == st_buf.st_rdev) { ret = 1; break; } } else if (S_ISREG(st_buf.st_mode)) { if (dev == st_buf.st_dev && ino == st_buf.st_ino) { ret = 1; break; } } } out: fclose(f); return ret; } /* * Check for signature at the offset 0 that would be present in case of zoned * device. Workaround for old blkid that do not recognize the format to avoid * accidental overwrites. */ static int check_btrfs_signature_zoned(const char *device) { int fd; int ret; struct btrfs_super_block sb; fd = open(device, O_RDONLY); if (fd < 0) return -1; ret = pread(fd, &sb, BTRFS_SUPER_INFO_SIZE, 0); if (ret < 0) { ret = -1; goto out; } if (btrfs_super_magic(&sb) == BTRFS_MAGIC) ret = 1; else ret = 0; out: close(fd); return ret; } /* * Check for existing filesystem or partition table on device. * Returns: * 1 for existing fs or partition * 0 for nothing found * -1 for internal error */ static int check_overwrite(const char *device) { const char *type; blkid_probe pr = NULL; int ret; blkid_loff_t size; if (!device || !*device) return 0; ret = -1; /* will reset on success of all setup calls */ pr = blkid_new_probe_from_filename(device); if (!pr) goto out; size = blkid_probe_get_size(pr); if (size < 0) goto out; /* nothing to overwrite on a 0-length device */ if (size == 0) { ret = 0; goto out; } ret = blkid_probe_enable_partitions(pr, 1); if (ret < 0) goto out; ret = blkid_do_fullprobe(pr); if (ret < 0) goto out; /* * Blkid returns 1 for nothing found and 0 when it finds a signature, * but we want the exact opposite, so reverse the return value here. * * In addition print some useful diagnostics about what actually is * on the device. */ if (ret) { ret = 0; goto out; } if (!blkid_probe_lookup_value(pr, "TYPE", &type, NULL)) { error("%s appears to contain an existing filesystem (%s)", device, type); } else if (!blkid_probe_lookup_value(pr, "PTTYPE", &type, NULL)) { error("%s appears to contain a partition table (%s)", device, type); } else { error("%s appears to contain something weird according to blkid", device); } ret = 1; out: if (pr) blkid_free_probe(pr); if (ret == -1) error("probe of %s failed, cannot detect existing filesystem", device); /* Either nothing found or there was an error is a reason to double check */ if (ret == 0 || ret == -1) { ret = check_btrfs_signature_zoned(device); if (ret > 0) { warning( "%s contains zoned btrfs signature but was not detected by blkid, please update", device); ret = 1; } else if (ret < 0) { warning( "cannot read superblock on %s, please check manually\n", device); ret = -1; } } return ret; } /* * Check if a device is suitable for btrfs * returns: * 1: something is wrong, an error is printed * 0: all is fine */ bool test_dev_for_mkfs(const char *file, int force_overwrite) { int ret, fd; struct stat st; ret = is_swap_device(file); if (ret < 0) { errno = -ret; error("checking status of %s: %m", file); return true; } if (ret == 1) { error("%s is a swap device", file); return true; } ret = test_status_for_mkfs(file, force_overwrite); if (ret) return true; /* * Check if the device is busy. Open it in read-only mode to avoid triggering * udev events. */ fd = open(file, O_RDONLY | O_EXCL); if (fd < 0) { error("unable to open %s: %m", file); return true; } if (fstat(fd, &st)) { error("unable to stat %s: %m", file); close(fd); return true; } if (!S_ISBLK(st.st_mode)) { error("%s is not a block device", file); close(fd); return true; } close(fd); return false; } /* * check if the file (device) is formatted or mounted */ bool test_status_for_mkfs(const char *file, bool force_overwrite) { int ret; if (!force_overwrite) { if (check_overwrite(file)) { error("use the -f option to force overwrite of %s", file); return true; } } ret = check_mounted(file); if (ret < 0) { errno = -ret; if (force_overwrite) { warning("forced overwrite but cannot check mount status of %s: %m", file); return false; } error("cannot check mount status of %s: %m", file); return true; } if (ret == 1) { error("%s is mounted", file); return true; } return false; } int is_vol_small(const char *file) { int fd = -1; int e; struct stat st; u64 size; fd = open(file, O_RDONLY); if (fd < 0) return -errno; if (fstat(fd, &st) < 0) { e = -errno; close(fd); return e; } size = device_get_partition_size_fd_stat(fd, &st); if (size == 0) { close(fd); return -1; } if (size < BTRFS_MKFS_SMALL_VOLUME_SIZE) { close(fd); return 1; } else { close(fd); return 0; } } int test_minimum_size(const char *file, u64 min_dev_size) { int fd; struct stat statbuf; fd = open(file, O_RDONLY); if (fd < 0) return -errno; if (stat(file, &statbuf) < 0) { close(fd); return -errno; } if (device_get_partition_size_fd_stat(fd, &statbuf) < min_dev_size) { close(fd); return 1; } close(fd); return 0; }