btrfs-progs/mkfs.c

538 lines
13 KiB
C
Raw Normal View History

2007-06-12 13:07:11 +00:00
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
2007-02-20 21:41:09 +00:00
#define _XOPEN_SOURCE 500
#define _GNU_SOURCE
#ifndef __CHECKER__
#include <sys/ioctl.h>
#include <sys/mount.h>
#include "ioctl.h"
#endif
2007-02-20 21:41:09 +00:00
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <getopt.h>
2007-04-05 18:29:12 +00:00
#include <uuid/uuid.h>
2007-06-12 15:39:09 +00:00
#include <linux/fs.h>
#include <ctype.h>
2007-06-12 15:39:09 +00:00
#include "kerncompat.h"
2007-02-20 21:41:09 +00:00
#include "ctree.h"
#include "disk-io.h"
#include "volumes.h"
2007-03-21 15:13:29 +00:00
#include "transaction.h"
#include "utils.h"
#include "version.h"
2007-02-20 21:41:09 +00:00
static u64 parse_size(char *s)
{
int len = strlen(s);
char c;
u64 mult = 1;
if (!isdigit(s[len - 1])) {
c = tolower(s[len - 1]);
switch (c) {
case 'g':
mult *= 1024;
case 'm':
mult *= 1024;
case 'k':
mult *= 1024;
case 'b':
break;
default:
fprintf(stderr, "Unknown size descriptor %c\n", c);
exit(1);
}
s[len - 1] = '\0';
}
return atol(s) * mult;
}
static int make_root_dir(struct btrfs_root *root)
{
2007-04-06 19:39:12 +00:00
struct btrfs_trans_handle *trans;
struct btrfs_key location;
u64 bytes_used;
u64 chunk_start = 0;
u64 chunk_size = 0;
int ret;
2007-04-06 19:39:12 +00:00
trans = btrfs_start_transaction(root, 1);
bytes_used = btrfs_super_bytes_used(&root->fs_info->super_copy);
root->fs_info->system_allocs = 1;
ret = btrfs_make_block_group(trans, root, bytes_used,
BTRFS_BLOCK_GROUP_SYSTEM,
BTRFS_FIRST_CHUNK_TREE_OBJECTID,
0, BTRFS_MKFS_SYSTEM_GROUP_SIZE);
BUG_ON(ret);
ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
&chunk_start, &chunk_size,
BTRFS_BLOCK_GROUP_METADATA);
BUG_ON(ret);
ret = btrfs_make_block_group(trans, root, 0,
BTRFS_BLOCK_GROUP_METADATA,
BTRFS_FIRST_CHUNK_TREE_OBJECTID,
chunk_start, chunk_size);
BUG_ON(ret);
root->fs_info->system_allocs = 0;
btrfs_commit_transaction(trans, root);
trans = btrfs_start_transaction(root, 1);
BUG_ON(!trans);
ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
&chunk_start, &chunk_size,
BTRFS_BLOCK_GROUP_DATA);
BUG_ON(ret);
ret = btrfs_make_block_group(trans, root, 0,
BTRFS_BLOCK_GROUP_DATA,
BTRFS_FIRST_CHUNK_TREE_OBJECTID,
chunk_start, chunk_size);
BUG_ON(ret);
ret = btrfs_make_root_dir(trans, root->fs_info->tree_root,
BTRFS_ROOT_TREE_DIR_OBJECTID);
2007-03-21 15:13:29 +00:00
if (ret)
2007-04-06 19:39:12 +00:00
goto err;
ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
2007-04-06 19:39:12 +00:00
if (ret)
goto err;
memcpy(&location, &root->fs_info->fs_root->root_key, sizeof(location));
location.offset = (u64)-1;
ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
"default", 7,
btrfs_super_root_dir(&root->fs_info->super_copy),
&location, BTRFS_FT_DIR, 0);
2007-04-06 19:39:12 +00:00
if (ret)
goto err;
ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
"default", 7, location.objectid,
2008-07-24 16:13:32 +00:00
BTRFS_ROOT_TREE_DIR_OBJECTID, 0);
if (ret)
goto err;
btrfs_commit_transaction(trans, root);
2007-04-06 19:39:12 +00:00
err:
2007-03-21 15:13:29 +00:00
return ret;
}
2007-03-21 00:35:03 +00:00
static int recow_roots(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
int ret;
struct extent_buffer *tmp;
struct btrfs_fs_info *info = root->fs_info;
ret = __btrfs_cow_block(trans, info->fs_root, info->fs_root->node,
NULL, 0, &tmp, 0, 0);
BUG_ON(ret);
free_extent_buffer(tmp);
ret = __btrfs_cow_block(trans, info->tree_root, info->tree_root->node,
NULL, 0, &tmp, 0, 0);
BUG_ON(ret);
free_extent_buffer(tmp);
ret = __btrfs_cow_block(trans, info->extent_root,
info->extent_root->node, NULL, 0, &tmp, 0, 0);
BUG_ON(ret);
free_extent_buffer(tmp);
ret = __btrfs_cow_block(trans, info->chunk_root, info->chunk_root->node,
NULL, 0, &tmp, 0, 0);
BUG_ON(ret);
free_extent_buffer(tmp);
ret = __btrfs_cow_block(trans, info->dev_root, info->dev_root->node,
NULL, 0, &tmp, 0, 0);
BUG_ON(ret);
free_extent_buffer(tmp);
ret = __btrfs_cow_block(trans, info->csum_root, info->csum_root->node,
NULL, 0, &tmp, 0, 0);
BUG_ON(ret);
free_extent_buffer(tmp);
return 0;
}
static int create_one_raid_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 type)
{
u64 chunk_start;
u64 chunk_size;
int ret;
ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
&chunk_start, &chunk_size, type);
BUG_ON(ret);
ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0,
type, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
chunk_start, chunk_size);
BUG_ON(ret);
return ret;
}
static int create_raid_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 data_profile,
u64 metadata_profile)
{
u64 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy);
u64 allowed;
int ret;
if (num_devices == 1)
allowed = BTRFS_BLOCK_GROUP_DUP;
2008-04-16 15:14:21 +00:00
else if (num_devices >= 4) {
allowed = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10;
} else
allowed = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1;
if (allowed & metadata_profile) {
ret = create_one_raid_group(trans, root,
BTRFS_BLOCK_GROUP_SYSTEM |
(allowed & metadata_profile));
BUG_ON(ret);
ret = create_one_raid_group(trans, root,
BTRFS_BLOCK_GROUP_METADATA |
(allowed & metadata_profile));
BUG_ON(ret);
ret = recow_roots(trans, root);
BUG_ON(ret);
}
if (num_devices > 1 && (allowed & data_profile)) {
ret = create_one_raid_group(trans, root,
BTRFS_BLOCK_GROUP_DATA |
(allowed & data_profile));
BUG_ON(ret);
}
return 0;
}
static int create_data_reloc_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_key location;
struct btrfs_root_item root_item;
struct extent_buffer *tmp;
u64 objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
int ret;
ret = btrfs_copy_root(trans, root, root->node, &tmp, objectid);
BUG_ON(ret);
memcpy(&root_item, &root->root_item, sizeof(root_item));
btrfs_set_root_bytenr(&root_item, tmp->start);
btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
btrfs_set_root_generation(&root_item, trans->transid);
free_extent_buffer(tmp);
location.objectid = objectid;
location.type = BTRFS_ROOT_ITEM_KEY;
location.offset = 0;
ret = btrfs_insert_root(trans, root->fs_info->tree_root,
&location, &root_item);
BUG_ON(ret);
return 0;
}
2007-10-15 20:25:14 +00:00
static void print_usage(void)
{
fprintf(stderr, "usage: mkfs.btrfs [options] dev [ dev ... ]\n");
fprintf(stderr, "options:\n");
fprintf(stderr, "\t -A --alloc-start the offset to start the FS\n");
fprintf(stderr, "\t -b --byte-count total number of bytes in the FS\n");
fprintf(stderr, "\t -d --data data profile, raid0, raid1, raid10 or single\n");
fprintf(stderr, "\t -l --leafsize size of btree leaves\n");
fprintf(stderr, "\t -L --label set a label\n");
fprintf(stderr, "\t -m --metadata metadata profile, values like data profile\n");
fprintf(stderr, "\t -n --nodesize size of btree nodes\n");
fprintf(stderr, "\t -s --sectorsize min block allocation\n");
fprintf(stderr, "%s\n", BTRFS_BUILD_VERSION);
2007-10-15 20:25:14 +00:00
exit(1);
}
2007-03-21 00:35:03 +00:00
static void print_version(void)
{
fprintf(stderr, "mkfs.btrfs, part of %s\n", BTRFS_BUILD_VERSION);
exit(0);
}
static u64 parse_profile(char *s)
{
if (strcmp(s, "raid0") == 0) {
return BTRFS_BLOCK_GROUP_RAID0;
} else if (strcmp(s, "raid1") == 0) {
return BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP;
2008-04-16 15:14:21 +00:00
} else if (strcmp(s, "raid10") == 0) {
return BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_DUP;
} else if (strcmp(s, "single") == 0) {
return 0;
} else {
fprintf(stderr, "Unknown option %s\n", s);
print_usage();
}
return 0;
}
static char *parse_label(char *input)
{
int i;
int len = strlen(input);
if (len > BTRFS_LABEL_SIZE) {
fprintf(stderr, "Label %s is too long (max %d)\n", input,
BTRFS_LABEL_SIZE);
exit(1);
}
for (i = 0; i < len; i++) {
if (input[i] == '/' || input[i] == '\\') {
fprintf(stderr, "invalid label %s\n", input);
exit(1);
}
}
return strdup(input);
}
static struct option long_options[] = {
{ "alloc-start", 1, NULL, 'A'},
{ "byte-count", 1, NULL, 'b' },
{ "leafsize", 1, NULL, 'l' },
{ "label", 1, NULL, 'L'},
{ "metadata", 1, NULL, 'm' },
{ "nodesize", 1, NULL, 'n' },
{ "sectorsize", 1, NULL, 's' },
{ "data", 1, NULL, 'd' },
{ "version", 0, NULL, 'V' },
{ 0, 0, 0, 0}
};
2007-03-21 00:35:03 +00:00
int main(int ac, char **av)
{
char *file;
struct btrfs_root *root;
struct btrfs_trans_handle *trans;
char *label = NULL;
char *first_file;
2007-03-21 00:35:03 +00:00
u64 block_count = 0;
2008-03-24 19:04:49 +00:00
u64 dev_block_count = 0;
Btrfs: move data checksumming into a dedicated tree Btrfs stores checksums for each data block. Until now, they have been stored in the subvolume trees, indexed by the inode that is referencing the data block. This means that when we read the inode, we've probably read in at least some checksums as well. But, this has a few problems: * The checksums are indexed by logical offset in the file. When compression is on, this means we have to do the expensive checksumming on the uncompressed data. It would be faster if we could checksum the compressed data instead. * If we implement encryption, we'll be checksumming the plain text and storing that on disk. This is significantly less secure. * For either compression or encryption, we have to get the plain text back before we can verify the checksum as correct. This makes the raid layer balancing and extent moving much more expensive. * It makes the front end caching code more complex, as we have touch the subvolume and inodes as we cache extents. * There is potentitally one copy of the checksum in each subvolume referencing an extent. The solution used here is to store the extent checksums in a dedicated tree. This allows us to index the checksums by phyiscal extent start and length. It means: * The checksum is against the data stored on disk, after any compression or encryption is done. * The checksum is stored in a central location, and can be verified without following back references, or reading inodes. This makes compression significantly faster by reducing the amount of data that needs to be checksummed. It will also allow much faster raid management code in general. The checksums are indexed by a key with a fixed objectid (a magic value in ctree.h) and offset set to the starting byte of the extent. This allows us to copy the checksum items into the fsync log tree directly (or any other tree), without having to invent a second format for them. Signed-off-by: Chris Mason <chris.mason@oracle.com>
2008-12-08 22:00:31 +00:00
u64 blocks[7];
u64 alloc_start = 0;
u64 metadata_profile = BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP;
u64 data_profile = BTRFS_BLOCK_GROUP_RAID0;
u32 leafsize = getpagesize();
2007-10-15 20:25:14 +00:00
u32 sectorsize = 4096;
u32 nodesize = leafsize;
2007-11-30 16:30:24 +00:00
u32 stripesize = 4096;
2008-03-24 19:04:49 +00:00
int zero_end = 1;
int option_index = 0;
int fd;
int first_fd;
int ret;
int i;
2007-03-21 15:13:29 +00:00
2007-10-15 20:25:14 +00:00
while(1) {
int c;
c = getopt_long(ac, av, "A:b:l:n:s:m:d:L:V", long_options,
&option_index);
2007-10-15 20:25:14 +00:00
if (c < 0)
break;
switch(c) {
case 'A':
alloc_start = parse_size(optarg);
break;
case 'd':
data_profile = parse_profile(optarg);
break;
2007-10-15 20:25:14 +00:00
case 'l':
leafsize = parse_size(optarg);
2007-10-15 20:25:14 +00:00
break;
case 'L':
label = parse_label(optarg);
break;
case 'm':
metadata_profile = parse_profile(optarg);
break;
2007-10-15 20:25:14 +00:00
case 'n':
nodesize = parse_size(optarg);
2007-10-15 20:25:14 +00:00
break;
2007-11-30 16:30:24 +00:00
case 's':
sectorsize = parse_size(optarg);
2007-11-30 16:30:24 +00:00
break;
2008-03-24 19:04:49 +00:00
case 'b':
block_count = parse_size(optarg);
if (block_count < 256*1024*1024) {
fprintf(stderr, "File system size "
"%llu bytes is too small, "
"256M is required at least\n",
(unsigned long long)block_count);
exit(1);
}
2008-03-24 19:04:49 +00:00
zero_end = 0;
break;
case 'V':
print_version();
break;
2007-10-15 20:25:14 +00:00
default:
print_usage();
}
}
sectorsize = max(sectorsize, (u32)getpagesize());
2007-10-15 20:25:14 +00:00
if (leafsize < sectorsize || (leafsize & (sectorsize - 1))) {
fprintf(stderr, "Illegal leafsize %u\n", leafsize);
exit(1);
}
if (nodesize < sectorsize || (nodesize & (sectorsize - 1))) {
fprintf(stderr, "Illegal nodesize %u\n", nodesize);
exit(1);
}
ac = ac - optind;
2008-03-24 19:04:49 +00:00
if (ac == 0)
2007-10-15 20:25:14 +00:00
print_usage();
2008-03-24 19:04:49 +00:00
printf("\nWARNING! - %s IS EXPERIMENTAL\n", BTRFS_BUILD_VERSION);
printf("WARNING! - see http://btrfs.wiki.kernel.org before using\n\n");
2008-03-24 19:04:49 +00:00
file = av[optind++];
ret = check_mounted(file);
if (ret < 0) {
fprintf(stderr, "error checking %s mount status\n", file);
exit(1);
}
if (ret == 1) {
fprintf(stderr, "%s is mounted\n", file);
exit(1);
}
2008-03-24 19:04:49 +00:00
ac--;
2007-03-21 00:35:03 +00:00
fd = open(file, O_RDWR);
if (fd < 0) {
fprintf(stderr, "unable to open %s\n", file);
exit(1);
}
2008-03-24 19:04:49 +00:00
first_fd = fd;
first_file = file;
2008-03-24 19:04:49 +00:00
ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count);
if (block_count == 0)
block_count = dev_block_count;
blocks[0] = BTRFS_SUPER_INFO_OFFSET;
for (i = 1; i < 7; i++) {
blocks[i] = BTRFS_SUPER_INFO_OFFSET + 1024 * 1024 +
leafsize * i;
}
ret = make_btrfs(fd, file, label, blocks, block_count,
nodesize, leafsize,
sectorsize, stripesize);
2007-03-21 00:35:03 +00:00
if (ret) {
fprintf(stderr, "error during mkfs %d\n", ret);
exit(1);
}
root = open_ctree(file, 0, O_RDWR);
root->fs_info->alloc_start = alloc_start;
ret = make_root_dir(root);
2007-03-21 15:13:29 +00:00
if (ret) {
fprintf(stderr, "failed to setup the root directory\n");
exit(1);
}
trans = btrfs_start_transaction(root, 1);
2008-03-24 19:04:49 +00:00
if (ac == 0)
goto raid_groups;
2008-03-24 19:04:49 +00:00
btrfs_register_one_device(file);
2008-03-24 19:04:49 +00:00
if (!root) {
fprintf(stderr, "ctree init failed\n");
return -1;
}
zero_end = 1;
while(ac-- > 0) {
file = av[optind++];
ret = check_mounted(file);
if (ret < 0) {
fprintf(stderr, "error checking %s mount status\n",
file);
exit(1);
}
if (ret == 1) {
fprintf(stderr, "%s is mounted\n", file);
exit(1);
}
2008-03-24 19:04:49 +00:00
fd = open(file, O_RDWR);
if (fd < 0) {
fprintf(stderr, "unable to open %s\n", file);
exit(1);
}
ret = btrfs_device_already_in_root(root, fd,
BTRFS_SUPER_INFO_OFFSET);
if (ret) {
fprintf(stderr, "skipping duplicate device %s in FS\n",
file);
close(fd);
continue;
}
2008-03-24 19:04:49 +00:00
ret = btrfs_prepare_device(fd, file, zero_end,
&dev_block_count);
BUG_ON(ret);
ret = btrfs_add_to_fsid(trans, root, fd, file, dev_block_count,
2008-03-24 19:04:49 +00:00
sectorsize, sectorsize, sectorsize);
BUG_ON(ret);
btrfs_register_one_device(file);
2008-03-24 19:04:49 +00:00
}
raid_groups:
ret = create_raid_groups(trans, root, data_profile,
metadata_profile);
BUG_ON(ret);
ret = create_data_reloc_tree(trans, root);
BUG_ON(ret);
printf("fs created label %s on %s\n\tnodesize %u leafsize %u "
"sectorsize %u size %s\n",
label, first_file, nodesize, leafsize, sectorsize,
pretty_sizes(btrfs_super_total_bytes(&root->fs_info->super_copy)));
printf("%s\n", BTRFS_BUILD_VERSION);
2008-03-24 19:04:49 +00:00
btrfs_commit_transaction(trans, root);
ret = close_ctree(root);
BUG_ON(ret);
free(label);
2007-03-21 00:35:03 +00:00
return 0;
}