diff --git a/btrfsck.c b/btrfsck.c index aa2256ea..9dc085f1 100644 --- a/btrfsck.c +++ b/btrfsck.c @@ -480,7 +480,6 @@ static int pick_next_pending(struct cache_tree *pending, } return ret; } -static struct extent_buffer reada_buf; static int run_next_block(struct btrfs_root *root, struct block_info *bits, @@ -503,7 +502,6 @@ static int run_next_block(struct btrfs_root *root, struct cache_extent *cache; int reada_bits; - u64 last_block = 0; ret = pick_next_pending(pending, reada, nodes, *last, bits, bits_nr, &reada_bits); if (ret == 0) { @@ -511,14 +509,9 @@ static int run_next_block(struct btrfs_root *root, } if (!reada_bits) { for(i = 0; i < ret; i++) { - u64 offset; insert_cache_extent(reada, bits[i].start, bits[i].size); - btrfs_map_bh_to_logical(root, &reada_buf, - bits[i].start); - offset = reada_buf.dev_bytenr; - last_block = bits[i].start; - readahead(reada_buf.fd, offset, bits[i].size); + readahead_tree_block(root, bits[i].start, bits[i].size); } } *last = bits[0].start; diff --git a/ctree.h b/ctree.h index 1e4c84a1..b90a567d 100644 --- a/ctree.h +++ b/ctree.h @@ -406,6 +406,7 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) #define BTRFS_BLOCK_GROUP_METADATA (1 << 2) #define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) +#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) struct btrfs_block_group_item { __le64 used; diff --git a/disk-io.c b/disk-io.c index 12153942..204abe08 100644 --- a/disk-io.c +++ b/disk-io.c @@ -18,6 +18,7 @@ #define _XOPEN_SOURCE 600 #define __USE_XOPEN2K +#define _GNU_SOURCE 1 #include #include #include @@ -33,22 +34,6 @@ #include "crc32c.h" #include "utils.h" -int btrfs_map_bh_to_logical(struct btrfs_root *root, struct extent_buffer *buf, - u64 logical) -{ - u64 physical; - u64 length; - struct btrfs_device *device; - int ret; - - ret = btrfs_map_block(&root->fs_info->mapping_tree, logical, &physical, - &length, &device); - BUG_ON(ret); - buf->fd = device->fd; - buf->dev_bytenr = physical; - return 0; -} - static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) { if (buf->start != btrfs_header_bytenr(buf)) @@ -110,6 +95,28 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { + int ret; + int total_devs = 1; + int dev_nr; + struct extent_buffer *eb; + u64 physical; + u64 length; + struct btrfs_device *device; + + eb = btrfs_find_tree_block(root, bytenr, blocksize); + if (eb && btrfs_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return 0; + } + + dev_nr = 0; + ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, dev_nr, + bytenr, &physical, &length, &device, + &total_devs); + BUG_ON(ret); + device->total_ios++; + blocksize = min(blocksize, (u32)(64 * 1024)); + readahead(device->fd, physical, blocksize); return 0; } @@ -117,35 +124,69 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { int ret; + int total_devs = 1; + int dev_nr; struct extent_buffer *eb; + u64 physical; + u64 length; + struct btrfs_device *device; eb = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!eb) return NULL; - if (!btrfs_buffer_uptodate(eb)) { - btrfs_map_bh_to_logical(root, eb, eb->start); - ret = read_extent_from_disk(eb); - if (ret) { - free_extent_buffer(eb); - return NULL; - } - btrfs_set_buffer_uptodate(eb); + + if (btrfs_buffer_uptodate(eb)) + return eb; + + dev_nr = 0; + ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, dev_nr, + eb->start, &physical, &length, &device, + &total_devs); + BUG_ON(ret); + eb->fd = device->fd; + device->total_ios++; + eb->dev_bytenr = physical; + ret = read_extent_from_disk(eb); + if (ret) { + free_extent_buffer(eb); + return NULL; } + btrfs_set_buffer_uptodate(eb); return eb; } int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb) { + int ret; + int total_devs = 1; + int dev_nr; + u64 physical; + u64 length; + struct btrfs_device *device; + if (check_tree_block(root, eb)) BUG(); if (!btrfs_buffer_uptodate(eb)) BUG(); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); - btrfs_map_bh_to_logical(root, eb, eb->start); csum_tree_block(root, eb, 0); - return write_extent_to_disk(eb); + + dev_nr = 0; + while(dev_nr < total_devs) { + ret = btrfs_map_block(&root->fs_info->mapping_tree, WRITE, + dev_nr, eb->start, &physical, &length, + &device, &total_devs); + BUG_ON(ret); + eb->fd = device->fd; + eb->dev_bytenr = physical; + dev_nr++; + device->total_ios++; + ret = write_extent_to_disk(eb); + BUG_ON(ret); + } + return 0; } static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, diff --git a/extent-tree.c b/extent-tree.c index b9cf92f6..d845adaa 100644 --- a/extent-tree.c +++ b/extent-tree.c @@ -1620,8 +1620,12 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_key keys[2]; int extra_alloc_flags = 0; - if (btrfs_super_num_devices(&info->super_copy) > 1) - extra_alloc_flags = BTRFS_BLOCK_GROUP_RAID0; + if (0 && btrfs_super_num_devices(&info->super_copy) > 1) { + if (data) + extra_alloc_flags = BTRFS_BLOCK_GROUP_RAID0; + else + extra_alloc_flags = BTRFS_BLOCK_GROUP_RAID1; + } if (data) { data = BTRFS_BLOCK_GROUP_DATA | extra_alloc_flags; diff --git a/kerncompat.h b/kerncompat.h index c7aee684..e6e9987b 100644 --- a/kerncompat.h +++ b/kerncompat.h @@ -25,6 +25,12 @@ #include #include +#ifndef READ +#define READ 0 +#define WRITE 1 +#define READA 2 +#endif + #define gfp_t int #define get_cpu_var(p) (p) #define __get_cpu_var(p) (p) diff --git a/mkfs.c b/mkfs.c index aa055367..0d2ca381 100644 --- a/mkfs.c +++ b/mkfs.c @@ -163,6 +163,8 @@ int main(int ac, char **av) char *file; u64 block_count = 0; u64 dev_block_count = 0; + u64 chunk_start; + u64 chunk_size; int fd; int first_fd; int ret; @@ -295,6 +297,29 @@ int main(int ac, char **av) close(fd); btrfs_register_one_device(file); } + + ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root, + &chunk_start, &chunk_size, + BTRFS_BLOCK_GROUP_METADATA | + BTRFS_BLOCK_GROUP_RAID1); + BUG_ON(ret); + ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0, + BTRFS_BLOCK_GROUP_METADATA | + BTRFS_BLOCK_GROUP_RAID1, + BTRFS_CHUNK_TREE_OBJECTID, + chunk_start, chunk_size); + BUG_ON(ret); + ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root, + &chunk_start, &chunk_size, + BTRFS_BLOCK_GROUP_DATA | + BTRFS_BLOCK_GROUP_RAID0); + BUG_ON(ret); + ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0, + BTRFS_BLOCK_GROUP_DATA | + BTRFS_BLOCK_GROUP_RAID0, + BTRFS_CHUNK_TREE_OBJECTID, + chunk_start, chunk_size); + BUG_ON(ret); btrfs_commit_transaction(trans, root); ret = close_ctree(root); BUG_ON(ret); diff --git a/utils.c b/utils.c index 323b108b..33610306 100644 --- a/utils.c +++ b/utils.c @@ -404,31 +404,40 @@ int btrfs_add_to_fsid(struct btrfs_trans_handle *trans, { struct btrfs_super_block *disk_super; struct btrfs_super_block *super = &root->fs_info->super_copy; - struct btrfs_device device; + struct btrfs_device *device; struct btrfs_dev_item *dev_item; char *buf; u64 total_bytes; u64 num_devs; int ret; - buf = malloc(sectorsize); + device = kmalloc(sizeof(*device), GFP_NOFS); + if (!device) + return -ENOMEM; + buf = kmalloc(sectorsize, GFP_NOFS); + if (!buf) { + kfree(device); + return -ENOMEM; + } BUG_ON(sizeof(*disk_super) > sectorsize); memset(buf, 0, sectorsize); disk_super = (struct btrfs_super_block *)buf; dev_item = &disk_super->dev_item; - uuid_generate(device.uuid); - device.devid = 0; - device.type = 0; - device.io_width = io_width; - device.io_align = io_align; - device.sector_size = sectorsize; - device.fd = 0; - device.total_bytes = block_count; - device.bytes_used = 0; + uuid_generate(device->uuid); + device->devid = 0; + device->type = 0; + device->io_width = io_width; + device->io_align = io_align; + device->sector_size = sectorsize; + device->fd = 0; + device->total_bytes = block_count; + device->bytes_used = 0; + device->total_ios = 0; + device->dev_root = root->fs_info->dev_root; - ret = btrfs_add_device(trans, root, &device); + ret = btrfs_add_device(trans, root, device); BUG_ON(ret); total_bytes = btrfs_super_total_bytes(super) + block_count; @@ -439,20 +448,21 @@ int btrfs_add_to_fsid(struct btrfs_trans_handle *trans, memcpy(disk_super, super, sizeof(*disk_super)); - printf("adding device id %llu\n", (unsigned long long)device.devid); - btrfs_set_stack_device_id(dev_item, device.devid); - btrfs_set_stack_device_type(dev_item, device.type); - btrfs_set_stack_device_io_align(dev_item, device.io_align); - btrfs_set_stack_device_io_width(dev_item, device.io_width); - btrfs_set_stack_device_sector_size(dev_item, device.sector_size); - btrfs_set_stack_device_total_bytes(dev_item, device.total_bytes); - btrfs_set_stack_device_bytes_used(dev_item, device.bytes_used); - memcpy(&dev_item->uuid, device.uuid, BTRFS_DEV_UUID_SIZE); + printf("adding device id %llu\n", (unsigned long long)device->devid); + btrfs_set_stack_device_id(dev_item, device->devid); + btrfs_set_stack_device_type(dev_item, device->type); + btrfs_set_stack_device_io_align(dev_item, device->io_align); + btrfs_set_stack_device_io_width(dev_item, device->io_width); + btrfs_set_stack_device_sector_size(dev_item, device->sector_size); + btrfs_set_stack_device_total_bytes(dev_item, device->total_bytes); + btrfs_set_stack_device_bytes_used(dev_item, device->bytes_used); + memcpy(&dev_item->uuid, device->uuid, BTRFS_DEV_UUID_SIZE); ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET); BUG_ON(ret != sectorsize); - free(buf); + kfree(buf); + list_add(&device->dev_list, &root->fs_info->fs_devices->devices); return 0; } diff --git a/volumes.c b/volumes.c index d3ee38b8..ef908724 100644 --- a/volumes.c +++ b/volumes.c @@ -232,6 +232,10 @@ static int find_free_dev_extent(struct btrfs_trans_handle *trans, /* FIXME use last free of some kind */ + /* we don't want to overwrite the superblock on the drive, + * so we make sure to start at an offset of at least 1MB + */ + search_start = max((u64)1024 * 1024, search_start); key.objectid = device->devid; key.offset = search_start; key.type = BTRFS_DEV_EXTENT_KEY; @@ -578,12 +582,15 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (list_empty(dev_list)) return -ENOSPC; - if (type & BTRFS_BLOCK_GROUP_RAID0) + if (type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1)) { + calc_size = 1024 * 1024 * 1024; + } + if (type & (BTRFS_BLOCK_GROUP_RAID1)) { + num_stripes = min_t(u64, 2, + btrfs_super_num_devices(&info->super_copy)); + } + if (type & (BTRFS_BLOCK_GROUP_RAID0)) num_stripes = btrfs_super_num_devices(&info->super_copy); - if (type & BTRFS_BLOCK_GROUP_DATA) - stripe_len = 64 * 1024; - if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) - stripe_len = 32 * 1024; again: INIT_LIST_HEAD(&private_devs); cur = dev_list->next; @@ -628,7 +635,11 @@ again: stripes = &chunk->stripe; - *num_bytes = calc_size * num_stripes; + if (type & BTRFS_BLOCK_GROUP_RAID1) + *num_bytes = calc_size; + else + *num_bytes = calc_size * num_stripes; + index = 0; while(index < num_stripes) { BUG_ON(list_empty(&private_devs)); @@ -695,9 +706,9 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree) cache_tree_init(&tree->cache_tree); } -int btrfs_map_block(struct btrfs_mapping_tree *map_tree, - u64 logical, u64 *phys, u64 *length, - struct btrfs_device **dev) +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, + int dev_nr, u64 logical, u64 *phys, u64 *length, + struct btrfs_device **dev, int *total_devs) { struct cache_extent *ce; struct map_lookup *map; @@ -725,20 +736,28 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, /* stripe_offset is the offset of this block in its stripe*/ stripe_offset = offset - stripe_offset; - /* - * after this do_div call, stripe_nr is the number of stripes - * on this device we have to walk to find the data, and - * stripe_index is the number of our device in the stripe array - */ - stripe_index = stripe_nr % map->num_stripes; - stripe_nr = stripe_nr / map->num_stripes; - + if (map->type & BTRFS_BLOCK_GROUP_RAID1) { + stripe_index = dev_nr; + if (rw == WRITE) + *total_devs = map->num_stripes; + else { + stripe_index = stripe_nr % map->num_stripes; + *total_devs = 1; + } + } else { + /* + * after this do_div call, stripe_nr is the number of stripes + * on this device we have to walk to find the data, and + * stripe_index is the number of our device in the stripe array + */ + stripe_index = stripe_nr % map->num_stripes; + stripe_nr = stripe_nr / map->num_stripes; + } BUG_ON(stripe_index >= map->num_stripes); - *phys = map->stripes[stripe_index].physical + stripe_offset + stripe_nr * map->stripe_len; - if (map->type & BTRFS_BLOCK_GROUP_RAID0) { + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1)) { /* we limit the length of each bio to what fits in a stripe */ *length = min_t(u64, ce->size - offset, map->stripe_len - stripe_offset); @@ -846,6 +865,7 @@ static int read_one_dev(struct btrfs_root *root, device = kmalloc(sizeof(*device), GFP_NOFS); if (!device) return -ENOMEM; + device->total_ios = 0; list_add(&device->dev_list, &root->fs_info->fs_devices->devices); } diff --git a/volumes.h b/volumes.h index ae9e7bb7..a51cfe54 100644 --- a/volumes.h +++ b/volumes.h @@ -22,6 +22,8 @@ struct btrfs_device { struct list_head dev_list; struct btrfs_root *dev_root; + u64 total_ios; + int fd; char *name; @@ -67,9 +69,9 @@ struct btrfs_fs_devices { int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 owner, u64 num_bytes, u64 *start); -int btrfs_map_block(struct btrfs_mapping_tree *map_tree, +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int stripe_nr, u64 logical, u64 *phys, u64 *length, - struct btrfs_device **dev); + struct btrfs_device **dev, int *total_stripes); int btrfs_read_sys_array(struct btrfs_root *root); int btrfs_read_chunk_tree(struct btrfs_root *root); int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,