diff --git a/btrfs-image.c b/btrfs-image.c index ebd283e5..82aa4f39 100644 --- a/btrfs-image.c +++ b/btrfs-image.c @@ -35,6 +35,7 @@ #include "utils.h" #include "version.h" #include "volumes.h" +#include "extent_io.h" #define HEADER_MAGIC 0xbd5c25e27295668bULL #define MAX_PENDING_SIZE (256 * 1024) @@ -136,6 +137,9 @@ struct mdrestore_struct { int done; int error; int old_restore; + int fixup_offset; + int multi_devices; + struct btrfs_fs_info *info; }; static int search_for_chunk_blocks(struct mdrestore_struct *mdres, @@ -1589,9 +1593,10 @@ static void *restore_worker(void *data) u8 *outbuf; int outfd; int ret; + int compress_size = MAX_PENDING_SIZE * 4; outfd = fileno(mdres->out); - buffer = malloc(MAX_PENDING_SIZE * 2); + buffer = malloc(compress_size); if (!buffer) { fprintf(stderr, "Error allocing buffer\n"); pthread_mutex_lock(&mdres->mutex); @@ -1619,7 +1624,7 @@ static void *restore_worker(void *data) pthread_mutex_unlock(&mdres->mutex); if (mdres->compress_method == COMPRESS_ZLIB) { - size = MAX_PENDING_SIZE * 2; + size = compress_size; ret = uncompress(buffer, (unsigned long *)&size, async->buffer, async->bufsize); if (ret != Z_OK) { @@ -1633,44 +1638,60 @@ static void *restore_worker(void *data) size = async->bufsize; } - if (async->start == BTRFS_SUPER_INFO_OFFSET) { - if (mdres->old_restore) { - update_super_old(outbuf); - } else { - ret = update_super(outbuf); + if (!mdres->multi_devices) { + if (async->start == BTRFS_SUPER_INFO_OFFSET) { + if (mdres->old_restore) { + update_super_old(outbuf); + } else { + ret = update_super(outbuf); + if (ret) + err = ret; + } + } else if (!mdres->old_restore) { + ret = fixup_chunk_tree_block(mdres, async, outbuf, size); if (ret) err = ret; } - } else if (!mdres->old_restore) { - ret = fixup_chunk_tree_block(mdres, async, outbuf, size); - if (ret) - err = ret; } - while (size) { - u64 chunk_size = size; - bytenr = logical_to_physical(mdres, - async->start + offset, - &chunk_size); - ret = pwrite64(outfd, outbuf+offset, chunk_size, - bytenr); - if (ret < chunk_size) { - if (ret < 0) { - fprintf(stderr, "Error writing to " - "device %d\n", errno); - err = errno; - break; - } else { - fprintf(stderr, "Short write\n"); - err = -EIO; - break; + if (!mdres->fixup_offset) { + while (size) { + u64 chunk_size = size; + if (!mdres->multi_devices) + bytenr = logical_to_physical(mdres, + async->start + offset, + &chunk_size); + else + bytenr = async->start + offset; + + ret = pwrite64(outfd, outbuf+offset, chunk_size, + bytenr); + if (ret != chunk_size) { + if (ret < 0) { + fprintf(stderr, "Error writing to " + "device %d\n", errno); + err = errno; + break; + } else { + fprintf(stderr, "Short write\n"); + err = -EIO; + break; + } } + size -= chunk_size; + offset += chunk_size; + } + } else if (async->start != BTRFS_SUPER_INFO_OFFSET) { + ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0); + if (ret) { + printk("Error write data\n"); + exit(1); } - size -= chunk_size; - offset += chunk_size; } - if (async->start == BTRFS_SUPER_INFO_OFFSET) + + /* backup super blocks are already there at fixup_offset stage */ + if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET) write_backup_supers(outfd, outbuf); pthread_mutex_lock(&mdres->mutex); @@ -1714,7 +1735,8 @@ static void mdrestore_destroy(struct mdrestore_struct *mdres) static int mdrestore_init(struct mdrestore_struct *mdres, FILE *in, FILE *out, int old_restore, - int num_threads) + int num_threads, int fixup_offset, + struct btrfs_fs_info *info, int multi_devices) { int i, ret = 0; @@ -1726,6 +1748,9 @@ static int mdrestore_init(struct mdrestore_struct *mdres, mdres->out = out; mdres->old_restore = old_restore; mdres->chunk_tree.rb_node = NULL; + mdres->fixup_offset = fixup_offset; + mdres->info = info; + mdres->multi_devices = multi_devices; if (!num_threads) return 0; @@ -2186,12 +2211,14 @@ static int build_chunk_tree(struct mdrestore_struct *mdres, return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0); } -static int restore_metadump(const char *input, FILE *out, int old_restore, - int num_threads) +static int __restore_metadump(const char *input, FILE *out, int old_restore, + int num_threads, int fixup_offset, + const char *target, int multi_devices) { struct meta_cluster *cluster = NULL; struct meta_cluster_header *header; struct mdrestore_struct mdrestore; + struct btrfs_fs_info *info = NULL; u64 bytenr = 0; FILE *in = NULL; int ret = 0; @@ -2206,26 +2233,36 @@ static int restore_metadump(const char *input, FILE *out, int old_restore, } } + /* NOTE: open with write mode */ + if (fixup_offset) { + BUG_ON(!target); + info = open_ctree_fs_info_restore(target, 0, 0, 1, 1); + if (!info) { + fprintf(stderr, "%s: open ctree failed\n", __func__); + ret = -EIO; + goto failed_open; + } + } + cluster = malloc(BLOCK_SIZE); if (!cluster) { fprintf(stderr, "Error allocating cluster\n"); - if (in != stdin) - fclose(in); - return -ENOMEM; + ret = -ENOMEM; + goto failed_info; } - ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads); + ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads, + fixup_offset, info, multi_devices); if (ret) { fprintf(stderr, "Error initing mdrestore %d\n", ret); - if (in != stdin) - fclose(in); - free(cluster); - return ret; + goto failed_cluster; } - ret = build_chunk_tree(&mdrestore, cluster); - if (ret) - goto out; + if (!multi_devices) { + ret = build_chunk_tree(&mdrestore, cluster); + if (ret) + goto out; + } if (in != stdin && fseek(in, 0, SEEK_SET)) { fprintf(stderr, "Error seeking %d\n", errno); @@ -2259,12 +2296,123 @@ static int restore_metadump(const char *input, FILE *out, int old_restore, } out: mdrestore_destroy(&mdrestore); +failed_cluster: free(cluster); +failed_info: + if (fixup_offset && info) + close_ctree(info->chunk_root); +failed_open: if (in != stdin) fclose(in); return ret; } +static int restore_metadump(const char *input, FILE *out, int old_restore, + int num_threads, int multi_devices) +{ + return __restore_metadump(input, out, old_restore, num_threads, 0, NULL, + multi_devices); +} + +static int fixup_metadump(const char *input, FILE *out, int num_threads, + const char *target) +{ + return __restore_metadump(input, out, 0, num_threads, 1, target, 1); +} + +static int update_disk_super_on_device(struct btrfs_fs_info *info, + const char *other_dev, u64 cur_devid) +{ + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_path path; + struct btrfs_dev_item *dev_item; + struct btrfs_super_block *disk_super; + char dev_uuid[BTRFS_UUID_SIZE]; + char fs_uuid[BTRFS_UUID_SIZE]; + u64 devid, type, io_align, io_width; + u64 sector_size, total_bytes, bytes_used; + char *buf; + int fp; + int ret; + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = cur_devid; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); + if (ret) { + fprintf(stderr, "search key fails\n"); + exit(1); + } + + leaf = path.nodes[0]; + dev_item = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_dev_item); + + devid = btrfs_device_id(leaf, dev_item); + if (devid != cur_devid) { + printk("devid %llu mismatch with %llu\n", devid, cur_devid); + exit(1); + } + + type = btrfs_device_type(leaf, dev_item); + io_align = btrfs_device_io_align(leaf, dev_item); + io_width = btrfs_device_io_width(leaf, dev_item); + sector_size = btrfs_device_sector_size(leaf, dev_item); + total_bytes = btrfs_device_total_bytes(leaf, dev_item); + bytes_used = btrfs_device_bytes_used(leaf, dev_item); + read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); + read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); + + btrfs_release_path(info->chunk_root, &path); + + printk("update disk super on %s devid=%llu\n", other_dev, devid); + + /* update other devices' super block */ + fp = open(other_dev, O_CREAT | O_RDWR, 0600); + if (fp < 0) { + fprintf(stderr, "could not open %s\n", other_dev); + exit(1); + } + + buf = malloc(BTRFS_SUPER_INFO_SIZE); + if (!buf) { + ret = -ENOMEM; + exit(1); + } + + memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE); + + disk_super = (struct btrfs_super_block *)buf; + dev_item = &disk_super->dev_item; + + btrfs_set_stack_device_type(dev_item, type); + btrfs_set_stack_device_id(dev_item, devid); + btrfs_set_stack_device_total_bytes(dev_item, total_bytes); + btrfs_set_stack_device_bytes_used(dev_item, bytes_used); + btrfs_set_stack_device_io_align(dev_item, io_align); + btrfs_set_stack_device_io_width(dev_item, io_width); + btrfs_set_stack_device_sector_size(dev_item, sector_size); + memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE); + memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE); + csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE); + + ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET); + if (ret != BTRFS_SUPER_INFO_SIZE) { + ret = -EIO; + goto out; + } + + write_backup_supers(fp, (u8 *)buf); + +out: + free(buf); + close(fp); + return 0; +} + static void print_usage(void) { fprintf(stderr, "usage: btrfs-image [options] source target\n"); @@ -2286,12 +2434,14 @@ int main(int argc, char *argv[]) int create = 1; int old_restore = 0; int walk_trees = 0; + int multi_devices = 0; int ret; int sanitize = 0; + int dev_cnt = 0; FILE *out; while (1) { - int c = getopt(argc, argv, "rc:t:osw"); + int c = getopt(argc, argv, "rc:t:oswm"); if (c < 0) break; switch (c) { @@ -2317,17 +2467,26 @@ int main(int argc, char *argv[]) case 'w': walk_trees = 1; break; + case 'm': + create = 0; + multi_devices = 1; + break; default: print_usage(); } } - if (old_restore && create) + if ((old_restore) && create) print_usage(); argc = argc - optind; - if (argc != 2) + dev_cnt = argc - 1; + + if (multi_devices && dev_cnt < 2) print_usage(); + if (!multi_devices && dev_cnt != 1) + print_usage(); + source = argv[optind]; target = argv[optind + 1]; @@ -2351,8 +2510,60 @@ int main(int argc, char *argv[]) ret = create_metadump(source, out, num_threads, compress_level, sanitize, walk_trees); else - ret = restore_metadump(source, out, old_restore, 1); + ret = restore_metadump(source, out, old_restore, 1, + multi_devices); + if (ret) { + printk("%s failed (%s)\n", (create) ? "create" : "restore", + strerror(errno)); + goto out; + } + /* extended support for multiple devices */ + if (!create && multi_devices) { + struct btrfs_fs_info *info; + u64 total_devs; + int i; + + info = open_ctree_fs_info_restore(target, 0, 0, 0, 1); + if (!info) { + int e = errno; + fprintf(stderr, "unable to open %s error = %s\n", + target, strerror(e)); + return 1; + } + + total_devs = btrfs_super_num_devices(info->super_copy); + if (total_devs != dev_cnt) { + printk("it needs %llu devices but has only %d\n", + total_devs, dev_cnt); + close_ctree(info->chunk_root); + goto out; + } + + /* update super block on other disks */ + for (i = 2; i <= dev_cnt; i++) { + ret = update_disk_super_on_device(info, + argv[optind + i], (u64)i); + if (ret) { + printk("update disk super failed devid=%d (error=%d)\n", + i, ret); + close_ctree(info->chunk_root); + exit(1); + } + } + + close_ctree(info->chunk_root); + + /* fix metadata block to map correct chunk */ + ret = fixup_metadump(source, out, 1, target); + if (ret) { + fprintf(stderr, "fix metadump failed (error=%d)\n", + ret); + exit(1); + } + } + +out: if (out == stdout) fflush(out); else diff --git a/ctree.h b/ctree.h index 3fe14b0e..6f086bf2 100644 --- a/ctree.h +++ b/ctree.h @@ -949,6 +949,7 @@ struct btrfs_fs_info { struct list_head space_info; int system_allocs; int readonly; + int on_restoring; int (*free_extent_hook)(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, diff --git a/disk-io.c b/disk-io.c index 30e2370a..151c1bd6 100644 --- a/disk-io.c +++ b/disk-io.c @@ -182,7 +182,7 @@ out: } -static int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror) +int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror) { unsigned long offset = 0; struct btrfs_multi_bio *multi = NULL; @@ -193,26 +193,40 @@ static int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, i while (bytes_left) { read_len = bytes_left; - ret = btrfs_map_block(&info->mapping_tree, READ, - eb->start + offset, &read_len, &multi, - mirror, NULL); - if (ret) { - printk("Couldn't map the block %Lu\n", eb->start + offset); - kfree(multi); - return -EIO; - } - device = multi->stripes[0].dev; + device = NULL; - if (device->fd == 0) { - kfree(multi); - return -EIO; - } + if (!info->on_restoring) { + ret = btrfs_map_block(&info->mapping_tree, READ, + eb->start + offset, &read_len, &multi, + mirror, NULL); + if (ret) { + printk("Couldn't map the block %Lu\n", eb->start + offset); + kfree(multi); + return -EIO; + } + device = multi->stripes[0].dev; - eb->fd = device->fd; - device->total_ios++; - eb->dev_bytenr = multi->stripes[0].physical; - kfree(multi); - multi = NULL; + if (device->fd == 0) { + kfree(multi); + return -EIO; + } + + eb->fd = device->fd; + device->total_ios++; + eb->dev_bytenr = multi->stripes[0].physical; + kfree(multi); + multi = NULL; + } else { + /* special case for restore metadump */ + list_for_each_entry(device, &info->fs_devices->devices, dev_list) { + if (device->devid == 1) + break; + } + + eb->fd = device->fd; + eb->dev_bytenr = eb->start; + device->total_ios++; + } if (read_len > bytes_left) read_len = bytes_left; @@ -281,149 +295,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return NULL; } -static int rmw_eb(struct btrfs_fs_info *info, - struct extent_buffer *eb, struct extent_buffer *orig_eb) -{ - int ret; - unsigned long orig_off = 0; - unsigned long dest_off = 0; - unsigned long copy_len = eb->len; - - ret = read_whole_eb(info, eb, 0); - if (ret) - return ret; - - if (eb->start + eb->len <= orig_eb->start || - eb->start >= orig_eb->start + orig_eb->len) - return 0; - /* - * | ----- orig_eb ------- | - * | ----- stripe ------- | - * | ----- orig_eb ------- | - * | ----- orig_eb ------- | - */ - if (eb->start > orig_eb->start) - orig_off = eb->start - orig_eb->start; - if (orig_eb->start > eb->start) - dest_off = orig_eb->start - eb->start; - - if (copy_len > orig_eb->len - orig_off) - copy_len = orig_eb->len - orig_off; - if (copy_len > eb->len - dest_off) - copy_len = eb->len - dest_off; - - memcpy(eb->data + dest_off, orig_eb->data + orig_off, copy_len); - return 0; -} - -static void split_eb_for_raid56(struct btrfs_fs_info *info, - struct extent_buffer *orig_eb, - struct extent_buffer **ebs, - u64 stripe_len, u64 *raid_map, - int num_stripes) -{ - struct extent_buffer *eb; - u64 start = orig_eb->start; - u64 this_eb_start; - int i; - int ret; - - for (i = 0; i < num_stripes; i++) { - if (raid_map[i] >= BTRFS_RAID5_P_STRIPE) - break; - - eb = malloc(sizeof(struct extent_buffer) + stripe_len); - if (!eb) - BUG(); - memset(eb, 0, sizeof(struct extent_buffer) + stripe_len); - - eb->start = raid_map[i]; - eb->len = stripe_len; - eb->refs = 1; - eb->flags = 0; - eb->fd = -1; - eb->dev_bytenr = (u64)-1; - - this_eb_start = raid_map[i]; - - if (start > this_eb_start || - start + orig_eb->len < this_eb_start + stripe_len) { - ret = rmw_eb(info, eb, orig_eb); - BUG_ON(ret); - } else { - memcpy(eb->data, orig_eb->data + eb->start - start, stripe_len); - } - ebs[i] = eb; - } -} - -static int write_raid56_with_parity(struct btrfs_fs_info *info, - struct extent_buffer *eb, - struct btrfs_multi_bio *multi, - u64 stripe_len, u64 *raid_map) -{ - struct extent_buffer *ebs[multi->num_stripes], *p_eb = NULL, *q_eb = NULL; - int i; - int j; - int ret; - int alloc_size = eb->len; - - if (stripe_len > alloc_size) - alloc_size = stripe_len; - - split_eb_for_raid56(info, eb, ebs, stripe_len, raid_map, - multi->num_stripes); - - for (i = 0; i < multi->num_stripes; i++) { - struct extent_buffer *new_eb; - if (raid_map[i] < BTRFS_RAID5_P_STRIPE) { - ebs[i]->dev_bytenr = multi->stripes[i].physical; - ebs[i]->fd = multi->stripes[i].dev->fd; - multi->stripes[i].dev->total_ios++; - BUG_ON(ebs[i]->start != raid_map[i]); - continue; - } - new_eb = kmalloc(sizeof(*eb) + alloc_size, GFP_NOFS); - BUG_ON(!new_eb); - new_eb->dev_bytenr = multi->stripes[i].physical; - new_eb->fd = multi->stripes[i].dev->fd; - multi->stripes[i].dev->total_ios++; - new_eb->len = stripe_len; - - if (raid_map[i] == BTRFS_RAID5_P_STRIPE) - p_eb = new_eb; - else if (raid_map[i] == BTRFS_RAID6_Q_STRIPE) - q_eb = new_eb; - } - if (q_eb) { - void *pointers[multi->num_stripes]; - ebs[multi->num_stripes - 2] = p_eb; - ebs[multi->num_stripes - 1] = q_eb; - - for (i = 0; i < multi->num_stripes; i++) - pointers[i] = ebs[i]->data; - - raid6_gen_syndrome(multi->num_stripes, stripe_len, pointers); - } else { - ebs[multi->num_stripes - 1] = p_eb; - memcpy(p_eb->data, ebs[0]->data, stripe_len); - for (j = 1; j < multi->num_stripes - 1; j++) { - for (i = 0; i < stripe_len; i += sizeof(unsigned long)) { - *(unsigned long *)(p_eb->data + i) ^= - *(unsigned long *)(ebs[j]->data + i); - } - } - } - - for (i = 0; i < multi->num_stripes; i++) { - ret = write_extent_to_disk(ebs[i]); - BUG_ON(ret); - if (ebs[i] != eb) - kfree(ebs[i]); - } - return 0; -} - int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb) { @@ -435,6 +306,7 @@ int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (check_tree_block(root, eb)) BUG(); + if (!btrfs_buffer_uptodate(eb, trans->transid)) BUG(); @@ -801,7 +673,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, u64 sb_bytenr, u64 root_tree_bytenr, int writes, - int partial) + int partial, int restore) { u32 sectorsize; u32 nodesize; @@ -822,6 +694,12 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, u64 total_devs; u64 features; + memset(tree_root, 0, sizeof(struct btrfs_root)); + memset(extent_root, 0, sizeof(struct btrfs_root)); + memset(chunk_root, 0, sizeof(struct btrfs_root)); + memset(dev_root, 0, sizeof(struct btrfs_root)); + memset(csum_root, 0, sizeof(struct btrfs_root)); + if (sb_bytenr == 0) sb_bytenr = BTRFS_SUPER_INFO_OFFSET; @@ -853,6 +731,8 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, if (!writes) fs_info->readonly = 1; + if (restore) + fs_info->on_restoring = 1; extent_io_tree_init(&fs_info->extent_cache); extent_io_tree_init(&fs_info->free_space_cache); @@ -1046,6 +926,29 @@ out: return NULL; } +struct btrfs_fs_info *open_ctree_fs_info_restore(const char *filename, + u64 sb_bytenr, u64 root_tree_bytenr, + int writes, int partial) +{ + int fp; + struct btrfs_fs_info *info; + int flags = O_CREAT | O_RDWR; + int restore = 1; + + if (!writes) + flags = O_RDONLY; + + fp = open(filename, flags, 0600); + if (fp < 0) { + fprintf (stderr, "Could not open %s\n", filename); + return NULL; + } + info = __open_ctree_fd(fp, filename, sb_bytenr, root_tree_bytenr, + writes, partial, restore); + close(fp); + return info; +} + struct btrfs_fs_info *open_ctree_fs_info(const char *filename, u64 sb_bytenr, u64 root_tree_bytenr, int writes, int partial) @@ -1063,7 +966,7 @@ struct btrfs_fs_info *open_ctree_fs_info(const char *filename, return NULL; } info = __open_ctree_fd(fp, filename, sb_bytenr, root_tree_bytenr, - writes, partial); + writes, partial, 0); close(fp); return info; } @@ -1082,7 +985,7 @@ struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr, int writes) { struct btrfs_fs_info *info; - info = __open_ctree_fd(fp, path, sb_bytenr, 0, writes, 0); + info = __open_ctree_fd(fp, path, sb_bytenr, 0, writes, 0, 0); if (!info) return NULL; return info->fs_root; diff --git a/disk-io.h b/disk-io.h index c29ee8e2..0158d17a 100644 --- a/disk-io.h +++ b/disk-io.h @@ -35,10 +35,13 @@ static inline u64 btrfs_sb_offset(int mirror) struct btrfs_device; +int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror); struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid); int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid); +int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *eb); struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); @@ -50,6 +53,9 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes); struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr, int writes); +struct btrfs_fs_info *open_ctree_fs_info_restore(const char *filename, + u64 sb_bytenr, u64 root_tree_bytenr, + int writes, int partial); struct btrfs_fs_info *open_ctree_fs_info(const char *filename, u64 sb_bytenr, u64 root_tree_bytenr, int writes, int partial); diff --git a/extent_io.c b/extent_io.c index 5093aeb2..592acd61 100644 --- a/extent_io.c +++ b/extent_io.c @@ -747,6 +747,93 @@ int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset, return 0; } +int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset, + u64 bytes, int mirror) +{ + struct btrfs_multi_bio *multi = NULL; + struct btrfs_device *device; + u64 bytes_left = bytes; + u64 this_len; + u64 total_write = 0; + u64 *raid_map = NULL; + u64 dev_bytenr; + int dev_nr; + int ret = 0; + + while (bytes_left > 0) { + this_len = bytes_left; + dev_nr = 0; + + ret = btrfs_map_block(&info->mapping_tree, WRITE, offset, + &this_len, &multi, mirror, &raid_map); + if (ret) { + fprintf(stderr, "Couldn't map the block %Lu\n", + offset); + return -EIO; + } + + if (raid_map) { + struct extent_buffer *eb; + u64 stripe_len = this_len; + + this_len = min(this_len, bytes_left); + this_len = min(this_len, (u64)info->tree_root->leafsize); + + eb = malloc(sizeof(struct extent_buffer) + this_len); + BUG_ON(!eb); + + memset(eb, 0, sizeof(struct extent_buffer) + this_len); + eb->start = offset; + eb->len = this_len; + + memcpy(eb->data, buf + total_write, this_len); + ret = write_raid56_with_parity(info, eb, multi, + stripe_len, raid_map); + BUG_ON(ret); + + free(eb); + kfree(raid_map); + raid_map = NULL; + } else while (dev_nr < multi->num_stripes) { + device = multi->stripes[dev_nr].dev; + if (device->fd == 0) { + kfree(multi); + return -EIO; + } + + dev_bytenr = multi->stripes[dev_nr].physical; + this_len = min(this_len, bytes_left); + dev_nr++; + + ret = pwrite(device->fd, buf + total_write, this_len, dev_bytenr); + if (ret != this_len) { + if (ret < 0) { + fprintf(stderr, "Error writing to " + "device %d\n", errno); + ret = errno; + kfree(multi); + return ret; + } else { + fprintf(stderr, "Short write\n"); + kfree(multi); + return -EIO; + } + } + } + + BUG_ON(bytes_left < this_len); + + bytes_left -= this_len; + offset += this_len; + total_write += this_len; + + kfree(multi); + multi = NULL; + } + return 0; +} + + int set_extent_buffer_uptodate(struct extent_buffer *eb) { eb->flags |= EXTENT_UPTODATE; diff --git a/extent_io.h b/extent_io.h index a0308a90..bef7fe5e 100644 --- a/extent_io.h +++ b/extent_io.h @@ -126,4 +126,6 @@ int set_extent_buffer_dirty(struct extent_buffer *eb); int clear_extent_buffer_dirty(struct extent_buffer *eb); int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset, u64 bytes, int mirror); +int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset, + u64 bytes, int mirror); #endif diff --git a/volumes.c b/volumes.c index 013bfd5a..01127e79 100644 --- a/volumes.c +++ b/volumes.c @@ -1774,3 +1774,146 @@ struct list_head *btrfs_scanned_uuids(void) { return &fs_uuids; } + +static int rmw_eb(struct btrfs_fs_info *info, + struct extent_buffer *eb, struct extent_buffer *orig_eb) +{ + int ret; + unsigned long orig_off = 0; + unsigned long dest_off = 0; + unsigned long copy_len = eb->len; + + ret = read_whole_eb(info, eb, 0); + if (ret) + return ret; + + if (eb->start + eb->len <= orig_eb->start || + eb->start >= orig_eb->start + orig_eb->len) + return 0; + /* + * | ----- orig_eb ------- | + * | ----- stripe ------- | + * | ----- orig_eb ------- | + * | ----- orig_eb ------- | + */ + if (eb->start > orig_eb->start) + orig_off = eb->start - orig_eb->start; + if (orig_eb->start > eb->start) + dest_off = orig_eb->start - eb->start; + + if (copy_len > orig_eb->len - orig_off) + copy_len = orig_eb->len - orig_off; + if (copy_len > eb->len - dest_off) + copy_len = eb->len - dest_off; + + memcpy(eb->data + dest_off, orig_eb->data + orig_off, copy_len); + return 0; +} + +static void split_eb_for_raid56(struct btrfs_fs_info *info, + struct extent_buffer *orig_eb, + struct extent_buffer **ebs, + u64 stripe_len, u64 *raid_map, + int num_stripes) +{ + struct extent_buffer *eb; + u64 start = orig_eb->start; + u64 this_eb_start; + int i; + int ret; + + for (i = 0; i < num_stripes; i++) { + if (raid_map[i] >= BTRFS_RAID5_P_STRIPE) + break; + + eb = malloc(sizeof(struct extent_buffer) + stripe_len); + if (!eb) + BUG(); + memset(eb, 0, sizeof(struct extent_buffer) + stripe_len); + + eb->start = raid_map[i]; + eb->len = stripe_len; + eb->refs = 1; + eb->flags = 0; + eb->fd = -1; + eb->dev_bytenr = (u64)-1; + + this_eb_start = raid_map[i]; + + if (start > this_eb_start || + start + orig_eb->len < this_eb_start + stripe_len) { + ret = rmw_eb(info, eb, orig_eb); + BUG_ON(ret); + } else { + memcpy(eb->data, orig_eb->data + eb->start - start, stripe_len); + } + ebs[i] = eb; + } +} + +int write_raid56_with_parity(struct btrfs_fs_info *info, + struct extent_buffer *eb, + struct btrfs_multi_bio *multi, + u64 stripe_len, u64 *raid_map) +{ + struct extent_buffer *ebs[multi->num_stripes], *p_eb = NULL, *q_eb = NULL; + int i; + int j; + int ret; + int alloc_size = eb->len; + + if (stripe_len > alloc_size) + alloc_size = stripe_len; + + split_eb_for_raid56(info, eb, ebs, stripe_len, raid_map, + multi->num_stripes); + + for (i = 0; i < multi->num_stripes; i++) { + struct extent_buffer *new_eb; + if (raid_map[i] < BTRFS_RAID5_P_STRIPE) { + ebs[i]->dev_bytenr = multi->stripes[i].physical; + ebs[i]->fd = multi->stripes[i].dev->fd; + multi->stripes[i].dev->total_ios++; + BUG_ON(ebs[i]->start != raid_map[i]); + continue; + } + new_eb = kmalloc(sizeof(*eb) + alloc_size, GFP_NOFS); + BUG_ON(!new_eb); + new_eb->dev_bytenr = multi->stripes[i].physical; + new_eb->fd = multi->stripes[i].dev->fd; + multi->stripes[i].dev->total_ios++; + new_eb->len = stripe_len; + + if (raid_map[i] == BTRFS_RAID5_P_STRIPE) + p_eb = new_eb; + else if (raid_map[i] == BTRFS_RAID6_Q_STRIPE) + q_eb = new_eb; + } + if (q_eb) { + void *pointers[multi->num_stripes]; + ebs[multi->num_stripes - 2] = p_eb; + ebs[multi->num_stripes - 1] = q_eb; + + for (i = 0; i < multi->num_stripes; i++) + pointers[i] = ebs[i]->data; + + raid6_gen_syndrome(multi->num_stripes, stripe_len, pointers); + } else { + ebs[multi->num_stripes - 1] = p_eb; + memcpy(p_eb->data, ebs[0]->data, stripe_len); + for (j = 1; j < multi->num_stripes - 1; j++) { + for (i = 0; i < stripe_len; i += sizeof(unsigned long)) { + *(unsigned long *)(p_eb->data + i) ^= + *(unsigned long *)(ebs[j]->data + i); + } + } + } + + for (i = 0; i < multi->num_stripes; i++) { + ret = write_extent_to_disk(ebs[i]); + BUG_ON(ret); + if (ebs[i] != eb) + kfree(ebs[i]); + } + return 0; +} diff --git a/volumes.h b/volumes.h index 911f7881..105179fa 100644 --- a/volumes.h +++ b/volumes.h @@ -190,4 +190,9 @@ int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); struct btrfs_device *btrfs_find_device_by_devid(struct btrfs_root *root, u64 devid, int instance); + +int write_raid56_with_parity(struct btrfs_fs_info *info, + struct extent_buffer *eb, + struct btrfs_multi_bio *multi, + u64 stripe_len, u64 *raid_map); #endif