From fd2d0af0bf80c127fbb3b84817a420fbc490da5d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Apr 2008 16:28:12 -0400 Subject: [PATCH] Retry metadata reads in the face of checksum failures --- disk-io.c | 61 +++++++++++++++++++++++++++++++++++-------------------- disk-io.h | 2 ++ utils.c | 8 ++++++++ volumes.c | 26 +++++++++++++++++++++++- volumes.h | 3 ++- 5 files changed, 76 insertions(+), 24 deletions(-) diff --git a/disk-io.c b/disk-io.c index d49f2cea..8150d73b 100644 --- a/disk-io.c +++ b/disk-io.c @@ -38,12 +38,12 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) { if (buf->start != btrfs_header_bytenr(buf)) - BUG(); + return 1; if (memcmp_extent_buffer(buf, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(buf), BTRFS_FSID_SIZE)) - BUG(); + return 1; return 0; } @@ -57,8 +57,8 @@ void btrfs_csum_final(u32 crc, char *result) *(__le32 *)result = ~cpu_to_le32(crc); } -static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, - int verify) +int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, + int verify) { char result[BTRFS_CRC32_SIZE]; u32 len; @@ -70,8 +70,9 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, if (verify) { if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { - printk("checksum verify failed on %llu\n", - (unsigned long long)buf->start); + printk("checksum verify failed on %llu wanted %X " + "found %X\n", (unsigned long long)buf->start, + *((int *)result), *((int *)buf)); return 1; } } else { @@ -112,7 +113,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) dev_nr = 0; length = blocksize; ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - bytenr, &length, &multi); + bytenr, &length, &multi, 0); BUG_ON(ret); device = multi->stripes[0].dev; device->total_ios++; @@ -131,6 +132,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u64 length; struct btrfs_multi_bio *multi = NULL; struct btrfs_device *device; + int mirror_num = 0; + int num_copies; eb = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!eb) @@ -141,21 +144,35 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, dev_nr = 0; length = blocksize; - ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - eb->start, &length, &multi); - BUG_ON(ret); - device = multi->stripes[0].dev; - eb->fd = device->fd; - device->total_ios++; - eb->dev_bytenr = multi->stripes[0].physical; - ret = read_extent_from_disk(eb); - if (ret) { - free_extent_buffer(eb); - return NULL; + while (1) { + ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, + eb->start, &length, &multi, mirror_num); + BUG_ON(ret); + device = multi->stripes[0].dev; + eb->fd = device->fd; + device->total_ios++; + eb->dev_bytenr = multi->stripes[0].physical; + kfree(multi); + ret = read_extent_from_disk(eb); + if (ret == 0 && check_tree_block(root, eb) == 0 && + csum_tree_block(root, eb, 1) == 0) { + btrfs_set_buffer_uptodate(eb); + return eb; + } + num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, + eb->start, eb->len); + if (num_copies == 1) { +printk("reading %Lu failed only one copy\n", eb->start); + break; + } + mirror_num++; + if (mirror_num > num_copies) { +printk("bailing at mirror %d of %d\n", mirror_num, num_copies); + break; + } } - btrfs_set_buffer_uptodate(eb); - kfree(multi); - return eb; + free_extent_buffer(eb); + return NULL; } int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -177,7 +194,7 @@ int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, dev_nr = 0; length = eb->len; ret = btrfs_map_block(&root->fs_info->mapping_tree, WRITE, - eb->start, &length, &multi); + eb->start, &length, &multi, 0); while(dev_nr < multi->num_stripes) { BUG_ON(ret); eb->fd = multi->stripes[dev_nr].dev->fd; diff --git a/disk-io.h b/disk-io.h index f680721a..1b5cbdfb 100644 --- a/disk-io.h +++ b/disk-io.h @@ -52,4 +52,6 @@ void btrfs_csum_final(u32 crc, char *result); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_open_device(struct btrfs_device *dev); +int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, + int verify); #endif diff --git a/utils.c b/utils.c index 55d3f5f1..9d162da7 100644 --- a/utils.c +++ b/utils.c @@ -106,6 +106,7 @@ int make_btrfs(int fd, char *device_name, /* create the tree of root objects */ memset(buf->data, 0, leafsize); + buf->len = leafsize; btrfs_set_header_bytenr(buf, blocks[1]); btrfs_set_header_nritems(buf, 3); btrfs_set_header_generation(buf, 1); @@ -165,6 +166,7 @@ int make_btrfs(int fd, char *device_name, nritems++; + csum_tree_block(NULL, buf, 0); ret = pwrite(fd, buf->data, leafsize, blocks[1]); BUG_ON(ret != leafsize); @@ -229,6 +231,7 @@ int make_btrfs(int fd, char *device_name, btrfs_set_header_bytenr(buf, blocks[2]); btrfs_set_header_owner(buf, BTRFS_EXTENT_TREE_OBJECTID); btrfs_set_header_nritems(buf, nritems); + csum_tree_block(NULL, buf, 0); ret = pwrite(fd, buf->data, leafsize, blocks[2]); BUG_ON(ret != leafsize); @@ -302,6 +305,7 @@ int make_btrfs(int fd, char *device_name, btrfs_set_header_bytenr(buf, blocks[3]); btrfs_set_header_owner(buf, BTRFS_CHUNK_TREE_OBJECTID); btrfs_set_header_nritems(buf, nritems); + csum_tree_block(NULL, buf, 0); ret = pwrite(fd, buf->data, leafsize, blocks[3]); /* create the device tree */ @@ -325,12 +329,14 @@ int make_btrfs(int fd, char *device_name, btrfs_set_header_bytenr(buf, blocks[4]); btrfs_set_header_owner(buf, BTRFS_DEV_TREE_OBJECTID); btrfs_set_header_nritems(buf, nritems); + csum_tree_block(NULL, buf, 0); ret = pwrite(fd, buf->data, leafsize, blocks[4]); /* finally create the FS root */ btrfs_set_header_bytenr(buf, blocks[5]); btrfs_set_header_owner(buf, BTRFS_FS_TREE_OBJECTID); btrfs_set_header_nritems(buf, 0); + csum_tree_block(NULL, buf, 0); ret = pwrite(fd, buf->data, leafsize, blocks[5]); BUG_ON(ret != leafsize); @@ -338,6 +344,8 @@ int make_btrfs(int fd, char *device_name, BUG_ON(sizeof(super) > sectorsize); memset(buf->data, 0, sectorsize); memcpy(buf->data, &super, sizeof(super)); + buf->len = sectorsize; + csum_tree_block(NULL, buf, 0); ret = pwrite(fd, buf->data, sectorsize, blocks[0]); BUG_ON(ret != sectorsize); diff --git a/volumes.c b/volumes.c index ef2f59ca..44e67291 100644 --- a/volumes.c +++ b/volumes.c @@ -732,9 +732,29 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree) cache_tree_init(&tree->cache_tree); } +int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) +{ + struct cache_extent *ce; + struct map_lookup *map; + int ret; + u64 offset; + + ce = find_first_cache_extent(&map_tree->cache_tree, logical); + BUG_ON(!ce); + BUG_ON(ce->start > logical || ce->start + ce->size < logical); + map = container_of(ce, struct map_lookup, ce); + + offset = logical - ce->start; + if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1)) + ret = map->num_stripes; + else + ret = 1; + return ret; +} + int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, - struct btrfs_multi_bio **multi_ret) + struct btrfs_multi_bio **multi_ret, int mirror_num) { struct cache_extent *ce; struct map_lookup *map; @@ -802,11 +822,15 @@ again: if (map->type & BTRFS_BLOCK_GROUP_RAID1) { if (rw == WRITE) multi->num_stripes = map->num_stripes; + else if (mirror_num) + stripe_index = mirror_num - 1; else stripe_index = stripe_nr % map->num_stripes; } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { if (rw == WRITE) multi->num_stripes = map->num_stripes; + else if (mirror_num) + stripe_index = mirror_num - 1; } else { /* * after this do_div call, stripe_nr is the number of stripes diff --git a/volumes.h b/volumes.h index 9928c1f9..be543163 100644 --- a/volumes.h +++ b/volumes.h @@ -85,7 +85,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, u64 owner, u64 num_bytes, u64 *start); int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, - struct btrfs_multi_bio **multi_ret); + struct btrfs_multi_bio **multi_ret, int mirror_num); int btrfs_read_sys_array(struct btrfs_root *root); int btrfs_read_chunk_tree(struct btrfs_root *root); int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, @@ -104,4 +104,5 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, int btrfs_scan_one_device(int fd, const char *path, struct btrfs_fs_devices **fs_devices_ret, u64 *total_devs, u64 super_offset); +int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); #endif