From e96b91078f17c7be39ec2053fd26b649b253ecf0 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 1 Sep 2021 23:04:11 +0200 Subject: [PATCH] btrfs-progs: wip raid10c34 - works: mkfs.btrfs -d raid10c3 -m raid10c4 ... - works: dump-super - works: dump-tree Signed-off-by: David Sterba --- cmds/rescue-chunk-recover.c | 2 +- common/fsfeatures.c | 9 +++++ ioctl.h | 6 ++++ kernel-shared/ctree.h | 14 +++++++- kernel-shared/extent-tree.c | 4 ++- kernel-shared/print-tree.c | 1 + kernel-shared/volumes.c | 66 +++++++++++++++++++++++++++++-------- kernel-shared/volumes.h | 6 ++-- libbtrfsutil/btrfs.h | 2 ++ mkfs/main.c | 5 +++ 10 files changed, 97 insertions(+), 18 deletions(-) diff --git a/cmds/rescue-chunk-recover.c b/cmds/rescue-chunk-recover.c index da24df4c..64bd9b80 100644 --- a/cmds/rescue-chunk-recover.c +++ b/cmds/rescue-chunk-recover.c @@ -1642,7 +1642,7 @@ static int btrfs_calc_stripe_index(struct chunk_record *chunk, u64 logical) stripe_nr = offset / chunk->stripe_len; if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID0) { index = stripe_nr % chunk->num_stripes; - } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID10) { + } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID10_MASK) { index = stripe_nr % (chunk->num_stripes / chunk->sub_stripes); index *= chunk->sub_stripes; } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID56_MASK) { diff --git a/common/fsfeatures.c b/common/fsfeatures.c index 23a92c21..9864146f 100644 --- a/common/fsfeatures.c +++ b/common/fsfeatures.c @@ -121,6 +121,15 @@ static const struct btrfs_feature mkfs_features[] = { VERSION_NULL(default), .desc = "RAID1 with 3 or 4 copies" }, + { + .name = "raid10c34", + .flag = BTRFS_FEATURE_INCOMPAT_RAID10C34, + .sysfs_name = "raid10c34", + VERSION_TO_STRING2(compat, 5,17), + VERSION_NULL(safe), + VERSION_NULL(default), + .desc = "RAID10 with 3 or 4 copies" + }, #ifdef BTRFS_ZONED { .name = "zoned", diff --git a/ioctl.h b/ioctl.h index 368a87b2..9fc23d1b 100644 --- a/ioctl.h +++ b/ioctl.h @@ -790,6 +790,8 @@ enum btrfs_err_code { BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS, BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET, BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET, + BTRFS_ERROR_DEV_RAID10C3_MIN_NOT_MET, + BTRFS_ERROR_DEV_RAID10C4_MIN_NOT_MET, }; /* An error code to error string mapping for the kernel @@ -806,6 +808,10 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code) return "unable to go below four devices on raid1c4"; case BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET: return "unable to go below four/two devices on raid10"; + case BTRFS_ERROR_DEV_RAID10C3_MIN_NOT_MET: + return "unable to go below three devices on raid10c3"; + case BTRFS_ERROR_DEV_RAID10C4_MIN_NOT_MET: + return "unable to go below four devices on raid10c4"; case BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET: return "unable to go below two devices on raid5"; case BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET: diff --git a/kernel-shared/ctree.h b/kernel-shared/ctree.h index ab2aaed6..d52027d9 100644 --- a/kernel-shared/ctree.h +++ b/kernel-shared/ctree.h @@ -511,6 +511,7 @@ BUILD_ASSERT(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE); #define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11) #define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12) #define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13) +#define BTRFS_FEATURE_INCOMPAT_RAID10C34 (1ULL << 14) #define BTRFS_FEATURE_COMPAT_SUPP 0ULL @@ -552,6 +553,7 @@ BUILD_ASSERT(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE); BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ BTRFS_FEATURE_INCOMPAT_NO_HOLES | \ BTRFS_FEATURE_INCOMPAT_RAID1C34 | \ + BTRFS_FEATURE_INCOMPAT_RAID10C34 | \ BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \ BTRFS_FEATURE_INCOMPAT_ZONED) #endif @@ -1016,6 +1018,8 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) #define BTRFS_BLOCK_GROUP_RAID1C3 (1ULL << 9) #define BTRFS_BLOCK_GROUP_RAID1C4 (1ULL << 10) +#define BTRFS_BLOCK_GROUP_RAID10C3 (1ULL << 11) +#define BTRFS_BLOCK_GROUP_RAID10C4 (1ULL << 12) #define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \ BTRFS_SPACE_INFO_GLOBAL_RSV) @@ -1029,6 +1033,8 @@ enum btrfs_raid_types { BTRFS_RAID_RAID6, BTRFS_RAID_RAID1C3, BTRFS_RAID_RAID1C4, + BTRFS_RAID_RAID10C3, + BTRFS_RAID_RAID10C4, BTRFS_NR_RAID_TYPES }; @@ -1043,7 +1049,9 @@ enum btrfs_raid_types { BTRFS_BLOCK_GROUP_RAID1C3 | \ BTRFS_BLOCK_GROUP_RAID1C4 | \ BTRFS_BLOCK_GROUP_DUP | \ - BTRFS_BLOCK_GROUP_RAID10) + BTRFS_BLOCK_GROUP_RAID10 | \ + BTRFS_BLOCK_GROUP_RAID10C3 | \ + BTRFS_BLOCK_GROUP_RAID10C4) #define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \ BTRFS_BLOCK_GROUP_RAID6) @@ -1052,6 +1060,10 @@ enum btrfs_raid_types { BTRFS_BLOCK_GROUP_RAID1C3 | \ BTRFS_BLOCK_GROUP_RAID1C4) +#define BTRFS_BLOCK_GROUP_RAID10_MASK (BTRFS_BLOCK_GROUP_RAID10 | \ + BTRFS_BLOCK_GROUP_RAID10C3 | \ + BTRFS_BLOCK_GROUP_RAID10C4) + /* used in struct btrfs_balance_args fields */ #define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) diff --git a/kernel-shared/extent-tree.c b/kernel-shared/extent-tree.c index 3713452b..fec91cd7 100644 --- a/kernel-shared/extent-tree.c +++ b/kernel-shared/extent-tree.c @@ -1671,7 +1671,7 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) { u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1_MASK | - BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_RAID10_MASK | BTRFS_BLOCK_GROUP_RAID56_MASK | BTRFS_BLOCK_GROUP_DUP); if (extra_flags) { @@ -3129,6 +3129,8 @@ static u64 get_dev_extent_len(struct map_lookup *map) div = map->num_stripes - btrfs_bg_type_to_nparity(map->type); break; case BTRFS_BLOCK_GROUP_RAID10: + case BTRFS_BLOCK_GROUP_RAID10C3: + case BTRFS_BLOCK_GROUP_RAID10C4: div = (map->num_stripes / map->sub_stripes); break; default: diff --git a/kernel-shared/print-tree.c b/kernel-shared/print-tree.c index bd75ae51..8de1a727 100644 --- a/kernel-shared/print-tree.c +++ b/kernel-shared/print-tree.c @@ -1689,6 +1689,7 @@ static struct readable_flag_entry incompat_flags_array[] = { DEF_INCOMPAT_FLAG_ENTRY(METADATA_UUID), DEF_INCOMPAT_FLAG_ENTRY(RAID1C34), DEF_INCOMPAT_FLAG_ENTRY(ZONED), + DEF_INCOMPAT_FLAG_ENTRY(RAID10C34), }; static const int incompat_flags_num = sizeof(incompat_flags_array) / sizeof(struct readable_flag_entry); diff --git a/kernel-shared/volumes.c b/kernel-shared/volumes.c index 4274c378..7cbccb90 100644 --- a/kernel-shared/volumes.c +++ b/kernel-shared/volumes.c @@ -47,6 +47,34 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { .bg_flag = BTRFS_BLOCK_GROUP_RAID10, .mindev_error = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET, }, + [BTRFS_RAID_RAID10C3] = { + .sub_stripes = 3, + .dev_stripes = 1, + .devs_max = 0, /* 0 == as many as possible */ + .devs_min = 3, + .tolerated_failures = 2, + .devs_increment = 3, + .ncopies = 3, + .nparity = 0, + .lower_name = "raid10c3", + .upper_name = "RAID10C3", + .bg_flag = BTRFS_BLOCK_GROUP_RAID10C3, + .mindev_error = BTRFS_ERROR_DEV_RAID10C3_MIN_NOT_MET, + }, + [BTRFS_RAID_RAID10C4] = { + .sub_stripes = 4, + .dev_stripes = 1, + .devs_max = 0, /* 0 == as many as possible */ + .devs_min = 4, + .tolerated_failures = 3, + .devs_increment = 4, + .ncopies = 4, + .nparity = 0, + .lower_name = "raid10c4", + .upper_name = "RAID10C4", + .bg_flag = BTRFS_BLOCK_GROUP_RAID10C4, + .mindev_error = BTRFS_ERROR_DEV_RAID10C4_MIN_NOT_MET, + }, [BTRFS_RAID_RAID1] = { .sub_stripes = 1, .dev_stripes = 1, @@ -195,6 +223,10 @@ enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags) { if (flags & BTRFS_BLOCK_GROUP_RAID10) return BTRFS_RAID_RAID10; + else if (flags & BTRFS_BLOCK_GROUP_RAID10C3) + return BTRFS_RAID_RAID10C3; + else if (flags & BTRFS_BLOCK_GROUP_RAID10C4) + return BTRFS_RAID_RAID10C4; else if (flags & BTRFS_BLOCK_GROUP_RAID1) return BTRFS_RAID_RAID1; else if (flags & BTRFS_BLOCK_GROUP_RAID1C3) @@ -1093,7 +1125,7 @@ static u64 chunk_bytes_by_type(struct alloc_chunk_ctl *ctl) if (type & (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_DUP)) return stripe_size; - else if (type & BTRFS_BLOCK_GROUP_RAID10) + else if (type & (BTRFS_BLOCK_GROUP_RAID10_MASK)) return stripe_size * (ctl->num_stripes / ctl->sub_stripes); else if (type & BTRFS_BLOCK_GROUP_RAID56_MASK) return stripe_size * (ctl->num_stripes - btrfs_bg_type_to_nparity(type)); @@ -1302,6 +1334,8 @@ static void init_alloc_chunk_ctl(struct btrfs_fs_info *info, break; case BTRFS_RAID_RAID0: case BTRFS_RAID_RAID10: + case BTRFS_RAID_RAID10C3: + case BTRFS_RAID_RAID10C4: case BTRFS_RAID_RAID5: case BTRFS_RAID_RAID6: ctl->num_stripes = min(ctl->max_stripes, ctl->total_devs); @@ -1549,11 +1583,9 @@ again: list_splice(&private_devs, dev_list); if (index >= ctl.min_stripes) { ctl.num_stripes = index; - if (type & (BTRFS_BLOCK_GROUP_RAID10)) { - /* We know this should be 2, but just in case */ - ASSERT(is_power_of_2(ctl.sub_stripes)); - ctl.num_stripes = round_down(ctl.num_stripes, - ctl.sub_stripes); + if (type & (BTRFS_BLOCK_GROUP_RAID10_MASK)) { + ctl.num_stripes /= ctl.sub_stripes; + ctl.num_stripes *= ctl.sub_stripes; } looped = 1; goto again; @@ -1643,7 +1675,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK)) ret = map->num_stripes; - else if (map->type & BTRFS_BLOCK_GROUP_RAID10) + else if (map->type & (BTRFS_BLOCK_GROUP_RAID10_MASK)) ret = map->sub_stripes; else if (map->type & BTRFS_BLOCK_GROUP_RAID5) ret = 2; @@ -1709,7 +1741,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, length = ce->size; rmap_len = map->stripe_len; - if (map->type & BTRFS_BLOCK_GROUP_RAID10) + if (map->type & BTRFS_BLOCK_GROUP_RAID10_MASK) length = ce->size / (map->num_stripes / map->sub_stripes); else if (map->type & BTRFS_BLOCK_GROUP_RAID0) length = ce->size / map->num_stripes; @@ -1728,7 +1760,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, stripe_nr = (physical - map->stripes[i].physical) / map->stripe_len; - if (map->type & BTRFS_BLOCK_GROUP_RAID10) { + if (map->type & BTRFS_BLOCK_GROUP_RAID10_MASK) { stripe_nr = (stripe_nr * map->num_stripes + i) / map->sub_stripes; } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) { @@ -1838,7 +1870,7 @@ again: if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_DUP)) { stripes_required = map->num_stripes; - } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { + } else if (map->type & (BTRFS_BLOCK_GROUP_RAID10_MASK)) { stripes_required = map->sub_stripes; } } @@ -1879,7 +1911,7 @@ again: if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID56_MASK | - BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_RAID10_MASK | BTRFS_BLOCK_GROUP_DUP)) { /* we limit the length of each bio to what fits in a stripe */ *length = min_t(u64, ce->size - offset, @@ -1900,7 +1932,7 @@ again: stripe_index = mirror_num - 1; else stripe_index = stripe_nr % map->num_stripes; - } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { + } else if (map->type & BTRFS_BLOCK_GROUP_RAID10_MASK) { int factor = map->num_stripes / map->sub_stripes; stripe_index = stripe_nr % factor; @@ -2193,8 +2225,14 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, */ min_devs = btrfs_bg_type_to_devs_min(type); table_sub_stripes = btrfs_bg_type_to_sub_stripes(type); - if ((type & BTRFS_BLOCK_GROUP_RAID10 && (sub_stripes != table_sub_stripes || + printf("sub_stripes %d\ntable_sub_stripes %d\nnum_stripes %d\n", + sub_stripes, table_sub_stripes, num_stripes); +#if 0 + /* FIXME: kernel does not check the alignment */ + if ((type & BTRFS_BLOCK_GROUP_RAID10_MASK && (sub_stripes != table_sub_stripes || !IS_ALIGNED(num_stripes, sub_stripes))) || +#endif + if ((type & BTRFS_BLOCK_GROUP_RAID10_MASK && (sub_stripes != table_sub_stripes)) || (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < min_devs) || (type & BTRFS_BLOCK_GROUP_RAID1C3 && num_stripes < min_devs) || (type & BTRFS_BLOCK_GROUP_RAID1C4 && num_stripes < min_devs) || @@ -2773,6 +2811,8 @@ u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info, stripe_len = chunk_len / (num_stripes - btrfs_bg_type_to_nparity(profile)); break; case BTRFS_BLOCK_GROUP_RAID10: + case BTRFS_BLOCK_GROUP_RAID10C3: + case BTRFS_BLOCK_GROUP_RAID10C4: stripe_len = chunk_len / (num_stripes / btrfs_chunk_sub_stripes(leaf, chunk)); break; diff --git a/kernel-shared/volumes.h b/kernel-shared/volumes.h index 5cfe7e39..0412e49d 100644 --- a/kernel-shared/volumes.h +++ b/kernel-shared/volumes.h @@ -213,6 +213,8 @@ static inline int check_crossing_stripes(struct btrfs_fs_info *fs_info, (bg_offset + len - 1) / BTRFS_STRIPE_LEN); } +/* FIXME */ +int btrfs_bg_type_to_sub_stripes(u64 flags); static inline u64 calc_stripe_length(u64 type, u64 length, int num_stripes) { u64 stripe_size; @@ -220,8 +222,8 @@ static inline u64 calc_stripe_length(u64 type, u64 length, int num_stripes) if (type & BTRFS_BLOCK_GROUP_RAID0) { stripe_size = length; stripe_size /= num_stripes; - } else if (type & BTRFS_BLOCK_GROUP_RAID10) { - stripe_size = length * 2; + } else if (type & BTRFS_BLOCK_GROUP_RAID10_MASK) { + stripe_size = length * btrfs_bg_type_to_sub_stripes(type); stripe_size /= num_stripes; } else if (type & BTRFS_BLOCK_GROUP_RAID56_MASK) { stripe_size = length; diff --git a/libbtrfsutil/btrfs.h b/libbtrfsutil/btrfs.h index 0d863d58..4d0c1847 100644 --- a/libbtrfsutil/btrfs.h +++ b/libbtrfsutil/btrfs.h @@ -838,6 +838,8 @@ enum btrfs_err_code { BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS, BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET, BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET, + BTRFS_ERROR_DEV_RAID10C3_MIN_NOT_MET, + BTRFS_ERROR_DEV_RAID10C4_MIN_NOT_MET, }; #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ diff --git a/mkfs/main.c b/mkfs/main.c index 2c4b7b00..7f58b31e 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -1229,6 +1229,11 @@ int BOX_MAIN(mkfs)(int argc, char **argv) runtime_features |= BTRFS_RUNTIME_FEATURE_FREE_SPACE_TREE; } + if ((data_profile | metadata_profile) & + (BTRFS_BLOCK_GROUP_RAID10C3 | BTRFS_BLOCK_GROUP_RAID10C4)) { + features |= BTRFS_FEATURE_INCOMPAT_RAID10C34; + } + if (zoned) { if (source_dir_set) { error("the option -r and zoned mode are incompatible");