diff --git a/kernel-shared/ctree.h b/kernel-shared/ctree.h index 5023db47..a68c8bd3 100644 --- a/kernel-shared/ctree.h +++ b/kernel-shared/ctree.h @@ -1134,6 +1134,12 @@ struct btrfs_block_group { /* For dirty block groups */ struct list_head dirty_list; + + /* + * Allocation offset for the block group to implement sequential + * allocation. This is used only with ZONED mode enabled. + */ + u64 alloc_offset; }; struct btrfs_device; diff --git a/kernel-shared/extent-tree.c b/kernel-shared/extent-tree.c index 5b1fbe10..ec5ea9a8 100644 --- a/kernel-shared/extent-tree.c +++ b/kernel-shared/extent-tree.c @@ -31,6 +31,7 @@ #include "kernel-shared/volumes.h" #include "kernel-shared/free-space-cache.h" #include "kernel-shared/free-space-tree.h" +#include "kernel-shared/zoned.h" #include "common/utils.h" #define PENDING_EXTENT_INSERT 0 @@ -2704,6 +2705,10 @@ static int read_one_block_group(struct btrfs_fs_info *fs_info, } cache->space_info = space_info; + ret = btrfs_load_block_group_zone_info(fs_info, cache); + if (ret) + return ret; + btrfs_add_block_group_cache(fs_info, cache); return 0; } @@ -2761,6 +2766,9 @@ btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type, cache->start = chunk_offset; cache->length = size; + ret = btrfs_load_block_group_zone_info(fs_info, cache); + BUG_ON(ret); + cache->used = bytes_used; cache->flags = type; INIT_LIST_HEAD(&cache->dirty_list); diff --git a/kernel-shared/zoned.c b/kernel-shared/zoned.c index a0d8a7f6..6894d421 100644 --- a/kernel-shared/zoned.c +++ b/kernel-shared/zoned.c @@ -14,6 +14,10 @@ /* Maximum number of zones to report per ioctl(BLKREPORTZONE) call */ #define BTRFS_REPORT_NR_ZONES 4096 +/* Invalid allocation pointer value for missing devices */ +#define WP_MISSING_DEV ((u64)-1) +/* Pseudo write pointer value for conventional zone */ +#define WP_CONVENTIONAL ((u64)-2) /* * Location of the first zone of superblock logging zone pairs. @@ -644,6 +648,135 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start, return pos; } +int btrfs_load_block_group_zone_info(struct btrfs_fs_info *fs_info, + struct btrfs_block_group *cache) +{ + struct btrfs_device *device; + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct cache_extent *ce; + struct map_lookup *map; + u64 logical = cache->start; + u64 length = cache->length; + u64 physical = 0; + int ret = 0; + int i; + u64 *alloc_offsets = NULL; + u32 num_sequential = 0, num_conventional = 0; + + if (!btrfs_is_zoned(fs_info)) + return 0; + + /* Sanity check */ + if (logical == BTRFS_BLOCK_RESERVED_1M_FOR_SUPER) { + if (length + SZ_1M != fs_info->zone_size) { + error("zoned: unaligned initial system block group"); + return -EIO; + } + } else if (!IS_ALIGNED(length, fs_info->zone_size)) { + error("zoned: unaligned block group at %llu + %llu", logical, + length); + return -EIO; + } + + /* Get the chunk mapping */ + ce = search_cache_extent(&map_tree->cache_tree, logical); + if (!ce) { + error("zoned: failed to find block group at %llu", logical); + return -ENOENT; + } + map = container_of(ce, struct map_lookup, ce); + + alloc_offsets = calloc(map->num_stripes, sizeof(*alloc_offsets)); + if (!alloc_offsets) { + error("zoned: failed to allocate alloc_offsets"); + return -ENOMEM; + } + + for (i = 0; i < map->num_stripes; i++) { + bool is_sequential; + struct blk_zone zone; + + device = map->stripes[i].dev; + physical = map->stripes[i].physical; + + if (device->fd == -1) { + alloc_offsets[i] = WP_MISSING_DEV; + continue; + } + + is_sequential = btrfs_dev_is_sequential(device, physical); + if (is_sequential) + num_sequential++; + else + num_conventional++; + + if (!is_sequential) { + alloc_offsets[i] = WP_CONVENTIONAL; + continue; + } + + /* + * The group is mapped to a sequential zone. Get the zone write + * pointer to determine the allocation offset within the zone. + */ + WARN_ON(!IS_ALIGNED(physical, fs_info->zone_size)); + zone = device->zone_info->zones[physical / fs_info->zone_size]; + + switch (zone.cond) { + case BLK_ZONE_COND_OFFLINE: + case BLK_ZONE_COND_READONLY: + error( + "zoned: offline/readonly zone %llu on device %s (devid %llu)", + physical / fs_info->zone_size, device->name, + device->devid); + alloc_offsets[i] = WP_MISSING_DEV; + break; + case BLK_ZONE_COND_EMPTY: + alloc_offsets[i] = 0; + break; + case BLK_ZONE_COND_FULL: + alloc_offsets[i] = fs_info->zone_size; + break; + default: + /* Partially used zone */ + alloc_offsets[i] = + ((zone.wp - zone.start) << SECTOR_SHIFT); + break; + } + } + + if (num_conventional > 0) { + /* + * Since conventional zones do not have a write pointer, we + * cannot determine alloc_offset from the pointer + */ + ret = -EINVAL; + goto out; + } + + switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: /* single */ + cache->alloc_offset = alloc_offsets[0]; + break; + case BTRFS_BLOCK_GROUP_DUP: + case BTRFS_BLOCK_GROUP_RAID1: + case BTRFS_BLOCK_GROUP_RAID0: + case BTRFS_BLOCK_GROUP_RAID10: + case BTRFS_BLOCK_GROUP_RAID5: + case BTRFS_BLOCK_GROUP_RAID6: + /* non-single profiles are not supported yet */ + default: + error("zoned: profile %s not yet supported", + btrfs_group_profile_str(map->type)); + ret = -EINVAL; + goto out; + } + +out: + free(alloc_offsets); + return ret; +} + #endif int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info) diff --git a/kernel-shared/zoned.h b/kernel-shared/zoned.h index 853bb7c5..f9e30e16 100644 --- a/kernel-shared/zoned.h +++ b/kernel-shared/zoned.h @@ -88,6 +88,8 @@ static inline bool btrfs_dev_is_empty_zone(struct btrfs_device *device, u64 pos) int btrfs_reset_dev_zone(int fd, struct blk_zone *zone); u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start, u64 hole_end, u64 num_bytes); +int btrfs_load_block_group_zone_info(struct btrfs_fs_info *fs_info, + struct btrfs_block_group *cache); #else @@ -119,6 +121,12 @@ static inline bool btrfs_dev_is_empty_zone(struct btrfs_device *device, u64 pos) return true; } +static inline int btrfs_load_block_group_zone_info( + struct btrfs_fs_info *fs_info, struct btrfs_block_group *cache) +{ + return 0; +} + #endif /* BTRFS_ZONED */ static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)