diff --git a/include/kerncompat.h b/include/kerncompat.h index 4ba6b3dc..18b474f6 100644 --- a/include/kerncompat.h +++ b/include/kerncompat.h @@ -684,6 +684,11 @@ static inline bool refcount_dec_and_test(refcount_t *ref) return ref->refs == 0; } +static inline int refcount_read(const refcount_t *ref) +{ + return ref->refs; +} + typedef u32 blk_status_t; typedef u32 blk_opf_t; typedef int atomic_t; @@ -813,6 +818,8 @@ static inline void lockdep_assert_held_read(struct rw_semaphore *sem) { } +#define lockdep_assert_held(sem) + static inline bool cond_resched_lock(spinlock_t *lock) { return false; diff --git a/kernel-shared/delayed-ref.c b/kernel-shared/delayed-ref.c index a6b92ecf..9b24001a 100644 --- a/kernel-shared/delayed-ref.c +++ b/kernel-shared/delayed-ref.c @@ -22,6 +22,10 @@ #include "kernel-shared/transaction.h" #include "kernel-shared/messages.h" +struct kmem_cache *btrfs_delayed_ref_head_cachep; +struct kmem_cache *btrfs_delayed_tree_ref_cachep; +struct kmem_cache *btrfs_delayed_data_ref_cachep; +struct kmem_cache *btrfs_delayed_extent_op_cachep; /* * delayed back reference update tracking. For subvolume trees * we queue up extent allocations and backref maintenance for @@ -51,6 +55,34 @@ static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1, return 0; } +/* + * compare two delayed data backrefs with same bytenr and type + */ +static int comp_data_refs(struct btrfs_delayed_data_ref *ref1, + struct btrfs_delayed_data_ref *ref2) +{ + if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) { + if (ref1->root < ref2->root) + return -1; + if (ref1->root > ref2->root) + return 1; + if (ref1->objectid < ref2->objectid) + return -1; + if (ref1->objectid > ref2->objectid) + return 1; + if (ref1->offset < ref2->offset) + return -1; + if (ref1->offset > ref2->offset) + return 1; + } else { + if (ref1->parent < ref2->parent) + return -1; + if (ref1->parent > ref2->parent) + return 1; + } + return 0; +} + static int comp_refs(struct btrfs_delayed_ref_node *ref1, struct btrfs_delayed_ref_node *ref2, bool check_seq) @@ -66,8 +98,8 @@ static int comp_refs(struct btrfs_delayed_ref_node *ref1, ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1), btrfs_delayed_node_to_tree_ref(ref2)); else - BUG(); - + ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1), + btrfs_delayed_node_to_data_ref(ref2)); if (ret) return ret; if (check_seq) { @@ -299,7 +331,7 @@ again: href_node); } - head->processing = 1; + head->processing = true; WARN_ON(delayed_refs->num_heads_ready == 0); delayed_refs->num_heads_ready--; delayed_refs->run_delayed_start = head->bytenr + @@ -307,6 +339,20 @@ again: return head; } +void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head) +{ + lockdep_assert_held(&delayed_refs->lock); + lockdep_assert_held(&head->lock); + + rb_erase(&head->href_node, &delayed_refs->href_root); + RB_CLEAR_NODE(&head->href_node); + atomic_dec(&delayed_refs->num_entries); + delayed_refs->num_heads--; + if (!head->processing) + delayed_refs->num_heads_ready--; +} + /* * Helper to insert the ref_node to the tail or merge with tail. * @@ -431,7 +477,7 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref, bool is_system) { int count_mod = 1; - int must_insert_reserved = 0; + bool must_insert_reserved = false; /* If reserved is provided, it must be a data extent. */ BUG_ON(!is_data && reserved); @@ -456,11 +502,11 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref, * BTRFS_ADD_DELAYED_REF because other special casing is not required. */ if (action == BTRFS_ADD_DELAYED_EXTENT) - must_insert_reserved = 1; + must_insert_reserved = true; else - must_insert_reserved = 0; + must_insert_reserved = false; - head_ref->refs = 1; + refcount_set(&head_ref->refs, 1); head_ref->bytenr = bytenr; head_ref->num_bytes = num_bytes; head_ref->ref_mod = count_mod; @@ -470,7 +516,7 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref, head_ref->ref_tree = RB_ROOT; INIT_LIST_HEAD(&head_ref->ref_add_list); RB_CLEAR_NODE(&head_ref->href_node); - head_ref->processing = 0; + head_ref->processing = false; head_ref->total_ref_mod = count_mod; } @@ -546,7 +592,7 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info, if (action == BTRFS_ADD_DELAYED_EXTENT) action = BTRFS_ADD_DELAYED_REF; - ref->refs = 1; + refcount_set(&ref->refs, 1); ref->bytenr = bytenr; ref->num_bytes = num_bytes; ref->ref_mod = 1; @@ -642,3 +688,47 @@ void btrfs_destroy_delayed_refs(struct btrfs_trans_handle *trans) ASSERT(cleanup_ref_head(trans, fs_info, head) == 0); } } + +void __cold btrfs_delayed_ref_exit(void) +{ + kmem_cache_destroy(btrfs_delayed_ref_head_cachep); + kmem_cache_destroy(btrfs_delayed_tree_ref_cachep); + kmem_cache_destroy(btrfs_delayed_data_ref_cachep); + kmem_cache_destroy(btrfs_delayed_extent_op_cachep); +} + +int __init btrfs_delayed_ref_init(void) +{ + btrfs_delayed_ref_head_cachep = kmem_cache_create( + "btrfs_delayed_ref_head", + sizeof(struct btrfs_delayed_ref_head), 0, + SLAB_MEM_SPREAD, NULL); + if (!btrfs_delayed_ref_head_cachep) + goto fail; + + btrfs_delayed_tree_ref_cachep = kmem_cache_create( + "btrfs_delayed_tree_ref", + sizeof(struct btrfs_delayed_tree_ref), 0, + SLAB_MEM_SPREAD, NULL); + if (!btrfs_delayed_tree_ref_cachep) + goto fail; + + btrfs_delayed_data_ref_cachep = kmem_cache_create( + "btrfs_delayed_data_ref", + sizeof(struct btrfs_delayed_data_ref), 0, + SLAB_MEM_SPREAD, NULL); + if (!btrfs_delayed_data_ref_cachep) + goto fail; + + btrfs_delayed_extent_op_cachep = kmem_cache_create( + "btrfs_delayed_extent_op", + sizeof(struct btrfs_delayed_extent_op), 0, + SLAB_MEM_SPREAD, NULL); + if (!btrfs_delayed_extent_op_cachep) + goto fail; + + return 0; +fail: + btrfs_delayed_ref_exit(); + return -ENOMEM; +} diff --git a/kernel-shared/delayed-ref.h b/kernel-shared/delayed-ref.h index 14e78704..6f817b1f 100644 --- a/kernel-shared/delayed-ref.h +++ b/kernel-shared/delayed-ref.h @@ -22,6 +22,8 @@ #include "kerncompat.h" +struct btrfs_block_rsv; + /* these are the possible values of struct btrfs_delayed_ref_node->action */ #define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ #define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ @@ -47,7 +49,7 @@ struct btrfs_delayed_ref_node { u64 seq; /* ref count on this data structure */ - u64 refs; + refcount_t refs; /* * how many refs is this entry adding or deleting. For @@ -85,14 +87,26 @@ struct btrfs_delayed_extent_op { struct btrfs_delayed_ref_head { u64 bytenr; u64 num_bytes; - u64 refs; + /* + * For insertion into struct btrfs_delayed_ref_root::href_root. + * Keep it in the same cache line as 'bytenr' for more efficient + * searches in the rbtree. + */ + struct rb_node href_node; + /* + * the mutex is held while running the refs, and it is also + * held when checking the sum of reference modifications. + */ + struct mutex mutex; + refcount_t refs; + + /* Protects 'ref_tree' and 'ref_add_list'. */ + spinlock_t lock; struct rb_root ref_tree; /* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */ struct list_head ref_add_list; - struct rb_node href_node; - struct btrfs_delayed_extent_op *extent_op; /* @@ -122,10 +136,10 @@ struct btrfs_delayed_ref_head { * we need to update the in ram accounting to properly reflect * the free has happened. */ - unsigned int must_insert_reserved:1; - unsigned int is_data:1; - unsigned int is_system:1; - unsigned int processing:1; + bool must_insert_reserved; + bool is_data; + bool is_system; + bool processing; }; struct btrfs_delayed_tree_ref { @@ -135,6 +149,19 @@ struct btrfs_delayed_tree_ref { int level; }; +struct btrfs_delayed_data_ref { + struct btrfs_delayed_ref_node node; + u64 root; + u64 parent; + u64 objectid; + u64 offset; +}; + +enum btrfs_delayed_ref_flags { + /* Indicate that we are flushing delayed refs for the commit */ + BTRFS_DELAYED_REFS_FLUSHING, +}; + struct btrfs_delayed_ref_root { /* head ref rbtree */ struct rb_root href_root; @@ -142,22 +169,158 @@ struct btrfs_delayed_ref_root { /* dirty extent records */ struct rb_root dirty_extent_root; + /* this spin lock protects the rbtree and the entries inside */ + spinlock_t lock; + + /* how many delayed ref updates we've queued, used by the + * throttling code + */ + atomic_t num_entries; + /* total number of head nodes in tree */ unsigned long num_heads; /* total number of head nodes ready for processing */ unsigned long num_heads_ready; - /* - * set when the tree is flushing before a transaction commit, - * used by the throttling code to decide if new updates need - * to be run right away - */ - int flushing; + u64 pending_csums; + + unsigned long flags; u64 run_delayed_start; + + /* + * To make qgroup to skip given root. + * This is for snapshot, as btrfs_qgroup_inherit() will manually + * modify counters for snapshot and its source, so we should skip + * the snapshot in new_root/old_roots or it will get calculated twice + */ + u64 qgroup_to_skip; }; +enum btrfs_ref_type { + BTRFS_REF_NOT_SET, + BTRFS_REF_DATA, + BTRFS_REF_METADATA, + BTRFS_REF_LAST, +}; + +struct btrfs_data_ref { + /* For EXTENT_DATA_REF */ + + /* Original root this data extent belongs to */ + u64 owning_root; + + /* Inode which refers to this data extent */ + u64 ino; + + /* + * file_offset - extent_offset + * + * file_offset is the key.offset of the EXTENT_DATA key. + * extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data. + */ + u64 offset; +}; + +struct btrfs_tree_ref { + /* + * Level of this tree block + * + * Shared for skinny (TREE_BLOCK_REF) and normal tree ref. + */ + int level; + + /* + * Root which owns this tree block. + * + * For TREE_BLOCK_REF (skinny metadata, either inline or keyed) + */ + u64 owning_root; + + /* For non-skinny metadata, no special member needed */ +}; + +struct btrfs_ref { + enum btrfs_ref_type type; + int action; + + /* + * Whether this extent should go through qgroup record. + * + * Normally false, but for certain cases like delayed subtree scan, + * setting this flag can hugely reduce qgroup overhead. + */ + bool skip_qgroup; + +#ifdef CONFIG_BTRFS_FS_REF_VERIFY + /* Through which root is this modification. */ + u64 real_root; +#endif + u64 bytenr; + u64 len; + + /* Bytenr of the parent tree block */ + u64 parent; + union { + struct btrfs_data_ref data_ref; + struct btrfs_tree_ref tree_ref; + }; +}; + +extern struct kmem_cache *btrfs_delayed_ref_head_cachep; +extern struct kmem_cache *btrfs_delayed_tree_ref_cachep; +extern struct kmem_cache *btrfs_delayed_data_ref_cachep; +extern struct kmem_cache *btrfs_delayed_extent_op_cachep; + +int __init btrfs_delayed_ref_init(void); +void __cold btrfs_delayed_ref_exit(void); + +static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref, + int action, u64 bytenr, u64 len, u64 parent) +{ + generic_ref->action = action; + generic_ref->bytenr = bytenr; + generic_ref->len = len; + generic_ref->parent = parent; +} + +static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, + int level, u64 root, u64 mod_root, bool skip_qgroup) +{ +#ifdef CONFIG_BTRFS_FS_REF_VERIFY + /* If @real_root not set, use @root as fallback */ + generic_ref->real_root = mod_root ?: root; +#endif + generic_ref->tree_ref.level = level; + generic_ref->tree_ref.owning_root = root; + generic_ref->type = BTRFS_REF_METADATA; + if (skip_qgroup || !(is_fstree(root) && + (!mod_root || is_fstree(mod_root)))) + generic_ref->skip_qgroup = true; + else + generic_ref->skip_qgroup = false; + +} + +static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref, + u64 ref_root, u64 ino, u64 offset, u64 mod_root, + bool skip_qgroup) +{ +#ifdef CONFIG_BTRFS_FS_REF_VERIFY + /* If @real_root not set, use @root as fallback */ + generic_ref->real_root = mod_root ?: ref_root; +#endif + generic_ref->data_ref.owning_root = ref_root; + generic_ref->data_ref.ino = ino; + generic_ref->data_ref.offset = offset; + generic_ref->type = BTRFS_REF_DATA; + if (skip_qgroup || !(is_fstree(ref_root) && + (!mod_root || is_fstree(mod_root)))) + generic_ref->skip_qgroup = true; + else + generic_ref->skip_qgroup = false; +} static inline struct btrfs_delayed_extent_op * btrfs_alloc_delayed_extent_op(void) @@ -169,22 +332,22 @@ static inline void btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op) { if (op) - kfree(op); + kmem_cache_free(btrfs_delayed_extent_op_cachep, op); } static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) { - WARN_ON(ref->refs == 0); - if (--ref->refs == 0) { + WARN_ON(refcount_read(&ref->refs) == 0); + if (refcount_dec_and_test(&ref->refs)) { WARN_ON(ref->in_tree); switch (ref->type) { case BTRFS_TREE_BLOCK_REF_KEY: case BTRFS_SHARED_BLOCK_REF_KEY: - kfree(ref); + kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); break; case BTRFS_EXTENT_DATA_REF_KEY: case BTRFS_SHARED_DATA_REF_KEY: - kfree(ref); + kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); break; default: BUG(); @@ -192,10 +355,20 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) } } +static inline u64 btrfs_ref_head_to_space_flags( + struct btrfs_delayed_ref_head *head_ref) +{ + if (head_ref->is_data) + return BTRFS_BLOCK_GROUP_DATA; + else if (head_ref->is_system) + return BTRFS_BLOCK_GROUP_SYSTEM; + return BTRFS_BLOCK_GROUP_METADATA; +} + static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head) { - if (--head->refs == 0) - kfree(head); + if (refcount_dec_and_test(&head->refs)) + kmem_cache_free(btrfs_delayed_ref_head_cachep, head); } int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, @@ -204,13 +377,40 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, u64 ref_root, int level, int action, struct btrfs_delayed_extent_op *extent_op, int *old_ref_mod, int *new_ref_mod); +int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, + struct btrfs_ref *generic_ref, + u64 reserved); +int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, + u64 bytenr, u64 num_bytes, + struct btrfs_delayed_extent_op *extent_op); void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head); +struct btrfs_delayed_ref_head * +btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, + u64 bytenr); +int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head); +static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head) +{ + mutex_unlock(&head->mutex); +} +void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head); + struct btrfs_delayed_ref_head * btrfs_select_ref_head(struct btrfs_trans_handle *trans); +int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq); + +void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr); +void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans); +void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *src, + u64 num_bytes); +bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); + /* * helper functions to cast a node into its container */ @@ -220,6 +420,12 @@ btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node) return container_of(node, struct btrfs_delayed_tree_ref, node); } +static inline struct btrfs_delayed_data_ref * +btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node) +{ + return container_of(node, struct btrfs_delayed_data_ref, node); +} + int cleanup_ref_head(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head); diff --git a/kernel-shared/extent-tree.c b/kernel-shared/extent-tree.c index f447498a..988bceed 100644 --- a/kernel-shared/extent-tree.c +++ b/kernel-shared/extent-tree.c @@ -3790,7 +3790,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long nr) struct btrfs_delayed_ref_head *locked_ref = NULL; struct btrfs_delayed_extent_op *extent_op; int ret; - int must_insert_reserved = 0; + bool must_insert_reserved = false; delayed_refs = &trans->delayed_refs; while (1) { @@ -3854,7 +3854,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long nr) * lock. */ must_insert_reserved = locked_ref->must_insert_reserved; - locked_ref->must_insert_reserved = 0; + locked_ref->must_insert_reserved = false; extent_op = locked_ref->extent_op; locked_ref->extent_op = NULL;