From 87d852afa26f7b5a7e21ac3a92d89f19c984cacd Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 28 Apr 2014 10:29:33 -0500 Subject: [PATCH 1/4] kmod/core: fail more gracefully in kpatch_unregister In kpatch_unregister(), if kpatch_remove_patch succeeds but one of the subsequent ftrace unregistering calls fails, it returns an error and fails to module_put() the patch module, even though the patch has been removed. This causes the patch module to get stuck in a weird place where its patch has been unregistered but the patch module can't ever be removed. These errors aren't serious and wouldn't cause any real problems if they did somehow fail, so instead just WARN if they fail. --- kmod/core/core.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/kmod/core/core.c b/kmod/core/core.c index aef71c6..490778e 100644 --- a/kmod/core/core.c +++ b/kmod/core/core.c @@ -321,8 +321,8 @@ static struct ftrace_ops kpatch_ftrace_ops __read_mostly = { }; /* Remove kpatch_funcs from ftrace filter */ -static int kpatch_remove_funcs_from_filter(struct kpatch_func *funcs, - int num_funcs) +static void kpatch_remove_funcs_from_filter(struct kpatch_func *funcs, + int num_funcs) { int i, ret = 0; @@ -339,14 +339,8 @@ static int kpatch_remove_funcs_from_filter(struct kpatch_func *funcs, /* Remove the ftrace handler for this function. */ ret = ftrace_set_filter_ip(&kpatch_ftrace_ops, func->old_addr, 1, 0); - if (ret) { - pr_err("can't remove ftrace filter at address 0x%lx\n", - func->old_addr); - break; - } + WARN_ON(ret); } - - return ret; } int kpatch_register(struct kpatch_module *kpmod) @@ -480,12 +474,11 @@ int kpatch_unregister(struct kpatch_module *kpmod) if (kpatch_num_registered == 1) { ret = unregister_ftrace_function(&kpatch_ftrace_ops); - if (ret) { - pr_err("can't unregister ftrace handler\n"); - goto out; - } + if (ret) + WARN_ON(1); + else + kpatch_num_registered--; } - kpatch_num_registered--; /* * This synchronize_rcu is to ensure any other kpatch_get_func @@ -494,9 +487,7 @@ int kpatch_unregister(struct kpatch_module *kpmod) */ synchronize_rcu(); - ret = kpatch_remove_funcs_from_filter(funcs, num_funcs); - if (ret) - goto out; + kpatch_remove_funcs_from_filter(funcs, num_funcs); pr_notice("unloaded patch module \"%s\"\n", kpmod->mod->name); From 2f34cf9a895c411f2f89f6cb5ed65272c6c28b04 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 28 Apr 2014 11:41:20 -0500 Subject: [PATCH 2/4] kmod/core: NMI synchronization improvements This is an attempt to both simplify and improve the correctness of the NMI synchronization code. There's a race in kpatch_ftrace_handler() between the kpatch_get_func() and kpatch_finish_status() calls which could result in func being NULL. The retry was supposed to fix this. However, this race would still be a problem in the repatching case (if the function had already been previously patched), in which case func would not be NULL, but could instead point to the previously patched version of the function. In this case it wouldn't retry and it would be possible for the previous version of the function to run. The fix is to use a memory barrier between accesses of the func hash and the status variable, and then just call kpatch_get_func() *after* accessing the status variable. For OP_PATCH, if status is SUCCESS, then func is guaranteed to point to the new function. If status is FAILURE, func might point to the new function, in which case we can use get_prev_func to get the previous version of the function. I also made some pretty big changes to try to simplify the design so that there are less moving parts and so that it's hopefully easier to understand. I moved the OP field into the kpatch_func struct. This allows us to merge the two global state variables (status + op) into a single global state variable (state), which helps make the code quite a bit simpler. I turned it into a proper state machine and documented the meaning of each state in the comments. Moving the OP field to the kpatch_func struct also paves the way for an upcoming pull request which will allow patch modules to be atomically replaced ("kpatch load --replace "). --- kmod/core/core.c | 274 ++++++++++++++++++++++++++------------------- kmod/core/kpatch.h | 5 +- 2 files changed, 161 insertions(+), 118 deletions(-) diff --git a/kmod/core/core.c b/kmod/core/core.c index 490778e..7fe8f54 100644 --- a/kmod/core/core.c +++ b/kmod/core/core.c @@ -62,32 +62,73 @@ struct kpatch_backtrace_args { int ret; }; +/* + * The kpatch core module has a state machine which allows for proper + * synchronization with kpatch_ftrace_handler() when it runs in NMI context. + * + * +-----------------------------------------------------+ + * | | + * | + + * v +---> KPATCH_STATE_SUCCESS + * KPATCH_STATE_IDLE +---> KPATCH_STATE_UPDATING | + * ^ +---> KPATCH_STATE_FAILURE + * | + + * | | + * +-----------------------------------------------------+ + * + * KPATCH_STATE_IDLE: No updates are pending. The func hash is valid, and the + * reader doesn't need to check func->op. + * + * KPATCH_STATE_UPDATING: An update is in progress. The reader must call + * kpatch_state_finish(KPATCH_STATE_FAILURE) before accessing the func hash. + * + * KPATCH_STATE_FAILURE: An update failed, and the func hash might be + * inconsistent (pending patched funcs might not have been removed yet). If + * func->op is KPATCH_OP_PATCH, then rollback to the previous version of the + * func. + * + * KPATCH_STATE_SUCCESS: An update succeeded, but the func hash might be + * inconsistent (pending unpatched funcs might not have been removed yet). If + * func->op is KPATCH_OP_UNPATCH, then rollback to the previous version of the + * func. + */ enum { - KPATCH_STATUS_START, - KPATCH_STATUS_SUCCESS, - KPATCH_STATUS_FAILURE, + KPATCH_STATE_IDLE, + KPATCH_STATE_UPDATING, + KPATCH_STATE_SUCCESS, + KPATCH_STATE_FAILURE, }; -static atomic_t kpatch_status; - -static inline void kpatch_start_status(void) -{ - atomic_set(&kpatch_status, KPATCH_STATUS_START); -} - -/* Try to set a finish status, and return the result status */ -static inline int kpatch_finish_status(int status) -{ - int result; - result = atomic_cmpxchg(&kpatch_status, KPATCH_STATUS_START, status); - return result == KPATCH_STATUS_START ? status : result; -} +static atomic_t kpatch_state; +/* values for func->op */ enum { KPATCH_OP_NONE, KPATCH_OP_PATCH, KPATCH_OP_UNPATCH, }; -static atomic_t kpatch_operation; + + +static inline void kpatch_state_idle(void) +{ + int state = atomic_read(&kpatch_state); + WARN_ON(state != KPATCH_STATE_SUCCESS && state != KPATCH_STATE_FAILURE); + atomic_set(&kpatch_state, KPATCH_STATE_IDLE); +} + +static inline void kpatch_state_updating(void) +{ + WARN_ON(atomic_read(&kpatch_state) != KPATCH_STATE_IDLE); + atomic_set(&kpatch_state, KPATCH_STATE_UPDATING); +} + +/* If state is updating, change it to success or failure and return new state */ +static inline int kpatch_state_finish(int state) +{ + int result; + WARN_ON(state != KPATCH_STATE_SUCCESS && state != KPATCH_STATE_FAILURE); + result = atomic_cmpxchg(&kpatch_state, KPATCH_STATE_UPDATING, state); + return result == KPATCH_STATE_UPDATING ? state : result; +} static struct kpatch_func *kpatch_get_func(unsigned long ip) { @@ -100,15 +141,12 @@ static struct kpatch_func *kpatch_get_func(unsigned long ip) return NULL; } -static struct kpatch_func *kpatch_get_committed_func(struct kpatch_func *f, - unsigned long ip) +static struct kpatch_func *kpatch_get_prev_func(struct kpatch_func *f, + unsigned long ip) { - /* Continuing on the same hlist to find commited (!updating) func */ - if (f) { - hlist_for_each_entry_continue_rcu(f, node) - if (f->old_addr == ip && !f->updating) - return f; - } + hlist_for_each_entry_continue_rcu(f, node) + if (f->old_addr == ip) + return f; return NULL; } @@ -196,29 +234,34 @@ static int kpatch_apply_patch(void *data) int i, ret; ret = kpatch_verify_activeness_safety(kpmod); - if (ret) + if (ret) { + kpatch_state_finish(KPATCH_STATE_FAILURE); return ret; - - for (i = 0; i < num_funcs; i++) { - struct kpatch_func *func = &funcs[i]; - - /* update the global list and go live */ - hash_add_rcu(kpatch_func_hash, &func->node, func->old_addr); } - /* Check if any inconsistent NMI has happened while updating */ - ret = kpatch_finish_status(KPATCH_STATUS_SUCCESS); - if (ret == KPATCH_STATUS_FAILURE) { + /* tentatively add the new funcs to the global func hash */ + for (i = 0; i < num_funcs; i++) + hash_add_rcu(kpatch_func_hash, &funcs[i].node, + funcs[i].old_addr); + + /* memory barrier between func hash add and state change */ + smp_wmb(); + + /* + * Check if any inconsistent NMI has happened while updating. If not, + * move to success state. + */ + ret = kpatch_state_finish(KPATCH_STATE_SUCCESS); + if (ret == KPATCH_STATE_FAILURE) { + pr_err("NMI activeness safety check failed\n"); + /* Failed, we have to rollback patching process */ for (i = 0; i < num_funcs; i++) hash_del_rcu(&funcs[i].node); + return -EBUSY; } - /* Succeeded, clear updating flags */ - for (i = 0; i < num_funcs; i++) - funcs[i].updating = false; - return 0; } @@ -231,17 +274,15 @@ static int kpatch_remove_patch(void *data) int ret, i; ret = kpatch_verify_activeness_safety(kpmod); - if (ret) + if (ret) { + kpatch_state_finish(KPATCH_STATE_FAILURE); return ret; + } /* Check if any inconsistent NMI has happened while updating */ - ret = kpatch_finish_status(KPATCH_STATUS_SUCCESS); - if (ret == KPATCH_STATUS_FAILURE) { - /* Failed, we must keep funcs on hash table */ - for (i = 0; i < num_funcs; i++) - funcs[i].updating = false; + ret = kpatch_state_finish(KPATCH_STATE_SUCCESS); + if (ret == KPATCH_STATE_FAILURE) return -EBUSY; - } /* Succeeded, remove all updating funcs from hash table */ for (i = 0; i < num_funcs; i++) @@ -263,49 +304,38 @@ void notrace kpatch_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct pt_regs *regs) { struct kpatch_func *func; - int ret, op; + int state; preempt_disable_notrace(); -retry: - func = kpatch_get_func(ip); - if (unlikely(in_nmi())) { - op = atomic_read(&kpatch_operation); - if (likely(op == KPATCH_OP_NONE)) - goto done; - /* - * Make sure no memory reordering between - * kpatch_operation and kpatch_status - */ + + if (likely(!in_nmi())) + func = kpatch_get_func(ip); + else { + /* Checking for NMI inconsistency */ + state = kpatch_state_finish(KPATCH_STATE_FAILURE); + + /* no memory reordering between state and func hash read */ smp_rmb(); - /* - * Checking for NMI inconsistency. - * If this can set the KPATCH_STATUS_FAILURE here, it means an - * NMI occures in updating process. In that case, we should - * rollback the process. - */ - ret = kpatch_finish_status(KPATCH_STATUS_FAILURE); - if (ret == KPATCH_STATUS_FAILURE) { + + func = kpatch_get_func(ip); + + if (likely(state == KPATCH_STATE_IDLE)) + goto done; + + if (state == KPATCH_STATE_SUCCESS) { /* - * Inconsistency happens here, Newly added funcs have - * to be ignored. + * Patching succeeded. If the function was being + * unpatched, roll back to the previous version. */ - if (op == KPATCH_OP_PATCH) - func = kpatch_get_committed_func(func, ip); + if (func && func->op == KPATCH_OP_UNPATCH) + func = kpatch_get_prev_func(func, ip); } else { /* - * Here, the updating process has been finished - * successfully. Unpatched funcs have to be ignored. + * Patching failed. If the function was being patched, + * roll back to the previous version. */ - if (op == KPATCH_OP_UNPATCH) - func = kpatch_get_committed_func(func, ip); - /* - * This is a very rare case but possible if the func - * is added in the hash table right after calling - * kpatch_get_func(ip) and before calling - * kpatch_finish_status(KPATCH_STATUS_FAILURE). - */ - else if (!func) - goto retry; + if (func && func->op == KPATCH_OP_PATCH) + func = kpatch_get_prev_func(func, ip); } } done: @@ -364,7 +394,7 @@ int kpatch_register(struct kpatch_module *kpmod) for (i = 0; i < num_funcs; i++) { struct kpatch_func *func = &funcs[i]; - func->updating = true; + func->op = KPATCH_OP_PATCH; /* * If any other modules have also patched this function, it @@ -394,27 +424,34 @@ int kpatch_register(struct kpatch_module *kpmod) } kpatch_num_registered++; - kpatch_start_status(); - /* - * Make sure no memory reordering between kpatch_operation and - * kpatch_status. kpatch_ftrace_handler() has corresponding smp_rmb(). - */ + /* memory barrier between func hash and state write */ smp_wmb(); - atomic_set(&kpatch_operation, KPATCH_OP_PATCH); + + kpatch_state_updating(); + /* * Idle the CPUs, verify activeness safety, and atomically make the new * functions visible to the trampoline. */ ret = stop_machine(kpatch_apply_patch, kpmod, NULL); - if (ret) { - /* - * This synchronize_rcu is to ensure any other kpatch_get_func - * user exits the rcu locked(preemt_disabled) critical section - * and hash_del_rcu() is correctly finished. - */ - synchronize_rcu(); + + /* NMI handlers can return to normal now */ + kpatch_state_idle(); + + /* + * Wait for all existing NMI handlers to complete so that they don't + * see any changes to funcs or funcs->op that might occur after this + * point. + * + * Any NMI handlers starting after this point will see the IDLE state. + */ + synchronize_rcu(); + + if (ret) goto err_unregister; - } + + for (i = 0; i < num_funcs; i++) + funcs[i].op = KPATCH_OP_NONE; /* TODO: need TAINT_KPATCH */ pr_notice_once("tainting kernel with TAINT_USER\n"); @@ -422,14 +459,12 @@ int kpatch_register(struct kpatch_module *kpmod) pr_notice("loaded patch module \"%s\"\n", kpmod->mod->name); - atomic_set(&kpatch_operation, KPATCH_OP_NONE); kpmod->enabled = true; up(&kpatch_mutex); return 0; err_unregister: - atomic_set(&kpatch_operation, KPATCH_OP_NONE); if (kpatch_num_registered == 1) { int ret2 = unregister_ftrace_function(&kpatch_ftrace_ops); if (ret2) { @@ -457,20 +492,33 @@ int kpatch_unregister(struct kpatch_module *kpmod) down(&kpatch_mutex); - /* Start unpatching operation */ - kpatch_start_status(); - /* - * Make sure no memory reordering between kpatch_operation and - * kpatch_status. kpatch_ftrace_handler() has corresponding smp_rmb(). - */ - smp_wmb(); - atomic_set(&kpatch_operation, KPATCH_OP_UNPATCH); for (i = 0; i < num_funcs; i++) - funcs[i].updating = true; + funcs[i].op = KPATCH_OP_UNPATCH; + + /* memory barrier between func hash and state write */ + smp_wmb(); + + kpatch_state_updating(); ret = stop_machine(kpatch_remove_patch, kpmod, NULL); - if (ret) + + /* NMI handlers can return to normal now */ + kpatch_state_idle(); + + /* + * Wait for all existing NMI handlers to complete so that they don't + * see any changes to funcs or funcs->op that might occur after this + * point. + * + * Any NMI handlers starting after this point will see the IDLE state. + */ + synchronize_rcu(); + + if (ret) { + for (i = 0; i < num_funcs; i++) + funcs[i].op = KPATCH_OP_NONE; goto out; + } if (kpatch_num_registered == 1) { ret = unregister_ftrace_function(&kpatch_ftrace_ops); @@ -480,13 +528,6 @@ int kpatch_unregister(struct kpatch_module *kpmod) kpatch_num_registered--; } - /* - * This synchronize_rcu is to ensure any other kpatch_get_func - * user exits the rcu locked(preemt_disabled) critical section - * and hash_del_rcu() is correctly finished. - */ - synchronize_rcu(); - kpatch_remove_funcs_from_filter(funcs, num_funcs); pr_notice("unloaded patch module \"%s\"\n", kpmod->mod->name); @@ -495,7 +536,6 @@ int kpatch_unregister(struct kpatch_module *kpmod) module_put(kpmod->mod); out: - atomic_set(&kpatch_operation, KPATCH_OP_NONE); up(&kpatch_mutex); return ret; } diff --git a/kmod/core/kpatch.h b/kmod/core/kpatch.h index 3586e6e..5affdb7 100644 --- a/kmod/core/kpatch.h +++ b/kmod/core/kpatch.h @@ -29,12 +29,15 @@ #include struct kpatch_func { + /* public */ unsigned long new_addr; unsigned long new_size; unsigned long old_addr; unsigned long old_size; + + /* private */ struct hlist_node node; - bool updating; + int op; }; struct kpatch_module { From ac2223076106ddf954c71586bbfba101764d3b8b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 30 Apr 2014 13:26:29 -0500 Subject: [PATCH 3/4] kmod/core: make func->op an enum --- kmod/core/core.c | 7 ------- kmod/core/kpatch.h | 8 +++++++- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/kmod/core/core.c b/kmod/core/core.c index 7fe8f54..1bfe029 100644 --- a/kmod/core/core.c +++ b/kmod/core/core.c @@ -100,13 +100,6 @@ enum { }; static atomic_t kpatch_state; -/* values for func->op */ -enum { - KPATCH_OP_NONE, - KPATCH_OP_PATCH, - KPATCH_OP_UNPATCH, -}; - static inline void kpatch_state_idle(void) { diff --git a/kmod/core/kpatch.h b/kmod/core/kpatch.h index 5affdb7..c5dc8b6 100644 --- a/kmod/core/kpatch.h +++ b/kmod/core/kpatch.h @@ -28,6 +28,12 @@ #include #include +enum kpatch_op { + KPATCH_OP_NONE, + KPATCH_OP_PATCH, + KPATCH_OP_UNPATCH, +}; + struct kpatch_func { /* public */ unsigned long new_addr; @@ -37,7 +43,7 @@ struct kpatch_func { /* private */ struct hlist_node node; - int op; + enum kpatch_op op; }; struct kpatch_module { From 968845f1bd1b21556552fa05c74c764d7e8a2d23 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 30 Apr 2014 13:42:22 -0500 Subject: [PATCH 4/4] kmod/core: make WARN messages more informative --- kmod/core/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kmod/core/core.c b/kmod/core/core.c index 1bfe029..15828c1 100644 --- a/kmod/core/core.c +++ b/kmod/core/core.c @@ -362,7 +362,9 @@ static void kpatch_remove_funcs_from_filter(struct kpatch_func *funcs, /* Remove the ftrace handler for this function. */ ret = ftrace_set_filter_ip(&kpatch_ftrace_ops, func->old_addr, 1, 0); - WARN_ON(ret); + + WARN(ret, "can't remove ftrace filter at address 0x%lx (rc=%d)", + func->old_addr, ret); } } @@ -516,7 +518,7 @@ int kpatch_unregister(struct kpatch_module *kpmod) if (kpatch_num_registered == 1) { ret = unregister_ftrace_function(&kpatch_ftrace_ops); if (ret) - WARN_ON(1); + WARN(1, "can't unregister ftrace handler"); else kpatch_num_registered--; }