From 07b2e84bce518cc6321e8c9014f75669993dac7c Mon Sep 17 00:00:00 2001 From: Aurelien DARRAGON Date: Tue, 12 Mar 2024 17:05:54 +0100 Subject: [PATCH] BUG/MEDIUM: hlua: streams don't support mixing lua-load with lua-load-per-thread (2nd try) While trying to reproduce another crash case involving lua filters reported by @bgrooot on GH #2467, we found out that mixing filters loaded from different contexts ('lua-load' vs 'lua-load-per-thread') for the same stream isn't supported and may even cause the process to crash. Historically, mixing lua-load and lua-load-per-threads for a stream wasn't supported, but this changed thanks to 0913386 ("BUG/MEDIUM: hlua: streams don't support mixing lua-load with lua-load-per-thread"). However, the above fix didn't consider lua filters's use-case properly: unlike lua fetches, actions or even services, lua filters don't simply use the stream hlua context as a "temporary" hlua running context to process some hlua code. For fetches, actions.. hlua executions are processed sequentially, so we simply reuse the hlua context from the previous action/fetch to run the next one (this allows to bypass memory allocations and initialization, thus it increases performance), unless we need to run on a different hlua state-id, in which case we perform a reset of the hlua context. But this cannot work with filters: indeed, once registered, a filter will last for the whole stream duration. It means that the filter will rely on the stream hlua context from ->attach() to ->detach(). And here is the catch, if for the same stream we register 2 lua filters from different contexts ('lua-load' + 'lua-load-per-thread'), then we have an issue, because the hlua stream will be re-created each time we switch between runtime contexts, which means each time we switch between the filters (may happen for each stream processing step), and since lua filters rely on the stream hlua to carry context between filtering steps, this context will be lost upon a switch. Given that lua filters code was not designed with that in mind, it would confuse the code and cause unexpected behaviors ranging from lua errors to crashing process. So here we take another approach: instead of re-creating the stream hlua context each time we switch between "global" and "per-thread" runtime context, let's have both of them inside the stream directly as initially suggested by Christopher back then when talked about the original issue. For this we leverage hlua_stream_ctx_prepare() and hlua_stream_ctx_get() helper functions which return the proper hlua context for a given stream and state_id combination. As for debugging infos reported after ha_panic(), we check for both hlua runtime contexts to check if one of them was active when the panic occured (only 1 runtime ctx per stream may be active at a given time). This should be backported to all stable versions with 0913386 ("BUG/MEDIUM: hlua: streams don't support mixing lua-load with lua-load-per-thread") This commit depends on: - "DEBUG: lua: precisely identify if stream is stuck inside lua or not" [for versions < 2.9 the ha_thread_dump_one() part should be skipped] - "MINOR: hlua: use accessors for stream hlua ctx" For 2.4, the filters API didn't exist. However it may be a good idea to backport it anyway because ->set_priv()/->get_priv() from tcp/http lua applets may also be affected by this bug, plus it will ease code maintenance. Of course, filters-related parts should be skipped in this case. --- include/haproxy/stream-t.h | 2 +- src/debug.c | 15 +++++++--- src/hlua.c | 56 ++++++++++++++++++-------------------- src/stream.c | 8 ++++-- 4 files changed, 43 insertions(+), 38 deletions(-) diff --git a/include/haproxy/stream-t.h b/include/haproxy/stream-t.h index 280929cbf..a5b719016 100644 --- a/include/haproxy/stream-t.h +++ b/include/haproxy/stream-t.h @@ -284,7 +284,7 @@ struct stream { int last_rule_line; /* last evaluated final rule's line (def: 0) */ unsigned int stream_epoch; /* copy of stream_epoch when the stream was created */ - struct hlua *hlua; /* lua runtime context */ + struct hlua *hlua[2]; /* lua runtime context (0: global, 1: per-thread) */ /* Context */ struct { diff --git a/src/debug.c b/src/debug.c index ff5480373..3ab5feb84 100644 --- a/src/debug.c +++ b/src/debug.c @@ -300,9 +300,15 @@ void ha_thread_dump_one(int thr, int from_signal) if (th_ctx->current && th_ctx->current->process == process_stream && th_ctx->current->context) { const struct stream *s = (const struct stream *)th_ctx->current->context; - struct hlua *hlua = s ? s->hlua : NULL; + struct hlua *hlua = NULL; - if (hlua && HLUA_IS_BUSY(hlua)) { + if (s) { + if (s->hlua[0] && HLUA_IS_BUSY(s->hlua[0])) + hlua = s->hlua[0]; + else if (s->hlua[1] && HLUA_IS_BUSY(s->hlua[1])) + hlua = s->hlua[1]; + } + if (hlua) { mark_tainted(TAINTED_LUA_STUCK); if (hlua->state_id == 0) mark_tainted(TAINTED_LUA_STUCK_SHARED); @@ -417,8 +423,9 @@ void ha_task_dump(struct buffer *buf, const struct task *task, const char *pfx) #ifdef USE_LUA hlua = NULL; - if (s && s->hlua && HLUA_IS_BUSY(s->hlua)) { - hlua = s->hlua; + if (s && ((s->hlua[0] && HLUA_IS_BUSY(s->hlua[0])) || + (s->hlua[1] && HLUA_IS_BUSY(s->hlua[1])))) { + hlua = (s->hlua[0] && HLUA_IS_BUSY(s->hlua[0])) ? s->hlua[0] : s->hlua[1]; chunk_appendf(buf, "%sCurrent executing Lua from a stream analyser -- ", pfx); } else if (task->process == hlua_process_task && (hlua = task->context)) { diff --git a/src/hlua.c b/src/hlua.c index f501bb07d..0ef050c89 100644 --- a/src/hlua.c +++ b/src/hlua.c @@ -330,7 +330,8 @@ struct hlua_flt_config { }; struct hlua_flt_ctx { - int ref; /* ref to the filter lua object */ + struct hlua *_hlua; /* main hlua context */ + int ref; /* ref to the filter lua object (in main hlua context) */ struct hlua *hlua[2]; /* lua runtime context (0: request, 1: response) */ unsigned int cur_off[2]; /* current offset (0: request, 1: response) */ unsigned int cur_len[2]; /* current forwardable length (0: request, 1: response) */ @@ -1678,19 +1679,19 @@ static int hlua_ctx_renew(struct hlua *lua, int keep_msg) return 1; } -/* Helper function to get the lua ctx for a given stream */ -static inline struct hlua *hlua_stream_ctx_get(struct stream *s) +/* Helper function to get the lua ctx for a given stream and state_id */ +static inline struct hlua *hlua_stream_ctx_get(struct stream *s, int state_id) { - return s->hlua; + /* state_id == 0 -> global runtime ctx + * state_id != 0 -> per-thread runtime ctx + */ + return s->hlua[!!state_id]; } -/* Helper function to prepare the lua ctx for a given stream +/* Helper function to prepare the lua ctx for a given stream and state id * - * ctx will be enforced in parent stack on initial creation. - * If s->hlua->state_id differs from , which may happen at - * runtime since existing stream hlua ctx will be reused for other - * "independent" (but stream-related) lua executions, hlua will be - * recreated with the expected state id. + * It uses the global or per-thread ctx depending on the expected + * . * * Returns hlua ctx on success and NULL on failure */ @@ -1701,8 +1702,7 @@ static struct hlua *hlua_stream_ctx_prepare(struct stream *s, int state_id) * permits to save performances because a systematic * Lua initialization cause 5% performances loss. */ - ctx_renew: - if (!s->hlua) { + if (!s->hlua[!!state_id]) { struct hlua *hlua; hlua = pool_alloc(pool_head_hlua); @@ -1713,20 +1713,9 @@ static struct hlua *hlua_stream_ctx_prepare(struct stream *s, int state_id) pool_free(pool_head_hlua, hlua); return NULL; } - s->hlua = hlua; + s->hlua[!!state_id] = hlua; } - else if (s->hlua->state_id != state_id) { - /* ctx already created, but not in proper state. - * It should only happen after the previous execution is - * finished, otherwise it's probably a bug since we don't - * want to abort unfinished job.. - */ - BUG_ON(HLUA_IS_RUNNING(s->hlua)); - hlua_ctx_destroy(s->hlua); - s->hlua = NULL; - goto ctx_renew; - } - return s->hlua; + return s->hlua[!!state_id]; } void hlua_hook(lua_State *L, lua_Debug *ar) @@ -4996,8 +4985,9 @@ __LJMP static int hlua_applet_tcp_get_var(lua_State *L) __LJMP static int hlua_applet_tcp_set_priv(lua_State *L) { struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1)); + struct hlua_cli_ctx *cli_ctx = luactx->appctx->svcctx; struct stream *s = luactx->htxn.s; - struct hlua *hlua = hlua_stream_ctx_get(s); + struct hlua *hlua = hlua_stream_ctx_get(s, cli_ctx->hlua->state_id); /* Note that this hlua struct is from the session and not from the applet. */ if (!hlua) @@ -5018,8 +5008,9 @@ __LJMP static int hlua_applet_tcp_set_priv(lua_State *L) __LJMP static int hlua_applet_tcp_get_priv(lua_State *L) { struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1)); + struct hlua_cli_ctx *cli_ctx = luactx->appctx->svcctx; struct stream *s = luactx->htxn.s; - struct hlua *hlua = hlua_stream_ctx_get(s); + struct hlua *hlua = hlua_stream_ctx_get(s, cli_ctx->hlua->state_id); /* Note that this hlua struct is from the session and not from the applet. */ if (!hlua) { @@ -5485,8 +5476,9 @@ __LJMP static int hlua_applet_http_get_var(lua_State *L) __LJMP static int hlua_applet_http_set_priv(lua_State *L) { struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1)); + struct hlua_http_ctx *http_ctx = luactx->appctx->svcctx; struct stream *s = luactx->htxn.s; - struct hlua *hlua = hlua_stream_ctx_get(s); + struct hlua *hlua = hlua_stream_ctx_get(s, http_ctx->hlua->state_id); /* Note that this hlua struct is from the session and not from the applet. */ if (!hlua) @@ -5507,8 +5499,9 @@ __LJMP static int hlua_applet_http_set_priv(lua_State *L) __LJMP static int hlua_applet_http_get_priv(lua_State *L) { struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1)); + struct hlua_http_ctx *http_ctx = luactx->appctx->svcctx; struct stream *s = luactx->htxn.s; - struct hlua *hlua = hlua_stream_ctx_get(s); + struct hlua *hlua = hlua_stream_ctx_get(s, http_ctx->hlua->state_id); /* Note that this hlua struct is from the session and not from the applet. */ if (!hlua) { @@ -11996,6 +11989,9 @@ static int hlua_filter_new(struct stream *s, struct filter *filter) /* At this point the execution is safe. */ RESET_SAFE_LJMP(hlua); + /* save main hlua ctx (from the stream) */ + flt_ctx->_hlua = hlua; + filter->ctx = flt_ctx; break; case HLUA_E_ERRMSG: @@ -12046,7 +12042,7 @@ static int hlua_filter_new(struct stream *s, struct filter *filter) static void hlua_filter_delete(struct stream *s, struct filter *filter) { struct hlua_flt_ctx *flt_ctx = filter->ctx; - struct hlua *hlua = hlua_stream_ctx_get(s); + struct hlua *hlua = hlua_stream_ctx_get(s, flt_ctx->_hlua->state_id); hlua_lock(hlua); hlua_unref(hlua->T, flt_ctx->ref); diff --git a/src/stream.c b/src/stream.c index 40756e6ae..1e2940974 100644 --- a/src/stream.c +++ b/src/stream.c @@ -537,7 +537,7 @@ struct stream *stream_new(struct session *sess, struct stconn *sc, struct buffer s->res.analyse_exp = TICK_ETERNITY; s->txn = NULL; - s->hlua = NULL; + s->hlua[0] = s->hlua[1] = NULL; s->resolv_ctx.requester = NULL; s->resolv_ctx.hostname_dn = NULL; @@ -649,8 +649,10 @@ void stream_free(struct stream *s) flt_stream_stop(s); flt_stream_release(s, 0); - hlua_ctx_destroy(s->hlua); - s->hlua = NULL; + hlua_ctx_destroy(s->hlua[0]); + hlua_ctx_destroy(s->hlua[1]); + s->hlua[0] = s->hlua[1] = NULL; + if (s->txn) http_destroy_txn(s);