BUG/MEDIUM: hlua: streams don't support mixing lua-load with lua-load-per-thread (2nd try)

While trying to reproduce another crash case involving lua filters
reported by @bgrooot on GH #2467, we found out that mixing filters loaded
from different contexts ('lua-load' vs 'lua-load-per-thread') for the same
stream isn't supported and may even cause the process to crash.

Historically, mixing lua-load and lua-load-per-threads for a stream wasn't
supported, but this changed thanks to 0913386 ("BUG/MEDIUM: hlua: streams
don't support mixing lua-load with lua-load-per-thread").

However, the above fix didn't consider lua filters's use-case properly:
unlike lua fetches, actions or even services, lua filters don't simply
use the stream hlua context as a "temporary" hlua running context to
process some hlua code. For fetches, actions.. hlua executions are
processed sequentially, so we simply reuse the hlua context from the
previous action/fetch to run the next one (this allows to bypass memory
allocations and initialization, thus it increases performance), unless
we need to run on a different hlua state-id, in which case we perform a
reset of the hlua context.

But this cannot work with filters: indeed, once registered, a filter will
last for the whole stream duration. It means that the filter will rely
on the stream hlua context from ->attach() to ->detach(). And here is the
catch, if for the same stream we register 2 lua filters from different
contexts ('lua-load' + 'lua-load-per-thread'), then we have an issue,
because the hlua stream will be re-created each time we switch between
runtime contexts, which means each time we switch between the filters (may
happen for each stream processing step), and since lua filters rely on the
stream hlua to carry context between filtering steps, this context will be
lost upon a switch. Given that lua filters code was not designed with that
in mind, it would confuse the code and cause unexpected behaviors ranging
from lua errors to crashing process.

So here we take another approach: instead of re-creating the stream hlua
context each time we switch between "global" and "per-thread" runtime
context, let's have both of them inside the stream directly as initially
suggested by Christopher back then when talked about the original issue.

For this we leverage hlua_stream_ctx_prepare() and hlua_stream_ctx_get()
helper functions which return the proper hlua context for a given stream
and state_id combination.

As for debugging infos reported after ha_panic(), we check for both hlua
runtime contexts to check if one of them was active when the panic occured
(only 1 runtime ctx per stream may be active at a given time).

This should be backported to all stable versions with 0913386
("BUG/MEDIUM: hlua: streams don't support mixing lua-load with lua-load-per-thread")

This commit depends on:
 - "DEBUG: lua: precisely identify if stream is stuck inside lua or not"
   [for versions < 2.9 the ha_thread_dump_one() part should be skipped]
 - "MINOR: hlua: use accessors for stream hlua ctx"

For 2.4, the filters API didn't exist. However it may be a good idea to
backport it anyway because ->set_priv()/->get_priv() from tcp/http lua
applets may also be affected by this bug, plus it will ease code
maintenance. Of course, filters-related parts should be skipped in this
case.
This commit is contained in:
Aurelien DARRAGON 2024-03-12 17:05:54 +01:00
parent aa554be69c
commit 07b2e84bce
4 changed files with 43 additions and 38 deletions

View File

@ -284,7 +284,7 @@ struct stream {
int last_rule_line; /* last evaluated final rule's line (def: 0) */
unsigned int stream_epoch; /* copy of stream_epoch when the stream was created */
struct hlua *hlua; /* lua runtime context */
struct hlua *hlua[2]; /* lua runtime context (0: global, 1: per-thread) */
/* Context */
struct {

View File

@ -300,9 +300,15 @@ void ha_thread_dump_one(int thr, int from_signal)
if (th_ctx->current &&
th_ctx->current->process == process_stream && th_ctx->current->context) {
const struct stream *s = (const struct stream *)th_ctx->current->context;
struct hlua *hlua = s ? s->hlua : NULL;
struct hlua *hlua = NULL;
if (hlua && HLUA_IS_BUSY(hlua)) {
if (s) {
if (s->hlua[0] && HLUA_IS_BUSY(s->hlua[0]))
hlua = s->hlua[0];
else if (s->hlua[1] && HLUA_IS_BUSY(s->hlua[1]))
hlua = s->hlua[1];
}
if (hlua) {
mark_tainted(TAINTED_LUA_STUCK);
if (hlua->state_id == 0)
mark_tainted(TAINTED_LUA_STUCK_SHARED);
@ -417,8 +423,9 @@ void ha_task_dump(struct buffer *buf, const struct task *task, const char *pfx)
#ifdef USE_LUA
hlua = NULL;
if (s && s->hlua && HLUA_IS_BUSY(s->hlua)) {
hlua = s->hlua;
if (s && ((s->hlua[0] && HLUA_IS_BUSY(s->hlua[0])) ||
(s->hlua[1] && HLUA_IS_BUSY(s->hlua[1])))) {
hlua = (s->hlua[0] && HLUA_IS_BUSY(s->hlua[0])) ? s->hlua[0] : s->hlua[1];
chunk_appendf(buf, "%sCurrent executing Lua from a stream analyser -- ", pfx);
}
else if (task->process == hlua_process_task && (hlua = task->context)) {

View File

@ -330,7 +330,8 @@ struct hlua_flt_config {
};
struct hlua_flt_ctx {
int ref; /* ref to the filter lua object */
struct hlua *_hlua; /* main hlua context */
int ref; /* ref to the filter lua object (in main hlua context) */
struct hlua *hlua[2]; /* lua runtime context (0: request, 1: response) */
unsigned int cur_off[2]; /* current offset (0: request, 1: response) */
unsigned int cur_len[2]; /* current forwardable length (0: request, 1: response) */
@ -1678,19 +1679,19 @@ static int hlua_ctx_renew(struct hlua *lua, int keep_msg)
return 1;
}
/* Helper function to get the lua ctx for a given stream */
static inline struct hlua *hlua_stream_ctx_get(struct stream *s)
/* Helper function to get the lua ctx for a given stream and state_id */
static inline struct hlua *hlua_stream_ctx_get(struct stream *s, int state_id)
{
return s->hlua;
/* state_id == 0 -> global runtime ctx
* state_id != 0 -> per-thread runtime ctx
*/
return s->hlua[!!state_id];
}
/* Helper function to prepare the lua ctx for a given stream
/* Helper function to prepare the lua ctx for a given stream and state id
*
* ctx will be enforced in <state_id> parent stack on initial creation.
* If s->hlua->state_id differs from <state_id>, which may happen at
* runtime since existing stream hlua ctx will be reused for other
* "independent" (but stream-related) lua executions, hlua will be
* recreated with the expected state id.
* It uses the global or per-thread ctx depending on the expected
* <state_id>.
*
* Returns hlua ctx on success and NULL on failure
*/
@ -1701,8 +1702,7 @@ static struct hlua *hlua_stream_ctx_prepare(struct stream *s, int state_id)
* permits to save performances because a systematic
* Lua initialization cause 5% performances loss.
*/
ctx_renew:
if (!s->hlua) {
if (!s->hlua[!!state_id]) {
struct hlua *hlua;
hlua = pool_alloc(pool_head_hlua);
@ -1713,20 +1713,9 @@ static struct hlua *hlua_stream_ctx_prepare(struct stream *s, int state_id)
pool_free(pool_head_hlua, hlua);
return NULL;
}
s->hlua = hlua;
s->hlua[!!state_id] = hlua;
}
else if (s->hlua->state_id != state_id) {
/* ctx already created, but not in proper state.
* It should only happen after the previous execution is
* finished, otherwise it's probably a bug since we don't
* want to abort unfinished job..
*/
BUG_ON(HLUA_IS_RUNNING(s->hlua));
hlua_ctx_destroy(s->hlua);
s->hlua = NULL;
goto ctx_renew;
}
return s->hlua;
return s->hlua[!!state_id];
}
void hlua_hook(lua_State *L, lua_Debug *ar)
@ -4996,8 +4985,9 @@ __LJMP static int hlua_applet_tcp_get_var(lua_State *L)
__LJMP static int hlua_applet_tcp_set_priv(lua_State *L)
{
struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
struct hlua_cli_ctx *cli_ctx = luactx->appctx->svcctx;
struct stream *s = luactx->htxn.s;
struct hlua *hlua = hlua_stream_ctx_get(s);
struct hlua *hlua = hlua_stream_ctx_get(s, cli_ctx->hlua->state_id);
/* Note that this hlua struct is from the session and not from the applet. */
if (!hlua)
@ -5018,8 +5008,9 @@ __LJMP static int hlua_applet_tcp_set_priv(lua_State *L)
__LJMP static int hlua_applet_tcp_get_priv(lua_State *L)
{
struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
struct hlua_cli_ctx *cli_ctx = luactx->appctx->svcctx;
struct stream *s = luactx->htxn.s;
struct hlua *hlua = hlua_stream_ctx_get(s);
struct hlua *hlua = hlua_stream_ctx_get(s, cli_ctx->hlua->state_id);
/* Note that this hlua struct is from the session and not from the applet. */
if (!hlua) {
@ -5485,8 +5476,9 @@ __LJMP static int hlua_applet_http_get_var(lua_State *L)
__LJMP static int hlua_applet_http_set_priv(lua_State *L)
{
struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
struct hlua_http_ctx *http_ctx = luactx->appctx->svcctx;
struct stream *s = luactx->htxn.s;
struct hlua *hlua = hlua_stream_ctx_get(s);
struct hlua *hlua = hlua_stream_ctx_get(s, http_ctx->hlua->state_id);
/* Note that this hlua struct is from the session and not from the applet. */
if (!hlua)
@ -5507,8 +5499,9 @@ __LJMP static int hlua_applet_http_set_priv(lua_State *L)
__LJMP static int hlua_applet_http_get_priv(lua_State *L)
{
struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
struct hlua_http_ctx *http_ctx = luactx->appctx->svcctx;
struct stream *s = luactx->htxn.s;
struct hlua *hlua = hlua_stream_ctx_get(s);
struct hlua *hlua = hlua_stream_ctx_get(s, http_ctx->hlua->state_id);
/* Note that this hlua struct is from the session and not from the applet. */
if (!hlua) {
@ -11996,6 +11989,9 @@ static int hlua_filter_new(struct stream *s, struct filter *filter)
/* At this point the execution is safe. */
RESET_SAFE_LJMP(hlua);
/* save main hlua ctx (from the stream) */
flt_ctx->_hlua = hlua;
filter->ctx = flt_ctx;
break;
case HLUA_E_ERRMSG:
@ -12046,7 +12042,7 @@ static int hlua_filter_new(struct stream *s, struct filter *filter)
static void hlua_filter_delete(struct stream *s, struct filter *filter)
{
struct hlua_flt_ctx *flt_ctx = filter->ctx;
struct hlua *hlua = hlua_stream_ctx_get(s);
struct hlua *hlua = hlua_stream_ctx_get(s, flt_ctx->_hlua->state_id);
hlua_lock(hlua);
hlua_unref(hlua->T, flt_ctx->ref);

View File

@ -537,7 +537,7 @@ struct stream *stream_new(struct session *sess, struct stconn *sc, struct buffer
s->res.analyse_exp = TICK_ETERNITY;
s->txn = NULL;
s->hlua = NULL;
s->hlua[0] = s->hlua[1] = NULL;
s->resolv_ctx.requester = NULL;
s->resolv_ctx.hostname_dn = NULL;
@ -649,8 +649,10 @@ void stream_free(struct stream *s)
flt_stream_stop(s);
flt_stream_release(s, 0);
hlua_ctx_destroy(s->hlua);
s->hlua = NULL;
hlua_ctx_destroy(s->hlua[0]);
hlua_ctx_destroy(s->hlua[1]);
s->hlua[0] = s->hlua[1] = NULL;
if (s->txn)
http_destroy_txn(s);