BUG/MAJOR: hlua: improper lock usage with hlua_ctx_resume()

hlua_ctx_resume() itself can safely be used as-is in a multithreading
context because it takes care of taking the lua lock.

However, when hlua_ctx_resume() returns, the lock is released and it is
thus the caller's responsibility to ensure it owns the lock prior to
performing additional manipulations on the Lua stack. Unfortunately, since
early haproxy lua implementation, we used to do it wrong:

The most common hlua_ctx_resume() pattern we can find in the code (because
it was duplicated over and over over time) is the following:

  |ret = hlua_ctx_resume()
  |switch (ret) {
  |        case HLUA_E_OK:
  |        break;
  |        case HLUA_E_ERRMSG:
  |        break;
  |        [...]
  |}

Problem is: for some of the switch cases, we still perform lua stack
manipulations. This is the case for the HLUA_E_ERRMSG for instance where
we often use lua_tostring() to retrieve last lua error message on the top
of the stack, or sometimes for the HLUA_E_OK case, when we need to perform
some lua cleanup logic once the resume ended. But all of this is done
WITHOUT the lua lock, so this means that the main lua stack could be
accessed simultaneously by concurrent threads when a script was loaded
using 'lua-load'.

While it is not critical for switch-cases dedicated to error handling,
(those are not supposed to happen very often), it can be very problematic
for stack manipulations occuring in the HLUA_E_OK case under heavy load
for instance. In this case, main lua stack corruptions will eventually
happen. This is especially true inside hlua_filter_new(), where this bug
was known to cause lua stack corruptions under load, leading to lua errors
and even crashing the process as reported by @bgrooot in GH #2467.

The fix is relatively simple, once hlua_ctx_resume() returns: we should
consider that ANY lua stack access should be lua-lock protected. If the
related lua calls may raise lua errors, then (RE)SET_SAFE_LJMP
combination should be used as usual (it allows to lock the lua stack and
catch lua exceptions at the same time), else hlua_{lock,unlock} may be
used if no exceptions are expected.

This patch should fix GH #2467.

It should be backported to all stable versions.

[ada: some ctx adj will be required for older versions as event_hdl
 doesn't exist prior to 2.8 and filters were implemented in 2.5, thus
 some chunks won't apply]
This commit is contained in:
Aurelien DARRAGON 2024-03-01 21:17:51 +01:00
parent 19b016f9f8
commit 8670db7a89

View File

@ -9036,7 +9036,9 @@ struct task *hlua_process_task(struct task *task, void *context, unsigned int st
SEND_ERR(NULL, "Lua task: execution timeout.\n");
goto err_task_abort;
case HLUA_E_ERRMSG:
hlua_lock(hlua);
SEND_ERR(NULL, "Lua task: %s.\n", hlua_tostring_safe(hlua->T, -1));
hlua_unlock(hlua);
goto err_task_abort;
case HLUA_E_ERR:
default:
@ -9311,7 +9313,9 @@ static void hlua_event_handler(struct hlua *hlua)
break;
case HLUA_E_ERRMSG:
hlua_lock(hlua);
SEND_ERR(NULL, "Lua event_hdl: %s.\n", hlua_tostring_safe(hlua->T, -1));
hlua_unlock(hlua);
break;
case HLUA_E_ERR:
@ -9999,9 +10003,12 @@ static int hlua_sample_conv_wrapper(const struct arg *arg_p, struct sample *smp,
switch (hlua_ctx_resume(stream->hlua, 0)) {
/* finished. */
case HLUA_E_OK:
hlua_lock(stream->hlua);
/* If the stack is empty, the function fails. */
if (lua_gettop(stream->hlua->T) <= 0)
if (lua_gettop(stream->hlua->T) <= 0) {
hlua_unlock(stream->hlua);
return 0;
}
/* Convert the returned value in sample. */
hlua_lua2smp(stream->hlua->T, -1, smp);
@ -10010,6 +10017,7 @@ static int hlua_sample_conv_wrapper(const struct arg *arg_p, struct sample *smp,
*/
smp_dup(smp);
lua_pop(stream->hlua->T, 1);
hlua_unlock(stream->hlua);
return 1;
/* yield. */
@ -10020,9 +10028,11 @@ static int hlua_sample_conv_wrapper(const struct arg *arg_p, struct sample *smp,
/* finished with error. */
case HLUA_E_ERRMSG:
/* Display log. */
hlua_lock(stream->hlua);
SEND_ERR(stream->be, "Lua converter '%s': %s.\n",
fcn->name, hlua_tostring_safe(stream->hlua->T, -1));
lua_pop(stream->hlua->T, 1);
hlua_unlock(stream->hlua);
return 0;
case HLUA_E_ETMOUT:
@ -10124,9 +10134,12 @@ static int hlua_sample_fetch_wrapper(const struct arg *arg_p, struct sample *smp
switch (hlua_ctx_resume(stream->hlua, 0)) {
/* finished. */
case HLUA_E_OK:
hlua_lock(stream->hlua);
/* If the stack is empty, the function fails. */
if (lua_gettop(stream->hlua->T) <= 0)
if (lua_gettop(stream->hlua->T) <= 0) {
hlua_unlock(stream->hlua);
return 0;
}
/* Convert the returned value in sample. */
hlua_lua2smp(stream->hlua->T, -1, smp);
@ -10135,6 +10148,7 @@ static int hlua_sample_fetch_wrapper(const struct arg *arg_p, struct sample *smp
*/
smp_dup(smp);
lua_pop(stream->hlua->T, 1);
hlua_unlock(stream->hlua);
/* Set the end of execution flag. */
smp->flags &= ~SMP_F_MAY_CHANGE;
@ -10148,9 +10162,11 @@ static int hlua_sample_fetch_wrapper(const struct arg *arg_p, struct sample *smp
/* finished with error. */
case HLUA_E_ERRMSG:
/* Display log. */
hlua_lock(stream->hlua);
SEND_ERR(smp->px, "Lua sample-fetch '%s': %s.\n",
fcn->name, hlua_tostring_safe(stream->hlua->T, -1));
lua_pop(stream->hlua->T, 1);
hlua_unlock(stream->hlua);
return 0;
case HLUA_E_ETMOUT:
@ -10460,8 +10476,10 @@ static enum act_return hlua_action(struct act_rule *rule, struct proxy *px,
/* finished. */
case HLUA_E_OK:
/* Catch the return value */
hlua_lock(s->hlua);
if (lua_gettop(s->hlua->T) > 0)
act_ret = lua_tointeger(s->hlua->T, -1);
hlua_unlock(s->hlua);
/* Set timeout in the required channel. */
if (act_ret == ACT_RET_YIELD) {
@ -10501,9 +10519,11 @@ static enum act_return hlua_action(struct act_rule *rule, struct proxy *px,
/* finished with error. */
case HLUA_E_ERRMSG:
/* Display log. */
hlua_lock(s->hlua);
SEND_ERR(px, "Lua function '%s': %s.\n",
rule->arg.hlua_rule->fcn->name, hlua_tostring_safe(s->hlua->T, -1));
lua_pop(s->hlua->T, 1);
hlua_unlock(s->hlua);
goto end;
case HLUA_E_ETMOUT:
@ -10678,9 +10698,11 @@ void hlua_applet_tcp_fct(struct appctx *ctx)
/* finished with error. */
case HLUA_E_ERRMSG:
/* Display log. */
hlua_lock(hlua);
SEND_ERR(px, "Lua applet tcp '%s': %s.\n",
rule->arg.hlua_rule->fcn->name, hlua_tostring_safe(hlua->T, -1));
lua_pop(hlua->T, 1);
hlua_unlock(hlua);
goto error;
case HLUA_E_ETMOUT:
@ -10889,9 +10911,11 @@ void hlua_applet_http_fct(struct appctx *ctx)
/* finished with error. */
case HLUA_E_ERRMSG:
/* Display log. */
hlua_lock(hlua);
SEND_ERR(px, "Lua applet http '%s': %s.\n",
rule->arg.hlua_rule->fcn->name, hlua_tostring_safe(hlua->T, -1));
lua_pop(hlua->T, 1);
hlua_unlock(hlua);
goto error;
case HLUA_E_ETMOUT:
@ -11515,9 +11539,11 @@ static int hlua_cli_io_handler_fct(struct appctx *appctx)
/* finished with error. */
case HLUA_E_ERRMSG:
/* Display log. */
hlua_lock(hlua);
SEND_ERR(NULL, "Lua cli '%s': %s.\n",
fcn->name, hlua_tostring_safe(hlua->T, -1));
lua_pop(hlua->T, 1);
hlua_unlock(hlua);
return 1;
case HLUA_E_ETMOUT:
@ -11928,9 +11954,25 @@ static int hlua_filter_new(struct stream *s, struct filter *filter)
switch (hlua_ctx_resume(s->hlua, 0)) {
case HLUA_E_OK:
/* The following Lua calls can fail. */
if (!SET_SAFE_LJMP(s->hlua)) {
const char *error;
hlua_lock(s->hlua);
if (lua_type(s->hlua->T, -1) == LUA_TSTRING)
error = hlua_tostring_safe(s->hlua->T, -1);
else
error = "critical error";
SEND_ERR(s->be, "Lua filter '%s': %s.\n", conf->reg->name, error);
hlua_unlock(s->hlua);
ret = 0;
goto end;
}
/* Nothing returned or not a table, ignore the filter for current stream */
if (!lua_gettop(s->hlua->T) || !lua_istable(s->hlua->T, 1)) {
ret = 0;
RESET_SAFE_LJMP(s->hlua);
goto end;
}
@ -11942,6 +11984,10 @@ static int hlua_filter_new(struct stream *s, struct filter *filter)
/* Save a ref on the filter ctx */
lua_pushvalue(s->hlua->T, 1);
flt_ctx->ref = hlua_ref(s->hlua->T);
/* At this point the execution is safe. */
RESET_SAFE_LJMP(s->hlua);
filter->ctx = flt_ctx;
break;
case HLUA_E_ERRMSG:
@ -12106,10 +12152,12 @@ static int hlua_filter_callback(struct stream *s, struct filter *filter, const c
switch (hlua_ctx_resume(flt_hlua, !(flags & HLUA_FLT_CB_FINAL))) {
case HLUA_E_OK:
/* Catch the return value if it required */
hlua_lock(flt_hlua);
if ((flags & HLUA_FLT_CB_RETVAL) && lua_gettop(flt_hlua->T) > 0) {
ret = lua_tointeger(flt_hlua->T, -1);
lua_settop(flt_hlua->T, 0); /* Empty the stack. */
}
hlua_unlock(flt_hlua);
/* Set timeout in the required channel. */
if (flt_hlua->wake_time != TICK_ETERNITY) {
@ -12138,7 +12186,9 @@ static int hlua_filter_callback(struct stream *s, struct filter *filter, const c
ret = 0;
goto end;
case HLUA_E_ERRMSG:
hlua_lock(flt_hlua);
SEND_ERR(s->be, "Lua filter '%s' : %s.\n", conf->reg->name, hlua_tostring_safe(flt_hlua->T, -1));
hlua_unlock(flt_hlua);
ret = -1;
goto end;
case HLUA_E_ETMOUT: