mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2024-12-26 22:52:13 +00:00
MEDIUM: hlua: reliable timeout detection
For non yieldable lua handlers (converters, fetches or yield incompatible lua functions), current timeout detection relies on now_ms thread local variable. But within non-yieldable contexts, now_ms won't be updated if not by us (because we're momentarily stuck in lua context so we won't re-enter the polling loop, which is responsible for clock updates). To circumvent this, clock_update_date(0, 1) was manually performed right before now_ms is being read for the timeout checks. But this fails to work consistently, because if no other concurrent threads periodically run clock_update_global_date(), which do happen if we're the only active thread (nbthread=1 or low traffic), our clock_update_date() call won't reliably update our local now_ms variable Moreover, clock_update_date() is not the right tool for this anyway, as it was initially meant to be used from the polling context. Using it could have negative impact on other threads relying on now_ms to be stable. (because clock_update_date() performs global clock update from time to time) -> Introducing hlua multipurpose timer, which is internally based on now_cpu_time_fast() that provides per-thread consistent clock readings. Thanks to this new hlua timer API, hlua timeout logic is less error-prone and more robust. This allows the timeout detection to work as expected for both yieldable and non-yieldable lua handlers. This patch depends on commit "MINOR: clock: add now_cpu_time_fast() function" While this could theorically be backported to all stable versions, it is advisable to avoid backports unless we're confident enough since it could cause slight behavior changes (timing related) in existing setups.
This commit is contained in:
parent
df188f145b
commit
da9503ca9a
@ -26,6 +26,7 @@
|
||||
|
||||
#include <lua.h>
|
||||
#include <lauxlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <import/ebtree-t.h>
|
||||
|
||||
@ -99,6 +100,13 @@ enum hlua_exec {
|
||||
HLUA_E_ERR, /* LUA stack execution failed without error message. */
|
||||
};
|
||||
|
||||
struct hlua_timer {
|
||||
uint32_t start; /* cpu time in ms when the timer was started */
|
||||
uint32_t burst; /* execution time for the current call in ms */
|
||||
uint32_t cumulative; /* cumulative execution time for the coroutine in ms */
|
||||
uint32_t max; /* max (cumulative) execution time for the coroutine in ms */
|
||||
};
|
||||
|
||||
struct hlua {
|
||||
lua_State *T; /* The LUA stack. */
|
||||
int state_id; /* contains the lua state id. 0 is common state, 1 to n are per-thread states.*/
|
||||
@ -109,9 +117,7 @@ struct hlua {
|
||||
int nargs; /* The number of arguments in the stack at the start of execution. */
|
||||
unsigned int flags; /* The current execution flags. */
|
||||
int wake_time; /* The lua wants to be waked at this time, or before. */
|
||||
unsigned int max_time; /* The max amount of execution time for an Lua process, in ms. */
|
||||
unsigned int start_time; /* The ms time when the Lua starts the last execution. */
|
||||
unsigned int run_time; /* Lua total execution time in ms. */
|
||||
struct hlua_timer timer; /* lua multipurpose timer */
|
||||
struct task *task; /* The task associated with the lua stack execution.
|
||||
We must wake this task to continue the task execution */
|
||||
struct list com; /* The list head of the signals attached to this task. */
|
||||
|
155
src/hlua.c
155
src/hlua.c
@ -345,14 +345,96 @@ static int class_applet_tcp_ref;
|
||||
static int class_applet_http_ref;
|
||||
static int class_txn_reply_ref;
|
||||
|
||||
/* Global Lua execution timeout. By default Lua, execution linked
|
||||
* with stream (actions, sample-fetches and converters) have a
|
||||
* short timeout. Lua linked with tasks doesn't have a timeout
|
||||
* because a task may remain alive during all the haproxy execution.
|
||||
/* Lua max execution timeouts. By default, stream-related
|
||||
* lua coroutines (e.g.: actions) have a short timeout.
|
||||
* On the other hand tasks coroutines don't have a timeout because
|
||||
* a task may remain alive during all the haproxy execution.
|
||||
*
|
||||
* Timeouts are expressed in milliseconds, they are meant to be used
|
||||
* with hlua timer's API exclusively.
|
||||
* 0 means no timeout
|
||||
*/
|
||||
static unsigned int hlua_timeout_session = 4000; /* session timeout. */
|
||||
static unsigned int hlua_timeout_task = TICK_ETERNITY; /* task timeout. */
|
||||
static unsigned int hlua_timeout_applet = 4000; /* applet timeout. */
|
||||
static uint32_t hlua_timeout_session = 4000; /* session timeout. */
|
||||
static uint32_t hlua_timeout_task = 0; /* task timeout. */
|
||||
static uint32_t hlua_timeout_applet = 4000; /* applet timeout. */
|
||||
|
||||
/* hlua multipurpose timer:
|
||||
* used to compute burst lua time (within a single hlua_ctx_resume())
|
||||
* and cumulative lua time for a given coroutine, and to check
|
||||
* the lua coroutine against the configured timeouts
|
||||
*/
|
||||
|
||||
/* fetch per-thread cpu_time with ms precision (may wrap) */
|
||||
static inline uint32_t _hlua_time_ms()
|
||||
{
|
||||
/* We're interested in the current cpu time in ms, which will be returned
|
||||
* as a uint32_t to save some space.
|
||||
* We must take the following into account:
|
||||
*
|
||||
* - now_cpu_time_fast() which returns the time in nanoseconds as a uint64_t
|
||||
* will wrap every 585 years.
|
||||
* - uint32_t may only contain 4294967295ms (~=49.7 days), so _hlua_time_ms()
|
||||
* itself will also wrap every 49.7 days.
|
||||
*
|
||||
* While we can safely ignore the now_cpu_time_fast() wrap, we must
|
||||
* take care of the uint32_t wrap by making sure to exclusively
|
||||
* manipulate the time using uint32_t everywhere _hlua_time_ms()
|
||||
* is involved.
|
||||
*/
|
||||
return (uint32_t)(now_cpu_time_fast() / 1000000ULL);
|
||||
}
|
||||
|
||||
/* computes time spent in a single lua execution (in ms) */
|
||||
static inline uint32_t _hlua_time_burst(const struct hlua_timer *timer)
|
||||
{
|
||||
uint32_t burst_ms;
|
||||
|
||||
/* wrapping is expected and properly
|
||||
* handled thanks to _hlua_time_ms() and burst_ms
|
||||
* being of the same type
|
||||
*/
|
||||
burst_ms = _hlua_time_ms() - timer->start;
|
||||
return burst_ms;
|
||||
}
|
||||
|
||||
static inline void hlua_timer_init(struct hlua_timer *timer, unsigned int max)
|
||||
{
|
||||
timer->cumulative = 0;
|
||||
timer->burst = 0;
|
||||
timer->max = max;
|
||||
}
|
||||
|
||||
/* reset the timer ctx between 2 yields */
|
||||
static inline void hlua_timer_reset(struct hlua_timer *timer)
|
||||
{
|
||||
timer->cumulative += timer->burst;
|
||||
timer->burst = 0;
|
||||
}
|
||||
|
||||
/* start the timer right before a new execution */
|
||||
static inline void hlua_timer_start(struct hlua_timer *timer)
|
||||
{
|
||||
timer->start = _hlua_time_ms();
|
||||
}
|
||||
|
||||
/* update hlua timer when finishing an execution */
|
||||
static inline void hlua_timer_stop(struct hlua_timer *timer)
|
||||
{
|
||||
timer->burst += _hlua_time_burst(timer);
|
||||
}
|
||||
|
||||
/* check the timers for current hlua context
|
||||
* Returns 1 if the check succeeded and 0 if it failed
|
||||
* (ie: timeout exceeded)
|
||||
*/
|
||||
static inline int hlua_timer_check(const struct hlua_timer *timer)
|
||||
{
|
||||
uint32_t pburst = _hlua_time_burst(timer); /* pending burst time in ms */
|
||||
|
||||
if (timer->max && (timer->cumulative + timer->burst + pburst) > timer->max)
|
||||
return 0; /* cumulative timeout exceeded */
|
||||
return 1; /* ok */
|
||||
}
|
||||
|
||||
/* Interrupts the Lua processing each "hlua_nb_instruction" instructions.
|
||||
* it is used for preventing infinite loops.
|
||||
@ -1323,6 +1405,7 @@ int hlua_ctx_init(struct hlua *lua, int state_id, struct task *task)
|
||||
lua->gc_count = 0;
|
||||
lua->wake_time = TICK_ETERNITY;
|
||||
lua->state_id = state_id;
|
||||
hlua_timer_init(&lua->timer, 0); /* default value, no timeout */
|
||||
LIST_INIT(&lua->com);
|
||||
MT_LIST_INIT(&lua->hc_list);
|
||||
if (!SET_SAFE_LJMP_PARENT(lua)) {
|
||||
@ -1505,17 +1588,12 @@ void hlua_hook(lua_State *L, lua_Debug *ar)
|
||||
return;
|
||||
}
|
||||
|
||||
/* If we cannot yield, update the clock and check the timeout. */
|
||||
clock_update_date(0, 1);
|
||||
hlua->run_time += now_ms - hlua->start_time;
|
||||
if (hlua->max_time && hlua->run_time >= hlua->max_time) {
|
||||
/* If we cannot yield, check the timeout. */
|
||||
if (!hlua_timer_check(&hlua->timer)) {
|
||||
lua_pushfstring(L, "execution timeout");
|
||||
WILL_LJMP(lua_error(L));
|
||||
}
|
||||
|
||||
/* Update the start time. */
|
||||
hlua->start_time = now_ms;
|
||||
|
||||
/* Try to interrupt the process at the end of the current
|
||||
* unyieldable function.
|
||||
*/
|
||||
@ -1546,15 +1624,17 @@ static enum hlua_exec hlua_ctx_resume(struct hlua *lua, int yield_allowed)
|
||||
const char *msg;
|
||||
const char *trace;
|
||||
|
||||
/* Initialise run time counter. */
|
||||
if (!HLUA_IS_RUNNING(lua))
|
||||
lua->run_time = 0;
|
||||
|
||||
/* Lock the whole Lua execution. This lock must be before the
|
||||
* label "resume_execution".
|
||||
*/
|
||||
hlua_lock(lua);
|
||||
|
||||
/* reset the timer as we might be re-entering the function to
|
||||
* resume the coroutine after a successful yield
|
||||
* (cumulative time will be updated)
|
||||
*/
|
||||
hlua_timer_reset(&lua->timer);
|
||||
|
||||
resume_execution:
|
||||
|
||||
/* This hook interrupts the Lua processing each 'hlua_nb_instruction'
|
||||
@ -1571,16 +1651,22 @@ resume_execution:
|
||||
if (!yield_allowed)
|
||||
HLUA_SET_NOYIELD(lua);
|
||||
|
||||
/* Update the start time and reset wake_time. */
|
||||
lua->start_time = now_ms;
|
||||
/* reset wake_time. */
|
||||
lua->wake_time = TICK_ETERNITY;
|
||||
|
||||
/* start the timer as we're about to start lua processing */
|
||||
hlua_timer_start(&lua->timer);
|
||||
|
||||
/* Call the function. */
|
||||
#if defined(LUA_VERSION_NUM) && LUA_VERSION_NUM >= 504
|
||||
ret = lua_resume(lua->T, hlua_states[lua->state_id], lua->nargs, &nres);
|
||||
#else
|
||||
ret = lua_resume(lua->T, hlua_states[lua->state_id], lua->nargs);
|
||||
#endif
|
||||
|
||||
/* out of lua processing, stop the timer */
|
||||
hlua_timer_stop(&lua->timer);
|
||||
|
||||
switch (ret) {
|
||||
|
||||
case LUA_OK:
|
||||
@ -1588,12 +1674,10 @@ resume_execution:
|
||||
break;
|
||||
|
||||
case LUA_YIELD:
|
||||
/* Check if the execution timeout is expired. It it is the case, we
|
||||
/* Check if the execution timeout is expired. If it is the case, we
|
||||
* break the Lua execution.
|
||||
*/
|
||||
clock_update_date(0, 1);
|
||||
lua->run_time += now_ms - lua->start_time;
|
||||
if (lua->max_time && lua->run_time > lua->max_time) {
|
||||
if (!hlua_timer_check(&lua->timer)) {
|
||||
lua_settop(lua->T, 0); /* Empty the stack. */
|
||||
ret = HLUA_E_ETMOUT;
|
||||
break;
|
||||
@ -8626,7 +8710,7 @@ struct task *hlua_process_task(struct task *task, void *context, unsigned int st
|
||||
* execution timeouts.
|
||||
*/
|
||||
if (!HLUA_IS_RUNNING(hlua))
|
||||
hlua->max_time = hlua_timeout_task;
|
||||
hlua_timer_init(&hlua->timer, hlua_timeout_task);
|
||||
|
||||
/* Execute the Lua code. */
|
||||
status = hlua_ctx_resume(hlua, 1);
|
||||
@ -8849,9 +8933,8 @@ static void hlua_event_handler(struct hlua *hlua)
|
||||
/* If it is the first call to the task, we must initialize the
|
||||
* execution timeouts.
|
||||
*/
|
||||
if (!HLUA_IS_RUNNING(hlua)) {
|
||||
hlua->max_time = hlua_timeout_task;
|
||||
}
|
||||
if (!HLUA_IS_RUNNING(hlua))
|
||||
hlua_timer_init(&hlua->timer, hlua_timeout_task);
|
||||
|
||||
/* make sure to reset the task expiry before each hlua_ctx_resume()
|
||||
* since the task is re-used for multiple cb function calls
|
||||
@ -9353,7 +9436,7 @@ static int hlua_sample_conv_wrapper(const struct arg *arg_p, struct sample *smp,
|
||||
}
|
||||
|
||||
/* We must initialize the execution timeouts. */
|
||||
stream->hlua->max_time = hlua_timeout_session;
|
||||
hlua_timer_init(&stream->hlua->timer, hlua_timeout_session);
|
||||
|
||||
/* At this point the execution is safe. */
|
||||
RESET_SAFE_LJMP(stream->hlua);
|
||||
@ -9488,7 +9571,7 @@ static int hlua_sample_fetch_wrapper(const struct arg *arg_p, struct sample *smp
|
||||
}
|
||||
|
||||
/* We must initialize the execution timeouts. */
|
||||
stream->hlua->max_time = hlua_timeout_session;
|
||||
hlua_timer_init(&stream->hlua->timer, hlua_timeout_session);
|
||||
|
||||
/* At this point the execution is safe. */
|
||||
RESET_SAFE_LJMP(stream->hlua);
|
||||
@ -9837,7 +9920,7 @@ static enum act_return hlua_action(struct act_rule *rule, struct proxy *px,
|
||||
RESET_SAFE_LJMP(s->hlua);
|
||||
|
||||
/* We must initialize the execution timeouts. */
|
||||
s->hlua->max_time = hlua_timeout_session;
|
||||
hlua_timer_init(&s->hlua->timer, hlua_timeout_session);
|
||||
}
|
||||
|
||||
/* Execute the function. */
|
||||
@ -9974,7 +10057,7 @@ static int hlua_applet_tcp_init(struct appctx *ctx)
|
||||
}
|
||||
|
||||
/* Set timeout according with the applet configuration. */
|
||||
hlua->max_time = ctx->applet->timeout;
|
||||
hlua_timer_init(&hlua->timer, ctx->applet->timeout);
|
||||
|
||||
/* The following Lua calls can fail. */
|
||||
if (!SET_SAFE_LJMP(hlua)) {
|
||||
@ -10165,7 +10248,7 @@ static int hlua_applet_http_init(struct appctx *ctx)
|
||||
}
|
||||
|
||||
/* Set timeout according with the applet configuration. */
|
||||
hlua->max_time = ctx->applet->timeout;
|
||||
hlua_timer_init(&hlua->timer, ctx->applet->timeout);
|
||||
|
||||
/* The following Lua calls can fail. */
|
||||
if (!SET_SAFE_LJMP(hlua)) {
|
||||
@ -10842,7 +10925,7 @@ static int hlua_cli_parse_fct(char **args, char *payload, struct appctx *appctx,
|
||||
}
|
||||
|
||||
/* We must initialize the execution timeouts. */
|
||||
hlua->max_time = hlua_timeout_session;
|
||||
hlua_timer_init(&hlua->timer, hlua_timeout_session);
|
||||
|
||||
/* At this point the execution is safe. */
|
||||
RESET_SAFE_LJMP(hlua);
|
||||
@ -11308,7 +11391,7 @@ static int hlua_filter_new(struct stream *s, struct filter *filter)
|
||||
s->hlua->nargs = 1;
|
||||
|
||||
/* We must initialize the execution timeouts. */
|
||||
s->hlua->max_time = hlua_timeout_session;
|
||||
hlua_timer_init(&s->hlua->timer, hlua_timeout_session);
|
||||
|
||||
/* At this point the execution is safe. */
|
||||
RESET_SAFE_LJMP(s->hlua);
|
||||
@ -11478,7 +11561,7 @@ static int hlua_filter_callback(struct stream *s, struct filter *filter, const c
|
||||
}
|
||||
|
||||
/* We must initialize the execution timeouts. */
|
||||
flt_hlua->max_time = hlua_timeout_session;
|
||||
hlua_timer_init(&flt_hlua->timer, hlua_timeout_session);
|
||||
|
||||
/* At this point the execution is safe. */
|
||||
RESET_SAFE_LJMP(flt_hlua);
|
||||
|
Loading…
Reference in New Issue
Block a user