From 3a4bedccc6023aa7ef155e6f9823aef4efeced71 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Aug 2021 08:51:02 +0200 Subject: [PATCH] MEDIUM: vars: replace the global name index with a hash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The global table of known variables names can only grow and was designed for static names that are registered at boot. Nowadays it's possible to set dynamic variable names from Lua or from the CLI, which causes a real problem that was partially addressed in 2.2 with commit 4e172c93f ("MEDIUM: lua: Add `ifexist` parameter to `set_var`"). Please see github issue #624 for more context. This patch simplifies all this by removing the need for a central registry of known names, and storing 64-bit hashes instead. This is highly sufficient given the low number of variables in each context. The hash is calculated using XXH64() which is bijective over the 64-bit space thus is guaranteed collision-free for 1..8 chars. Above that the risk remains around 1/2^64 per extra 8 chars so in practice this is highly sufficient for our usage. A random seed is used at boot to seed the hash so that it's not attackable from Lua for example. There's one particular nit though. The "ifexist" hack mentioned above is now limited to variables of scope "proc" only, and will only match variables that were already created or declared, but will now verify the scope as well. This may affect some bogus Lua scripts and SPOE agents which used to accidentally work because a similarly named variable used to exist in a different scope. These ones may need to be fixed to comply with the doc. Now we can sum up the situation as this one: - ephemeral variables (scopes sess, txn, req, res) will always be usable, regardless of any prior declaration. This effectively addresses the most problematic change from the commit above that in order to work well could have required some script auditing ; - process-wide variables (scope proc) that are mentioned in the configuration, referenced in a "register-var-names" SPOE directive, or created via "set-var" in the global section or the CLI, are permanent and will always accept to be set, with or without the "ifexist" restriction (SPOE uses this internally as well). - process-wide variables (scope proc) that are only created via a set-var() tcp/http action, via Lua's set_var() calls, or via an SPOE with the "force-set-var" directive), will not be permanent but will always accept to be replaced once they are created, even if "ifexist" is present - process-wide variables (scope proc) that do not exist will only support being created via the set-var() tcp/http action, Lua's set_var() calls without "ifexist", or an SPOE declared with "force-set-var". This means that non-proc variables do not care about "ifexist" nor prior declaration, and that using "ifexist" should most often be reliable in Lua and that SPOE should most often work without any prior declaration. It may be doable to turn "ifexist" to 1 by default in Lua to further ease the transition. Note: regtests were adjusted. Cc: Tim Düsterhus --- include/haproxy/action-t.h | 2 +- include/haproxy/vars-t.h | 6 +- reg-tests/lua/set_var.vtc | 41 ++++++-- src/vars.c | 193 ++++++++++++++----------------------- 4 files changed, 111 insertions(+), 131 deletions(-) diff --git a/include/haproxy/action-t.h b/include/haproxy/action-t.h index bb243c13f..fd85d2707 100644 --- a/include/haproxy/action-t.h +++ b/include/haproxy/action-t.h @@ -164,7 +164,7 @@ struct act_rule { struct { struct list fmt; /* log-format compatible expression */ struct sample_expr *expr; - const char *name; + uint64_t name_hash; enum vars_scope scope; } vars; struct { diff --git a/include/haproxy/vars-t.h b/include/haproxy/vars-t.h index ab83b4ebe..9e4d8078c 100644 --- a/include/haproxy/vars-t.h +++ b/include/haproxy/vars-t.h @@ -46,15 +46,15 @@ struct vars { __decl_thread(HA_RWLOCK_T rwlock); }; -/* This struct describes a variable. */ +/* This struct describes a variable as found in an arg_data */ struct var_desc { - const char *name; /* Contains the normalized variable name. */ + uint64_t name_hash; enum vars_scope scope; }; struct var { struct list l; /* Used for chaining vars. */ - const char *name; /* Contains the variable name. */ + uint64_t name_hash; /* XXH3() of the variable's name */ uint flags; // VF_* /* 32-bit hole here */ struct sample_data data; /* data storage. */ diff --git a/reg-tests/lua/set_var.vtc b/reg-tests/lua/set_var.vtc index 5ca49b6bf..0c8a4b162 100644 --- a/reg-tests/lua/set_var.vtc +++ b/reg-tests/lua/set_var.vtc @@ -23,18 +23,35 @@ haproxy h1 -conf { frontend fe2 mode http bind "fd@${fe2}" - - http-request set-header Dummy %[var(txn.fe2_foo)] + # just make sure the variable exists + http-request set-header Dummy %[var(proc.fe2_foo)] http-request use-service lua.set_var_ifexist } -start client c0 -connect ${h1_fe1_sock} { + # create var txreq -url "/" \ -hdr "Var: txn.fe1_foo" rxresp expect resp.status == 202 expect resp.http.echo == "value" + + # rewrite var + txreq -url "/" \ + -hdr "Var: txn.fe1_foo" + rxresp + expect resp.status == 202 + expect resp.http.echo == "value" + + # create var under scope "proc" + txreq -url "/" \ + -hdr "Var: proc.fe1_foo" + rxresp + expect resp.status == 202 + expect resp.http.echo == "value" + + # fail to create bad scope txreq -url "/" \ -hdr "Var: invalid.var" rxresp @@ -43,14 +60,24 @@ client c0 -connect ${h1_fe1_sock} { } -run client c1 -connect ${h1_fe2_sock} { + # this one exists in the conf, it must succeed + txreq -url "/" \ + -hdr "Var: proc.fe2_foo" + rxresp + expect resp.status == 202 + expect resp.http.echo == "value" + + # this one does not exist in the conf, it must fail + txreq -url "/" \ + -hdr "Var: proc.fe2_bar" + rxresp + expect resp.status == 400 + expect resp.http.echo == "(nil)" + + # this one is under txn, it must succeed txreq -url "/" \ -hdr "Var: txn.fe2_foo" rxresp expect resp.status == 202 expect resp.http.echo == "value" - txreq -url "/" \ - -hdr "Var: txn.fe2_bar" - rxresp - expect resp.status == 400 - expect resp.http.echo == "(nil)" } -run diff --git a/src/vars.c b/src/vars.c index cefd02a03..98e1cfec8 100644 --- a/src/vars.c +++ b/src/vars.c @@ -214,29 +214,19 @@ void vars_init_head(struct vars *vars, enum vars_scope scope) HA_RWLOCK_INIT(&vars->rwlock); } -/* This function declares a new variable name. It returns a pointer - * on the string identifying the name. This function assures that - * the same name exists only once. - * - * This function check if the variable name is acceptable. - * - * The function returns NULL if an error occurs, and is filled. - * In this case, the HAProxy must be stopped because the structs are - * left inconsistent. Otherwise, it returns the pointer on the global - * name. +/* This function returns a hash value and a scope for a variable name of a + * specified length. It makes sure that the scope is valid. It returns non-zero + * on success, 0 on failure. Neither hash nor scope may be NULL. */ -static char *register_name(const char *name, int len, enum vars_scope *scope, - int alloc, char **err) +static int vars_hash_name(const char *name, int len, enum vars_scope *scope, + uint64_t *hash, char **err) { - int i; - char **var_names2; const char *tmp; - char *res = NULL; /* Check length. */ if (len == 0) { memprintf(err, "Empty variable name cannot be accepted"); - return res; + return 0; } /* Check scope. */ @@ -273,73 +263,32 @@ static char *register_name(const char *name, int len, enum vars_scope *scope, else { memprintf(err, "invalid variable name '%.*s'. A variable name must be start by its scope. " "The scope can be 'proc', 'sess', 'txn', 'req', 'res' or 'check'", len, name); - return res; + return 0; } - if (alloc) - HA_RWLOCK_WRLOCK(VARS_LOCK, &var_names_rwlock); - else - HA_RWLOCK_RDLOCK(VARS_LOCK, &var_names_rwlock); - - - /* Look for existing variable name. */ - for (i = 0; i < var_names_nb; i++) - if (strncmp(var_names[i], name, len) == 0 && var_names[i][len] == '\0') { - res = var_names[i]; - goto end; - } - - if (!alloc) { - res = NULL; - goto end; - } - - /* Store variable name. If realloc fails, var_names remains valid */ - var_names2 = realloc(var_names, (var_names_nb + 1) * sizeof(*var_names)); - if (!var_names2) { - memprintf(err, "out of memory error"); - res = NULL; - goto end; - } - var_names_nb++; - var_names = var_names2; - var_names[var_names_nb - 1] = malloc(len + 1); - if (!var_names[var_names_nb - 1]) { - memprintf(err, "out of memory error"); - res = NULL; - goto end; - } - memcpy(var_names[var_names_nb - 1], name, len); - var_names[var_names_nb - 1][len] = '\0'; - /* Check variable name syntax. */ - tmp = var_names[var_names_nb - 1]; - while (*tmp) { + for (tmp = name; tmp < name + len; tmp++) { if (!isalnum((unsigned char)*tmp) && *tmp != '_' && *tmp != '.') { memprintf(err, "invalid syntax at char '%s'", tmp); - res = NULL; - goto end; + return 0; } - tmp++; } - res = var_names[var_names_nb - 1]; - end: - if (alloc) - HA_RWLOCK_WRUNLOCK(VARS_LOCK, &var_names_rwlock); - else - HA_RWLOCK_RDUNLOCK(VARS_LOCK, &var_names_rwlock); - - return res; + *hash = XXH3(name, len, var_name_hash_seed); + return 1; } -/* This function returns an existing variable or returns NULL. */ -static inline struct var *var_get(struct vars *vars, const char *name) +/* This function returns the variable from the given list that matches + * or returns NULL if not found. It's only a linked list since it + * is not expected to have many variables per scope (a few tens at best). + * The caller is responsible for ensuring that is properly locked. + */ +static struct var *var_get(struct vars *vars, uint64_t name_hash) { struct var *var; list_for_each_entry(var, &vars->head, l) - if (var->name == name) + if (var->name_hash == name_hash) return var; return NULL; } @@ -356,11 +305,13 @@ static int smp_fetch_var(const struct arg *args, struct sample *smp, const char return vars_get_by_desc(var_desc, smp, def); } -/* This function tries to create variable in scope and store - * sample as its value. The stream and session are extracted from , - * and the stream may be NULL when scope is SCOPE_SESS. In case there wouldn't - * be enough memory to store the sample while the variable was already created, - * it would be changed to a bool (which is memory-less). +/* This function tries to create a variable whose name hash is in + * scope and store sample as its value. + * + * The stream and session are extracted from , whose stream may be NULL + * when scope is SCOPE_SESS. In case there wouldn't be enough memory to store + * the sample while the variable was already created, it would be changed to + * a bool (which is memory-less). * * Flags is a bitfield that may contain one of the following flags: * - VF_UPDATEONLY: if the scope is SCOPE_PROC, the variable may only be @@ -370,7 +321,7 @@ static int smp_fetch_var(const struct arg *args, struct sample *smp, const char * * It returns 0 on failure, non-zero on success. */ -static int var_set(const char *name, enum vars_scope scope, struct sample *smp, uint flags) +static int var_set(uint64_t name_hash, enum vars_scope scope, struct sample *smp, uint flags) { struct vars *vars; struct var *var; @@ -383,7 +334,7 @@ static int var_set(const char *name, enum vars_scope scope, struct sample *smp, HA_RWLOCK_WRLOCK(VARS_LOCK, &vars->rwlock); /* Look for existing variable name. */ - var = var_get(vars, name); + var = var_get(vars, name_hash); if (var) { if (flags & VF_CREATEONLY) { @@ -417,7 +368,7 @@ static int var_set(const char *name, enum vars_scope scope, struct sample *smp, if (!var) goto unlock; LIST_APPEND(&vars->head, &var->l); - var->name = name; + var->name_hash = name_hash; var->flags = flags & VF_PERMANENT; } @@ -485,11 +436,11 @@ static int var_set(const char *name, enum vars_scope scope, struct sample *smp, return ret; } -/* This unsets variable from scope , using the session and stream - * found in . Note that stream may be null for SCOPE_SESS. Returns 0 if - * the scope was not found otherwise 1. +/* Deletes a variable matching name hash and scope for the + * session and stream found in . Note that stream may be null for + * SCOPE_SESS. Returns 0 if the scope was not found otherwise 1. */ -static int var_unset(const char *name, enum vars_scope scope, struct sample *smp) +static int var_unset(uint64_t name_hash, enum vars_scope scope, struct sample *smp) { struct vars *vars; struct var *var; @@ -501,7 +452,7 @@ static int var_unset(const char *name, enum vars_scope scope, struct sample *smp /* Look for existing variable name. */ HA_RWLOCK_WRLOCK(VARS_LOCK, &vars->rwlock); - var = var_get(vars, name); + var = var_get(vars, name_hash); if (var) { size = var_clear(var, 0); var_accounting_diff(vars, smp->sess, smp->strm, -size); @@ -513,13 +464,19 @@ static int var_unset(const char *name, enum vars_scope scope, struct sample *smp /* Returns 0 if fails, else returns 1. */ static int smp_conv_store(const struct arg *args, struct sample *smp, void *private) { - return var_set(args[0].data.var.name, args[0].data.var.scope, smp, 0); + uint64_t seed = var_name_hash_seed; + uint64_t name_hash = XXH3(smp->data.u.str.area, smp->data.u.str.data, seed); + + return var_set(name_hash, args[0].data.var.scope, smp, 0); } /* Returns 0 if fails, else returns 1. */ static int smp_conv_clear(const struct arg *args, struct sample *smp, void *private) { - return var_unset(args[0].data.var.name, args[0].data.var.scope, smp); + uint64_t seed = var_name_hash_seed; + uint64_t name_hash = XXH3(smp->data.u.str.area, smp->data.u.str.data, seed); + + return var_unset(name_hash, args[0].data.var.scope, smp); } /* This functions check an argument entry and fill it with a variable @@ -528,9 +485,9 @@ static int smp_conv_clear(const struct arg *args, struct sample *smp, void *priv */ int vars_check_arg(struct arg *arg, char **err) { - char *name; enum vars_scope scope; struct sample empty_smp = { }; + uint64_t hash; /* Check arg type. */ if (arg->type != ARGT_STR) { @@ -539,13 +496,10 @@ int vars_check_arg(struct arg *arg, char **err) } /* Register new variable name. */ - name = register_name(arg->data.str.area, arg->data.str.data, &scope, - 1, - err); - if (!name) + if (!vars_hash_name(arg->data.str.area, arg->data.str.data, &scope, &hash, err)) return 0; - if (scope == SCOPE_PROC && !var_set(name, scope, &empty_smp, VF_CREATEONLY|VF_PERMANENT)) + if (scope == SCOPE_PROC && !var_set(hash, scope, &empty_smp, VF_CREATEONLY|VF_PERMANENT)) return 0; /* properly destroy the chunk */ @@ -553,75 +507,76 @@ int vars_check_arg(struct arg *arg, char **err) /* Use the global variable name pointer. */ arg->type = ARGT_VAR; - arg->data.var.name = name; + arg->data.var.name_hash = hash; arg->data.var.scope = scope; return 1; } -/* This function store a sample in a variable if it was already defined. +/* This function stores a sample in a variable if it was already defined. * Returns zero on failure and non-zero otherwise. The variable not being * defined is treated as a failure. */ int vars_set_by_name_ifexist(const char *name, size_t len, struct sample *smp) { enum vars_scope scope; + uint64_t hash; /* Resolve name and scope. */ - name = register_name(name, len, &scope, 0, NULL); - if (!name) + if (!vars_hash_name(name, len, &scope, &hash, NULL)) return 0; - return var_set(name, scope, smp, VF_UPDATEONLY); + return var_set(hash, scope, smp, VF_UPDATEONLY); } -/* This function store a sample in a variable. +/* This function stores a sample in a variable. * Returns zero on failure and non-zero otherwise. */ int vars_set_by_name(const char *name, size_t len, struct sample *smp) { enum vars_scope scope; + uint64_t hash; /* Resolve name and scope. */ - name = register_name(name, len, &scope, 1, NULL); - if (!name) + if (!vars_hash_name(name, len, &scope, &hash, NULL)) return 0; - return var_set(name, scope, smp, 0); + return var_set(hash, scope, smp, 0); } -/* This function unset a variable if it was already defined. +/* This function unsets a variable if it was already defined. * Returns zero on failure and non-zero otherwise. */ int vars_unset_by_name_ifexist(const char *name, size_t len, struct sample *smp) { enum vars_scope scope; + uint64_t hash; /* Resolve name and scope. */ - name = register_name(name, len, &scope, 0, NULL); - if (!name) + if (!vars_hash_name(name, len, &scope, &hash, NULL)) return 0; - return var_unset(name, scope, smp); + return var_unset(hash, scope, smp); } -/* This retrieves variable from variables , and if found and not - * empty, duplicates the result into sample . smp_dup() is used in order - * to release the variables lock ASAP (so a pre-allocated chunk is obtained - * via get_trash_shunk()). The variables' lock is used for reads. +/* This retrieves variable whose hash matches from variables , + * and if found and not empty, duplicates the result into sample . + * smp_dup() is used in order to release the variables lock ASAP (so a pre- + * allocated chunk is obtained via get_trash_shunk()). The variables' lock is + * used for reads. * * The function returns 0 if the variable was not found and no default * value was provided in , otherwise 1 with the sample filled. * Default values are always returned as strings. */ -static int var_to_smp(struct vars *vars, const char *name, struct sample *smp, const struct buffer *def) +static int var_to_smp(struct vars *vars, uint64_t name_hash, struct sample *smp, const struct buffer *def) { struct var *var; /* Get the variable entry. */ HA_RWLOCK_RDLOCK(VARS_LOCK, &vars->rwlock); - var = var_get(vars, name); + var = var_get(vars, name_hash); if (!var || !var->data.type) { if (!def) { HA_RWLOCK_RDUNLOCK(VARS_LOCK, &vars->rwlock); @@ -658,10 +613,10 @@ int vars_get_by_name(const char *name, size_t len, struct sample *smp, const str { struct vars *vars; enum vars_scope scope; + uint64_t hash; /* Resolve name and scope. */ - name = register_name(name, len, &scope, 0, NULL); - if (!name) + if (!vars_hash_name(name, len, &scope, &hash, NULL)) return 0; /* Select "vars" pool according with the scope. */ @@ -669,8 +624,7 @@ int vars_get_by_name(const char *name, size_t len, struct sample *smp, const str if (!vars || vars->scope != scope) return 0; - - return var_to_smp(vars, name, smp, def); + return var_to_smp(vars, hash, smp, def); } /* This function fills a sample with the content of the variable described @@ -697,7 +651,7 @@ int vars_get_by_desc(const struct var_desc *var_desc, struct sample *smp, const if (!vars || vars->scope != var_desc->scope) return 0; - return var_to_smp(vars, var_desc->name, smp, def); + return var_to_smp(vars, var_desc->name_hash, smp, def); } /* Always returns ACT_RET_CONT even if an error occurs. */ @@ -747,7 +701,7 @@ static enum act_return action_store(struct act_rule *rule, struct proxy *px, smp_set_owner(&smp, px, sess, s, 0); smp.data.type = SMP_T_STR; smp.data.u.str = *fmtstr; - var_set(rule->arg.vars.name, rule->arg.vars.scope, &smp, 0); + var_set(rule->arg.vars.name_hash, rule->arg.vars.scope, &smp, 0); } else { /* an expression is used */ @@ -757,7 +711,7 @@ static enum act_return action_store(struct act_rule *rule, struct proxy *px, } /* Store the sample, and ignore errors. */ - var_set(rule->arg.vars.name, rule->arg.vars.scope, &smp, 0); + var_set(rule->arg.vars.name_hash, rule->arg.vars.scope, &smp, 0); free_trash_chunk(fmtstr); return ACT_RET_CONT; } @@ -772,7 +726,7 @@ static enum act_return action_clear(struct act_rule *rule, struct proxy *px, smp_set_owner(&smp, px, sess, s, SMP_OPT_FINAL); /* Clear the variable using the sample context, and ignore errors. */ - var_unset(rule->arg.vars.name, rule->arg.vars.scope, &smp); + var_unset(rule->arg.vars.name_hash, rule->arg.vars.scope, &smp); return ACT_RET_CONT; } @@ -858,12 +812,11 @@ static enum act_parse_ret parse_store(const char **args, int *arg, struct proxy } LIST_INIT(&rule->arg.vars.fmt); - rule->arg.vars.name = register_name(var_name, var_len, &rule->arg.vars.scope, 1, err); - if (!rule->arg.vars.name) + if (!vars_hash_name(var_name, var_len, &rule->arg.vars.scope, &rule->arg.vars.name_hash, err)) return ACT_RET_PRS_ERR; if (rule->arg.vars.scope == SCOPE_PROC && - !var_set(rule->arg.vars.name, rule->arg.vars.scope, &empty_smp, VF_CREATEONLY|VF_PERMANENT)) + !var_set(rule->arg.vars.name_hash, rule->arg.vars.scope, &empty_smp, VF_CREATEONLY|VF_PERMANENT)) return 0; /* There is no fetch method when variable is unset. Just set the right