MEDIUM: log-format: make the LF parser aware of sample expressions' end

For a very long time it used to be impossible to pass a closing square
bracket as a valid character in argument to a sample fetch function or
to a converter because the LF parser used to stop on the first such
character found and to pass what was between the first '[' and the first
']' to sample_parse_expr().

This patch addresses this by passing the whole string to sample_parse_expr()
which is the only one authoritative to indicate the first character that
does not belong to the expression. The LF parser then verifies it matches
a ']' or fails. As a result it is finally possible to write rules such as
the following, which is totally valid an unambigous :

    http-request redirect location %[url,regsub([.:/?-],!,g)]
                                                |-----| | |
                                                  arg1  | `---> arg3
                                                        `-----> arg2
                                         |-----------------|
                                              converter
                                     |---------------------|
                                        sample expression
                                   |------------------------|
                                         log-format tag
This commit is contained in:
Willy Tarreau 2020-02-14 17:33:06 +01:00
parent e3b57bf92f
commit cd0d2ed6ee
2 changed files with 30 additions and 9 deletions

View File

@ -14151,13 +14151,16 @@ regsub(<regex>,<subst>[,<flags>])
second level is usable for argument. It is recommended to use single quotes second level is usable for argument. It is recommended to use single quotes
outside since these ones do not try to resolve backslashes nor dollar signs. outside since these ones do not try to resolve backslashes nor dollar signs.
Example : Examples:
# de-duplicate "/" in header "x-path". # de-duplicate "/" in header "x-path".
# input: x-path: /////a///b/c/xzxyz/ # input: x-path: /////a///b/c/xzxyz/
# output: x-path: /a/b/c/xzxyz/ # output: x-path: /a/b/c/xzxyz/
http-request set-header x-path "%[hdr(x-path),regsub('/+','/','g')]" http-request set-header x-path "%[hdr(x-path),regsub('/+','/','g')]"
# copy query string to x-query and drop all leading '?', ';' and '&'
http-request set-header x-query "%[query,regsub([?;&]*,'')]"
capture-req(<id>) capture-req(<id>)
Capture the string entry in the request slot <id> and returns the entry as Capture the string entry in the request slot <id> and returns the entry as
is. If the slot doesn't exist, the capture fails silently. is. If the slot doesn't exist, the capture fails silently.

View File

@ -473,11 +473,13 @@ int add_to_logformat_list(char *start, char *end, int type, struct list *list_fo
/* /*
* Parse the sample fetch expression <text> and add a node to <list_format> upon * Parse the sample fetch expression <text> and add a node to <list_format> upon
* success. At the moment, sample converters are not yet supported but fetch arguments * success. At the moment, sample converters are not yet supported but fetch arguments
* should work. The curpx->conf.args.ctx must be set by the caller. * should work. The curpx->conf.args.ctx must be set by the caller. If an end pointer
* is passed in <endptr>, it will be updated with the pointer to the first character
* not part of the sample expression.
* *
* In error case, the function returns 0, otherwise it returns 1. * In error case, the function returns 0, otherwise it returns 1.
*/ */
int add_sample_to_logformat_list(char *text, char *arg, int arg_len, struct proxy *curpx, struct list *list_format, int options, int cap, char **err) int add_sample_to_logformat_list(char *text, char *arg, int arg_len, struct proxy *curpx, struct list *list_format, int options, int cap, char **err, char **endptr)
{ {
char *cmd[2]; char *cmd[2];
struct sample_expr *expr = NULL; struct sample_expr *expr = NULL;
@ -488,7 +490,7 @@ int add_sample_to_logformat_list(char *text, char *arg, int arg_len, struct prox
cmd[1] = ""; cmd[1] = "";
cmd_arg = 0; cmd_arg = 0;
expr = sample_parse_expr(cmd, &cmd_arg, curpx->conf.args.file, curpx->conf.args.line, err, &curpx->conf.args, NULL); expr = sample_parse_expr(cmd, &cmd_arg, curpx->conf.args.file, curpx->conf.args.line, err, &curpx->conf.args, endptr);
if (!expr) { if (!expr) {
memprintf(err, "failed to parse sample expression <%s> : %s", text, *err); memprintf(err, "failed to parse sample expression <%s> : %s", text, *err);
goto error_free; goto error_free;
@ -648,10 +650,26 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list
goto fail; goto fail;
case LF_STEXPR: // text immediately following '%[' case LF_STEXPR: // text immediately following '%['
if (*str == ']') { // end of arg /* the whole sample expression is parsed at once,
cformat = LF_EDEXPR; * returning the pointer to the first character not
var_len = str - var; * part of the expression, which MUST be the trailing
*str = 0; // needed for parsing the expression * angle bracket.
*/
if (!add_sample_to_logformat_list(var, arg, arg_len, curproxy, list_format, options, cap, err, &str))
goto fail;
if (*str == ']') {
// end of arg, go on with next state
cformat = pformat = LF_EDEXPR;
sp = str;
}
else {
char c = *str;
*str = 0;
if (isprint(c))
memprintf(err, "expected ']' after '%s', but found '%c'", var, c);
else
memprintf(err, "missing ']' after '%s'", var);
} }
break; break;
@ -681,7 +699,7 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list
goto fail; goto fail;
break; break;
case LF_STEXPR: case LF_STEXPR:
if (!add_sample_to_logformat_list(var, arg, arg_len, curproxy, list_format, options, cap, err)) if (!add_sample_to_logformat_list(var, arg, arg_len, curproxy, list_format, options, cap, err, &sp))
goto fail; goto fail;
break; break;
case LF_TEXT: case LF_TEXT: