From 15a53a43846e25c99e37f210ec84349d3ea1c64d Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 21 Jan 2015 13:39:42 +0100 Subject: [PATCH] MEDIUM: regex: add support for passing regex flags to regex_exec_match() This function (and its sister regex_exec_match2()) abstract the regex execution but make it impossible to pass flags to the regex engine. Currently we don't use them but we'll need to support REG_NOTBOL soon (to indicate that we're not at the beginning of a line). So let's add support for this flag and update the API accordingly. --- include/common/regex.h | 4 ++-- src/proto_http.c | 12 ++++++------ src/regex.c | 30 +++++++++++++++++++++++------- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/include/common/regex.h b/include/common/regex.h index 58b9dc14a..76ed29064 100644 --- a/include/common/regex.h +++ b/include/common/regex.h @@ -128,9 +128,9 @@ static inline int regex_exec2(const struct my_regex *preg, char *subject, int le } int regex_exec_match(const struct my_regex *preg, const char *subject, - size_t nmatch, regmatch_t pmatch[]); + size_t nmatch, regmatch_t pmatch[], int flags); int regex_exec_match2(const struct my_regex *preg, char *subject, int length, - size_t nmatch, regmatch_t pmatch[]); + size_t nmatch, regmatch_t pmatch[], int flags); static inline void regex_free(struct my_regex *preg) { #if defined(USE_PCRE) || defined(USE_PCRE_JIT) diff --git a/src/proto_http.c b/src/proto_http.c index 4d259df7c..ac1c60fbc 100644 --- a/src/proto_http.c +++ b/src/proto_http.c @@ -3185,7 +3185,7 @@ static inline void inet_set_tos(int fd, struct sockaddr_storage from, int tos) static int http_replace_header(struct my_regex *re, char *dst, uint dst_size, char *val, int len, const char *rep_str) { - if (!regex_exec_match2(re, val, len, MAX_MATCH, pmatch)) + if (!regex_exec_match2(re, val, len, MAX_MATCH, pmatch, 0)) return -2; return exp_replace(dst, dst_size, val, rep_str, pmatch); @@ -3209,7 +3209,7 @@ static int http_replace_value(struct my_regex *re, char *dst, uint dst_size, cha while (p_delim < p + len && *p_delim != delim) p_delim++; - if (regex_exec_match2(re, p, p_delim-p, MAX_MATCH, pmatch)) { + if (regex_exec_match2(re, p, p_delim-p, MAX_MATCH, pmatch, 0)) { int replace_n = exp_replace(dst_p, dst_end - dst_p, p, rep_str, pmatch); if (replace_n < 0) @@ -6877,7 +6877,7 @@ int apply_filter_to_req_headers(struct session *s, struct channel *req, struct h * and the next header starts at cur_next. */ - if (regex_exec_match2(exp->preg, cur_ptr, cur_end-cur_ptr, MAX_MATCH, pmatch)) { + if (regex_exec_match2(exp->preg, cur_ptr, cur_end-cur_ptr, MAX_MATCH, pmatch, 0)) { switch (exp->action) { case ACT_SETBE: /* It is not possible to jump a second time. @@ -6978,7 +6978,7 @@ int apply_filter_to_req_line(struct session *s, struct channel *req, struct hdr_ /* Now we have the request line between cur_ptr and cur_end */ - if (regex_exec_match2(exp->preg, cur_ptr, cur_end-cur_ptr, MAX_MATCH, pmatch)) { + if (regex_exec_match2(exp->preg, cur_ptr, cur_end-cur_ptr, MAX_MATCH, pmatch, 0)) { switch (exp->action) { case ACT_SETBE: /* It is not possible to jump a second time. @@ -7738,7 +7738,7 @@ int apply_filter_to_resp_headers(struct session *s, struct channel *rtr, struct * and the next header starts at cur_next. */ - if (regex_exec_match2(exp->preg, cur_ptr, cur_end-cur_ptr, MAX_MATCH, pmatch)) { + if (regex_exec_match2(exp->preg, cur_ptr, cur_end-cur_ptr, MAX_MATCH, pmatch, 0)) { switch (exp->action) { case ACT_ALLOW: txn->flags |= TX_SVALLOW; @@ -7819,7 +7819,7 @@ int apply_filter_to_sts_line(struct session *s, struct channel *rtr, struct hdr_ /* Now we have the status line between cur_ptr and cur_end */ - if (regex_exec_match2(exp->preg, cur_ptr, cur_end-cur_ptr, MAX_MATCH, pmatch)) { + if (regex_exec_match2(exp->preg, cur_ptr, cur_end-cur_ptr, MAX_MATCH, pmatch, 0)) { switch (exp->action) { case ACT_ALLOW: txn->flags |= TX_SVALLOW; diff --git a/src/regex.c b/src/regex.c index 760a1eda4..c83e48268 100644 --- a/src/regex.c +++ b/src/regex.c @@ -153,14 +153,16 @@ const char *chain_regex(struct hdr_exp **head, struct my_regex *preg, /* This function apply regex. It take const null terminated char as input. * If the function doesn't match, it returns false, else it returns true. * When it is compiled with JIT, this function execute strlen on the subject. + * Currently the only supported flag is REG_NOTBOL. */ int regex_exec_match(const struct my_regex *preg, const char *subject, - size_t nmatch, regmatch_t pmatch[]) { + size_t nmatch, regmatch_t pmatch[], int flags) { #if defined(USE_PCRE) || defined(USE_PCRE_JIT) int ret; int matches[MAX_MATCH * 3]; int enmatch; int i; + int options; /* Silently limit the number of allowed matches. max * match i the maximum value for match, in fact this @@ -170,6 +172,10 @@ int regex_exec_match(const struct my_regex *preg, const char *subject, if (enmatch > MAX_MATCH) enmatch = MAX_MATCH; + options = 0; + if (flags & REG_NOTBOL) + options |= PCRE_NOTBOL; + /* The value returned by pcre_exec() is one more than the highest numbered * pair that has been set. For example, if two substrings have been captured, * the returned value is 3. If there are no capturing subpatterns, the return @@ -179,7 +185,7 @@ int regex_exec_match(const struct my_regex *preg, const char *subject, * It seems that this function returns 0 if it detect more matches than avalaible * space in the matches array. */ - ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, 0, matches, enmatch * 3); + ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3); if (ret < 0) return 0; @@ -200,7 +206,9 @@ int regex_exec_match(const struct my_regex *preg, const char *subject, return 1; #else int match; - match = regexec(&preg->regex, subject, nmatch, pmatch, 0); + + flags &= REG_NOTBOL; + match = regexec(&preg->regex, subject, nmatch, pmatch, flags); if (match == REG_NOMATCH) return 0; return 1; @@ -212,15 +220,17 @@ int regex_exec_match(const struct my_regex *preg, const char *subject, * match, it returns false, else it returns true. * When it is compiled with standard POSIX regex or PCRE, this function add * a temporary null chracters at the end of the . The must - * have a real length of + 1. + * have a real length of + 1. Currently the only supported flag is + * REG_NOTBOL. */ int regex_exec_match2(const struct my_regex *preg, char *subject, int length, - size_t nmatch, regmatch_t pmatch[]) { + size_t nmatch, regmatch_t pmatch[], int flags) { #if defined(USE_PCRE) || defined(USE_PCRE_JIT) int ret; int matches[MAX_MATCH * 3]; int enmatch; int i; + int options; /* Silently limit the number of allowed matches. max * match i the maximum value for match, in fact this @@ -230,6 +240,10 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length, if (enmatch > MAX_MATCH) enmatch = MAX_MATCH; + options = 0; + if (flags & REG_NOTBOL) + options |= PCRE_NOTBOL; + /* The value returned by pcre_exec() is one more than the highest numbered * pair that has been set. For example, if two substrings have been captured, * the returned value is 3. If there are no capturing subpatterns, the return @@ -239,7 +253,7 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length, * It seems that this function returns 0 if it detect more matches than avalaible * space in the matches array. */ - ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, 0, matches, enmatch * 3); + ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3); if (ret < 0) return 0; @@ -261,8 +275,10 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length, #else char old_char = subject[length]; int match; + + flags &= REG_NOTBOL; subject[length] = 0; - match = regexec(&preg->regex, subject, nmatch, pmatch, 0); + match = regexec(&preg->regex, subject, nmatch, pmatch, flags); subject[length] = old_char; if (match == REG_NOMATCH) return 0;