mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2024-12-24 13:42:16 +00:00
f348ecd67a
The PCRE2 JIT support is buggy. If HAProxy is compiled with USE_PCRE2_JIT option while the PCRE2 library is compiled without the JIT support, any matching will fail because pcre2_jit_compile() return value is not properly handled. We must fall back on pcre2_match() if PCRE2_ERROR_JIT_BADOPTION error is returned. This patch should fix the issue #1848. It must be backported as far as 2.4.
460 lines
11 KiB
C
460 lines
11 KiB
C
/*
|
|
* Regex and string management functions.
|
|
*
|
|
* Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
*/
|
|
|
|
#include <ctype.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include <haproxy/api.h>
|
|
#include <haproxy/errors.h>
|
|
#include <haproxy/global.h>
|
|
#include <haproxy/regex.h>
|
|
#include <haproxy/tools.h>
|
|
|
|
/* regex trash buffer used by various regex tests */
|
|
THREAD_LOCAL regmatch_t pmatch[MAX_MATCH]; /* rm_so, rm_eo for regular expressions */
|
|
|
|
int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches)
|
|
{
|
|
char *old_dst = dst;
|
|
char* dst_end = dst + dst_size;
|
|
|
|
while (*str) {
|
|
if (*str == '\\') {
|
|
str++;
|
|
if (!*str)
|
|
return -1;
|
|
|
|
if (isdigit((unsigned char)*str)) {
|
|
int len, num;
|
|
|
|
num = *str - '0';
|
|
str++;
|
|
|
|
if (matches[num].rm_eo > -1 && matches[num].rm_so > -1) {
|
|
len = matches[num].rm_eo - matches[num].rm_so;
|
|
|
|
if (dst + len >= dst_end)
|
|
return -1;
|
|
|
|
memcpy(dst, src + matches[num].rm_so, len);
|
|
dst += len;
|
|
}
|
|
|
|
} else if (*str == 'x') {
|
|
unsigned char hex1, hex2;
|
|
str++;
|
|
|
|
if (!*str)
|
|
return -1;
|
|
|
|
hex1 = toupper((unsigned char)*str++) - '0';
|
|
|
|
if (!*str)
|
|
return -1;
|
|
|
|
hex2 = toupper((unsigned char)*str++) - '0';
|
|
|
|
if (hex1 > 9) hex1 -= 'A' - '9' - 1;
|
|
if (hex2 > 9) hex2 -= 'A' - '9' - 1;
|
|
|
|
if (dst >= dst_end)
|
|
return -1;
|
|
|
|
*dst++ = (hex1<<4) + hex2;
|
|
} else {
|
|
if (dst >= dst_end)
|
|
return -1;
|
|
|
|
*dst++ = *str++;
|
|
}
|
|
} else {
|
|
if (dst >= dst_end)
|
|
return -1;
|
|
|
|
*dst++ = *str++;
|
|
}
|
|
}
|
|
if (dst >= dst_end)
|
|
return -1;
|
|
|
|
*dst = '\0';
|
|
return dst - old_dst;
|
|
}
|
|
|
|
/* returns NULL if the replacement string <str> is valid, or the pointer to the first error */
|
|
const char *check_replace_string(const char *str)
|
|
{
|
|
const char *err = NULL;
|
|
while (*str) {
|
|
if (*str == '\\') {
|
|
err = str; /* in case of a backslash, we return the pointer to it */
|
|
str++;
|
|
if (!*str)
|
|
return err;
|
|
else if (isdigit((unsigned char)*str))
|
|
err = NULL;
|
|
else if (*str == 'x') {
|
|
str++;
|
|
if (!ishex(*str))
|
|
return err;
|
|
str++;
|
|
if (!ishex(*str))
|
|
return err;
|
|
err = NULL;
|
|
}
|
|
else {
|
|
ha_warning("'\\%c' : deprecated use of a backslash before something not '\\','x' or a digit.\n", *str);
|
|
err = NULL;
|
|
}
|
|
}
|
|
str++;
|
|
}
|
|
return err;
|
|
}
|
|
|
|
|
|
/* This function apply regex. It take const null terminated char as input.
|
|
* If the function doesn't match, it returns false, else it returns true.
|
|
* When it is compiled with JIT, this function execute strlen on the subject.
|
|
* Currently the only supported flag is REG_NOTBOL.
|
|
*/
|
|
int regex_exec_match(const struct my_regex *preg, const char *subject,
|
|
size_t nmatch, regmatch_t pmatch[], int flags) {
|
|
#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
|
|
int ret;
|
|
#ifdef USE_PCRE2
|
|
PCRE2_SIZE *matches;
|
|
pcre2_match_data *pm;
|
|
#else
|
|
int matches[MAX_MATCH * 3];
|
|
#endif
|
|
int enmatch;
|
|
int i;
|
|
int options;
|
|
|
|
/* Silently limit the number of allowed matches. max
|
|
* match i the maximum value for match, in fact this
|
|
* limit is not applied.
|
|
*/
|
|
|
|
enmatch = nmatch;
|
|
if (enmatch > MAX_MATCH)
|
|
enmatch = MAX_MATCH;
|
|
|
|
options = 0;
|
|
if (flags & REG_NOTBOL)
|
|
#ifdef USE_PCRE2
|
|
options |= PCRE2_NOTBOL;
|
|
#else
|
|
options |= PCRE_NOTBOL;
|
|
#endif
|
|
|
|
/* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
|
|
* pair that has been set. For example, if two substrings have been captured,
|
|
* the returned value is 3. If there are no capturing subpatterns, the return
|
|
* value from a successful match is 1, indicating that just the first pair of
|
|
* offsets has been set.
|
|
*
|
|
* It seems that this function returns 0 if it detects more matches than available
|
|
* space in the matches array.
|
|
*/
|
|
#ifdef USE_PCRE2
|
|
pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
|
|
ret = preg->mfn(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), 0, options, pm, NULL);
|
|
|
|
if (ret < 0) {
|
|
pcre2_match_data_free(pm);
|
|
return 0;
|
|
}
|
|
|
|
matches = pcre2_get_ovector_pointer(pm);
|
|
#else
|
|
ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
|
|
|
|
if (ret < 0)
|
|
return 0;
|
|
#endif
|
|
|
|
if (ret == 0)
|
|
ret = enmatch;
|
|
|
|
for (i=0; i<nmatch; i++) {
|
|
/* Copy offset. */
|
|
if (i < ret) {
|
|
pmatch[i].rm_so = matches[(i*2)];
|
|
pmatch[i].rm_eo = matches[(i*2)+1];
|
|
continue;
|
|
}
|
|
/* Set the unmatvh flag (-1). */
|
|
pmatch[i].rm_so = -1;
|
|
pmatch[i].rm_eo = -1;
|
|
}
|
|
#ifdef USE_PCRE2
|
|
pcre2_match_data_free(pm);
|
|
#endif
|
|
return 1;
|
|
#else
|
|
int match;
|
|
|
|
flags &= REG_NOTBOL;
|
|
match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
|
|
if (match == REG_NOMATCH)
|
|
return 0;
|
|
return 1;
|
|
#endif
|
|
}
|
|
|
|
/* This function apply regex. It take a "char *" ans length as input. The
|
|
* <subject> can be modified during the processing. If the function doesn't
|
|
* match, it returns false, else it returns true.
|
|
* When it is compiled with standard POSIX regex or PCRE, this function add
|
|
* a temporary null characters at the end of the <subject>. The <subject> must
|
|
* have a real length of <length> + 1. Currently the only supported flag is
|
|
* REG_NOTBOL.
|
|
*/
|
|
int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
|
|
size_t nmatch, regmatch_t pmatch[], int flags) {
|
|
#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
|
|
int ret;
|
|
#ifdef USE_PCRE2
|
|
PCRE2_SIZE *matches;
|
|
pcre2_match_data *pm;
|
|
#else
|
|
int matches[MAX_MATCH * 3];
|
|
#endif
|
|
int enmatch;
|
|
int i;
|
|
int options;
|
|
|
|
/* Silently limit the number of allowed matches. max
|
|
* match i the maximum value for match, in fact this
|
|
* limit is not applied.
|
|
*/
|
|
enmatch = nmatch;
|
|
if (enmatch > MAX_MATCH)
|
|
enmatch = MAX_MATCH;
|
|
|
|
options = 0;
|
|
if (flags & REG_NOTBOL)
|
|
#ifdef USE_PCRE2
|
|
options |= PCRE2_NOTBOL;
|
|
#else
|
|
options |= PCRE_NOTBOL;
|
|
#endif
|
|
|
|
/* The value returned by pcre_exec()/pcre2_(jit)_match() is one more than the highest numbered
|
|
* pair that has been set. For example, if two substrings have been captured,
|
|
* the returned value is 3. If there are no capturing subpatterns, the return
|
|
* value from a successful match is 1, indicating that just the first pair of
|
|
* offsets has been set.
|
|
*
|
|
* It seems that this function returns 0 if it detects more matches than available
|
|
* space in the matches array.
|
|
*/
|
|
#ifdef USE_PCRE2
|
|
pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
|
|
ret = preg->mfn(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, 0, options, pm, NULL);
|
|
|
|
if (ret < 0) {
|
|
pcre2_match_data_free(pm);
|
|
return 0;
|
|
}
|
|
|
|
matches = pcre2_get_ovector_pointer(pm);
|
|
#else
|
|
ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
|
|
if (ret < 0)
|
|
return 0;
|
|
#endif
|
|
|
|
if (ret == 0)
|
|
ret = enmatch;
|
|
|
|
for (i=0; i<nmatch; i++) {
|
|
/* Copy offset. */
|
|
if (i < ret) {
|
|
pmatch[i].rm_so = matches[(i*2)];
|
|
pmatch[i].rm_eo = matches[(i*2)+1];
|
|
continue;
|
|
}
|
|
/* Set the unmatvh flag (-1). */
|
|
pmatch[i].rm_so = -1;
|
|
pmatch[i].rm_eo = -1;
|
|
}
|
|
#ifdef USE_PCRE2
|
|
pcre2_match_data_free(pm);
|
|
#endif
|
|
return 1;
|
|
#else
|
|
char old_char = subject[length];
|
|
int match;
|
|
|
|
flags &= REG_NOTBOL;
|
|
subject[length] = 0;
|
|
match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
|
|
subject[length] = old_char;
|
|
if (match == REG_NOMATCH)
|
|
return 0;
|
|
return 1;
|
|
#endif
|
|
}
|
|
|
|
struct my_regex *regex_comp(const char *str, int cs, int cap, char **err)
|
|
{
|
|
struct my_regex *regex = NULL;
|
|
#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
|
|
int flags = 0;
|
|
const char *error;
|
|
int erroffset;
|
|
#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
|
|
int flags = 0;
|
|
int errn;
|
|
#if defined(USE_PCRE2_JIT)
|
|
int jit;
|
|
#endif
|
|
PCRE2_UCHAR error[256];
|
|
PCRE2_SIZE erroffset;
|
|
#else
|
|
int flags = REG_EXTENDED;
|
|
#endif
|
|
|
|
regex = calloc(1, sizeof(*regex));
|
|
if (!regex) {
|
|
memprintf(err, "not enough memory to build regex");
|
|
goto out_fail_alloc;
|
|
}
|
|
|
|
#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
|
|
if (!cs)
|
|
flags |= PCRE_CASELESS;
|
|
if (!cap)
|
|
flags |= PCRE_NO_AUTO_CAPTURE;
|
|
|
|
regex->reg = pcre_compile(str, flags, &error, &erroffset, NULL);
|
|
if (!regex->reg) {
|
|
memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%d)", str, error, erroffset);
|
|
goto out_fail_alloc;
|
|
}
|
|
|
|
regex->extra = pcre_study(regex->reg, PCRE_STUDY_JIT_COMPILE, &error);
|
|
if (!regex->extra && error != NULL) {
|
|
pcre_free(regex->reg);
|
|
memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
|
|
goto out_fail_alloc;
|
|
}
|
|
#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
|
|
if (!cs)
|
|
flags |= PCRE2_CASELESS;
|
|
if (!cap)
|
|
flags |= PCRE2_NO_AUTO_CAPTURE;
|
|
|
|
regex->reg = pcre2_compile((PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, flags, &errn, &erroffset, NULL);
|
|
if (!regex->reg) {
|
|
pcre2_get_error_message(errn, error, sizeof(error));
|
|
memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%zu)", str, error, erroffset);
|
|
goto out_fail_alloc;
|
|
}
|
|
|
|
regex->mfn = &pcre2_match;
|
|
#if defined(USE_PCRE2_JIT)
|
|
jit = pcre2_jit_compile(regex->reg, PCRE2_JIT_COMPLETE);
|
|
/*
|
|
* We end if it is an error not related to lack of JIT support
|
|
* in a case of JIT support missing pcre2_jit_compile is "no-op"
|
|
*/
|
|
if (!jit)
|
|
regex->mfn = &pcre2_jit_match;
|
|
else {
|
|
if (jit != PCRE2_ERROR_JIT_BADOPTION) {
|
|
pcre2_code_free(regex->reg);
|
|
memprintf(err, "regex '%s' jit compilation failed", str);
|
|
goto out_fail_alloc;
|
|
}
|
|
else
|
|
regex->mfn = &pcre2_match;
|
|
}
|
|
#endif
|
|
|
|
#else
|
|
if (!cs)
|
|
flags |= REG_ICASE;
|
|
if (!cap)
|
|
flags |= REG_NOSUB;
|
|
|
|
if (regcomp(®ex->regex, str, flags) != 0) {
|
|
memprintf(err, "regex '%s' is invalid", str);
|
|
goto out_fail_alloc;
|
|
}
|
|
#endif
|
|
return regex;
|
|
|
|
out_fail_alloc:
|
|
free(regex);
|
|
return NULL;
|
|
}
|
|
|
|
static void regex_register_build_options(void)
|
|
{
|
|
char *ptr = NULL;
|
|
|
|
#ifdef USE_PCRE
|
|
memprintf(&ptr, "Built with PCRE version : %s", (HAP_XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
|
HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
|
|
HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR) HAP_XSTRING(PCRE_PRERELEASE PCRE_DATE));
|
|
memprintf(&ptr, "%s\nRunning on PCRE version : %s", ptr, pcre_version());
|
|
|
|
memprintf(&ptr, "%s\nPCRE library supports JIT : %s", ptr,
|
|
#ifdef USE_PCRE_JIT
|
|
({
|
|
int r;
|
|
pcre_config(PCRE_CONFIG_JIT, &r);
|
|
r ? "yes" : "no (libpcre build without JIT?)";
|
|
})
|
|
#else
|
|
"no (USE_PCRE_JIT not set)"
|
|
#endif
|
|
);
|
|
#endif /* USE_PCRE */
|
|
|
|
#ifdef USE_PCRE2
|
|
memprintf(&ptr, "Built with PCRE2 version : %s", (HAP_XSTRING(Z PCRE2_PRERELEASE)[1] == 0) ?
|
|
HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
|
|
HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR) HAP_XSTRING(PCRE2_PRERELEASE PCRE2_DATE));
|
|
memprintf(&ptr, "%s\nPCRE2 library supports JIT : %s", ptr,
|
|
#ifdef USE_PCRE2_JIT
|
|
({
|
|
int r;
|
|
pcre2_config(PCRE2_CONFIG_JIT, &r);
|
|
r ? "yes" : "no (libpcre2 build without JIT?)";
|
|
})
|
|
#else
|
|
"no (USE_PCRE2_JIT not set)"
|
|
#endif
|
|
);
|
|
#endif /* USE_PCRE2 */
|
|
|
|
#if !defined(USE_PCRE) && !defined(USE_PCRE2)
|
|
memprintf(&ptr, "Built without PCRE or PCRE2 support (using libc's regex instead)");
|
|
#endif
|
|
hap_register_build_opts(ptr, 1);
|
|
}
|
|
|
|
INITCALL0(STG_REGISTER, regex_register_build_options);
|
|
|
|
/*
|
|
* Local variables:
|
|
* c-indent-level: 8
|
|
* c-basic-offset: 8
|
|
* End:
|
|
*/
|