mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-02-02 03:23:12 +00:00
MEDIUM: regex: pcre2 support
this adds a support of the newest pcre2 library, more secure than its older sibling in a cost of a more complex API. It works pretty similarly to pcre's part to keep the overall change smooth, except : - we define the string class supported at compile time. - after matching the ovec data is properly sized, althought we do not take advantage of it here. - the lack of jit support is treated less 'dramatically' as pcre2_jit_compile in this case is 'no-op'.
This commit is contained in:
parent
01e0974b5a
commit
f2592b29f1
54
Makefile
54
Makefile
@ -14,11 +14,14 @@
|
||||
# USE_NETFILTER : enable netfilter on Linux. Automatic.
|
||||
# USE_PCRE : enable use of libpcre for regex. Recommended.
|
||||
# USE_PCRE_JIT : enable JIT for faster regex on libpcre >= 8.32
|
||||
# USE_PCRE2 : enable use of libpcre2 for regex.
|
||||
# USE_PCRE2_JIT : enable JIT for faster regex on libpcre2
|
||||
# USE_POLL : enable poll(). Automatic.
|
||||
# USE_PRIVATE_CACHE : disable shared memory cache of ssl sessions.
|
||||
# USE_PTHREAD_PSHARED : enable pthread process shared mutex on sslcache.
|
||||
# USE_REGPARM : enable regparm optimization. Recommended on x86.
|
||||
# USE_STATIC_PCRE : enable static libpcre. Recommended.
|
||||
# USE_STATIC_PCRE2 : enable static libpcre2.
|
||||
# USE_TPROXY : enable transparent proxy. Automatic.
|
||||
# USE_LINUX_TPROXY : enable full transparent proxy. Automatic.
|
||||
# USE_LINUX_SPLICE : enable kernel 2.6 splicing. Automatic.
|
||||
@ -671,6 +674,9 @@ OPTIONS_LDFLAGS += $(if $(WURFL_LIB),-L$(WURFL_LIB)) -lwurfl
|
||||
endif
|
||||
|
||||
ifneq ($(USE_PCRE)$(USE_STATIC_PCRE)$(USE_PCRE_JIT),)
|
||||
ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),)
|
||||
$(error cannot compile both PCRE and PCRE2 support)
|
||||
endif
|
||||
# PCREDIR is used to automatically construct the PCRE_INC and PCRE_LIB paths,
|
||||
# by appending /include and /lib respectively. If your system does not use the
|
||||
# same sub-directories, simply force these variables instead of PCREDIR. It is
|
||||
@ -702,6 +708,54 @@ BUILD_OPTIONS += $(call ignore_implicit,USE_PCRE_JIT)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),)
|
||||
PCRE2DIR := $(shell pcre2-config --prefix 2>/dev/null || echo /usr/local)
|
||||
ifneq ($(PCRE2DIR),)
|
||||
PCRE2_INC := $(PCRE2DIR)/include
|
||||
PCRE2_LIB := $(PCRE2DIR)/lib
|
||||
|
||||
ifeq ($(PCRE2_WIDTH),)
|
||||
PCRE2_WIDTH = 8
|
||||
endif
|
||||
|
||||
ifneq ($(PCRE2_WIDTH),8)
|
||||
ifneq ($(PCRE2_WIDTH),16)
|
||||
ifneq ($(PCRE2_WIDTH),32)
|
||||
$(error PCRE2_WIDTH needs to be set to either 8,16 or 32)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
PCRE2_LDFLAGS := $(shell pcre2-config --libs$(PCRE2_WIDTH) 2>/dev/null || echo -L/usr/local/lib -lpcre2-$(PCRE2_WIDTH))
|
||||
|
||||
ifeq ($(PCRE2_LDFLAGS),)
|
||||
$(error libpcre2-$(PCRE2_WIDTH) not found)
|
||||
else
|
||||
ifeq ($(PCRE2_WIDTH),8)
|
||||
PCRE2_LDFLAGS += -lpcre2-posix
|
||||
endif
|
||||
endif
|
||||
|
||||
OPTIONS_CFLAGS += -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=$(PCRE2_WIDTH)
|
||||
OPTIONS_CFLAGS += $(if $(PCRE2_INC), -I$(PCRE2_INC))
|
||||
|
||||
ifneq ($(USE_STATIC_PCRE2),)
|
||||
OPTIONS_LDFLAGS += $(if $(PCRE2_LIB),-L$(PCRE2_LIB)) -Wl,-Bstatic -L$(PCRE2_LIB) $(PCRE2_LDFLAGS) -Wl,-Bdynamic
|
||||
BUILD_OPTIONS += $(call ignore_implicit,USE_STATIC_PCRE2)
|
||||
else
|
||||
OPTIONS_LDFLAGS += $(if $(PCRE2_LIB),-L$(PCRE2_LIB)) -L$(PCRE2_LIB) $(PCRE2_LDFLAGS)
|
||||
BUILD_OPTIONS += $(call ignore_implicit,USE_PCRE2)
|
||||
endif
|
||||
|
||||
ifneq ($(USE_PCRE2_JIT),)
|
||||
OPTIONS_CFLAGS += -DUSE_PCRE2_JIT
|
||||
BUILD_OPTIONS += $(call ignore_implicit,USE_PCRE2_JIT)
|
||||
endif
|
||||
|
||||
endif
|
||||
endif
|
||||
|
||||
# TCP Fast Open
|
||||
ifneq ($(USE_TFO),)
|
||||
OPTIONS_CFLAGS += -DUSE_TFO
|
||||
|
@ -36,7 +36,11 @@
|
||||
#define PCRE_STUDY_JIT_COMPILE 0
|
||||
#endif
|
||||
|
||||
#else /* no PCRE */
|
||||
#elif USE_PCRE2
|
||||
#include <pcre2.h>
|
||||
#include <pcre2posix.h>
|
||||
|
||||
#else /* no PCRE, nor PCRE2 */
|
||||
#include <regex.h>
|
||||
#endif
|
||||
|
||||
@ -49,6 +53,8 @@ struct my_regex {
|
||||
#error "The PCRE lib doesn't support JIT. Change your lib, or remove the option USE_PCRE_JIT."
|
||||
#endif
|
||||
#endif
|
||||
#elif USE_PCRE2
|
||||
pcre2_code *reg;
|
||||
#else /* no PCRE */
|
||||
regex_t regex;
|
||||
#endif
|
||||
@ -95,6 +101,17 @@ static inline int regex_exec(const struct my_regex *preg, char *subject) {
|
||||
if (pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, 0, NULL, 0) < 0)
|
||||
return 0;
|
||||
return 1;
|
||||
#elif defined(USE_PCRE2)
|
||||
pcre2_match_data *pm;
|
||||
int ret;
|
||||
|
||||
pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
|
||||
ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject),
|
||||
0, 0, pm, NULL);
|
||||
pcre2_match_data_free(pm);
|
||||
if (ret < 0)
|
||||
return 0;
|
||||
return 1;
|
||||
#else
|
||||
int match;
|
||||
match = regexec(&preg->regex, subject, 0, NULL, 0);
|
||||
@ -115,6 +132,17 @@ static inline int regex_exec2(const struct my_regex *preg, char *subject, int le
|
||||
if (pcre_exec(preg->reg, preg->extra, subject, length, 0, 0, NULL, 0) < 0)
|
||||
return 0;
|
||||
return 1;
|
||||
#elif defined(USE_PCRE2)
|
||||
pcre2_match_data *pm;
|
||||
int ret;
|
||||
|
||||
pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
|
||||
ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length,
|
||||
0, 0, pm, NULL);
|
||||
pcre2_match_data_free(pm);
|
||||
if (ret < 0)
|
||||
return 0;
|
||||
return 1;
|
||||
#else
|
||||
int match;
|
||||
char old_char = subject[length];
|
||||
@ -143,6 +171,8 @@ static inline void regex_free(struct my_regex *preg) {
|
||||
#else /* PCRE_CONFIG_JIT */
|
||||
pcre_free(preg->extra);
|
||||
#endif /* PCRE_CONFIG_JIT */
|
||||
#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
|
||||
pcre2_code_free(preg->reg);
|
||||
#else
|
||||
regfree(&preg->regex);
|
||||
#endif
|
||||
|
113
src/regex.c
113
src/regex.c
@ -158,9 +158,14 @@ const char *chain_regex(struct hdr_exp **head, struct my_regex *preg,
|
||||
*/
|
||||
int regex_exec_match(const struct my_regex *preg, const char *subject,
|
||||
size_t nmatch, regmatch_t pmatch[], int flags) {
|
||||
#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
|
||||
#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
|
||||
int ret;
|
||||
#ifdef USE_PCRE2
|
||||
PCRE2_SIZE *matches;
|
||||
pcre2_match_data *pm;
|
||||
#else
|
||||
int matches[MAX_MATCH * 3];
|
||||
#endif
|
||||
int enmatch;
|
||||
int i;
|
||||
int options;
|
||||
@ -169,15 +174,20 @@ int regex_exec_match(const struct my_regex *preg, const char *subject,
|
||||
* match i the maximum value for match, in fact this
|
||||
* limit is not applyied.
|
||||
*/
|
||||
|
||||
enmatch = nmatch;
|
||||
if (enmatch > MAX_MATCH)
|
||||
enmatch = MAX_MATCH;
|
||||
|
||||
options = 0;
|
||||
if (flags & REG_NOTBOL)
|
||||
#ifdef USE_PCRE2
|
||||
options |= PCRE2_NOTBOL;
|
||||
#else
|
||||
options |= PCRE_NOTBOL;
|
||||
#endif
|
||||
|
||||
/* The value returned by pcre_exec() is one more than the highest numbered
|
||||
/* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
|
||||
* pair that has been set. For example, if two substrings have been captured,
|
||||
* the returned value is 3. If there are no capturing subpatterns, the return
|
||||
* value from a successful match is 1, indicating that just the first pair of
|
||||
@ -186,9 +196,22 @@ int regex_exec_match(const struct my_regex *preg, const char *subject,
|
||||
* It seems that this function returns 0 if it detect more matches than avalaible
|
||||
* space in the matches array.
|
||||
*/
|
||||
#ifdef USE_PCRE2
|
||||
pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
|
||||
ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), 0, options, pm, NULL);
|
||||
|
||||
if (ret < 0) {
|
||||
pcre2_match_data_free(pm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
matches = pcre2_get_ovector_pointer(pm);
|
||||
#else
|
||||
ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
|
||||
|
||||
if (ret < 0)
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
if (ret == 0)
|
||||
ret = enmatch;
|
||||
@ -204,6 +227,9 @@ int regex_exec_match(const struct my_regex *preg, const char *subject,
|
||||
pmatch[i].rm_so = -1;
|
||||
pmatch[i].rm_eo = -1;
|
||||
}
|
||||
#ifdef USE_PCRE2
|
||||
pcre2_match_data_free(pm);
|
||||
#endif
|
||||
return 1;
|
||||
#else
|
||||
int match;
|
||||
@ -226,9 +252,14 @@ int regex_exec_match(const struct my_regex *preg, const char *subject,
|
||||
*/
|
||||
int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
|
||||
size_t nmatch, regmatch_t pmatch[], int flags) {
|
||||
#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
|
||||
#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
|
||||
int ret;
|
||||
#ifdef USE_PCRE2
|
||||
PCRE2_SIZE *matches;
|
||||
pcre2_match_data *pm;
|
||||
#else
|
||||
int matches[MAX_MATCH * 3];
|
||||
#endif
|
||||
int enmatch;
|
||||
int i;
|
||||
int options;
|
||||
@ -243,9 +274,13 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
|
||||
|
||||
options = 0;
|
||||
if (flags & REG_NOTBOL)
|
||||
#ifdef USE_PCRE2
|
||||
options |= PCRE2_NOTBOL;
|
||||
#else
|
||||
options |= PCRE_NOTBOL;
|
||||
#endif
|
||||
|
||||
/* The value returned by pcre_exec() is one more than the highest numbered
|
||||
/* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
|
||||
* pair that has been set. For example, if two substrings have been captured,
|
||||
* the returned value is 3. If there are no capturing subpatterns, the return
|
||||
* value from a successful match is 1, indicating that just the first pair of
|
||||
@ -254,9 +289,21 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
|
||||
* It seems that this function returns 0 if it detect more matches than avalaible
|
||||
* space in the matches array.
|
||||
*/
|
||||
#ifdef USE_PCRE2
|
||||
pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
|
||||
ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, 0, options, pm, NULL);
|
||||
|
||||
if (ret < 0) {
|
||||
pcre2_match_data_free(pm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
matches = pcre2_get_ovector_pointer(pm);
|
||||
#else
|
||||
ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
|
||||
if (ret < 0)
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
if (ret == 0)
|
||||
ret = enmatch;
|
||||
@ -272,6 +319,9 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
|
||||
pmatch[i].rm_so = -1;
|
||||
pmatch[i].rm_eo = -1;
|
||||
}
|
||||
#ifdef USE_PCRE2
|
||||
pcre2_match_data_free(pm);
|
||||
#endif
|
||||
return 1;
|
||||
#else
|
||||
char old_char = subject[length];
|
||||
@ -311,6 +361,40 @@ int regex_comp(const char *str, struct my_regex *regex, int cs, int cap, char **
|
||||
memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
|
||||
return 0;
|
||||
}
|
||||
#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
|
||||
int flags = 0;
|
||||
int errn;
|
||||
#if defined(USE_PCRE2_JIT)
|
||||
int jit;
|
||||
#endif
|
||||
PCRE2_UCHAR error[256];
|
||||
PCRE2_SIZE erroffset;
|
||||
|
||||
if (!cs)
|
||||
flags |= PCRE2_CASELESS;
|
||||
if (!cap)
|
||||
flags |= PCRE2_NO_AUTO_CAPTURE;
|
||||
|
||||
regex->reg = pcre2_compile((PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, flags, &errn, &erroffset, NULL);
|
||||
if (!regex->reg) {
|
||||
pcre2_get_error_message(errn, error, sizeof(error));
|
||||
memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%zu)", str, error, erroffset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(USE_PCRE2_JIT)
|
||||
jit = pcre2_jit_compile(regex->reg, PCRE2_JIT_COMPLETE);
|
||||
/*
|
||||
* We end if it is an error not related to lack of JIT support
|
||||
* in a case of JIT support missing pcre2_jit_compile is "no-op"
|
||||
*/
|
||||
if (jit < 0 && jit != PCRE2_ERROR_JIT_BADOPTION) {
|
||||
pcre2_code_free(regex->reg);
|
||||
memprintf(err, "regex '%s' jit compilation failed", str);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
int flags = REG_EXTENDED;
|
||||
|
||||
@ -349,8 +433,27 @@ static void __regex_init(void)
|
||||
"no (USE_PCRE_JIT not set)"
|
||||
#endif
|
||||
);
|
||||
#endif /* USE_PCRE */
|
||||
|
||||
#ifdef USE_PCRE2
|
||||
memprintf(&ptr, "Built with PCRE2 version : %s", (HAP_XSTRING(Z PCRE2_PRERELEASE)[1] == 0) ?
|
||||
HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
|
||||
HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR) HAP_XSTRING(PCRE2_PRERELEASE PCRE2_DATE));
|
||||
memprintf(&ptr, "%s\nPCRE2 library supports JIT : %s", ptr,
|
||||
#ifdef USE_PCRE2_JIT
|
||||
({
|
||||
int r;
|
||||
pcre2_config(PCRE2_CONFIG_JIT, &r);
|
||||
r ? "yes" : "no (libpcre2 build without JIT?)";
|
||||
})
|
||||
#else
|
||||
memprintf(&ptr, "Built without PCRE support (using libc's regex instead)");
|
||||
"no (USE_PCRE2_JIT not set)"
|
||||
#endif
|
||||
);
|
||||
#endif /* USE_PCRE2 */
|
||||
|
||||
#if !defined(USE_PCRE) && !defined(USE_PCRE2)
|
||||
memprintf(&ptr, "Built without PCRE or PCRE2 support (using libc's regex instead)");
|
||||
#endif
|
||||
hap_register_build_opts(ptr, 1);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user