From f2592b29f13907ddf2bba42d00bc41cb8ee5b69b Mon Sep 17 00:00:00 2001 From: David Carlier Date: Mon, 21 Nov 2016 21:25:58 +0000 Subject: [PATCH] MEDIUM: regex: pcre2 support this adds a support of the newest pcre2 library, more secure than its older sibling in a cost of a more complex API. It works pretty similarly to pcre's part to keep the overall change smooth, except : - we define the string class supported at compile time. - after matching the ovec data is properly sized, althought we do not take advantage of it here. - the lack of jit support is treated less 'dramatically' as pcre2_jit_compile in this case is 'no-op'. --- Makefile | 54 ++++++++++++++++++++ include/common/regex.h | 32 +++++++++++- src/regex.c | 113 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 193 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 9e3948501..a87e1e24b 100644 --- a/Makefile +++ b/Makefile @@ -14,11 +14,14 @@ # USE_NETFILTER : enable netfilter on Linux. Automatic. # USE_PCRE : enable use of libpcre for regex. Recommended. # USE_PCRE_JIT : enable JIT for faster regex on libpcre >= 8.32 +# USE_PCRE2 : enable use of libpcre2 for regex. +# USE_PCRE2_JIT : enable JIT for faster regex on libpcre2 # USE_POLL : enable poll(). Automatic. # USE_PRIVATE_CACHE : disable shared memory cache of ssl sessions. # USE_PTHREAD_PSHARED : enable pthread process shared mutex on sslcache. # USE_REGPARM : enable regparm optimization. Recommended on x86. # USE_STATIC_PCRE : enable static libpcre. Recommended. +# USE_STATIC_PCRE2 : enable static libpcre2. # USE_TPROXY : enable transparent proxy. Automatic. # USE_LINUX_TPROXY : enable full transparent proxy. Automatic. # USE_LINUX_SPLICE : enable kernel 2.6 splicing. Automatic. @@ -671,6 +674,9 @@ OPTIONS_LDFLAGS += $(if $(WURFL_LIB),-L$(WURFL_LIB)) -lwurfl endif ifneq ($(USE_PCRE)$(USE_STATIC_PCRE)$(USE_PCRE_JIT),) +ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),) +$(error cannot compile both PCRE and PCRE2 support) +endif # PCREDIR is used to automatically construct the PCRE_INC and PCRE_LIB paths, # by appending /include and /lib respectively. If your system does not use the # same sub-directories, simply force these variables instead of PCREDIR. It is @@ -702,6 +708,54 @@ BUILD_OPTIONS += $(call ignore_implicit,USE_PCRE_JIT) endif endif +ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),) +PCRE2DIR := $(shell pcre2-config --prefix 2>/dev/null || echo /usr/local) +ifneq ($(PCRE2DIR),) +PCRE2_INC := $(PCRE2DIR)/include +PCRE2_LIB := $(PCRE2DIR)/lib + +ifeq ($(PCRE2_WIDTH),) +PCRE2_WIDTH = 8 +endif + +ifneq ($(PCRE2_WIDTH),8) +ifneq ($(PCRE2_WIDTH),16) +ifneq ($(PCRE2_WIDTH),32) +$(error PCRE2_WIDTH needs to be set to either 8,16 or 32) +endif +endif +endif + + +PCRE2_LDFLAGS := $(shell pcre2-config --libs$(PCRE2_WIDTH) 2>/dev/null || echo -L/usr/local/lib -lpcre2-$(PCRE2_WIDTH)) + +ifeq ($(PCRE2_LDFLAGS),) +$(error libpcre2-$(PCRE2_WIDTH) not found) +else +ifeq ($(PCRE2_WIDTH),8) +PCRE2_LDFLAGS += -lpcre2-posix +endif +endif + +OPTIONS_CFLAGS += -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=$(PCRE2_WIDTH) +OPTIONS_CFLAGS += $(if $(PCRE2_INC), -I$(PCRE2_INC)) + +ifneq ($(USE_STATIC_PCRE2),) +OPTIONS_LDFLAGS += $(if $(PCRE2_LIB),-L$(PCRE2_LIB)) -Wl,-Bstatic -L$(PCRE2_LIB) $(PCRE2_LDFLAGS) -Wl,-Bdynamic +BUILD_OPTIONS += $(call ignore_implicit,USE_STATIC_PCRE2) +else +OPTIONS_LDFLAGS += $(if $(PCRE2_LIB),-L$(PCRE2_LIB)) -L$(PCRE2_LIB) $(PCRE2_LDFLAGS) +BUILD_OPTIONS += $(call ignore_implicit,USE_PCRE2) +endif + +ifneq ($(USE_PCRE2_JIT),) +OPTIONS_CFLAGS += -DUSE_PCRE2_JIT +BUILD_OPTIONS += $(call ignore_implicit,USE_PCRE2_JIT) +endif + +endif +endif + # TCP Fast Open ifneq ($(USE_TFO),) OPTIONS_CFLAGS += -DUSE_TFO diff --git a/include/common/regex.h b/include/common/regex.h index 8a1703f8c..2f171b3ba 100644 --- a/include/common/regex.h +++ b/include/common/regex.h @@ -36,7 +36,11 @@ #define PCRE_STUDY_JIT_COMPILE 0 #endif -#else /* no PCRE */ +#elif USE_PCRE2 +#include +#include + +#else /* no PCRE, nor PCRE2 */ #include #endif @@ -49,6 +53,8 @@ struct my_regex { #error "The PCRE lib doesn't support JIT. Change your lib, or remove the option USE_PCRE_JIT." #endif #endif +#elif USE_PCRE2 + pcre2_code *reg; #else /* no PCRE */ regex_t regex; #endif @@ -95,6 +101,17 @@ static inline int regex_exec(const struct my_regex *preg, char *subject) { if (pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, 0, NULL, 0) < 0) return 0; return 1; +#elif defined(USE_PCRE2) + pcre2_match_data *pm; + int ret; + + pm = pcre2_match_data_create_from_pattern(preg->reg, NULL); + ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), + 0, 0, pm, NULL); + pcre2_match_data_free(pm); + if (ret < 0) + return 0; + return 1; #else int match; match = regexec(&preg->regex, subject, 0, NULL, 0); @@ -115,6 +132,17 @@ static inline int regex_exec2(const struct my_regex *preg, char *subject, int le if (pcre_exec(preg->reg, preg->extra, subject, length, 0, 0, NULL, 0) < 0) return 0; return 1; +#elif defined(USE_PCRE2) + pcre2_match_data *pm; + int ret; + + pm = pcre2_match_data_create_from_pattern(preg->reg, NULL); + ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, + 0, 0, pm, NULL); + pcre2_match_data_free(pm); + if (ret < 0) + return 0; + return 1; #else int match; char old_char = subject[length]; @@ -143,6 +171,8 @@ static inline void regex_free(struct my_regex *preg) { #else /* PCRE_CONFIG_JIT */ pcre_free(preg->extra); #endif /* PCRE_CONFIG_JIT */ +#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT) + pcre2_code_free(preg->reg); #else regfree(&preg->regex); #endif diff --git a/src/regex.c b/src/regex.c index dd7719405..38d7132b3 100644 --- a/src/regex.c +++ b/src/regex.c @@ -158,9 +158,14 @@ const char *chain_regex(struct hdr_exp **head, struct my_regex *preg, */ int regex_exec_match(const struct my_regex *preg, const char *subject, size_t nmatch, regmatch_t pmatch[], int flags) { -#if defined(USE_PCRE) || defined(USE_PCRE_JIT) +#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT) int ret; +#ifdef USE_PCRE2 + PCRE2_SIZE *matches; + pcre2_match_data *pm; +#else int matches[MAX_MATCH * 3]; +#endif int enmatch; int i; int options; @@ -169,15 +174,20 @@ int regex_exec_match(const struct my_regex *preg, const char *subject, * match i the maximum value for match, in fact this * limit is not applyied. */ + enmatch = nmatch; if (enmatch > MAX_MATCH) enmatch = MAX_MATCH; options = 0; if (flags & REG_NOTBOL) +#ifdef USE_PCRE2 + options |= PCRE2_NOTBOL; +#else options |= PCRE_NOTBOL; +#endif - /* The value returned by pcre_exec() is one more than the highest numbered + /* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered * pair that has been set. For example, if two substrings have been captured, * the returned value is 3. If there are no capturing subpatterns, the return * value from a successful match is 1, indicating that just the first pair of @@ -186,9 +196,22 @@ int regex_exec_match(const struct my_regex *preg, const char *subject, * It seems that this function returns 0 if it detect more matches than avalaible * space in the matches array. */ +#ifdef USE_PCRE2 + pm = pcre2_match_data_create_from_pattern(preg->reg, NULL); + ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), 0, options, pm, NULL); + + if (ret < 0) { + pcre2_match_data_free(pm); + return 0; + } + + matches = pcre2_get_ovector_pointer(pm); +#else ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3); + if (ret < 0) return 0; +#endif if (ret == 0) ret = enmatch; @@ -204,6 +227,9 @@ int regex_exec_match(const struct my_regex *preg, const char *subject, pmatch[i].rm_so = -1; pmatch[i].rm_eo = -1; } +#ifdef USE_PCRE2 + pcre2_match_data_free(pm); +#endif return 1; #else int match; @@ -226,9 +252,14 @@ int regex_exec_match(const struct my_regex *preg, const char *subject, */ int regex_exec_match2(const struct my_regex *preg, char *subject, int length, size_t nmatch, regmatch_t pmatch[], int flags) { -#if defined(USE_PCRE) || defined(USE_PCRE_JIT) +#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT) int ret; +#ifdef USE_PCRE2 + PCRE2_SIZE *matches; + pcre2_match_data *pm; +#else int matches[MAX_MATCH * 3]; +#endif int enmatch; int i; int options; @@ -243,9 +274,13 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length, options = 0; if (flags & REG_NOTBOL) +#ifdef USE_PCRE2 + options |= PCRE2_NOTBOL; +#else options |= PCRE_NOTBOL; +#endif - /* The value returned by pcre_exec() is one more than the highest numbered + /* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered * pair that has been set. For example, if two substrings have been captured, * the returned value is 3. If there are no capturing subpatterns, the return * value from a successful match is 1, indicating that just the first pair of @@ -254,9 +289,21 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length, * It seems that this function returns 0 if it detect more matches than avalaible * space in the matches array. */ +#ifdef USE_PCRE2 + pm = pcre2_match_data_create_from_pattern(preg->reg, NULL); + ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, 0, options, pm, NULL); + + if (ret < 0) { + pcre2_match_data_free(pm); + return 0; + } + + matches = pcre2_get_ovector_pointer(pm); +#else ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3); if (ret < 0) return 0; +#endif if (ret == 0) ret = enmatch; @@ -272,6 +319,9 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length, pmatch[i].rm_so = -1; pmatch[i].rm_eo = -1; } +#ifdef USE_PCRE2 + pcre2_match_data_free(pm); +#endif return 1; #else char old_char = subject[length]; @@ -311,6 +361,40 @@ int regex_comp(const char *str, struct my_regex *regex, int cs, int cap, char ** memprintf(err, "failed to compile regex '%s' (error=%s)", str, error); return 0; } +#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT) + int flags = 0; + int errn; +#if defined(USE_PCRE2_JIT) + int jit; +#endif + PCRE2_UCHAR error[256]; + PCRE2_SIZE erroffset; + + if (!cs) + flags |= PCRE2_CASELESS; + if (!cap) + flags |= PCRE2_NO_AUTO_CAPTURE; + + regex->reg = pcre2_compile((PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, flags, &errn, &erroffset, NULL); + if (!regex->reg) { + pcre2_get_error_message(errn, error, sizeof(error)); + memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%zu)", str, error, erroffset); + return 0; + } + +#if defined(USE_PCRE2_JIT) + jit = pcre2_jit_compile(regex->reg, PCRE2_JIT_COMPLETE); + /* + * We end if it is an error not related to lack of JIT support + * in a case of JIT support missing pcre2_jit_compile is "no-op" + */ + if (jit < 0 && jit != PCRE2_ERROR_JIT_BADOPTION) { + pcre2_code_free(regex->reg); + memprintf(err, "regex '%s' jit compilation failed", str); + return 0; + } +#endif + #else int flags = REG_EXTENDED; @@ -349,8 +433,27 @@ static void __regex_init(void) "no (USE_PCRE_JIT not set)" #endif ); +#endif /* USE_PCRE */ + +#ifdef USE_PCRE2 + memprintf(&ptr, "Built with PCRE2 version : %s", (HAP_XSTRING(Z PCRE2_PRERELEASE)[1] == 0) ? + HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) : + HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR) HAP_XSTRING(PCRE2_PRERELEASE PCRE2_DATE)); + memprintf(&ptr, "%s\nPCRE2 library supports JIT : %s", ptr, +#ifdef USE_PCRE2_JIT + ({ + int r; + pcre2_config(PCRE2_CONFIG_JIT, &r); + r ? "yes" : "no (libpcre2 build without JIT?)"; + }) #else - memprintf(&ptr, "Built without PCRE support (using libc's regex instead)"); + "no (USE_PCRE2_JIT not set)" +#endif + ); +#endif /* USE_PCRE2 */ + +#if !defined(USE_PCRE) && !defined(USE_PCRE2) + memprintf(&ptr, "Built without PCRE or PCRE2 support (using libc's regex instead)"); #endif hap_register_build_opts(ptr, 1); }