MEDIUM: regex: pcre2 support

this adds a support of the newest pcre2 library,
more secure than its older sibling in a cost of a
more complex API.
It works pretty similarly to pcre's part to keep
the overall change smooth,  except :

- we define the string class supported at compile time.
- after matching the ovec data is properly sized, althought
we do not take advantage of it here.
- the lack of jit support is treated less 'dramatically'
as pcre2_jit_compile in this case is 'no-op'.
This commit is contained in:
David Carlier 2016-11-21 21:25:58 +00:00 committed by Willy Tarreau
parent 01e0974b5a
commit f2592b29f1
3 changed files with 193 additions and 6 deletions

View File

@ -14,11 +14,14 @@
# USE_NETFILTER : enable netfilter on Linux. Automatic.
# USE_PCRE : enable use of libpcre for regex. Recommended.
# USE_PCRE_JIT : enable JIT for faster regex on libpcre >= 8.32
# USE_PCRE2 : enable use of libpcre2 for regex.
# USE_PCRE2_JIT : enable JIT for faster regex on libpcre2
# USE_POLL : enable poll(). Automatic.
# USE_PRIVATE_CACHE : disable shared memory cache of ssl sessions.
# USE_PTHREAD_PSHARED : enable pthread process shared mutex on sslcache.
# USE_REGPARM : enable regparm optimization. Recommended on x86.
# USE_STATIC_PCRE : enable static libpcre. Recommended.
# USE_STATIC_PCRE2 : enable static libpcre2.
# USE_TPROXY : enable transparent proxy. Automatic.
# USE_LINUX_TPROXY : enable full transparent proxy. Automatic.
# USE_LINUX_SPLICE : enable kernel 2.6 splicing. Automatic.
@ -671,6 +674,9 @@ OPTIONS_LDFLAGS += $(if $(WURFL_LIB),-L$(WURFL_LIB)) -lwurfl
endif
ifneq ($(USE_PCRE)$(USE_STATIC_PCRE)$(USE_PCRE_JIT),)
ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),)
$(error cannot compile both PCRE and PCRE2 support)
endif
# PCREDIR is used to automatically construct the PCRE_INC and PCRE_LIB paths,
# by appending /include and /lib respectively. If your system does not use the
# same sub-directories, simply force these variables instead of PCREDIR. It is
@ -702,6 +708,54 @@ BUILD_OPTIONS += $(call ignore_implicit,USE_PCRE_JIT)
endif
endif
ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),)
PCRE2DIR := $(shell pcre2-config --prefix 2>/dev/null || echo /usr/local)
ifneq ($(PCRE2DIR),)
PCRE2_INC := $(PCRE2DIR)/include
PCRE2_LIB := $(PCRE2DIR)/lib
ifeq ($(PCRE2_WIDTH),)
PCRE2_WIDTH = 8
endif
ifneq ($(PCRE2_WIDTH),8)
ifneq ($(PCRE2_WIDTH),16)
ifneq ($(PCRE2_WIDTH),32)
$(error PCRE2_WIDTH needs to be set to either 8,16 or 32)
endif
endif
endif
PCRE2_LDFLAGS := $(shell pcre2-config --libs$(PCRE2_WIDTH) 2>/dev/null || echo -L/usr/local/lib -lpcre2-$(PCRE2_WIDTH))
ifeq ($(PCRE2_LDFLAGS),)
$(error libpcre2-$(PCRE2_WIDTH) not found)
else
ifeq ($(PCRE2_WIDTH),8)
PCRE2_LDFLAGS += -lpcre2-posix
endif
endif
OPTIONS_CFLAGS += -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=$(PCRE2_WIDTH)
OPTIONS_CFLAGS += $(if $(PCRE2_INC), -I$(PCRE2_INC))
ifneq ($(USE_STATIC_PCRE2),)
OPTIONS_LDFLAGS += $(if $(PCRE2_LIB),-L$(PCRE2_LIB)) -Wl,-Bstatic -L$(PCRE2_LIB) $(PCRE2_LDFLAGS) -Wl,-Bdynamic
BUILD_OPTIONS += $(call ignore_implicit,USE_STATIC_PCRE2)
else
OPTIONS_LDFLAGS += $(if $(PCRE2_LIB),-L$(PCRE2_LIB)) -L$(PCRE2_LIB) $(PCRE2_LDFLAGS)
BUILD_OPTIONS += $(call ignore_implicit,USE_PCRE2)
endif
ifneq ($(USE_PCRE2_JIT),)
OPTIONS_CFLAGS += -DUSE_PCRE2_JIT
BUILD_OPTIONS += $(call ignore_implicit,USE_PCRE2_JIT)
endif
endif
endif
# TCP Fast Open
ifneq ($(USE_TFO),)
OPTIONS_CFLAGS += -DUSE_TFO

View File

@ -36,7 +36,11 @@
#define PCRE_STUDY_JIT_COMPILE 0
#endif
#else /* no PCRE */
#elif USE_PCRE2
#include <pcre2.h>
#include <pcre2posix.h>
#else /* no PCRE, nor PCRE2 */
#include <regex.h>
#endif
@ -49,6 +53,8 @@ struct my_regex {
#error "The PCRE lib doesn't support JIT. Change your lib, or remove the option USE_PCRE_JIT."
#endif
#endif
#elif USE_PCRE2
pcre2_code *reg;
#else /* no PCRE */
regex_t regex;
#endif
@ -95,6 +101,17 @@ static inline int regex_exec(const struct my_regex *preg, char *subject) {
if (pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, 0, NULL, 0) < 0)
return 0;
return 1;
#elif defined(USE_PCRE2)
pcre2_match_data *pm;
int ret;
pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject),
0, 0, pm, NULL);
pcre2_match_data_free(pm);
if (ret < 0)
return 0;
return 1;
#else
int match;
match = regexec(&preg->regex, subject, 0, NULL, 0);
@ -115,6 +132,17 @@ static inline int regex_exec2(const struct my_regex *preg, char *subject, int le
if (pcre_exec(preg->reg, preg->extra, subject, length, 0, 0, NULL, 0) < 0)
return 0;
return 1;
#elif defined(USE_PCRE2)
pcre2_match_data *pm;
int ret;
pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length,
0, 0, pm, NULL);
pcre2_match_data_free(pm);
if (ret < 0)
return 0;
return 1;
#else
int match;
char old_char = subject[length];
@ -143,6 +171,8 @@ static inline void regex_free(struct my_regex *preg) {
#else /* PCRE_CONFIG_JIT */
pcre_free(preg->extra);
#endif /* PCRE_CONFIG_JIT */
#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
pcre2_code_free(preg->reg);
#else
regfree(&preg->regex);
#endif

View File

@ -158,9 +158,14 @@ const char *chain_regex(struct hdr_exp **head, struct my_regex *preg,
*/
int regex_exec_match(const struct my_regex *preg, const char *subject,
size_t nmatch, regmatch_t pmatch[], int flags) {
#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
int ret;
#ifdef USE_PCRE2
PCRE2_SIZE *matches;
pcre2_match_data *pm;
#else
int matches[MAX_MATCH * 3];
#endif
int enmatch;
int i;
int options;
@ -169,15 +174,20 @@ int regex_exec_match(const struct my_regex *preg, const char *subject,
* match i the maximum value for match, in fact this
* limit is not applyied.
*/
enmatch = nmatch;
if (enmatch > MAX_MATCH)
enmatch = MAX_MATCH;
options = 0;
if (flags & REG_NOTBOL)
#ifdef USE_PCRE2
options |= PCRE2_NOTBOL;
#else
options |= PCRE_NOTBOL;
#endif
/* The value returned by pcre_exec() is one more than the highest numbered
/* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
* pair that has been set. For example, if two substrings have been captured,
* the returned value is 3. If there are no capturing subpatterns, the return
* value from a successful match is 1, indicating that just the first pair of
@ -186,9 +196,22 @@ int regex_exec_match(const struct my_regex *preg, const char *subject,
* It seems that this function returns 0 if it detect more matches than avalaible
* space in the matches array.
*/
#ifdef USE_PCRE2
pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), 0, options, pm, NULL);
if (ret < 0) {
pcre2_match_data_free(pm);
return 0;
}
matches = pcre2_get_ovector_pointer(pm);
#else
ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
if (ret < 0)
return 0;
#endif
if (ret == 0)
ret = enmatch;
@ -204,6 +227,9 @@ int regex_exec_match(const struct my_regex *preg, const char *subject,
pmatch[i].rm_so = -1;
pmatch[i].rm_eo = -1;
}
#ifdef USE_PCRE2
pcre2_match_data_free(pm);
#endif
return 1;
#else
int match;
@ -226,9 +252,14 @@ int regex_exec_match(const struct my_regex *preg, const char *subject,
*/
int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
size_t nmatch, regmatch_t pmatch[], int flags) {
#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
int ret;
#ifdef USE_PCRE2
PCRE2_SIZE *matches;
pcre2_match_data *pm;
#else
int matches[MAX_MATCH * 3];
#endif
int enmatch;
int i;
int options;
@ -243,9 +274,13 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
options = 0;
if (flags & REG_NOTBOL)
#ifdef USE_PCRE2
options |= PCRE2_NOTBOL;
#else
options |= PCRE_NOTBOL;
#endif
/* The value returned by pcre_exec() is one more than the highest numbered
/* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
* pair that has been set. For example, if two substrings have been captured,
* the returned value is 3. If there are no capturing subpatterns, the return
* value from a successful match is 1, indicating that just the first pair of
@ -254,9 +289,21 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
* It seems that this function returns 0 if it detect more matches than avalaible
* space in the matches array.
*/
#ifdef USE_PCRE2
pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, 0, options, pm, NULL);
if (ret < 0) {
pcre2_match_data_free(pm);
return 0;
}
matches = pcre2_get_ovector_pointer(pm);
#else
ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
if (ret < 0)
return 0;
#endif
if (ret == 0)
ret = enmatch;
@ -272,6 +319,9 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
pmatch[i].rm_so = -1;
pmatch[i].rm_eo = -1;
}
#ifdef USE_PCRE2
pcre2_match_data_free(pm);
#endif
return 1;
#else
char old_char = subject[length];
@ -311,6 +361,40 @@ int regex_comp(const char *str, struct my_regex *regex, int cs, int cap, char **
memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
return 0;
}
#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
int flags = 0;
int errn;
#if defined(USE_PCRE2_JIT)
int jit;
#endif
PCRE2_UCHAR error[256];
PCRE2_SIZE erroffset;
if (!cs)
flags |= PCRE2_CASELESS;
if (!cap)
flags |= PCRE2_NO_AUTO_CAPTURE;
regex->reg = pcre2_compile((PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, flags, &errn, &erroffset, NULL);
if (!regex->reg) {
pcre2_get_error_message(errn, error, sizeof(error));
memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%zu)", str, error, erroffset);
return 0;
}
#if defined(USE_PCRE2_JIT)
jit = pcre2_jit_compile(regex->reg, PCRE2_JIT_COMPLETE);
/*
* We end if it is an error not related to lack of JIT support
* in a case of JIT support missing pcre2_jit_compile is "no-op"
*/
if (jit < 0 && jit != PCRE2_ERROR_JIT_BADOPTION) {
pcre2_code_free(regex->reg);
memprintf(err, "regex '%s' jit compilation failed", str);
return 0;
}
#endif
#else
int flags = REG_EXTENDED;
@ -349,8 +433,27 @@ static void __regex_init(void)
"no (USE_PCRE_JIT not set)"
#endif
);
#endif /* USE_PCRE */
#ifdef USE_PCRE2
memprintf(&ptr, "Built with PCRE2 version : %s", (HAP_XSTRING(Z PCRE2_PRERELEASE)[1] == 0) ?
HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR) HAP_XSTRING(PCRE2_PRERELEASE PCRE2_DATE));
memprintf(&ptr, "%s\nPCRE2 library supports JIT : %s", ptr,
#ifdef USE_PCRE2_JIT
({
int r;
pcre2_config(PCRE2_CONFIG_JIT, &r);
r ? "yes" : "no (libpcre2 build without JIT?)";
})
#else
memprintf(&ptr, "Built without PCRE support (using libc's regex instead)");
"no (USE_PCRE2_JIT not set)"
#endif
);
#endif /* USE_PCRE2 */
#if !defined(USE_PCRE) && !defined(USE_PCRE2)
memprintf(&ptr, "Built without PCRE or PCRE2 support (using libc's regex instead)");
#endif
hap_register_build_opts(ptr, 1);
}