MEDIUM: regex: Use PCRE JIT in acl

This is a patch for using PCRE JIT in acl.

I notice regex are used in other places, but they are more complicated
to modify to use PCRE APIs. So I focused to acl in the first try.

BTW, I made a simple benchmark program for PCRE JIT beforehand.
https://github.com/hnakamur/pcre-jit-benchmark

I read the manual for PCRE JIT
http://www.manpagez.com/man/3/pcrejit/

and wrote my benchmark program.
https://github.com/hnakamur/pcre-jit-benchmark/blob/master/test-pcre.c
This commit is contained in:
Hiroaki Nakamura 2013-01-13 15:00:42 +09:00 committed by Willy Tarreau
parent 0fbf016508
commit 7035132349
4 changed files with 61 additions and 7 deletions

View File

@ -14,6 +14,7 @@
# USE_MY_SPLICE : redefine the splice syscall if build fails without.
# USE_NETFILTER : enable netfilter on Linux. Automatic.
# USE_PCRE : enable use of libpcre for regex. Recommended.
# USE_PCRE_JIT : enable use of libpcre jit for regex. Recommended.
# USE_POLL : enable poll(). Automatic.
# USE_PRIVATE_CACHE : disable shared memory cache of ssl sessions.
# USE_REGPARM : enable regparm optimization. Recommended on x86.
@ -521,7 +522,7 @@ endif
endif
endif
ifneq ($(USE_PCRE)$(USE_STATIC_PCRE),)
ifneq ($(USE_PCRE)$(USE_STATIC_PCRE)$(USE_PCRE_JIT),)
# PCREDIR is used to automatically construct the PCRE_INC and PCRE_LIB paths,
# by appending /include and /lib respectively. If your system does not use the
# same sub-directories, simply force these variables instead of PCREDIR. It is
@ -546,6 +547,10 @@ OPTIONS_CFLAGS += -DUSE_PCRE $(if $(PCRE_INC),-I$(PCRE_INC))
OPTIONS_LDFLAGS += $(if $(PCRE_LIB),-L$(PCRE_LIB)) -Wl,-Bstatic -lpcreposix -lpcre -Wl,-Bdynamic
BUILD_OPTIONS += $(call ignore_implicit,USE_STATIC_PCRE)
endif
# JIT PCRE
ifneq ($(USE_PCRE_JIT),)
OPTIONS_CFLAGS += -DUSE_PCRE_JIT
endif
endif
# This one can be changed to look for ebtree files in an external directory

View File

@ -27,8 +27,20 @@
#ifdef USE_PCRE
#include <pcre.h>
#include <pcreposix.h>
#else
#ifdef USE_PCRE_JIT
struct jit_regex {
pcre *reg;
pcre_extra *extra;
};
typedef struct jit_regex regex;
#else /* no PCRE_JIT */
typedef regex_t regex;
#endif
#else /* no PCRE */
#include <regex.h>
typedef regex_t regex;
#endif
/* what to do when a header matches a regex */
@ -55,6 +67,24 @@ const char *check_replace_string(const char *str);
const char *chain_regex(struct hdr_exp **head, const regex_t *preg,
int action, const char *replace, void *cond);
static inline int regex_exec(const regex *preg, const char *subject, int length) {
#ifdef USE_PCRE_JIT
return pcre_exec(preg->reg, preg->extra, subject, length, 0, 0, NULL, 0);
#else
return regexec(preg, subject, 0, NULL, 0);
#endif
}
static inline void regex_free(regex *preg) {
#ifdef USE_PCRE_JIT
pcre_free_study(preg->extra);
pcre_free(preg->reg);
free(preg);
#else
regfree(preg);
#endif
}
#endif /* _COMMON_REGEX_H */
/*

View File

@ -213,7 +213,7 @@ struct acl_pattern {
union {
void *ptr; /* any data */
char *str; /* any string */
regex_t *reg; /* a compiled regex */
regex *reg; /* a compiled regex */
} ptr; /* indirect values, allocated */
void(*freeptrbuf)(void *ptr); /* a destructor able to free objects from the ptr */
int len; /* data length when required */

View File

@ -533,7 +533,7 @@ int acl_match_reg(struct sample *smp, struct acl_pattern *pattern)
old_char = smp->data.str.str[smp->data.str.len];
smp->data.str.str[smp->data.str.len] = 0;
if (regexec(pattern->ptr.reg, smp->data.str.str, 0, NULL, 0) == 0)
if (regex_exec(pattern->ptr.reg, smp->data.str.str, smp->data.str.len) == 0)
ret = ACL_PAT_PASS;
else
ret = ACL_PAT_FAIL;
@ -900,28 +900,47 @@ acl_parse_strcat(const char **text, struct acl_pattern *pattern, int *opaque, ch
/* Free data allocated by acl_parse_reg */
static void acl_free_reg(void *ptr)
{
regfree((regex_t *)ptr);
regex_free(ptr);
}
/* Parse a regex. It is allocated. */
int acl_parse_reg(const char **text, struct acl_pattern *pattern, int *opaque, char **err)
{
regex_t *preg;
regex *preg;
int icase;
preg = calloc(1, sizeof(regex_t));
preg = calloc(1, sizeof(*preg));
if (!preg) {
memprintf(err, "out of memory while loading pattern");
return 0;
}
#ifdef USE_PCRE_JIT
icase = (pattern->flags & ACL_PAT_F_IGNORE_CASE) ? PCRE_CASELESS : 0;
preg->reg = pcre_compile(*text, PCRE_NO_AUTO_CAPTURE | icase, NULL, NULL,
NULL);
if (!preg->reg) {
free(preg);
memprintf(err, "regex '%s' is invalid", *text);
return 0;
}
preg->extra = pcre_study(preg->reg, PCRE_STUDY_JIT_COMPILE, NULL);
if (!preg->extra) {
pcre_free(preg->reg);
free(preg);
memprintf(err, "failed to compile regex '%s'", *text);
return 0;
}
#else
icase = (pattern->flags & ACL_PAT_F_IGNORE_CASE) ? REG_ICASE : 0;
if (regcomp(preg, *text, REG_EXTENDED | REG_NOSUB | icase) != 0) {
free(preg);
memprintf(err, "regex '%s' is invalid", *text);
return 0;
}
#endif
pattern->ptr.reg = preg;
pattern->freeptrbuf = &acl_free_reg;