MAJOR: pattern: add LRU-based cache on pattern matching
The principle of this cache is to have a global cache for all pattern matching operations which rely on lists (reg, sub, dir, dom, ...). The input data, the expression and a random seed are used as a hashing key. The cached entries contains a pointer to the expression and a revision number for that expression so that we don't accidently used obsolete data after a pattern update or a very unlikely hash collision. Regarding the risk of collisions, 10k entries at 10k req/s mean 1% risk of a collision after 60 years, that's already much less than the memory's reliability in most machines and more durable than most admin's life expectancy. A collision will result in a valid result to be returned for a different entry from the same list. If this is not acceptable, the cache can be disabled using tune.pattern.cache-size. A test on a file containing 10k small regex showed that the regex matching was limited to 6k/s instead of 70k with regular strings. When enabling the LRU cache, the performance was back to 70k/s.
This commit is contained in:
parent
72f073b6c7
commit
f3045d2a06
|
@ -493,6 +493,7 @@ The following keywords are supported in the "global" section :
|
|||
- tune.maxaccept
|
||||
- tune.maxpollevents
|
||||
- tune.maxrewrite
|
||||
- tune.pattern.cache-size
|
||||
- tune.pipesize
|
||||
- tune.rcvbuf.client
|
||||
- tune.rcvbuf.server
|
||||
|
@ -1050,6 +1051,25 @@ tune.maxrewrite <number>
|
|||
larger than that. This means you don't have to worry about it when changing
|
||||
bufsize.
|
||||
|
||||
tune.pattern.cache-size <number>
|
||||
Sets the size of the pattern lookup cache to <number> entries. This is an LRU
|
||||
cache which reminds previous lookups and their results. It is used by ACLs
|
||||
and maps on slow pattern lookups, namely the ones using the "sub", "reg",
|
||||
"dir", "dom", "end", "bin" match methods as well as the case-insensitive
|
||||
strings. It applies to pattern expressions which means that it will be able
|
||||
to memorize the result of a lookup among all the patterns specified on a
|
||||
configuration line (including all those loaded from files). It automatically
|
||||
invalidates entries which are updated using HTTP actions or on the CLI. The
|
||||
default cache size is set to 10000 entries, which limits its footprint to
|
||||
about 5 MB on 32-bit systems and 8 MB on 64-bit systems. There is a very low
|
||||
risk of collision in this cache, which is in the order of the size of the
|
||||
cache divided by 2^64. Typically, at 10000 requests per second with the
|
||||
default cache size of 10000 entries, there's 1% chance that a brute force
|
||||
attack could cause a single collision after 60 years, or 0.1% after 6 years.
|
||||
This is considered much lower than the risk of a memory corruption caused by
|
||||
aging components. If this is not acceptable, the cache can be disabled by
|
||||
setting this parameter to 0.
|
||||
|
||||
tune.pipesize <number>
|
||||
Sets the kernel pipe buffer size to this size (in bytes). By default, pipes
|
||||
are the default size for the system. But sometimes when using TCP splicing,
|
||||
|
|
|
@ -295,4 +295,15 @@
|
|||
#ifndef TLS_TICKETS_NO
|
||||
#define TLS_TICKETS_NO 3
|
||||
#endif
|
||||
|
||||
/* pattern lookup default cache size, in number of entries :
|
||||
* 10k entries at 10k req/s mean 1% risk of a collision after 60 years, that's
|
||||
* already much less than the memory's reliability in most machines and more
|
||||
* durable than most admin's life expectancy. A collision will result in a
|
||||
* valid result to be returned for a different entry from the same list.
|
||||
*/
|
||||
#ifndef DEFAULT_PAT_LRU_SIZE
|
||||
#define DEFAULT_PAT_LRU_SIZE 10000
|
||||
#endif
|
||||
|
||||
#endif /* _COMMON_DEFAULTS_H */
|
||||
|
|
|
@ -142,6 +142,7 @@ struct global {
|
|||
int pipesize; /* pipe size in bytes, system defaults if zero */
|
||||
int max_http_hdr; /* max number of HTTP headers, use MAX_HTTP_HDR if zero */
|
||||
int cookie_len; /* max length of cookie captures */
|
||||
int pattern_cache; /* max number of entries in the pattern cache. */
|
||||
int sslcachesize; /* SSL cache size in session, defaults to 20000 */
|
||||
#ifdef USE_OPENSSL
|
||||
int sslprivatecache; /* Force to use a private session cache even if nbproc > 1 */
|
||||
|
|
|
@ -907,6 +907,22 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm)
|
|||
goto out;
|
||||
}
|
||||
}
|
||||
else if (!strcmp(args[0], "tune.pattern.cache-size")) {
|
||||
if (*args[1]) {
|
||||
global.tune.pattern_cache = atoi(args[1]);
|
||||
if (global.tune.pattern_cache < 0) {
|
||||
Alert("parsing [%s:%d] : '%s' expects a positive numeric value\n",
|
||||
file, linenum, args[0]);
|
||||
err_code |= ERR_ALERT | ERR_FATAL;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
Alert("parsing [%s:%d] : '%s' expects a positive numeric value\n",
|
||||
file, linenum, args[0]);
|
||||
err_code |= ERR_ALERT | ERR_FATAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
else if (!strcmp(args[0], "uid")) {
|
||||
if (global.uid != 0) {
|
||||
Alert("parsing [%s:%d] : user/uid already specified. Continuing.\n", file, linenum);
|
||||
|
|
|
@ -146,6 +146,7 @@ struct global global = {
|
|||
.maxrewrite = MAXREWRITE,
|
||||
.chksize = BUFSIZE,
|
||||
.reserved_bufs = RESERVED_BUFS,
|
||||
.pattern_cache = DEFAULT_PAT_LRU_SIZE,
|
||||
#ifdef USE_OPENSSL
|
||||
.sslcachesize = SSLCACHESIZE,
|
||||
.ssl_default_dh_param = SSL_DEFAULT_DH_PARAM,
|
||||
|
|
147
src/pattern.c
147
src/pattern.c
|
@ -24,6 +24,8 @@
|
|||
#include <proto/sample.h>
|
||||
|
||||
#include <ebsttree.h>
|
||||
#include <import/lru.h>
|
||||
#include <import/xxhash.h>
|
||||
|
||||
char *pat_match_names[PAT_MATCH_NUM] = {
|
||||
[PAT_MATCH_FOUND] = "found",
|
||||
|
@ -144,6 +146,9 @@ static struct pattern static_pattern;
|
|||
/* This is the root of the list of all pattern_ref avalaibles. */
|
||||
struct list pattern_reference = LIST_HEAD_INIT(pattern_reference);
|
||||
|
||||
static struct lru64_head *pat_lru_tree;
|
||||
static unsigned long long pat_lru_seed;
|
||||
|
||||
/*
|
||||
*
|
||||
* The following functions are not exported and are used by internals process
|
||||
|
@ -443,6 +448,8 @@ struct pattern *pat_match_str(struct sample *smp, struct pattern_expr *expr, int
|
|||
struct pattern_tree *elt;
|
||||
struct pattern_list *lst;
|
||||
struct pattern *pattern;
|
||||
struct pattern *ret = NULL;
|
||||
struct lru64 *lru = NULL;
|
||||
|
||||
/* Lookup a string in the expression's pattern tree. */
|
||||
if (!eb_is_empty(&expr->pattern_tree)) {
|
||||
|
@ -468,6 +475,15 @@ struct pattern *pat_match_str(struct sample *smp, struct pattern_expr *expr, int
|
|||
}
|
||||
|
||||
/* look in the list */
|
||||
if (pat_lru_tree) {
|
||||
unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr;
|
||||
|
||||
lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed),
|
||||
pat_lru_tree, expr, expr->revision);
|
||||
if (lru && lru->domain)
|
||||
return lru->data;
|
||||
}
|
||||
|
||||
list_for_each_entry(lst, &expr->patterns, list) {
|
||||
pattern = &lst->pat;
|
||||
|
||||
|
@ -476,11 +492,16 @@ struct pattern *pat_match_str(struct sample *smp, struct pattern_expr *expr, int
|
|||
|
||||
icase = expr->mflags & PAT_MF_IGNORE_CASE;
|
||||
if ((icase && strncasecmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0) ||
|
||||
(!icase && strncmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0))
|
||||
return pattern;
|
||||
(!icase && strncmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0)) {
|
||||
ret = pattern;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
if (lru)
|
||||
lru64_commit(lru, ret, expr, expr->revision);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* NB: For two binaries buf to be identical, it is required that their lengths match */
|
||||
|
@ -488,19 +509,34 @@ struct pattern *pat_match_bin(struct sample *smp, struct pattern_expr *expr, int
|
|||
{
|
||||
struct pattern_list *lst;
|
||||
struct pattern *pattern;
|
||||
struct pattern *ret = NULL;
|
||||
struct lru64 *lru = NULL;
|
||||
|
||||
if (pat_lru_tree) {
|
||||
unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr;
|
||||
|
||||
lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed),
|
||||
pat_lru_tree, expr, expr->revision);
|
||||
if (lru && lru->domain)
|
||||
return lru->data;
|
||||
}
|
||||
|
||||
/* Look in the list. */
|
||||
list_for_each_entry(lst, &expr->patterns, list) {
|
||||
pattern = &lst->pat;
|
||||
|
||||
if (pattern->len != smp->data.str.len)
|
||||
continue;
|
||||
|
||||
if (memcmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0)
|
||||
return pattern;
|
||||
if (memcmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0) {
|
||||
ret = pattern;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
if (lru)
|
||||
lru64_commit(lru, ret, expr, expr->revision);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Executes a regex. It temporarily changes the data to add a trailing zero,
|
||||
|
@ -510,15 +546,31 @@ struct pattern *pat_match_reg(struct sample *smp, struct pattern_expr *expr, int
|
|||
{
|
||||
struct pattern_list *lst;
|
||||
struct pattern *pattern;
|
||||
struct pattern *ret = NULL;
|
||||
struct lru64 *lru = NULL;
|
||||
|
||||
if (pat_lru_tree) {
|
||||
unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr;
|
||||
|
||||
lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed),
|
||||
pat_lru_tree, expr, expr->revision);
|
||||
if (lru && lru->domain)
|
||||
return lru->data;
|
||||
}
|
||||
|
||||
/* look in the list */
|
||||
list_for_each_entry(lst, &expr->patterns, list) {
|
||||
pattern = &lst->pat;
|
||||
|
||||
if (regex_exec2(pattern->ptr.reg, smp->data.str.str, smp->data.str.len))
|
||||
return pattern;
|
||||
if (regex_exec2(pattern->ptr.reg, smp->data.str.str, smp->data.str.len)) {
|
||||
ret = pattern;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
|
||||
if (lru)
|
||||
lru64_commit(lru, ret, expr, expr->revision);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Checks that the pattern matches the beginning of the tested string. */
|
||||
|
@ -530,6 +582,8 @@ struct pattern *pat_match_beg(struct sample *smp, struct pattern_expr *expr, int
|
|||
struct pattern_tree *elt;
|
||||
struct pattern_list *lst;
|
||||
struct pattern *pattern;
|
||||
struct pattern *ret = NULL;
|
||||
struct lru64 *lru = NULL;
|
||||
|
||||
/* Lookup a string in the expression's pattern tree. */
|
||||
if (!eb_is_empty(&expr->pattern_tree)) {
|
||||
|
@ -555,6 +609,15 @@ struct pattern *pat_match_beg(struct sample *smp, struct pattern_expr *expr, int
|
|||
}
|
||||
|
||||
/* look in the list */
|
||||
if (pat_lru_tree) {
|
||||
unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr;
|
||||
|
||||
lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed),
|
||||
pat_lru_tree, expr, expr->revision);
|
||||
if (lru && lru->domain)
|
||||
return lru->data;
|
||||
}
|
||||
|
||||
list_for_each_entry(lst, &expr->patterns, list) {
|
||||
pattern = &lst->pat;
|
||||
|
||||
|
@ -566,9 +629,14 @@ struct pattern *pat_match_beg(struct sample *smp, struct pattern_expr *expr, int
|
|||
(!icase && strncmp(pattern->ptr.str, smp->data.str.str, pattern->len) != 0))
|
||||
continue;
|
||||
|
||||
return pattern;
|
||||
ret = pattern;
|
||||
break;
|
||||
}
|
||||
return NULL;
|
||||
|
||||
if (lru)
|
||||
lru64_commit(lru, ret, expr, expr->revision);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Checks that the pattern matches the end of the tested string. */
|
||||
|
@ -577,6 +645,17 @@ struct pattern *pat_match_end(struct sample *smp, struct pattern_expr *expr, int
|
|||
int icase;
|
||||
struct pattern_list *lst;
|
||||
struct pattern *pattern;
|
||||
struct pattern *ret = NULL;
|
||||
struct lru64 *lru = NULL;
|
||||
|
||||
if (pat_lru_tree) {
|
||||
unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr;
|
||||
|
||||
lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed),
|
||||
pat_lru_tree, expr, expr->revision);
|
||||
if (lru && lru->domain)
|
||||
return lru->data;
|
||||
}
|
||||
|
||||
list_for_each_entry(lst, &expr->patterns, list) {
|
||||
pattern = &lst->pat;
|
||||
|
@ -589,9 +668,14 @@ struct pattern *pat_match_end(struct sample *smp, struct pattern_expr *expr, int
|
|||
(!icase && strncmp(pattern->ptr.str, smp->data.str.str + smp->data.str.len - pattern->len, pattern->len) != 0))
|
||||
continue;
|
||||
|
||||
return pattern;
|
||||
ret = pattern;
|
||||
break;
|
||||
}
|
||||
return NULL;
|
||||
|
||||
if (lru)
|
||||
lru64_commit(lru, ret, expr, expr->revision);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Checks that the pattern is included inside the tested string.
|
||||
|
@ -604,6 +688,17 @@ struct pattern *pat_match_sub(struct sample *smp, struct pattern_expr *expr, int
|
|||
char *c;
|
||||
struct pattern_list *lst;
|
||||
struct pattern *pattern;
|
||||
struct pattern *ret = NULL;
|
||||
struct lru64 *lru = NULL;
|
||||
|
||||
if (pat_lru_tree) {
|
||||
unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr;
|
||||
|
||||
lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed),
|
||||
pat_lru_tree, expr, expr->revision);
|
||||
if (lru && lru->domain)
|
||||
return lru->data;
|
||||
}
|
||||
|
||||
list_for_each_entry(lst, &expr->patterns, list) {
|
||||
pattern = &lst->pat;
|
||||
|
@ -617,19 +712,27 @@ struct pattern *pat_match_sub(struct sample *smp, struct pattern_expr *expr, int
|
|||
for (c = smp->data.str.str; c <= end; c++) {
|
||||
if (tolower(*c) != tolower(*pattern->ptr.str))
|
||||
continue;
|
||||
if (strncasecmp(pattern->ptr.str, c, pattern->len) == 0)
|
||||
return pattern;
|
||||
if (strncasecmp(pattern->ptr.str, c, pattern->len) == 0) {
|
||||
ret = pattern;
|
||||
goto leave;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (c = smp->data.str.str; c <= end; c++) {
|
||||
if (*c != *pattern->ptr.str)
|
||||
continue;
|
||||
if (strncmp(pattern->ptr.str, c, pattern->len) == 0)
|
||||
return pattern;
|
||||
if (strncmp(pattern->ptr.str, c, pattern->len) == 0) {
|
||||
ret = pattern;
|
||||
goto leave;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
leave:
|
||||
if (lru)
|
||||
lru64_commit(lru, ret, expr, expr->revision);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* This one is used by other real functions. It checks that the pattern is
|
||||
|
@ -2321,6 +2424,10 @@ void pattern_finalize_config(void)
|
|||
struct pat_ref *ref, *ref2, *ref3;
|
||||
struct list pr = LIST_HEAD_INIT(pr);
|
||||
|
||||
pat_lru_seed = random();
|
||||
if (global.tune.pattern_cache)
|
||||
pat_lru_tree = lru64_new(global.tune.pattern_cache);
|
||||
|
||||
list_for_each_entry(ref, &pattern_reference, list) {
|
||||
if (ref->unique_id == -1) {
|
||||
/* Look for the first free id. */
|
||||
|
|
Loading…
Reference in New Issue