MEDIUM: pattern: Extract the index process from the pat_parse_*() functions

Now, the pat_parse_*() functions parses the incoming data. The input
"pattern" struct can be preallocated. If the parser needs to add some
buffers, it allocates memory.

The function pattern_register() runs the call to the parser, process
the key indexation and associate the "sample_storage" used by maps.
This commit is contained in:
Thierry FOURNIER 2013-12-06 19:06:43 +01:00 committed by Willy Tarreau
parent f3489d2ccd
commit 7148ce6ef4
7 changed files with 180 additions and 135 deletions

View File

@ -28,7 +28,7 @@
#include <common/standard.h>
#include <types/pattern.h>
/* parse the <text> with <expr> compliant parser. <pattern> is a context for
/* parse the <args> with <expr> compliant parser. <pattern> is a context for
* the current parsed acl. It must initialized at NULL:
*
* struct pattern *pattern = NULL
@ -37,11 +37,10 @@
* patflag are a lot of 'PAT_F_*' flags pattern compatible. see
* <types/acl.h>.
*
* The function returns 1 if the processing is ok, return -1 if the parser
* fails, with <err> message filled. It returns -2 in "out of memory"
* error case.
* The function returns 1 if the processing is ok, return 0
* if the parser fails, with <err> message filled.
*/
int pattern_register(struct pattern_expr *expr, char *text, struct sample_storage *smp, struct pattern **pattern, int patflags, char **err);
int pattern_register(struct pattern_expr *expr, const char **args, struct sample_storage *smp, struct pattern **pattern, int patflags, char **err);
/* return the PAT_MATCH_* index for match name "name", or < 0 if not found */
static inline int pat_find_match_name(const char *name)
@ -69,7 +68,7 @@ enum pat_match_res pattern_exec_match(struct pattern_expr *expr, struct sample *
/* ignore the current line */
int pat_parse_nothing(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err);
int pat_parse_nothing(const char **text, struct pattern *pattern, int *opaque, char **err);
/* NB: For two strings to be identical, it is required that their lengths match */
enum pat_match_res pat_match_str(struct sample *smp, struct pattern *pattern);
@ -84,37 +83,37 @@ enum pat_match_res pat_match_len(struct sample *smp, struct pattern *pattern);
enum pat_match_res pat_match_int(struct sample *smp, struct pattern *pattern);
/* Parse an integer. It is put both in min and max. */
int pat_parse_int(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err);
int pat_parse_int(const char **text, struct pattern *pattern, int *opaque, char **err);
/* Parse len like an integer, but specify expected string type */
int pat_parse_len(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err);
int pat_parse_len(const char **text, struct pattern *pattern, int *opaque, char **err);
/* Parse an version. It is put both in min and max. */
int pat_parse_dotted_ver(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err);
int pat_parse_dotted_ver(const char **text, struct pattern *pattern, int *opaque, char **err);
/* Parse a range of integers delimited by either ':' or '-'. If only one
* integer is read, it is set as both min and max.
*/
int pat_parse_range(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err);
int pat_parse_range(const char **text, struct pattern *pattern, int *opaque, char **err);
/* Parse a string. It is allocated and duplicated. */
int pat_parse_str(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err);
int pat_parse_str(const char **text, struct pattern *pattern, int *opaque, char **err);
/* Parse a hexa binary definition. It is allocated and duplicated. */
int pat_parse_bin(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err);
int pat_parse_bin(const char **text, struct pattern *pattern, int *opaque, char **err);
/* Parse and concatenate strings into one. It is allocated and duplicated. */
int pat_parse_strcat(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err);
int pat_parse_strcat(const char **text, struct pattern *pattern, int *opaque, char **err);
/* Parse a regex. It is allocated. */
int pat_parse_reg(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err);
int pat_parse_reg(const char **text, struct pattern *pattern, int *opaque, char **err);
/* Parse an IP address and an optional mask in the form addr[/mask].
* The addr may either be an IPv4 address or a hostname. The mask
* may either be a dotted mask or a number of bits. Returns 1 if OK,
* otherwise 0.
*/
int pat_parse_ip(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err);
int pat_parse_ip(const char **text, struct pattern *pattern, int *opaque, char **err);
/* always return false */
enum pat_match_res pat_match_nothing(struct sample *smp, struct pattern *pattern);

View File

@ -92,7 +92,7 @@ struct acl_expr;
struct acl_keyword {
const char *kw;
char *fetch_kw;
int (*parse)(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err);
int (*parse)(const char **text, struct pattern *pattern, int *opaque, char **err);
enum pat_match_res (*match)(struct sample *smp, struct pattern *pattern);
/* must be after the config params */
struct sample_fetch *smp; /* the sample fetch we depend on */

View File

@ -65,8 +65,7 @@ enum pat_match_res {
enum {
PAT_F_IGNORE_CASE = 1 << 0, /* ignore case */
PAT_F_FROM_FILE = 1 << 1, /* pattern comes from a file */
PAT_F_TREE_OK = 1 << 2, /* the pattern parser is allowed to build a tree */
PAT_F_TREE = 1 << 3, /* some patterns are arranged in a tree */
PAT_F_TREE = 1 << 2, /* some patterns are arranged in a tree */
};
/* ACL match methods */
@ -152,14 +151,14 @@ struct pattern {
* are grouped together in order to optimize caching.
*/
struct pattern_expr {
int (*parse)(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err);
int (*parse)(const char **text, struct pattern *pattern, int *opaque, char **err);
enum pat_match_res (*match)(struct sample *smp, struct pattern *pattern);
struct list patterns; /* list of acl_patterns */
struct eb_root pattern_tree; /* may be used for lookup in large datasets */
};
extern char *pat_match_names[PAT_MATCH_NUM];
extern int (*pat_parse_fcts[PAT_MATCH_NUM])(const char **, struct pattern *, struct sample_storage *, int *, char **);
extern int (*pat_parse_fcts[PAT_MATCH_NUM])(const char **, struct pattern *, int *, char **);
extern enum pat_match_res (*pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern *);
extern int pat_match_types[PAT_MATCH_NUM];

View File

@ -133,7 +133,7 @@ struct acl_expr *parse_acl_expr(const char **args, char **err, struct arg_list *
struct acl_expr *expr;
struct acl_keyword *aclkw;
struct pattern *pattern;
int opaque, patflags;
int patflags;
const char *arg;
struct sample_expr *smp = NULL;
const char *p;
@ -497,23 +497,9 @@ struct acl_expr *parse_acl_expr(const char **args, char **err, struct arg_list *
}
/* now parse all patterns */
opaque = 0;
while (**args) {
int ret;
pattern = (struct pattern *)calloc(1, sizeof(*pattern));
if (!pattern) {
memprintf(err, "out of memory when parsing ACL pattern");
goto out_free_expr;
}
pattern->flags = patflags;
ret = expr->pat.parse(args, pattern, NULL, &opaque, err);
if (!ret)
goto out_free_pattern;
LIST_ADDQ(&expr->pat.patterns, &pattern->list);
args += ret;
}
pattern = NULL;
if (!pattern_register(&expr->pat, args, NULL, &pattern, patflags, err))
goto out_free_pattern;
return expr;

View File

@ -306,6 +306,7 @@ static int map_parse_and_index(struct map_descriptor *desc,
char **err)
{
struct sample_storage *smp;
const char *args[2];
/* use new smp for storing value */
smp = calloc(1, sizeof(*smp));
@ -319,8 +320,10 @@ static int map_parse_and_index(struct map_descriptor *desc,
return 0;
}
/* read and convert key */
if (!pattern_register(desc->pat, ent->key, smp, pattern, patflags, err))
/* register key */
args[0] = ent->key;
args[1] = "";
if (!pattern_register(desc->pat, args, smp, pattern, patflags, err))
return 0;
return 1;

View File

@ -40,7 +40,7 @@ char *pat_match_names[PAT_MATCH_NUM] = {
[PAT_MATCH_REG] = "reg",
};
int (*pat_parse_fcts[PAT_MATCH_NUM])(const char **, struct pattern *, struct sample_storage *, int *, char **) = {
int (*pat_parse_fcts[PAT_MATCH_NUM])(const char **, struct pattern *, int *, char **) = {
[PAT_MATCH_FOUND] = pat_parse_nothing,
[PAT_MATCH_BOOL] = pat_parse_nothing,
[PAT_MATCH_INT] = pat_parse_int,
@ -94,7 +94,7 @@ int pat_match_types[PAT_MATCH_NUM] = {
*/
/* ignore the current line */
int pat_parse_nothing(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
int pat_parse_nothing(const char **text, struct pattern *pattern, int *opaque, char **err)
{
return 1;
}
@ -419,56 +419,31 @@ static void *pat_lookup_ip(struct sample *smp, struct pattern_expr *expr)
}
/* Parse a string. It is allocated and duplicated. */
int pat_parse_str(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
int pat_parse_str(const char **text, struct pattern *pattern, int *opaque, char **err)
{
int len;
len = strlen(*text);
pattern->type = SMP_T_CSTR;
pattern->expect_type = SMP_T_CSTR;
if (pattern->flags & PAT_F_TREE_OK) {
/* we're allowed to put the data in a tree whose root is pointed
* to by val.tree.
*/
struct pat_idx_elt *node;
node = calloc(1, sizeof(*node) + len + 1);
if (!node) {
memprintf(err, "out of memory while loading string pattern");
return 0;
}
node->smp = smp;
memcpy(node->node.key, *text, len + 1);
if (ebst_insert(pattern->val.tree, &node->node) != &node->node)
free(node); /* was a duplicate */
pattern->flags |= PAT_F_TREE; /* this pattern now contains a tree */
return 1;
}
pattern->ptr.str = strdup(*text);
pattern->smp = smp;
if (!pattern->ptr.str) {
memprintf(err, "out of memory while loading string pattern");
return 0;
}
pattern->len = len;
pattern->len = strlen(*text);
return 1;
}
/* Parse a binary written in hexa. It is allocated. */
int pat_parse_bin(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
int pat_parse_bin(const char **text, struct pattern *pattern, int *opaque, char **err)
{
pattern->type = SMP_T_CBIN;
pattern->expect_type = SMP_T_CBIN;
pattern->smp = smp;
return parse_binary(*text, &pattern->ptr.str, &pattern->len, err);
}
/* Parse and concatenate all further strings into one. */
int
pat_parse_strcat(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
pat_parse_strcat(const char **text, struct pattern *pattern, int *opaque, char **err)
{
int len = 0, i;
@ -479,7 +454,6 @@ pat_parse_strcat(const char **text, struct pattern *pattern, struct sample_stora
pattern->type = SMP_T_CSTR;
pattern->ptr.str = s = calloc(1, len);
pattern->smp = smp;
if (!pattern->ptr.str) {
memprintf(err, "out of memory while loading pattern");
return 0;
@ -500,7 +474,7 @@ static void pat_free_reg(void *ptr)
}
/* Parse a regex. It is allocated. */
int pat_parse_reg(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
int pat_parse_reg(const char **text, struct pattern *pattern, int *opaque, char **err)
{
regex *preg;
@ -518,7 +492,6 @@ int pat_parse_reg(const char **text, struct pattern *pattern, struct sample_stor
pattern->ptr.reg = preg;
pattern->freeptrbuf = &pat_free_reg;
pattern->smp = smp;
pattern->expect_type = SMP_T_CSTR;
return 1;
}
@ -537,7 +510,7 @@ int pat_parse_reg(const char **text, struct pattern *pattern, struct sample_stor
* the caller will have to free it.
*
*/
int pat_parse_int(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
int pat_parse_int(const char **text, struct pattern *pattern, int *opaque, char **err)
{
signed long long i;
unsigned int j, last, skip = 0;
@ -545,7 +518,7 @@ int pat_parse_int(const char **text, struct pattern *pattern, struct sample_stor
pattern->type = SMP_T_UINT;
pattern->expect_type = SMP_T_UINT;
pattern->smp = smp;
while (!isdigit((unsigned char)*ptr)) {
switch (get_std_op(ptr)) {
case STD_OP_EQ: *opaque = 0; break;
@ -610,11 +583,11 @@ int pat_parse_int(const char **text, struct pattern *pattern, struct sample_stor
return skip + 1;
}
int pat_parse_len(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
int pat_parse_len(const char **text, struct pattern *pattern, int *opaque, char **err)
{
int ret;
ret = pat_parse_int(text, pattern, smp, opaque, err);
ret = pat_parse_int(text, pattern, opaque, err);
pattern->expect_type = SMP_T_CSTR;
return ret;
}
@ -639,7 +612,7 @@ int pat_parse_len(const char **text, struct pattern *pattern, struct sample_stor
* acl valid_ssl ssl_req_proto 3.0-3.1
*
*/
int pat_parse_dotted_ver(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
int pat_parse_dotted_ver(const char **text, struct pattern *pattern, int *opaque, char **err)
{
signed long long i;
unsigned int j, last, skip = 0;
@ -698,7 +671,6 @@ int pat_parse_dotted_ver(const char **text, struct pattern *pattern, struct samp
return 0;
}
pattern->smp = smp;
pattern->expect_type = SMP_T_CSTR;
if (!last)
@ -731,38 +703,11 @@ int pat_parse_dotted_ver(const char **text, struct pattern *pattern, struct samp
* may either be a dotted mask or a number of bits. Returns 1 if OK,
* otherwise 0. NOTE: IP address patterns are typed (IPV4/IPV6).
*/
int pat_parse_ip(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
int pat_parse_ip(const char **text, struct pattern *pattern, int *opaque, char **err)
{
struct eb_root *tree = NULL;
if (pattern->flags & PAT_F_TREE_OK)
tree = pattern->val.tree;
pattern->expect_type = SMP_T_ADDR;
if (str2net(*text, &pattern->val.ipv4.addr, &pattern->val.ipv4.mask)) {
unsigned int mask = ntohl(pattern->val.ipv4.mask.s_addr);
struct pat_idx_elt *node;
/* check if the mask is contiguous so that we can insert the
* network into the tree. A continuous mask has only ones on
* the left. This means that this mask + its lower bit added
* once again is null.
*/
pattern->type = SMP_T_IPV4;
if (mask + (mask & -mask) == 0 && tree) {
mask = mask ? 33 - flsnz(mask & -mask) : 0; /* equals cidr value */
/* FIXME: insert <addr>/<mask> into the tree here */
node = calloc(1, sizeof(*node) + 4); /* reserve 4 bytes for IPv4 address */
if (!node) {
memprintf(err, "out of memory while loading IPv4 pattern");
return 0;
}
node->smp = smp;
memcpy(node->node.key, &pattern->val.ipv4.addr, 4); /* network byte order */
node->node.node.pfx = mask;
if (ebmb_insert_prefix(tree, &node->node, 4) != &node->node)
free(node); /* was a duplicate */
pattern->flags |= PAT_F_TREE;
return 1;
}
return 1;
}
else if (str62net(*text, &pattern->val.ipv6.addr, &pattern->val.ipv6.mask)) {
@ -831,42 +776,152 @@ void pattern_init_expr(struct pattern_expr *expr)
* return -1 if the parser fail. The err message is filled.
* return -2 if out of memory
*/
int pattern_register(struct pattern_expr *expr, char *text,
int pattern_register(struct pattern_expr *expr, const char **args,
struct sample_storage *smp,
struct pattern **pattern,
int patflags, char **err)
{
const char *args[2];
int opaque = 0;
unsigned int mask = 0;
struct pat_idx_elt *node;
int len;
int ret;
args[0] = text;
args[1] = "";
/* eat args */
while (**args) {
/* we keep the previous pattern along iterations as long as it's not used */
if (!*pattern)
*pattern = (struct pattern *)malloc(sizeof(**pattern));
if (!*pattern)
return -1;
/* we keep the previous pattern along iterations as long as it's not used */
if (!*pattern)
*pattern = (struct pattern *)malloc(sizeof(**pattern));
if (!*pattern) {
memprintf(err, "out of memory while loading pattern");
return 0;
}
memset(*pattern, 0, sizeof(**pattern));
(*pattern)->flags = patflags;
memset(*pattern, 0, sizeof(**pattern));
(*pattern)->flags = patflags;
if (!((*pattern)->flags & PAT_F_IGNORE_CASE) &&
(expr->match == pat_match_str || expr->match == pat_match_ip)) {
/* we pre-set the data pointer to the tree's head so that functions
* which are able to insert in a tree know where to do that.
ret = expr->parse(args, *pattern, &opaque, err);
if (!ret)
return 0;
/* each parser return the number of args eated */
args += ret;
/*
*
* SMP_T_CSTR tree indexation
*
* The match "pat_match_str()" can use tree.
*
*/
(*pattern)->flags |= PAT_F_TREE_OK;
(*pattern)->val.tree = &expr->pattern_tree;
}
if (expr->match == pat_match_str) {
if (!expr->parse(args, *pattern, smp, &opaque, err))
return -1;
/* If the flag PAT_F_IGNORE_CASE is set, we cannot use trees */
if ((*pattern)->flags & PAT_F_IGNORE_CASE)
goto just_chain_the_pattern;
/* if the parser did not feed the tree, let's chain the pattern to the list */
if (!((*pattern)->flags & PAT_F_TREE)) {
LIST_ADDQ(&expr->patterns, &(*pattern)->list);
*pattern = NULL; /* get a new one */
/* Process the key len */
len = strlen((*pattern)->ptr.str) + 1;
/* node memory allocation */
node = calloc(1, sizeof(*node) + len);
if (!node) {
memprintf(err, "out of memory while loading pattern");
return 0;
}
/* copy the pointer to sample associated to this node */
node->smp = smp;
/* copy the string */
memcpy(node->node.key, (*pattern)->ptr.str, len);
/* the "map_parser_str()" function always duplicate string information */
free((*pattern)->ptr.str);
/* we pre-set the data pointer to the tree's head so that functions
* which are able to insert in a tree know where to do that.
*
* because "val" is an "union", the previous data are crushed.
*/
(*pattern)->flags |= PAT_F_TREE;
(*pattern)->val.tree = &expr->pattern_tree;
/* index the new node */
if (ebst_insert((*pattern)->val.tree, &node->node) != &node->node)
free(node); /* was a duplicate */
}
/*
*
* SMP_T_IPV4 tree indexation
*
* The match "pat_match_ip()" can use tree.
*
*/
else if (expr->match == pat_match_ip) {
/* Only IPv4 can be indexed */
if ((*pattern)->type != SMP_T_IPV4)
goto just_chain_the_pattern;
/* in IPv4 case, check if the mask is contiguous so that we can
* insert the network into the tree. A continuous mask has only
* ones on the left. This means that this mask + its lower bit
* added once again is null.
*/
mask = ntohl((*pattern)->val.ipv4.mask.s_addr);
if (mask + (mask & -mask) != 0)
goto just_chain_the_pattern;
mask = mask ? 33 - flsnz(mask & -mask) : 0; /* equals cidr value */
/* node memory allocation */
node = calloc(1, sizeof(*node) + 4);
if (!node) {
memprintf(err, "out of memory while loading pattern");
return 0;
}
/* copy the pointer to sample associated to this node */
node->smp = smp;
/* FIXME: insert <addr>/<mask> into the tree here */
memcpy(node->node.key, &(*pattern)->val.ipv4.addr, 4); /* network byte order */
/* we pre-set the data pointer to the tree's head so that functions
* which are able to insert in a tree know where to do that.
*
* because "val" is an "union", the previous data are crushed.
*/
(*pattern)->flags |= PAT_F_TREE;
(*pattern)->val.tree = &expr->pattern_tree;
/* Index the new node
* FIXME: insert <addr>/<mask> into the tree here
*/
node->node.node.pfx = mask;
if (ebmb_insert_prefix((*pattern)->val.tree, &node->node, 4) != &node->node)
free(node); /* was a duplicate */
}
/*
*
* if the parser did not feed the tree, let's chain the pattern to the list
*
*/
else {
just_chain_the_pattern:
LIST_ADDQ(&expr->patterns, &(*pattern)->list);
/* copy the pointer to sample associated to this node */
(*pattern)->smp = smp;
/* get a new one */
*pattern = NULL;
}
}
return 1;
@ -886,6 +941,7 @@ int pattern_read_from_file(struct pattern_expr *expr,
int ret = 0;
int line = 0;
int code;
const char *args[2];
file = fopen(filename, "r");
if (!file) {
@ -920,7 +976,10 @@ int pattern_read_from_file(struct pattern_expr *expr,
if (c == arg)
continue;
code = pattern_register(expr, arg, NULL, &pattern, patflags, err);
args[0] = arg;
args[1] = "";
code = pattern_register(expr, args, NULL, &pattern, patflags, err);
if (code == -2) {
memprintf(err, "out of memory when loading patterns from file <%s>", filename);
goto out_close;

View File

@ -8735,14 +8735,13 @@ smp_prefetch_http(struct proxy *px, struct session *s, void *l7, unsigned int op
* We use the pre-parsed method if it is known, and store its number as an
* integer. If it is unknown, we use the pointer and the length.
*/
static int pat_parse_meth(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
static int pat_parse_meth(const char **text, struct pattern *pattern, int *opaque, char **err)
{
int len, meth;
len = strlen(*text);
meth = find_http_meth(*text, len);
pattern->smp = smp;
pattern->val.i = meth;
if (meth == HTTP_METH_OTHER) {
pattern->ptr.str = strdup(*text);