MINOR: map: Add regex matching replacement

This patch declares a new map which provides a string based on
a string with back references replaced by the content matched
by the regex.
This commit is contained in:
Thierry Fournier 2016-02-10 22:55:20 +01:00 committed by Willy Tarreau
parent 443ea1a242
commit 8feaa661b6
5 changed files with 68 additions and 4 deletions

View File

@ -11815,13 +11815,19 @@ map_<match_type>_<output_type>(<map_file>[,<default_value>])
-----------+--------------+-----------------+-----------------+--------------- -----------+--------------+-----------------+-----------------+---------------
str | end | map_end | map_end_int | map_end_ip str | end | map_end | map_end_int | map_end_ip
-----------+--------------+-----------------+-----------------+--------------- -----------+--------------+-----------------+-----------------+---------------
str | reg | map_reg | map_reg_int | map_reg_ip | | map_reg | |
str | reg +-----------------+ map_reg_int | map_reg_ip
| | map_regm | |
-----------+--------------+-----------------+-----------------+--------------- -----------+--------------+-----------------+-----------------+---------------
int | int | map_int | map_int_int | map_int_ip int | int | map_int | map_int_int | map_int_ip
-----------+--------------+-----------------+-----------------+--------------- -----------+--------------+-----------------+-----------------+---------------
ip | ip | map_ip | map_ip_int | map_ip_ip ip | ip | map_ip | map_ip_int | map_ip_ip
-----------+--------------+-----------------+-----------------+--------------- -----------+--------------+-----------------+-----------------+---------------
The special map called "map_regm" expect matching zone in the regular
expression and modify the output replacing back reference (like "\1") by
the corresponding match text.
The file contains one key + value per line. Lines which start with '#' are The file contains one key + value per line. Lines which start with '#' are
ignored, just like empty lines. Leading tabs and spaces are stripped. The key ignored, just like empty lines. Leading tabs and spaces are stripped. The key
is then the first "word" (series of non-space/tabs characters), and the value is then the first "word" (series of non-space/tabs characters), and the value

View File

@ -67,6 +67,7 @@ int pat_idx_list_val(struct pattern_expr *expr, struct pattern *pat, char **err)
int pat_idx_list_ptr(struct pattern_expr *expr, struct pattern *pat, char **err); int pat_idx_list_ptr(struct pattern_expr *expr, struct pattern *pat, char **err);
int pat_idx_list_str(struct pattern_expr *expr, struct pattern *pat, char **err); int pat_idx_list_str(struct pattern_expr *expr, struct pattern *pat, char **err);
int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err); int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err);
int pat_idx_list_regm(struct pattern_expr *expr, struct pattern *pat, char **err);
int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err); int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err);
int pat_idx_tree_str(struct pattern_expr *expr, struct pattern *pat, char **err); int pat_idx_tree_str(struct pattern_expr *expr, struct pattern *pat, char **err);
int pat_idx_tree_pfx(struct pattern_expr *expr, struct pattern *pat, char **err); int pat_idx_tree_pfx(struct pattern_expr *expr, struct pattern *pat, char **err);
@ -174,6 +175,7 @@ struct pattern *pat_match_ip(struct sample *smp, struct pattern_expr *expr, int
* and restores the previous character when leaving. * and restores the previous character when leaving.
*/ */
struct pattern *pat_match_reg(struct sample *smp, struct pattern_expr *expr, int fill); struct pattern *pat_match_reg(struct sample *smp, struct pattern_expr *expr, int fill);
struct pattern *pat_match_regm(struct sample *smp, struct pattern_expr *expr, int fill);
/* /*
* pattern_ref manipulation. * pattern_ref manipulation.

View File

@ -87,6 +87,7 @@ enum {
PAT_MATCH_DOM, /* domain-like sub-string (str) */ PAT_MATCH_DOM, /* domain-like sub-string (str) */
PAT_MATCH_END, /* end of string (str) */ PAT_MATCH_END, /* end of string (str) */
PAT_MATCH_REG, /* regex (str -> reg) */ PAT_MATCH_REG, /* regex (str -> reg) */
PAT_MATCH_REGM, /* regex (str -> reg) with match zones */
/* keep this one last */ /* keep this one last */
PAT_MATCH_NUM PAT_MATCH_NUM
}; };

View File

@ -163,6 +163,7 @@ static int sample_conv_map(const struct arg *arg_p, struct sample *smp, void *pr
{ {
struct map_descriptor *desc; struct map_descriptor *desc;
struct pattern *pat; struct pattern *pat;
struct chunk *str;
/* get config */ /* get config */
desc = arg_p[0].data.map; desc = arg_p[0].data.map;
@ -172,8 +173,19 @@ static int sample_conv_map(const struct arg *arg_p, struct sample *smp, void *pr
/* Match case. */ /* Match case. */
if (pat) { if (pat) {
/* Copy sample. */
if (pat->data) { if (pat->data) {
/* In the regm case, merge the sample with the input. */
if ((long)private == PAT_MATCH_REGM) {
str = get_trash_chunk();
str->len = exp_replace(str->str, str->size, smp->data.u.str.str,
pat->data->u.str.str,
(regmatch_t *)smp->ctx.a[0]);
if (str->len == -1)
return 0;
smp->data.u.str = *str;
return 1;
}
/* Copy sample. */
smp->data = *pat->data; smp->data = *pat->data;
smp->flags |= SMP_F_CONST; smp->flags |= SMP_F_CONST;
return 1; return 1;
@ -242,6 +254,7 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, {
{ "map_dom", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_DOM }, { "map_dom", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_DOM },
{ "map_end", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_END }, { "map_end", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_END },
{ "map_reg", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_REG }, { "map_reg", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_REG },
{ "map_regm", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_REGM},
{ "map_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_SINT, SMP_T_STR, (void *)PAT_MATCH_INT }, { "map_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_SINT, SMP_T_STR, (void *)PAT_MATCH_INT },
{ "map_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_ADDR, SMP_T_STR, (void *)PAT_MATCH_IP }, { "map_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_ADDR, SMP_T_STR, (void *)PAT_MATCH_IP },

View File

@ -41,6 +41,7 @@ char *pat_match_names[PAT_MATCH_NUM] = {
[PAT_MATCH_DOM] = "dom", [PAT_MATCH_DOM] = "dom",
[PAT_MATCH_END] = "end", [PAT_MATCH_END] = "end",
[PAT_MATCH_REG] = "reg", [PAT_MATCH_REG] = "reg",
[PAT_MATCH_REGM] = "regm",
}; };
int (*pat_parse_fcts[PAT_MATCH_NUM])(const char *, struct pattern *, int, char **) = { int (*pat_parse_fcts[PAT_MATCH_NUM])(const char *, struct pattern *, int, char **) = {
@ -57,6 +58,7 @@ int (*pat_parse_fcts[PAT_MATCH_NUM])(const char *, struct pattern *, int, char *
[PAT_MATCH_DOM] = pat_parse_str, [PAT_MATCH_DOM] = pat_parse_str,
[PAT_MATCH_END] = pat_parse_str, [PAT_MATCH_END] = pat_parse_str,
[PAT_MATCH_REG] = pat_parse_reg, [PAT_MATCH_REG] = pat_parse_reg,
[PAT_MATCH_REGM] = pat_parse_reg,
}; };
int (*pat_index_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pattern *, char **) = { int (*pat_index_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pattern *, char **) = {
@ -73,6 +75,7 @@ int (*pat_index_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pattern *, ch
[PAT_MATCH_DOM] = pat_idx_list_str, [PAT_MATCH_DOM] = pat_idx_list_str,
[PAT_MATCH_END] = pat_idx_list_str, [PAT_MATCH_END] = pat_idx_list_str,
[PAT_MATCH_REG] = pat_idx_list_reg, [PAT_MATCH_REG] = pat_idx_list_reg,
[PAT_MATCH_REGM] = pat_idx_list_regm,
}; };
void (*pat_delete_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pat_ref_elt *) = { void (*pat_delete_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pat_ref_elt *) = {
@ -89,6 +92,7 @@ void (*pat_delete_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pat_ref_elt
[PAT_MATCH_DOM] = pat_del_list_ptr, [PAT_MATCH_DOM] = pat_del_list_ptr,
[PAT_MATCH_END] = pat_del_list_ptr, [PAT_MATCH_END] = pat_del_list_ptr,
[PAT_MATCH_REG] = pat_del_list_reg, [PAT_MATCH_REG] = pat_del_list_reg,
[PAT_MATCH_REGM] = pat_del_list_reg,
}; };
void (*pat_prune_fcts[PAT_MATCH_NUM])(struct pattern_expr *) = { void (*pat_prune_fcts[PAT_MATCH_NUM])(struct pattern_expr *) = {
@ -105,6 +109,7 @@ void (*pat_prune_fcts[PAT_MATCH_NUM])(struct pattern_expr *) = {
[PAT_MATCH_DOM] = pat_prune_ptr, [PAT_MATCH_DOM] = pat_prune_ptr,
[PAT_MATCH_END] = pat_prune_ptr, [PAT_MATCH_END] = pat_prune_ptr,
[PAT_MATCH_REG] = pat_prune_reg, [PAT_MATCH_REG] = pat_prune_reg,
[PAT_MATCH_REGM] = pat_prune_reg,
}; };
struct pattern *(*pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern_expr *, int) = { struct pattern *(*pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern_expr *, int) = {
@ -121,6 +126,7 @@ struct pattern *(*pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern
[PAT_MATCH_DOM] = pat_match_dom, [PAT_MATCH_DOM] = pat_match_dom,
[PAT_MATCH_END] = pat_match_end, [PAT_MATCH_END] = pat_match_end,
[PAT_MATCH_REG] = pat_match_reg, [PAT_MATCH_REG] = pat_match_reg,
[PAT_MATCH_REGM] = pat_match_regm,
}; };
/* Just used for checking configuration compatibility */ /* Just used for checking configuration compatibility */
@ -138,6 +144,7 @@ int pat_match_types[PAT_MATCH_NUM] = {
[PAT_MATCH_DOM] = SMP_T_STR, [PAT_MATCH_DOM] = SMP_T_STR,
[PAT_MATCH_END] = SMP_T_STR, [PAT_MATCH_END] = SMP_T_STR,
[PAT_MATCH_REG] = SMP_T_STR, [PAT_MATCH_REG] = SMP_T_STR,
[PAT_MATCH_REGM] = SMP_T_STR,
}; };
/* this struct is used to return information */ /* this struct is used to return information */
@ -539,6 +546,30 @@ struct pattern *pat_match_bin(struct sample *smp, struct pattern_expr *expr, int
return ret; return ret;
} }
/* Executes a regex. It temporarily changes the data to add a trailing zero,
* and restores the previous character when leaving. This function fills
* a matching array.
*/
struct pattern *pat_match_regm(struct sample *smp, struct pattern_expr *expr, int fill)
{
struct pattern_list *lst;
struct pattern *pattern;
struct pattern *ret = NULL;
list_for_each_entry(lst, &expr->patterns, list) {
pattern = &lst->pat;
if (regex_exec_match2(pattern->ptr.reg, smp->data.u.str.str, smp->data.u.str.len,
MAX_MATCH, pmatch, 0)) {
ret = pattern;
smp->ctx.a[0] = pmatch;
break;
}
}
return ret;
}
/* Executes a regex. It temporarily changes the data to add a trailing zero, /* Executes a regex. It temporarily changes the data to add a trailing zero,
* and restores the previous character when leaving. * and restores the previous character when leaving.
*/ */
@ -1146,7 +1177,7 @@ int pat_idx_list_str(struct pattern_expr *expr, struct pattern *pat, char **err)
return 1; return 1;
} }
int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err) int pat_idx_list_reg_cap(struct pattern_expr *expr, struct pattern *pat, int cap, char **err)
{ {
struct pattern_list *patl; struct pattern_list *patl;
@ -1169,7 +1200,8 @@ int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err)
} }
/* compile regex */ /* compile regex */
if (!regex_comp(pat->ptr.str, patl->pat.ptr.reg, !(expr->mflags & PAT_MF_IGNORE_CASE), 0, err)) { if (!regex_comp(pat->ptr.str, patl->pat.ptr.reg,
!(expr->mflags & PAT_MF_IGNORE_CASE), cap, err)) {
free(patl->pat.ptr.reg); free(patl->pat.ptr.reg);
free(patl); free(patl);
return 0; return 0;
@ -1183,6 +1215,16 @@ int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err)
return 1; return 1;
} }
int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err)
{
return pat_idx_list_reg_cap(expr, pat, 0, err);
}
int pat_idx_list_regm(struct pattern_expr *expr, struct pattern *pat, char **err)
{
return pat_idx_list_reg_cap(expr, pat, 1, err);
}
int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err) int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err)
{ {
unsigned int mask; unsigned int mask;