MEDIUM: sample: add the "map" converter

Add a new converter with the following prototype :

  map(<map_file>[,<default_value>])
  map_<match_type>(<map_file>[,<default_value>])
  map_<match_type>_<output_type>(<map_file>[,<default_value>])

It searches the for input value from <map_file> using the <match_type>
matching method, and return the associated value converted to the type
<output_type>. If the input value cannot be found in the <map_file>,
the converter returns the <default_value>. If the <default_value> is
not set, the converter fails and acts as if no input value could be
fetched. If the <match_type> is not set, it defaults to "str".
Likewise, if the <output_type> is not set, it defaults to "str". For
convenience, the "map" keyword is an alias for "map_str" and maps a
string to another string. The following array contains contains the
list of all the map* converters.

                 +----+----------+---------+-------------+------------+
                 |     `-_   out |         |             |            |
                 | input  `-_    |   str   |     int     |     ip     |
                 | / match   `-_ |         |             |            |
                 +---------------+---------+-------------+------------+
                 | str   / str   | map_str | map_str_int | map_str_ip |
                 | str   / sub   | map_sub | map_sub_int | map_sub_ip |
                 | str   / dir   | map_dir | map_dir_int | map_dir_ip |
                 | str   / dom   | map_dom | map_dom_int | map_dom_ip |
                 | str   / end   | map_end | map_end_int | map_end_ip |
                 | str   / reg   | map_reg | map_reg_int | map_reg_ip |
                 | int   / int   | map_int | map_int_int | map_int_ip |
                 | ip    / ip    | map_ip  | map_ip_int  | map_ip_ip  |
                 +---------------+---------+-------------+------------+

The names are intentionally chosen to reflect the same match methods
as ACLs use.
This commit is contained in:
Thierry FOURNIER 2013-11-26 11:52:33 +01:00 committed by Willy Tarreau
parent 4b5e422759
commit d5f624dde7
4 changed files with 568 additions and 4 deletions

View File

@ -637,7 +637,7 @@ OBJS = src/haproxy.o src/sessionhash.o src/base64.o src/protocol.o \
src/stream_interface.o src/dumpstats.o src/proto_tcp.o \
src/session.o src/hdr_idx.o src/ev_select.o src/signal.o \
src/acl.o src/sample.o src/memory.o src/freq_ctr.o src/auth.o \
src/compression.o src/payload.o src/hash.o src/pattern.o
src/compression.o src/payload.o src/hash.o src/pattern.o src/map.o
EBTREE_OBJS = $(EBTREE_DIR)/ebtree.o \
$(EBTREE_DIR)/eb32tree.o $(EBTREE_DIR)/eb64tree.o \

View File

@ -8842,6 +8842,59 @@ The currently available list of transformation keywords include :
with a positive offset, or Last-Modified values when the
offset is negative.
map(<map_file>[,<default_value>])
map_<match_type>(<map_file>[,<default_value>])
map_<match_type>_<output_type>(<map_file>[,<default_value>])
Search the input value from <map_file> using the <match_type>
matching method, and return the associated value converted to
the type <output_type>. If the input value cannot be found in
the <map_file>, the converter returns the <default_value>. If
the <default_value> is not set, the converter fails and acts
as if no input value could be fetched. If the <match_type> is
not set, it defaults to "str". Likewise, if the <output_type>
is not set, it defaults to "str". For convenience, the "map"
keyword is an alias for "map_str" and maps a string to another
string. The following array contains contains the list of all
the map* converters.
It is important to avoid overlapping between the keys : IP
addresses and strings are stored in trees, so the first of the
finest match will be used. Other keys are stored in lists, so
the first matching occurrence will be used.
+----+----------+---------+-------------+------------+
| `-_ out | | | |
| input `-_ | str | int | ip |
| / match `-_ | | | |
+---------------+---------+-------------+------------+
| str / str | map_str | map_str_int | map_str_ip |
| str / sub | map_sub | map_sub_int | map_sub_ip |
| str / dir | map_dir | map_dir_int | map_dir_ip |
| str / dom | map_dom | map_dom_int | map_dom_ip |
| str / end | map_end | map_end_int | map_end_ip |
| str / reg | map_reg | map_reg_int | map_reg_ip |
| int / int | map_int | map_int_int | map_int_ip |
| ip / ip | map_ip | map_ip_int | map_ip_ip |
+---------------+---------+-------------+------------+
The file contains one key + value per line. Lines which start
with '#' are ignored, just like empty lines. Leading tabs and
spaces are stripped. The key is then the first "word" (series
of non-space/tabs characters), and the value is what follows
this series of space/tab till the end of the line excluding
trailing spaces/tabs.
Example :
# this is a comment and is ignored
2.22.246.0/23 United Kingdom \n
<-><-----------><--><------------><---->
| | | | `- trailing spaces ignored
| | | `----------- value
| | `--------------------- middle spaces ignored
| `---------------------------- key
`------------------------------------ leading spaces ignored
7.3.1. Fetching samples from internal states
--------------------------------------------
@ -9290,6 +9343,10 @@ src : ip
be the address of a client behind another PROXY-protocol compatible component
for all rule sets except "tcp-request connection" which sees the real address.
Example:
# add an HTTP header in requests with the originating address' country
http-request set-header X-Country %[src,map_ip(geoip.lst)]
src_bytes_in_rate([<table>]) : integer
Returns the average bytes rate from the incoming connection's source address
in the current proxy's stick-table or in the designated stick-table, measured

View File

@ -56,11 +56,10 @@ struct sample_storage;
struct map_descriptor {
struct list list; /* used for listing */
struct map_reference *ref; /* the reference used for unindexed entries */
struct sample_conv *conv; /* original convertissor descriptor */
struct sample_conv *conv; /* original converter descriptor */
int (*parse)(const char *text, /* The function that can parse the output value */
struct sample_storage *smp);
struct acl_expr acl; /* dummy acl expression. just for using the acl
match primitive and storage system */
struct pattern_expr pat; /* the pattern matching associated to the map */
char *default_value; /* a copy of default value. This copy is
useful if the type is str */
struct sample_storage *def; /* contain the default value */

508
src/map.c Normal file
View File

@ -0,0 +1,508 @@
/*
* MAP management functions.
*
* Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
#include <limits.h>
#include <stdio.h>
#include <common/standard.h>
#include <types/global.h>
#include <types/map.h>
#include <proto/arg.h>
#include <proto/pattern.h>
#include <proto/sample.h>
/* This function return existing map reference or return NULL. */
static struct map_reference *map_get_reference(const char *reference)
{
struct map_reference *ref;
/* process the lookup */
list_for_each_entry(ref, &maps, list)
if (strcmp(ref->reference, reference) == 0)
return ref;
return NULL;
}
/* Parse an IPv4 address and store it into the sample.
* The output type is IPV4.
*/
static int map_parse_ip(const char *text, struct sample_storage *smp)
{
if (!buf2ip(text, strlen(text), &smp->data.ipv4))
return 0;
smp->type = SMP_T_IPV4;
return 1;
}
/* Parse an IPv6 address and store it into the sample.
* The output type is IPV6.
*/
static int map_parse_ip6(const char *text, struct sample_storage *smp)
{
if (!buf2ip6(text, strlen(text), &smp->data.ipv6))
return 0;
smp->type = SMP_T_IPV6;
return 1;
}
/* Parse a string and store a pointer to it into the sample. The original
* string must be left in memory because we return a direct memory reference.
* The output type is CSTR.
*/
static int map_parse_str(const char *text, struct sample_storage *smp)
{
/* The loose of the "const" is balanced by the SMP_T_CSTR type */
smp->data.str.str = (char *)text;
smp->data.str.len = strlen(text);
smp->data.str.size = smp->data.str.len + 1;
smp->type = SMP_T_CSTR;
return 1;
}
/* Parse an integer and convert it to a sample. The output type is SINT if the
* number is negative, or UINT if it is positive or null. The function returns
* zero (error) if the number is too large.
*/
static int map_parse_int(const char *text, struct sample_storage *smp)
{
long long int value;
char *error;
/* parse interger and convert it. Return the value in 64 format. */
value = strtoll(text, &error, 10);
if (*error != '\0')
return 0;
/* check sign iand limits */
if (value < 0) {
if (value < INT_MIN)
return 0;
smp->type = SMP_T_SINT;
smp->data.sint = value;
}
else {
if (value > UINT_MAX)
return 0;
smp->type = SMP_T_UINT;
smp->data.uint = value;
}
return 1;
}
/* This function creates and initializes a new map_reference entry. This
* function only fails in case of a memory allocation issue, in which case
* it returns NULL. <reference> here is a unique identifier for the map's
* contents, typically the name of the file used to build the map.
*/
static struct map_reference *map_create_reference(const char *reference)
{
struct map_reference *ref;
/* create new entry */
ref = calloc(1, sizeof(*ref));
if (!ref)
return NULL;
ref->reference = strdup(reference);
if (!ref->reference)
return NULL;
LIST_INIT(&ref->entries);
LIST_INIT(&ref->maps);
LIST_ADDQ(&maps, &ref->list);
return ref;
}
/* This function just create new entry */
static struct map_entry *map_create_entry(int line, char *key, char *value)
{
struct map_entry *ent;
ent = calloc(1, sizeof(*ent));
if (!ent)
return NULL;
ent->line = line;
ent->key = strdup(key);
if (!ent->key) {
free(ent);
return NULL;
}
ent->value = strdup(value);
if (!ent->value) {
free(ent->key);
free(ent);
return NULL;
}
return ent;
}
/* This crete and initialize map descriptor.
* Return NULL if out of memory error
*/
static struct map_descriptor *map_create_descriptor(struct map_reference *ref,
struct sample_conv *conv)
{
struct map_descriptor *desc;
desc = calloc(1, sizeof(*desc));
if (!desc)
return NULL;
desc->conv = conv;
LIST_ADDQ(&ref->maps, &desc->list);
return desc;
}
/* This function just add entry into the list of pattern.
* It can return false only in memory problem case
*/
static int map_add_entry(struct map_reference *map, int line, char *key, char *value)
{
struct map_entry *ent;
ent = map_create_entry(line, key, value);
if (!ent)
return 0;
LIST_ADDQ(&map->entries, &ent->list);
return 1;
}
/* Reads patterns from a file. If <err_msg> is non-NULL, an error message will
* be returned there on errors and the caller will have to free it.
*
* The file contains one key + value per line. Lines which start with '#' are
* ignored, just like empty lines. Leading tabs/spaces are stripped. The key is
* then the first "word" (series of non-space/tabs characters), and the value is
* what follows this series of space/tab till the end of the line excluding
* trailing spaces/tabs.
*
* Example :
*
* # this is a comment and is ignored
* 62.212.114.60 1wt.eu \n
* <-><-----------><---><----><---->
* | | | | `--- trailing spaces ignored
* | | | `-------- value
* | | `--------------- middle spaces ignored
* | `------------------------ key
* `-------------------------------- leading spaces ignored
*
* Return non-zero in case of succes, otherwise 0.
*/
static int map_read_entries_from_file(const char *filename,
struct map_reference *ref,
char **err)
{
FILE *file;
char *c;
int ret = 0;
int line = 0;
char *key_beg;
char *key_end;
char *value_beg;
char *value_end;
file = fopen(filename, "r");
if (!file) {
memprintf(err, "failed to open pattern file <%s>", filename);
return 0;
}
/* now parse all patterns. The file may contain only one pattern
* followed by one value per line. The start spaces, separator spaces
* and and spaces are stripped. Each can contain comment started by '#'
*/
while (fgets(trash.str, trash.size, file) != NULL) {
line++;
c = trash.str;
/* ignore lines beginning with a dash */
if (*c == '#')
continue;
/* strip leading spaces and tabs */
while (*c == ' ' || *c == '\t')
c++;
/* empty lines are ignored too */
if (*c == '\0')
continue;
/* look for the end of the key */
key_beg = c;
while (*c && *c != ' ' && *c != '\t' && *c != '\n' && *c != '\r')
c++;
key_end = c;
/* strip middle spaces and tabs */
while (*c == ' ' || *c == '\t')
c++;
/* look for the end of the value, it is the end of the line */
value_beg = c;
while (*c && *c != '\n' && *c != '\r')
c++;
value_end = c;
/* trim possibly trailing spaces and tabs */
while (value_end > value_beg && (value_end[-1] == ' ' || value_end[-1] == '\t'))
value_end--;
/* set final \0 and check entries */
*key_end = '\0';
*value_end = '\0';
/* insert values */
if (!map_add_entry(ref, line, key_beg, value_beg)) {
memprintf(err, "out of memory");
goto out_close;
}
}
/* succes */
ret = 1;
out_close:
fclose(file);
return ret;
}
/* This function read the string entries of <ent>, parse it with
* the <desc> methods, and strore the result into <desc> dummy ACL.
* return 1 in succes case, else return 0 and <err> is filled.
*
* The acm parser use <pattern> for creating new pattern (list
* of values case) or using the same pattern (tree index case).
*
* <patflags> must be PAT_F_*.
*/
static int map_parse_and_index(struct map_descriptor *desc,
struct pattern **pattern,
struct map_entry *ent,
int patflags,
char **err)
{
struct sample_storage *smp;
/* use new smp for storing value */
smp = calloc(1, sizeof(*smp));
if (!smp)
return 0;
/* first read and convert value */
if (!desc->parse(ent->value, smp)) {
memprintf(err, "parse value failed at line %d of file <%s>",
ent->line, desc->ref->reference);
return 0;
}
/* read and convert key */
if (!pattern_register(&desc->pat, ent->key, smp, pattern, patflags, err))
return 0;
return 1;
}
/* This function load the map file according with data type declared into
* the "struct sample_conv".
*
* This function choose the indexation type (ebtree or list) according with
* the type of match needed.
*/
static int sample_load_map(struct arg *arg, struct sample_conv *conv, char **err)
{
struct map_reference *ref;
struct map_descriptor *desc;
struct pattern *pattern;
struct map_entry *ent;
/* look for existing map reference. The reference is the
* file encountered in the first argument. arg[0] with string
* type is guaranteed by the parser.
*/
ref = map_get_reference(arg[0].data.str.str);
/* The reference doesn't exist */
if (!ref) {
/* create new reference entry */
ref = map_create_reference(arg[0].data.str.str);
if (!ref) {
memprintf(err, "out of memory");
return 0;
}
/* load the file */
if (!map_read_entries_from_file(arg[0].data.str.str, ref, err))
return 0;
}
/* create new map descriptor */
desc = map_create_descriptor(ref, conv);
if (!desc) {
memprintf(err, "out of memory");
return 0;
}
desc->ref = ref;
pattern_init_expr(&desc->pat);
/* set the match method */
desc->pat.match = pat_match_fcts[conv->private];
/* set the input parse method */
switch (conv->in_type) {
case SMP_T_STR: desc->pat.parse = pat_parse_fcts[PAT_MATCH_STR]; break;
case SMP_T_UINT: desc->pat.parse = pat_parse_fcts[PAT_MATCH_INT]; break;
case SMP_T_ADDR: desc->pat.parse = pat_parse_fcts[PAT_MATCH_IP]; break;
default:
memprintf(err, "map: internal haproxy error: no default parse case for the input type <%d>.",
conv->in_type);
return 0;
}
/* check the output parse method */
switch (desc->conv->out_type) {
case SMP_T_STR: desc->parse = map_parse_str; break;
case SMP_T_UINT: desc->parse = map_parse_int; break;
case SMP_T_IPV4: desc->parse = map_parse_ip; break;
case SMP_T_IPV6: desc->parse = map_parse_ip6; break;
default:
memprintf(err, "map: internal haproxy error: no default parse case for the input type <%d>.",
conv->out_type);
return 0;
}
/* The second argument is the default value */
if (arg[1].type == ARGT_STR) {
desc->default_value = strdup(arg[1].data.str.str);
if (!desc->default_value) {
memprintf(err, "out of memory");
return 0;
}
desc->def = calloc(1, sizeof(*desc->def));
if (!desc->def) {
memprintf(err, "out of memory");
return 0;
}
if (!desc->parse(desc->default_value, desc->def)) {
memprintf(err, "Cannot parse default value");
return 0;
}
}
else
desc->def = NULL;
/* parse each line of the file */
pattern = NULL;
list_for_each_entry(ent, &ref->entries, list)
if (!map_parse_and_index(desc, &pattern, ent, 0, err))
return 0;
/* replace the first argument by this definition */
arg[0].type = ARGT_MAP;
arg[0].data.map = desc;
return 1;
}
static int sample_conv_map(const struct arg *arg_p, struct sample *smp)
{
struct map_descriptor *desc;
struct sample_storage *sample;
enum pat_match_res ret;
/* get config */
desc = arg_p[0].data.map;
/* Execute the match function. */
ret = pattern_exec_match(&desc->pat, smp, &sample);
if (ret != PAT_MATCH) {
if (!desc->def)
return 0;
sample = desc->def;
}
/* copy new data */
smp->type = sample->type;
memcpy(&smp->data, &sample->data, sizeof(smp->data));
return 1;
}
/* Note: must not be declared <const> as its list will be overwritten
*
* For the map_*_int keywords, the output is declared as SMP_T_UINT, but the converter function
* can provide SMP_T_UINT, SMP_T_SINT or SMP_T_BOOL depending on how the patterns found in the
* file can be parsed.
*
* For the map_*_ip keyword, the output is declared as SMP_T_IPV4, but the converter function
* can provide SMP_T_IPV4 or SMP_T_IPV6 depending on the patterns found in the file.
*
* The map_* keywords only emit strings.
*
* The output type is only used during the configuration parsing. It is used for detecting
* compatibility problems.
*
* The arguments are: <file>[,<default value>]
*/
static struct sample_conv_kw_list sample_conv_kws = {ILH, {
{ "map", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, PAT_MATCH_STR },
{ "map_str", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, PAT_MATCH_STR },
{ "map_beg", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, PAT_MATCH_BEG },
{ "map_sub", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, PAT_MATCH_SUB },
{ "map_dir", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, PAT_MATCH_DIR },
{ "map_dom", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, PAT_MATCH_DOM },
{ "map_end", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, PAT_MATCH_END },
{ "map_reg", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, PAT_MATCH_REG },
{ "map_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_UINT, SMP_T_STR, PAT_MATCH_INT },
{ "map_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_ADDR, SMP_T_STR, PAT_MATCH_IP },
{ "map_str_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_UINT, PAT_MATCH_STR },
{ "map_beg_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_UINT, PAT_MATCH_BEG },
{ "map_sub_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_UINT, PAT_MATCH_SUB },
{ "map_dir_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_UINT, PAT_MATCH_DIR },
{ "map_dom_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_UINT, PAT_MATCH_DOM },
{ "map_end_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_UINT, PAT_MATCH_END },
{ "map_reg_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_UINT, PAT_MATCH_REG },
{ "map_int_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_UINT, SMP_T_UINT, PAT_MATCH_INT },
{ "map_ip_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_ADDR, SMP_T_UINT, PAT_MATCH_IP },
{ "map_str_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_IPV4, PAT_MATCH_STR },
{ "map_beg_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_IPV4, PAT_MATCH_BEG },
{ "map_sub_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_IPV4, PAT_MATCH_SUB },
{ "map_dir_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_IPV4, PAT_MATCH_DIR },
{ "map_dom_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_IPV4, PAT_MATCH_DOM },
{ "map_end_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_IPV4, PAT_MATCH_END },
{ "map_reg_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_IPV4, PAT_MATCH_REG },
{ "map_int_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_UINT, SMP_T_IPV4, PAT_MATCH_INT },
{ "map_ip_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_ADDR, SMP_T_IPV4, PAT_MATCH_IP },
{ /* END */ },
}};
__attribute__((constructor))
static void __map_init(void)
{
/* register format conversion keywords */
sample_register_convs(&sample_conv_kws);
}