MINOR: sample: add new converters to hash input

From time to time it's useful to hash input data (scramble input, or
reduce the space needed in a stick table). This patch provides 3 simple
converters allowing use of the available hash functions to hash input
data. The output is an unsigned integer which can be passed into a header,
a log or used as an index for a stick table. One nice usage is to scramble
source IP addresses before logging when there are requirements to hide them.
This commit is contained in:
Willy Tarreau 2014-07-15 20:15:37 +02:00
parent 9700e5c914
commit 23ec4ca1bb
2 changed files with 70 additions and 1 deletions

View File

@ -9959,6 +9959,17 @@ base64
transfer binary content in a way that can be reliably transferred (eg:
an SSL ID can be copied in a header).
djb2([<avalanche>])
Hashes a binary input sample into an unsigned 32-bit quantity using the DJB2
hash function. Optionally, it is possible to apply a full avalanche hash
function to the output if the optional <avalanche> argument equals 1. This
converter uses the same functions as used by the various hash-based load
balancing algorithms, so it will provide exactly the same results. It is
mostly intended for debugging, but can be used as a stick-table entry to
collect rough statistics. It must not be used for security purposes as a
32-bit hash is trivial to break. See also "sdbm", "wt6" and the "hash-type"
directive.
hex
Converts a binary input sample to an hex string containing two hex digits per
input byte. It is used to log or transfer hex dumps of some binary input data
@ -10089,6 +10100,17 @@ map_<match_type>_<output_type>(<map_file>[,<default_value>])
| `---------------------------- key
`------------------------------------ leading spaces ignored
sdbm([<avalanche>])
Hashes a binary input sample into an unsigned 32-bit quantity using the SDBM
hash function. Optionally, it is possible to apply a full avalanche hash
function to the output if the optional <avalanche> argument equals 1. This
converter uses the same functions as used by the various hash-based load
balancing algorithms, so it will provide exactly the same results. It is
mostly intended for debugging, but can be used as a stick-table entry to
collect rough statistics. It must not be used for security purposes as a
32-bit hash is trivial to break. See also "djb2", "wt6" and the "hash-type"
directive.
table_bytes_in_rate(<table>)
Uses the string representation of the input sample to perform a look up in
the specified table. If the key is not found in the table, integer value zero
@ -10247,6 +10269,17 @@ utime(<format>[,<offset>])
# Eg: 20140710162350 127.0.0.1:57325
log-format %[date,utime(%Y%m%d%H%M%S)]\ %ci:%cp
wt6([<avalanche>])
Hashes a binary input sample into an unsigned 32-bit quantity using the WT6
hash function. Optionally, it is possible to apply a full avalanche hash
function to the output if the optional <avalanche> argument equals 1. This
converter uses the same functions as used by the various hash-based load
balancing algorithms, so it will provide exactly the same results. It is
mostly intended for debugging, but can be used as a stick-table entry to
collect rough statistics. It must not be used for security purposes as a
32-bit hash is trivial to break. See also "djb2", "sdbm", and the "hash-type"
directive.
7.3.2. Fetching samples from internal states
--------------------------------------------
@ -11408,7 +11441,9 @@ base32 : integer
This returns a 32-bit hash of the value returned by the "base" fetch method
above. This is useful to track per-URL activity on high traffic sites without
having to store all URLs. Instead a shorter hash is stored, saving a lot of
memory. The output type is an unsigned integer.
memory. The output type is an unsigned integer. The hash function used is
SDBM with full avalanche on the output. Technically, base32 is exactly equal
to "base,sdbm(1)".
base32+src : binary
This returns the concatenation of the base32 fetch above and the src fetch

View File

@ -18,6 +18,7 @@
#include <types/global.h>
#include <common/chunk.h>
#include <common/hash.h>
#include <common/standard.h>
#include <common/uri_auth.h>
#include <common/base64.h>
@ -1240,6 +1241,16 @@ static int sample_conv_bin2hex(const struct arg *arg_p, struct sample *smp)
return 1;
}
/* hashes the binary input into a 32-bit unsigned int */
static int sample_conv_djb2(const struct arg *arg_p, struct sample *smp)
{
smp->data.uint = hash_djb2(smp->data.str.str, smp->data.str.len);
if (arg_p && arg_p->data.uint)
smp->data.uint = full_hash(smp->data.uint);
smp->type = SMP_T_UINT;
return 1;
}
static int sample_conv_str2lower(const struct arg *arg_p, struct sample *smp)
{
int i;
@ -1302,6 +1313,16 @@ static int sample_conv_ltime(const struct arg *args, struct sample *smp)
return 1;
}
/* hashes the binary input into a 32-bit unsigned int */
static int sample_conv_sdbm(const struct arg *arg_p, struct sample *smp)
{
smp->data.uint = hash_sdbm(smp->data.str.str, smp->data.str.len);
if (arg_p && arg_p->data.uint)
smp->data.uint = full_hash(smp->data.uint);
smp->type = SMP_T_UINT;
return 1;
}
/* takes an UINT value on input supposed to represent the time since EPOCH,
* adds an optional offset found in args[1] and emits a string representing
* the UTC date in the format specified in args[1] using strftime().
@ -1322,6 +1343,16 @@ static int sample_conv_utime(const struct arg *args, struct sample *smp)
return 1;
}
/* hashes the binary input into a 32-bit unsigned int */
static int sample_conv_wt6(const struct arg *arg_p, struct sample *smp)
{
smp->data.uint = hash_wt6(smp->data.str.str, smp->data.str.len);
if (arg_p && arg_p->data.uint)
smp->data.uint = full_hash(smp->data.uint);
smp->type = SMP_T_UINT;
return 1;
}
/************************************************************************/
/* All supported sample fetch functions must be declared here */
/************************************************************************/
@ -1426,6 +1457,9 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, {
{ "ipmask", sample_conv_ipmask, ARG1(1,MSK4), NULL, SMP_T_IPV4, SMP_T_IPV4 },
{ "ltime", sample_conv_ltime, ARG2(1,STR,SINT), NULL, SMP_T_UINT, SMP_T_STR },
{ "utime", sample_conv_utime, ARG2(1,STR,SINT), NULL, SMP_T_UINT, SMP_T_STR },
{ "djb2", sample_conv_djb2, ARG1(0,UINT), NULL, SMP_T_BIN, SMP_T_UINT },
{ "sdbm", sample_conv_sdbm, ARG1(0,UINT), NULL, SMP_T_BIN, SMP_T_UINT },
{ "wt6", sample_conv_wt6, ARG1(0,UINT), NULL, SMP_T_BIN, SMP_T_UINT },
{ NULL, NULL, 0, 0, 0 },
}};