From 01e0974b5ae18c620d8aa8ca71f5965f90f41b63 Mon Sep 17 00:00:00 2001 From: Thierry FOURNIER Date: Mon, 26 Dec 2016 11:46:11 +0100 Subject: [PATCH] MINOR: samples: add xx-hash functions This patch adds the support of xx-hash 32 and 64-bits functions. --- doc/configuration.txt | 18 ++++++++++++++++++ src/sample.c | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/doc/configuration.txt b/doc/configuration.txt index c4efbce5e..2a5f7dc06 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -12806,6 +12806,24 @@ xor() This prefix is followed by a name. The separator is a '.'. The name may only contain characters 'a-z', 'A-Z', '0-9', '.' and '_'. +xxh32([]) + Hashes a binary input sample into an unsigned 32-bit quantity using the 32-bit + variant of the XXHash hash function. This hash supports a seed which defaults + to zero but a different value maybe passed as the argument. This hash + is known to be very good and very fast so it can be used to hash URLs and/or + URL parameters for use as stick-table keys to collect statistics with a low + collision rate, though care must be taken as the algorithm is not considered + as cryptographically secure. + +xxh64([]) + Hashes a binary input sample into a signed 64-bit quantity using the 64-bit + variant of the XXHash hash function. This hash supports a seed which defaults + to zero but a different value maybe passed as the argument. This hash + is known to be very good and very fast so it can be used to hash URLs and/or + URL parameters for use as stick-table keys to collect statistics with a low + collision rate, though care must be taken as the algorithm is not considered + as cryptographically secure. + 7.3.2. Fetching samples from internal states -------------------------------------------- diff --git a/src/sample.c b/src/sample.c index 81349cdde..3cbe76279 100644 --- a/src/sample.c +++ b/src/sample.c @@ -33,6 +33,8 @@ #include #include +#include + /* sample type names */ const char *smp_to_type[SMP_TYPES] = { [SMP_T_ANY] = "any", @@ -1617,6 +1619,41 @@ static int sample_conv_wt6(const struct arg *arg_p, struct sample *smp, void *pr return 1; } +/* hashes the binary input into a 32-bit unsigned int using xxh. + * The seed of the hash defaults to 0 but can be changd in argument 1. + */ +static int sample_conv_xxh32(const struct arg *arg_p, struct sample *smp, void *private) +{ + unsigned int seed; + + if (arg_p && arg_p->data.sint) + seed = arg_p->data.sint; + else + seed = 0; + smp->data.u.sint = XXH32(smp->data.u.str.str, smp->data.u.str.len, seed); + smp->data.type = SMP_T_SINT; + return 1; +} + +/* hashes the binary input into a 64-bit unsigned int using xxh. + * In fact, the function returns a 64 bit unsigned, but the sample + * storage of haproxy only proposes 64-bits signed, so the value is + * cast as signed. This cast doesn't impact the hash repartition. + * The seed of the hash defaults to 0 but can be changd in argument 1. + */ +static int sample_conv_xxh64(const struct arg *arg_p, struct sample *smp, void *private) +{ + unsigned long long int seed; + + if (arg_p && arg_p->data.sint) + seed = (unsigned long long int)arg_p->data.sint; + else + seed = 0; + smp->data.u.sint = (long long int)XXH64(smp->data.u.str.str, smp->data.u.str.len, seed); + smp->data.type = SMP_T_SINT; + return 1; +} + /* hashes the binary input into a 32-bit unsigned int */ static int sample_conv_crc32(const struct arg *arg_p, struct sample *smp, void *private) { @@ -2680,6 +2717,8 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, { { "djb2", sample_conv_djb2, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT }, { "sdbm", sample_conv_sdbm, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT }, { "wt6", sample_conv_wt6, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT }, + { "xxh32", sample_conv_xxh32, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT }, + { "xxh64", sample_conv_xxh64, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT }, { "json", sample_conv_json, ARG1(1,STR), sample_conv_json_check, SMP_T_STR, SMP_T_STR }, { "bytes", sample_conv_bytes, ARG2(1,SINT,SINT), NULL, SMP_T_BIN, SMP_T_BIN }, { "field", sample_conv_field, ARG2(2,SINT,STR), sample_conv_field_check, SMP_T_STR, SMP_T_STR },