MEDIUM: resolvers: hash the records before inserting them into the tree

We're using an XXH32() on the record to insert it into or look it up from
the tree. This way we don't change the rest of the code, the comparisons
are still made on all fields and the next node is visited on mismatch. This
also allows to continue to use roundrobin between identical nodes.

Just doing this is sufficient to see the CPU usage go down from ~60-70% to
4% at ~2k DNS requests per second for farm with 300 servers. A larger
config with 12 backends of 2000 servers each shows ~8-9% CPU for 6-10000
DNS requests per second.

It would probably be possible to go further with multiple levels of indexing
but it's not worth it, and it's important to remember that tree nodes take
space (the struct answer_list went back from 576 to 600 bytes).
This commit is contained in:
Willy Tarreau 2021-10-21 08:18:01 +02:00
parent 7893ae117f
commit dcb696cd31

View File

@ -47,6 +47,7 @@
#include <haproxy/time.h>
#include <haproxy/tools.h>
#include <haproxy/vars.h>
#include <haproxy/xxhash.h>
struct list sec_resolvers = LIST_HEAD_INIT(sec_resolvers);
@ -882,6 +883,7 @@ static int resolv_validate_dns_response(unsigned char *resp, unsigned char *bufe
struct resolv_answer_item *answer_record, *tmp_record;
struct resolv_response *r_res;
struct eb32_node *eb32;
uint32_t key = 0;
int i, found = 0;
int cause = RSLV_RESP_ERROR;
@ -1116,6 +1118,7 @@ static int resolv_validate_dns_response(unsigned char *resp, unsigned char *bufe
answer_record->data.in4.sin_family = AF_INET;
memcpy(&answer_record->data.in4.sin_addr, reader, answer_record->data_len);
key = XXH32(reader, answer_record->data_len, answer_record->type);
break;
case DNS_RTYPE_CNAME:
@ -1139,6 +1142,7 @@ static int resolv_validate_dns_response(unsigned char *resp, unsigned char *bufe
memcpy(answer_record->data.target, tmpname, len);
answer_record->data.target[len] = 0;
key = XXH32(tmpname, len, answer_record->type);
previous_dname = answer_record->data.target;
break;
@ -1167,6 +1171,7 @@ static int resolv_validate_dns_response(unsigned char *resp, unsigned char *bufe
answer_record->data_len = len;
memcpy(answer_record->data.target, tmpname, len);
answer_record->data.target[len] = 0;
key = XXH32(tmpname, len, answer_record->type);
if (answer_record->ar_item != NULL) {
pool_free(resolv_answer_item_pool, answer_record->ar_item);
answer_record->ar_item = NULL;
@ -1180,6 +1185,7 @@ static int resolv_validate_dns_response(unsigned char *resp, unsigned char *bufe
answer_record->data.in6.sin6_family = AF_INET6;
memcpy(&answer_record->data.in6.sin6_addr, reader, answer_record->data_len);
key = XXH32(reader, answer_record->data_len, answer_record->type);
break;
} /* switch (record type) */
@ -1197,7 +1203,7 @@ static int resolv_validate_dns_response(unsigned char *resp, unsigned char *bufe
/* Lookup to see if we already had this entry */
found = 0;
for (eb32 = eb32_first(&r_res->answer_tree); eb32 != NULL; eb32 = eb32_next(eb32)) {
for (eb32 = eb32_lookup(&r_res->answer_tree, key); eb32 != NULL; eb32 = eb32_next(eb32)) {
tmp_record = eb32_entry(eb32, typeof(*tmp_record), link);
if (tmp_record->type != answer_record->type)
continue;
@ -1242,7 +1248,7 @@ static int resolv_validate_dns_response(unsigned char *resp, unsigned char *bufe
else {
answer_record->last_seen = now_ms;
answer_record->ar_item = NULL;
answer_record->link.key = 0; // will be set later
answer_record->link.key = key;
eb32_insert(&r_res->answer_tree, &answer_record->link);
answer_record = NULL;
}