BUG/MINOR: cache: Manage multiple headers in accept-encoding normalization

The accept-encoding part of the secondary key (vary) was only built out
of the first occurrence of the header. So if a client had two
accept-encoding headers, gzip and br for instance, the key would have
been built out of the gzip string. So another client that only managed
gzip would have been sent the cached resource, even if it was a br resource.
The http_find_header function is now called directly by the normalizers
so that they can manage multiple headers if needed.
A request that has more than 16 encodings will be considered as an
illegitimate request and its response will not be stored.

This fixes GitHub issue #987.

It does not need any backport.
This commit is contained in:
Remi Tricot-Le Breton 2020-12-23 18:13:46 +01:00 committed by Willy Tarreau
parent 2b5c5cbef6
commit e4421dec7e
2 changed files with 112 additions and 37 deletions

View File

@ -93,6 +93,29 @@ server s1 {
chunkedlen 19
chunkedlen 0
# Multiple Accept-Encoding headers
rxreq
expect req.url == "/multiple_headers"
txresp -hdr "Vary: accept-encoding" \
-hdr "Cache-Control: max-age=5" -bodylen 155
rxreq
expect req.url == "/multiple_headers"
txresp -hdr "Vary: accept-encoding" \
-hdr "Cache-Control: max-age=5" -bodylen 166
# Too many Accept-Encoding values (we will not cache responses with more than 16 encodings)
rxreq
expect req.url == "/too_many_encodings"
txresp -hdr "Vary: accept-encoding" \
-hdr "Cache-Control: max-age=5" -bodylen 177
rxreq
expect req.url == "/too_many_encodings"
txresp -hdr "Vary: accept-encoding" \
-hdr "Cache-Control: max-age=5" -bodylen 188
} -start
@ -277,6 +300,48 @@ client c1 -connect ${h1_fe_sock} {
expect resp.bodylen == 57
expect resp.http.X-Cache-Hit == 1
# Multiple Accept-encoding headers
txreq -url "/multiple_headers" \
-hdr "Accept-Encoding: first_encoding" \
-hdr "Accept-Encoding: second_encoding,third_encoding"
rxresp
expect resp.status == 200
expect resp.bodylen == 155
expect resp.http.X-Cache-Hit == 0
txreq -url "/multiple_headers" \
-hdr "Accept-Encoding: third_encoding" \
-hdr "Accept-Encoding: second_encoding,first_encoding"
rxresp
expect resp.status == 200
expect resp.bodylen == 155
expect resp.http.X-Cache-Hit == 1
# Should not match a cache entry
txreq -url "/multiple_headers" \
-hdr "Accept-Encoding: first_encoding"
rxresp
expect resp.status == 200
expect resp.bodylen == 166
expect resp.http.X-Cache-Hit == 0
# Too many accept encodings
txreq -url "/too_many_encodings" \
-hdr "Accept-Encoding: a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16,a17"
rxresp
expect resp.status == 200
expect resp.bodylen == 177
expect resp.http.X-Cache-Hit == 0
txreq -url "/too_many_encodings" \
-hdr "Accept-Encoding: a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16,a17"
rxresp
expect resp.status == 200
expect resp.bodylen == 188
expect resp.http.X-Cache-Hit == 0
# The following requests are treated by a backend that does not cache
# responses containing a Vary header
txreq -url "/no_vary_support"

View File

@ -73,17 +73,17 @@ enum vary_header_bit {
VARY_LAST /* should always be last */
};
typedef int(*http_header_normalizer)(struct ist value, char *buf, unsigned int *buf_len);
struct vary_hashing_information {
struct ist hdr_name; /* Header name */
enum vary_header_bit value; /* Bit representing the header in a vary signature */
unsigned int hash_length; /* Size of the sub hash for this header's value */
http_header_normalizer norm_fn; /* Normalization function */
int(*norm_fn)(struct htx*,struct ist hdr_name,char* buf,unsigned int* buf_len); /* Normalization function */
};
static int accept_encoding_normalizer(struct ist value, char *buf, unsigned int *buf_len);
static int default_normalizer(struct ist value, char *buf, unsigned int *buf_len);
static int accept_encoding_normalizer(struct htx *htx, struct ist hdr_name,
char *buf, unsigned int *buf_len);
static int default_normalizer(struct htx *htx, struct ist hdr_name,
char *buf, unsigned int *buf_len);
/* Warning : do not forget to update HTTP_CACHE_SEC_KEY_LEN when new items are
* added to this array. */
@ -2050,31 +2050,33 @@ int accept_encoding_cmp(const void *a, const void *b)
#define ACCEPT_ENCODING_MAX_ENTRIES 16
/*
* Build a hash of the accept-encoding header. The different parts of the
* header value are first sorted, appended and then a crc is calculated
* for the newly constructed buffer.
* Returns 0 in case of success.
* header value are converted to lower case, hashed, sorted and then all
* the unique sub-hashes are merged into a single hash that is copied into
* the buffer.
* Returns 0 in case of success, -1 in case of error.
*/
static int accept_encoding_normalizer(struct ist full_value, char *buf, unsigned int *buf_len)
static int accept_encoding_normalizer(struct htx *htx, struct ist hdr_name,
char *buf, unsigned int *buf_len)
{
unsigned int values[ACCEPT_ENCODING_MAX_ENTRIES] = {};
size_t count = 0;
char *comma = NULL;
unsigned int hash_value = 0;
unsigned int prev = 0, curr = 0;
struct http_hdr_ctx ctx = { .blk = NULL };
/* Turn accept-encoding value to lower case */
full_value = ist2bin_lc(istptr(full_value), full_value);
/* The hash will be built out of a sorted list of accepted encodings. */
while (count < (ACCEPT_ENCODING_MAX_ENTRIES - 1) && (comma = istchr(full_value, ',')) != NULL) {
size_t length = comma - istptr(full_value);
values[count++] = hash_crc32(istptr(full_value), length);
full_value = istadv(full_value, length + 1);
/* Iterate over all the ACCEPT_ENCODING_MAX_ENTRIES first accept-encoding
* values that might span acrosse multiple accept-encoding headers. */
while (http_find_header(htx, hdr_name, &ctx, 0) && count < ACCEPT_ENCODING_MAX_ENTRIES) {
/* Turn accept-encoding value to lower case */
ist2bin_lc(istptr(ctx.value), ctx.value);
values[count++] = hash_crc32(istptr(ctx.value), istlen(ctx.value));
}
values[count++] = hash_crc32(istptr(full_value), istlen(full_value));
/* A request with more than ACCEPT_ENCODING_MAX_ENTRIES accepted
* encodings might be illegitimate so we will not use it. */
if (count == ACCEPT_ENCODING_MAX_ENTRIES)
return -1;
/* Sort the values alphabetically. */
qsort(values, count, sizeof(*values), &accept_encoding_cmp);
@ -2087,31 +2089,38 @@ static int accept_encoding_normalizer(struct ist full_value, char *buf, unsigned
prev = curr;
}
memcpy(buf, &hash_value, sizeof(hash_value));
write_u32(buf, hash_value);
*buf_len = sizeof(hash_value);
/* This function fills the hash buffer correctly even if no header was
* found, hence the 0 return value (success). */
return 0;
}
#undef ACCEPT_ENCODING_MAX_ENTRIES
/*
* Normalizer used by default for User-Agent and Referer headers. It only
* Normalizer used by default for the Referer header. It only
* calculates a simple crc of the whole value.
* Returns 0 in case of success.
* Only the first occurrence of the header will be taken into account in the
* hash.
* Returns 0 in case of success, 1 if the hash buffer should be filled with 0s
* and -1 in case of error.
*/
static int default_normalizer(struct ist value, char *buf, unsigned int *buf_len)
static int default_normalizer(struct htx *htx, struct ist hdr_name,
char *buf, unsigned int *buf_len)
{
int hash_value = 0;
int retval = 1;
struct http_hdr_ctx ctx = { .blk = NULL };
hash_value = hash_crc32(istptr(value), istlen(value));
if (http_find_header(htx, hdr_name, &ctx, 1)) {
retval = 0;
write_u32(buf, hash_crc32(istptr(ctx.value), istlen(ctx.value)));
*buf_len = sizeof(int);
}
memcpy(buf, &hash_value, sizeof(hash_value));
*buf_len = sizeof(hash_value);
return 0;
return retval;
}
/*
* Pre-calculate the hashes of all the supported headers (in our Vary
* implementation) of a given request. We have to calculate all the hashes
@ -2145,7 +2154,6 @@ static int http_request_build_secondary_key(struct stream *s, int vary_signature
{
struct http_txn *txn = s->txn;
struct htx *htx = htxbuf(&s->req.buf);
struct http_hdr_ctx ctx = { .blk = NULL };
unsigned int idx;
const struct vary_hashing_information *info = NULL;
@ -2153,12 +2161,14 @@ static int http_request_build_secondary_key(struct stream *s, int vary_signature
int retval = 0;
int offset = 0;
for (idx = 0; idx < sizeof(vary_information)/sizeof(*vary_information) && !retval; ++idx) {
for (idx = 0; idx < sizeof(vary_information)/sizeof(*vary_information) && retval >= 0; ++idx) {
info = &vary_information[idx];
ctx.blk = NULL;
if (info->norm_fn != NULL && http_find_header(htx, info->hdr_name, &ctx, 1)) {
retval = info->norm_fn(ctx.value, &txn->cache_secondary_hash[offset], &hash_length);
/* The normalizing functions will be in charge of getting the
* header values from the htx. This way they can manage multiple
* occurrences of their processed header. */
if ((vary_signature & info->value) && info->norm_fn != NULL &&
!(retval = info->norm_fn(htx, info->hdr_name, &txn->cache_secondary_hash[offset], &hash_length))) {
offset += hash_length;
}
else {