MEDIUM: cache: Add the Vary header support

Calculate a preliminary secondary key for every request we see so that
we can have a real secondary key if the response is cacheable and
contains a manageable Vary header.
The cache's ebtree is now allowed to have multiple entries with the same
primary key. Two of those entries will be distinguished thanks to
secondary keys stored in the cache_entry (based on hashes of a subset of
their headers).
When looking for an entry in the cache (cache_use), we still use the
primary key (built the same way as before), but in case of match, we
also need to check if the entry has a vary signature. If it has one, we
need to perform an extra check based on the newly built secondary key.
We will only be able to forge a response out of the cache if both the
primary and secondary keys match with one of our entries. Otherwise the
request will be forwarder to the server.
This commit is contained in:
Remi Tricot-Le Breton 2020-11-16 15:56:09 +01:00 committed by William Lallemand
parent 3d08236cb3
commit 1785f3dd96
2 changed files with 313 additions and 13 deletions

234
reg-tests/cache/vary.vtc vendored Normal file
View File

@ -0,0 +1,234 @@
varnishtest "Vary support"
#REQUIRE_VERSION=2.3
feature ignore_unknown_macro
server s1 {
# Response varying on "accept-encoding"
rxreq
expect req.url == "/accept-encoding"
txresp -nolen -hdr "Transfer-Encoding: chunked" \
-hdr "Content-Type: gzip" \
-hdr "Vary: accept-encoding" \
-hdr "Cache-Control: max-age=5"
chunkedlen 15
chunkedlen 15
chunkedlen 15
chunkedlen 0
# Response varying on "accept-encoding"
rxreq
expect req.url == "/accept-encoding"
txresp -nolen -hdr "Transfer-Encoding: chunked" \
-hdr "Content-Type: text/plain" \
-hdr "Vary: accept-encoding" \
-hdr "Cache-Control: max-age=5"
chunkedlen 16
chunkedlen 16
chunkedlen 16
chunkedlen 0
# Response varying on "accept-encoding" but having two different encodings
rxreq
expect req.url == "/accept-encoding-multiple"
txresp -nolen -hdr "Transfer-Encoding: chunked" \
-hdr "Vary: accept-encoding" \
-hdr "Cache-Control: max-age=5"
chunkedlen 17
chunkedlen 17
chunkedlen 17
chunkedlen 0
# Unmanaged vary
rxreq
expect req.url == "/unmanaged"
txresp -nolen -hdr "Transfer-Encoding: chunked" \
-hdr "Vary: accept-encoding,unmanaged" \
-hdr "Cache-Control: max-age=5"
chunkedlen 17
chunkedlen 17
chunkedlen 17
chunkedlen 0
rxreq
expect req.url == "/unmanaged"
txresp -nolen -hdr "Transfer-Encoding: chunked" \
-hdr "Vary: accept-encoding,unmanaged" \
-hdr "Cache-Control: max-age=5"
chunkedlen 17
chunkedlen 17
chunkedlen 17
chunkedlen 0
# Mixed Vary (Accept-Encoding + Referer)
rxreq
expect req.url == "/referer-accept-encoding"
txresp -nolen -hdr "Transfer-Encoding: chunked" \
-hdr "Vary: accept-encoding,referer" \
-hdr "Cache-Control: max-age=5"
chunkedlen 17
chunkedlen 17
chunkedlen 17
chunkedlen 0
rxreq
expect req.url == "/referer-accept-encoding"
txresp -nolen -hdr "Transfer-Encoding: chunked" \
-hdr "Vary: referer,accept-encoding" \
-hdr "Cache-Control: max-age=5"
chunkedlen 18
chunkedlen 18
chunkedlen 18
chunkedlen 0
rxreq
expect req.url == "/referer-accept-encoding"
txresp -nolen -hdr "Transfer-Encoding: chunked" \
-hdr "Vary: referer,accept-encoding" \
-hdr "Cache-Control: max-age=5"
chunkedlen 19
chunkedlen 19
chunkedlen 19
chunkedlen 0
} -start
haproxy h1 -conf {
defaults
mode http
${no-htx} option http-use-htx
timeout connect 1s
timeout client 1s
timeout server 1s
frontend fe
bind "fd@${fe}"
default_backend test
backend test
http-request cache-use my_cache
server www ${s1_addr}:${s1_port}
http-response cache-store my_cache
http-response set-header X-Cache-Hit %[res.cache_hit]
cache my_cache
total-max-size 3
max-age 20
max-object-size 3072
} -start
client c1 -connect ${h1_fe_sock} {
# Accept-Encoding Vary
txreq -url "/accept-encoding" -hdr "Accept-Encoding: first_value"
rxresp
expect resp.status == 200
expect resp.http.content-type == "gzip"
expect resp.bodylen == 45
txreq -url "/accept-encoding" -hdr "Accept-Encoding: second_value"
rxresp
expect resp.status == 200
expect resp.bodylen == 48
expect resp.http.content-type == "text/plain"
expect resp.http.X-Cache-Hit == 0
txreq -url "/accept-encoding" -hdr "Accept-Encoding: first_value"
rxresp
expect resp.status == 200
expect resp.bodylen == 45
expect resp.http.content-type == "gzip"
expect resp.http.X-Cache-Hit == 1
txreq -url "/accept-encoding" -hdr "Accept-Encoding: second_value"
rxresp
expect resp.status == 200
expect resp.bodylen == 48
expect resp.http.content-type == "text/plain"
expect resp.http.X-Cache-Hit == 1
# The accept-encoding normalizer function sorts alphabeticaly the values
# before calculating the secondary key
txreq -url "/accept-encoding-multiple" -hdr "Accept-Encoding: first,second"
rxresp
expect resp.status == 200
expect resp.bodylen == 51
expect resp.http.X-Cache-Hit == 0
txreq -url "/accept-encoding-multiple" -hdr "Accept-Encoding: first,second"
rxresp
expect resp.status == 200
expect resp.bodylen == 51
expect resp.http.X-Cache-Hit == 1
txreq -url "/accept-encoding-multiple" -hdr "Accept-Encoding: second,first"
rxresp
expect resp.status == 200
expect resp.bodylen == 51
expect resp.http.X-Cache-Hit == 1
# Unmanaged vary
txreq -url "/unmanaged" -hdr "Accept-Encoding: first_value"
rxresp
expect resp.status == 200
expect resp.bodylen == 51
expect resp.http.X-Cache-Hit == 0
txreq -url "/unmanaged" -hdr "Accept-Encoding: first_value"
rxresp
expect resp.status == 200
expect resp.bodylen == 51
expect resp.http.X-Cache-Hit == 0
# Mixed Vary (Accept-Encoding + Referer)
txreq -url "/referer-accept-encoding" \
-hdr "Accept-Encoding: first_value,second_value" \
-hdr "Referer: referer"
rxresp
expect resp.status == 200
expect resp.bodylen == 51
expect resp.http.X-Cache-Hit == 0
txreq -url "/referer-accept-encoding" \
-hdr "Accept-Encoding: first_value" \
-hdr "Referer: other-referer"
rxresp
expect resp.status == 200
expect resp.bodylen == 54
expect resp.http.X-Cache-Hit == 0
txreq -url "/referer-accept-encoding" \
-hdr "Accept-Encoding: second_value" \
-hdr "Referer: other-referer"
rxresp
expect resp.status == 200
expect resp.bodylen == 57
expect resp.http.X-Cache-Hit == 0
txreq -url "/referer-accept-encoding" \
-hdr "Referer: referer" \
-hdr "Accept-Encoding: second_value,first_value"
rxresp
expect resp.status == 200
expect resp.bodylen == 51
expect resp.http.X-Cache-Hit == 1
txreq -url "/referer-accept-encoding" \
-hdr "Accept-Encoding: first_value" \
-hdr "Referer: other-referer"
rxresp
expect resp.status == 200
expect resp.bodylen == 54
expect resp.http.X-Cache-Hit == 1
txreq -url "/referer-accept-encoding" \
-hdr "Accept-Encoding: second_value" \
-hdr "Referer: other-referer"
rxresp
expect resp.status == 200
expect resp.bodylen == 57
expect resp.http.X-Cache-Hit == 1
} -run

View File

@ -111,6 +111,10 @@ struct cache_entry {
struct eb32_node eb; /* ebtree node used to hold the cache object */
char hash[20];
char secondary_key[HTTP_CACHE_SEC_KEY_LEN]; /* Optional secondary key. */
unsigned int secondary_key_signature; /* Bitfield of the HTTP headers that should be used
* to build secondary keys for this cache entry. */
unsigned int etag_length; /* Length of the ETag value (if one was found in the response). */
unsigned int etag_offset; /* Offset of the ETag value in the data buffer. */
@ -121,10 +125,6 @@ struct cache_entry {
* be used in case of an "If-Modified-Since"-based
* conditional request. */
unsigned int secondary_key_signature; /* Bitfield of the HTTP headers that should be used
* to build secondary keys for this cache entry. */
char secondary_key[HTTP_CACHE_SEC_KEY_LEN]; /* Optional secondary key. */
unsigned char data[0];
};
@ -162,6 +162,13 @@ struct cache_entry *entry_exist(struct cache *cache, char *hash)
}
/*
* There can be multiple entries with the same primary key in the ebtree so in
* order to get the proper one out of the list, we use a secondary_key.
* This function simply iterates over all the entries with the same primary_key
* until it finds the right one.
* Returns the cache_entry in case of success, NULL otherwise.
*/
struct cache_entry *secondary_entry_exist(struct cache *cache, struct cache_entry *entry,
char *secondary_key)
{
@ -711,6 +718,7 @@ enum act_return http_action_store_cache(struct act_rule *rule, struct proxy *px,
unsigned int etag_offset = 0;
struct ist header_name = IST_NULL;
time_t last_modified = 0;
unsigned int vary_signature = 0;
/* Don't cache if the response came from a cache */
if ((obj_type(s->target) == OBJ_TYPE_APPLET) &&
@ -753,10 +761,12 @@ enum act_return http_action_store_cache(struct act_rule *rule, struct proxy *px,
htx->data + htx->extra > shctx->max_obj_size)
goto out;
/* Does not manage Vary at the moment. We will need a secondary key later for that */
ctx.blk = NULL;
if (http_find_header(htx, ist("Vary"), &ctx, 0))
/* Only a subset of headers are supported in our Vary implementation. If
* any other header is present in the Vary header value, we won't be
* able to use the cache. */
if (!http_check_vary_header(htx, &vary_signature)) {
goto out;
}
http_check_response_for_cacheability(s, &s->res);
@ -823,6 +833,7 @@ enum act_return http_action_store_cache(struct act_rule *rule, struct proxy *px,
object->eb.key = 0;
object->age = age;
object->last_modified = last_modified;
object->secondary_key_signature = vary_signature;
/* reserve space for the cache_entry structure */
first->len = sizeof(struct cache_entry);
@ -849,15 +860,28 @@ enum act_return http_action_store_cache(struct act_rule *rule, struct proxy *px,
object->eb.key = key;
memcpy(object->hash, txn->cache_hash, sizeof(object->hash));
/* Add the current request's secondary key to the buffer if needed. */
if (vary_signature) {
http_request_reduce_secondary_key(vary_signature, txn->cache_secondary_hash);
memcpy(object->secondary_key, txn->cache_secondary_hash, HTTP_CACHE_SEC_KEY_LEN);
}
/* Insert the node later on caching success */
shctx_lock(shctx);
old = entry_exist(cconf->c.cache, txn->cache_hash);
if (old) {
if (vary_signature)
old = secondary_entry_exist(cconf->c.cache, old,
txn->cache_secondary_hash);
if (old) {
eb32_delete(&old->eb);
old->eb.key = 0;
}
}
shctx_unlock(shctx);
/* store latest value and expiration time */
@ -1391,9 +1415,11 @@ enum act_return http_action_req_cache_use(struct act_rule *rule, struct proxy *p
{
struct http_txn *txn = s->txn;
struct cache_entry *res;
struct cache_entry *res, *sec_entry = NULL;
struct cache_flt_conf *cconf = rule->arg.act.p[0];
struct cache *cache = cconf->c.cache;
struct shared_block *entry_block;
/* Ignore cache for HTTP/1.0 requests and for requests other than GET
* and HEAD */
@ -1421,8 +1447,40 @@ enum act_return http_action_req_cache_use(struct act_rule *rule, struct proxy *p
res = entry_exist(cache, s->txn->cache_hash);
if (res) {
struct appctx *appctx;
shctx_row_inc_hot(shctx_ptr(cache), block_ptr(res));
entry_block = block_ptr(res);
shctx_row_inc_hot(shctx_ptr(cache), entry_block);
shctx_unlock(shctx_ptr(cache));
/* In case of Vary, we could have multiple entries with the same
* primary hash. We need to calculate the secondary has in order
* to find the actual entry we want (if it exists). */
if (res->secondary_key_signature) {
if (!http_request_build_secondary_key(s, res->secondary_key_signature)) {
shctx_lock(shctx_ptr(cache));
sec_entry = secondary_entry_exist(cache, res,
s->txn->cache_secondary_hash);
if (sec_entry && sec_entry != res) {
/* The wrong row was added to the hot list. */
shctx_row_dec_hot(shctx_ptr(cache), entry_block);
entry_block = block_ptr(sec_entry);
shctx_row_inc_hot(shctx_ptr(cache), entry_block);
}
res = sec_entry;
shctx_unlock(shctx_ptr(cache));
}
else
res = NULL;
}
/* We looked for a valid secondary entry and could not find one,
* the request must be forwarded to the server. */
if (!res) {
shctx_lock(shctx_ptr(cache));
shctx_row_dec_hot(shctx_ptr(cache), entry_block);
shctx_unlock(shctx_ptr(cache));
return ACT_RET_CONT;
}
s->target = &http_cache_applet.obj_type;
if ((appctx = si_register_handler(&s->si[1], objt_applet(s->target)))) {
appctx->st0 = HTX_CACHE_INIT;
@ -1440,12 +1498,20 @@ enum act_return http_action_req_cache_use(struct act_rule *rule, struct proxy *p
return ACT_RET_CONT;
} else {
shctx_lock(shctx_ptr(cache));
shctx_row_dec_hot(shctx_ptr(cache), block_ptr(res));
shctx_row_dec_hot(shctx_ptr(cache), entry_block);
shctx_unlock(shctx_ptr(cache));
return ACT_RET_YIELD;
}
}
shctx_unlock(shctx_ptr(cache));
/* Shared context does not need to be locked while we calculate the
* secondary hash. */
if (!res) {
/* Build a complete secondary hash until the server response
* tells us which fields should be kept (if any). */
http_request_prebuild_full_secondary_key(s);
}
return ACT_RET_CONT;
}
@ -1651,7 +1717,7 @@ int post_check_cache()
* list */
memcpy(shctx->data, cache_config, sizeof(struct cache));
cache = (struct cache *)shctx->data;
cache->entries = EB_ROOT_UNIQUE;
cache->entries = EB_ROOT;
LIST_ADDQ(&caches, &cache->list);
LIST_DEL(&cache_config->list);
free(cache_config);