From b229f018eedef4d18571ce6da23d8e153249a836 Mon Sep 17 00:00:00 2001 From: Tim Duesterhus Date: Tue, 29 Jan 2019 16:38:56 +0100 Subject: [PATCH] BUG/MEDIUM: compression: Rewrite strong ETags RFC 7232 section 2.3.3 states: > Note: Content codings are a property of the representation data, > so a strong entity-tag for a content-encoded representation has to > be distinct from the entity tag of an unencoded representation to > prevent potential conflicts during cache updates and range > requests. In contrast, transfer codings (Section 4 of [RFC7230]) > apply only during message transfer and do not result in distinct > entity-tags. Thus a strong ETag must be changed when compressing. Usually this is done by converting it into a weak ETag, which represents a semantically, but not byte-by-byte identical response. A conversion to a weak ETag still allows If-None-Match to work. This should be backported to 1.9 and might be backported to every supported branch with compression. --- doc/configuration.txt | 3 +- reg-tests/compression/h00001.vtc | 226 +++++++++++++++++++++++++++++++ src/flt_http_comp.c | 66 ++++++++- 3 files changed, 289 insertions(+), 6 deletions(-) create mode 100644 reg-tests/compression/h00001.vtc diff --git a/doc/configuration.txt b/doc/configuration.txt index 55e56be5f7..00532a895a 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -3114,8 +3114,7 @@ compression offload * The response contains a "Content-Encoding" header, indicating that the response is already compressed (see compression offload) - Note: The compression does not rewrite Etag headers, and does not emit the - Warning header. + Note: The compression does not emit the Warning header. Examples : compression algo gzip diff --git a/reg-tests/compression/h00001.vtc b/reg-tests/compression/h00001.vtc new file mode 100644 index 0000000000..0cae679592 --- /dev/null +++ b/reg-tests/compression/h00001.vtc @@ -0,0 +1,226 @@ +varnishtest "Compression converts strong ETags to weak ETags" + +#REQUIRE_VERSION=1.9 +#REQUIRE_OPTION=ZLIB|SLZ + +feature ignore_unknown_macro + +server s1 { + rxreq + expect req.url == "/strong" + expect req.http.accept-encoding == "gzip" + txresp \ + -hdr "Content-Type: text/plain" \ + -hdr "ETag: \"123\"" \ + -bodylen 100 + + rxreq + expect req.url == "/weak" + expect req.http.accept-encoding == "gzip" + txresp \ + -hdr "Content-Type: text/plain" \ + -hdr "ETag: W/\"456\"" \ + -bodylen 100 + + rxreq + expect req.url == "/weak-incorrect-quoting" + expect req.http.accept-encoding == "gzip" + txresp \ + -hdr "Content-Type: text/plain" \ + -hdr "ETag: \"W/789\"" \ + -bodylen 100 + + rxreq + expect req.url == "/empty-strong" + expect req.http.accept-encoding == "gzip" + txresp \ + -hdr "Content-Type: text/plain" \ + -hdr "ETag: \"\"" \ + -bodylen 100 + + rxreq + expect req.url == "/empty-weak" + expect req.http.accept-encoding == "gzip" + txresp \ + -hdr "Content-Type: text/plain" \ + -hdr "ETag: W/\"\"" \ + -bodylen 100 + + rxreq + expect req.url == "/invalid1" + expect req.http.accept-encoding == "gzip" + txresp \ + -hdr "Content-Type: text/plain" \ + -hdr "ETag: \"invalid" \ + -bodylen 100 + + rxreq + expect req.url == "/invalid2" + expect req.http.accept-encoding == "gzip" + txresp \ + -hdr "Content-Type: text/plain" \ + -hdr "ETag: invalid\"" \ + -bodylen 100 + + rxreq + expect req.url == "/invalid3" + expect req.http.accept-encoding == "gzip" + txresp \ + -hdr "Content-Type: text/plain" \ + -hdr "ETag: invalid" \ + -bodylen 100 + + rxreq + expect req.url == "/invalid4" + expect req.http.accept-encoding == "gzip" + txresp \ + -hdr "Content-Type: text/plain" \ + -hdr "ETag: W/\"invalid" \ + -bodylen 100 + + rxreq + expect req.url == "/invalid5" + expect req.http.accept-encoding == "gzip" + txresp \ + -hdr "Content-Type: text/plain" \ + -hdr "ETag: W/invalid\"" \ + -bodylen 100 + + rxreq + expect req.url == "/invalid6" + expect req.http.accept-encoding == "gzip" + txresp \ + -hdr "Content-Type: text/plain" \ + -hdr "ETag: W/invalid" \ + -bodylen 100 + + rxreq + expect req.url == "/multiple" + expect req.http.accept-encoding == "gzip" + txresp \ + -hdr "Content-Type: text/plain" \ + -hdr "ETag: \"one\"" \ + -hdr "ETag: \"two\"" \ + -bodylen 100 +} -start + + +haproxy h1 -conf { + defaults + mode http + ${no-htx} option http-use-htx + timeout connect 1s + timeout client 1s + timeout server 1s + + frontend fe-gzip + bind "fd@${fe_gzip}" + default_backend be-gzip + + backend be-gzip + compression algo gzip + compression type text/html text/plain + server www ${s1_addr}:${s1_port} +} -start + +client c1 -connect ${h1_fe_gzip_sock} { + txreq -url "/strong" \ + -hdr "Accept-Encoding: gzip" + rxresp + expect resp.status == 200 + expect resp.http.content-encoding == "gzip" + expect resp.http.etag == "W/\"123\"" + gunzip + expect resp.bodylen == 100 + + txreq -url "/weak" \ + -hdr "Accept-Encoding: gzip" + rxresp + expect resp.status == 200 + expect resp.http.content-encoding == "gzip" + expect resp.http.etag == "W/\"456\"" + gunzip + expect resp.bodylen == 100 + + txreq -url "/weak-incorrect-quoting" \ + -hdr "Accept-Encoding: gzip" + rxresp + expect resp.status == 200 + expect resp.http.content-encoding == "gzip" + expect resp.http.etag == "W/\"W/789\"" + gunzip + expect resp.bodylen == 100 + + txreq -url "/empty-strong" \ + -hdr "Accept-Encoding: gzip" + rxresp + expect resp.status == 200 + expect resp.http.content-encoding == "gzip" + expect resp.http.etag == "W/\"\"" + gunzip + expect resp.bodylen == 100 + + txreq -url "/empty-weak" \ + -hdr "Accept-Encoding: gzip" + rxresp + expect resp.status == 200 + expect resp.http.content-encoding == "gzip" + expect resp.http.etag == "W/\"\"" + gunzip + expect resp.bodylen == 100 + + txreq -url "/invalid1" \ + -hdr "Accept-Encoding: gzip" + rxresp + expect resp.status == 200 + expect resp.http.content-encoding == "" + expect resp.http.etag == "\"invalid" + expect resp.bodylen == 100 + + txreq -url "/invalid2" \ + -hdr "Accept-Encoding: gzip" + rxresp + expect resp.status == 200 + expect resp.http.content-encoding == "" + expect resp.http.etag == "invalid\"" + expect resp.bodylen == 100 + + txreq -url "/invalid3" \ + -hdr "Accept-Encoding: gzip" + rxresp + expect resp.status == 200 + expect resp.http.content-encoding == "" + expect resp.http.etag == "invalid" + expect resp.bodylen == 100 + + txreq -url "/invalid4" \ + -hdr "Accept-Encoding: gzip" + rxresp + expect resp.status == 200 + expect resp.http.content-encoding == "" + expect resp.http.etag == "W/\"invalid" + expect resp.bodylen == 100 + + txreq -url "/invalid5" \ + -hdr "Accept-Encoding: gzip" + rxresp + expect resp.status == 200 + expect resp.http.content-encoding == "" + expect resp.http.etag == "W/invalid\"" + expect resp.bodylen == 100 + + txreq -url "/invalid6" \ + -hdr "Accept-Encoding: gzip" + rxresp + expect resp.status == 200 + expect resp.http.content-encoding == "" + expect resp.http.etag == "W/invalid" + expect resp.bodylen == 100 + + txreq -url "/multiple" \ + -hdr "Accept-Encoding: gzip" + rxresp + expect resp.status == 200 + expect resp.http.content-encoding == "" + expect resp.bodylen == 100 +} -run diff --git a/src/flt_http_comp.c b/src/flt_http_comp.c index f8fbf4c9d2..cfe864e35a 100644 --- a/src/flt_http_comp.c +++ b/src/flt_http_comp.c @@ -466,6 +466,7 @@ static int http_set_comp_reshdr(struct comp_state *st, struct stream *s, struct http_msg *msg) { struct http_txn *txn = s->txn; + struct hdr_ctx ctx; /* * Add Content-Encoding header when it's not identity encoding. @@ -486,8 +487,6 @@ http_set_comp_reshdr(struct comp_state *st, struct stream *s, struct http_msg *m /* remove Content-Length header */ if (msg->flags & HTTP_MSGF_CNT_LEN) { - struct hdr_ctx ctx; - ctx.idx = 0; while (http_find_header2("Content-Length", 14, ci_head(&s->res), &txn->hdr_idx, &ctx)) http_remove_header2(msg, &txn->hdr_idx, &ctx); @@ -499,6 +498,22 @@ http_set_comp_reshdr(struct comp_state *st, struct stream *s, struct http_msg *m goto error; } + ctx.idx = 0; + if (http_find_full_header2("ETag", 4, ci_head(&s->res), &txn->hdr_idx, &ctx)) { + if (ctx.line[ctx.val] == '"') { + /* This a strong ETag. Convert it to a weak one. */ + trash.data = 8; + if (trash.data + ctx.vlen > trash.size) + goto error; + memcpy(trash.area, "ETag: W/", trash.data); + memcpy(trash.area + trash.data, ctx.line + ctx.val, ctx.vlen); + trash.data += ctx.vlen; + trash.area[trash.data] = '\0'; + http_remove_header2(msg, &txn->hdr_idx, &ctx); + if (http_header_add_tail2(msg, &txn->hdr_idx, trash.area, trash.data) < 0) + goto error; + } + } return 1; @@ -512,6 +527,7 @@ static int htx_set_comp_reshdr(struct comp_state *st, struct stream *s, struct http_msg *msg) { struct htx *htx = htxbuf(&msg->chn->buf); + struct http_hdr_ctx ctx; /* * Add Content-Encoding header when it's not identity encoding. @@ -528,8 +544,6 @@ htx_set_comp_reshdr(struct comp_state *st, struct stream *s, struct http_msg *ms /* remove Content-Length header */ if (msg->flags & HTTP_MSGF_CNT_LEN) { - struct http_hdr_ctx ctx; - ctx.blk = NULL; while (http_find_header(htx, ist("Content-Length"), &ctx, 1)) http_remove_header(htx, &ctx); @@ -541,6 +555,20 @@ htx_set_comp_reshdr(struct comp_state *st, struct stream *s, struct http_msg *ms goto error; } + /* convert "ETag" header to a weak ETag */ + ctx.blk = NULL; + if (http_find_header(htx, ist("ETag"), &ctx, 1)) { + if (ctx.value.ptr[0] == '"') { + /* This a strong ETag. Convert it to a weak one. */ + struct ist v = ist2(trash.area, 0); + if (istcat(&v, ist("W/"), trash.size) == -1 || istcat(&v, ctx.value, trash.size) == -1) + goto error; + + if (!http_replace_header_value(htx, &ctx, v)) + goto error; + } + } + return 1; error: @@ -843,6 +871,21 @@ http_select_comp_reshdr(struct comp_state *st, struct stream *s, struct http_msg goto fail; } + /* no compression when ETag is malformed */ + ctx.idx = 0; + if (http_find_full_header2("ETag", 4, ci_head(c), &txn->hdr_idx, &ctx)) { + if (!(((ctx.vlen >= 4 && memcmp(ctx.line + ctx.val, "W/\"", 3) == 0) || /* Either a weak ETag */ + (ctx.vlen >= 2 && ctx.line[ctx.val] == '"')) && /* or strong ETag */ + ctx.line[ctx.val + ctx.vlen - 1] == '"')) { + goto fail; + } + } + /* no compression when multiple ETags are present + * Note: Do not reset ctx.idx! + */ + if (http_find_full_header2("ETag", 4, ci_head(c), &txn->hdr_idx, &ctx)) + goto fail; + comp_type = NULL; /* we don't want to compress multipart content-types, nor content-types that are @@ -939,6 +982,21 @@ htx_select_comp_reshdr(struct comp_state *st, struct stream *s, struct http_msg goto fail; } + /* no compression when ETag is malformed */ + ctx.blk = NULL; + if (http_find_header(htx, ist("ETag"), &ctx, 1)) { + if (!(((ctx.value.len >= 4 && memcmp(ctx.value.ptr, "W/\"", 3) == 0) || /* Either a weak ETag */ + (ctx.value.len >= 2 && ctx.value.ptr[0] == '"')) && /* or strong ETag */ + ctx.value.ptr[ctx.value.len - 1] == '"')) { + goto fail; + } + } + /* no compression when multiple ETags are present + * Note: Do not reset ctx.blk! + */ + if (http_find_header(htx, ist("ETag"), &ctx, 1)) + goto fail; + comp_type = NULL; /* we don't want to compress multipart content-types, nor content-types that are