From 25bcdb1d950a1505e4169b1eca48f5af7a13243f Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Wed, 15 May 2024 16:53:50 +0200 Subject: [PATCH] BUG/MAJOR: h1: Be stricter on request target validation during message parsing As stated in issue #2565, checks on the request target during H1 message parsing are not good enough. Invalid paths, not starting by a slash are in fact parsed as authorities. The same error is repeated at the sample fetch level. This last point is annoying because routing rules may be fooled. It is also an issue when the URI or the Host header are updated. Because the error is repeated at different places, it must be fixed. We cannot be lax by arguing it is the server's job to accept or reject invalid request targets. With this patch, we strengthen the checks performed on the request target during H1 parsing. Idea is to reject invalid requests at this step to be sure it is safe to manipulate the path or the authority at other places. So now, the asterisk-form is only allowed for OPTIONS and OTHER methods. This last point was added to not reject the H2 preface. In addition, we take care to have only one asterisk and nothing more. For the CONNECT method, we take care to have a valid authority-form. All other form are rejected. The authority-form is now only supported for CONNECT method. No specific check is performed on the origin-form (except for the CONNECT method). For the absolute-form, we take care to have a scheme and a valid authority. These checks are not perfect but should be good enough to properly identify each part of the request target for a relative small cost. But, it is a breaking change. Some requests are now be rejected while they was not on older versions. However, nowadays, it is most probably not an issue. If it turns out it's really an issue for legitimate use-cases, an option would be to supports these kinds of requests when the "accept-invalid-http-request" option is set, with the consequence of seeing some sample fetches having an unexpected behavior. This patch should fix the issue #2665. It MUST NOT be backported. First because it is a breaking change. And then because by avoiding backporting it, it remains possible to relax the parsing with the "accept-invalid-http-request" option. --- .../h1_request_target_validation.vtc | 111 ++++++++++++++++++ src/h1.c | 96 ++++++++++----- 2 files changed, 178 insertions(+), 29 deletions(-) create mode 100644 reg-tests/http-messaging/h1_request_target_validation.vtc diff --git a/reg-tests/http-messaging/h1_request_target_validation.vtc b/reg-tests/http-messaging/h1_request_target_validation.vtc new file mode 100644 index 0000000000..63e194ae3e --- /dev/null +++ b/reg-tests/http-messaging/h1_request_target_validation.vtc @@ -0,0 +1,111 @@ +varnishtest "HTTP request tests: H1 request target parsing" + +feature ignore_unknown_macro + +#REQUIRE_VERSION=3.0 + +haproxy h1 -conf { + global + # WT: limit false-positives causing "HTTP header incomplete" due to + # idle server connections being randomly used and randomly expiring + # under us. + tune.idle-pool.shared off + + defaults + mode http + timeout connect "${HAPROXY_TEST_TIMEOUT-5s}" + timeout client "${HAPROXY_TEST_TIMEOUT-5s}" + timeout server "${HAPROXY_TEST_TIMEOUT-5s}" + + listen li1 + bind "fd@${li1}" + http-request return status 200 +} -start + +client c1 -connect ${h1_li1_sock} { + txreq -req "OPTIONS" -url "*" + rxresp + expect resp.status == 200 + +} -run + +client c2 -connect ${h1_li1_sock} { + txreq -req "OPTIONS" -url "/" + rxresp + expect resp.status == 200 + +} -run + +client c3 -connect ${h1_li1_sock} { + txreq -req "OPTIONS" -url "http://haproxy.org" \ + -hdr "Host: haproxy.org" + rxresp + expect resp.status == 200 + +} -run + +client c4 -connect ${h1_li1_sock} { + txreq -req "OPTIONS" -url "*/test" + rxresp + expect resp.status == 400 +} -run + +client c5 -connect ${h1_li1_sock} { + txreq -req "GET" -url "*" + rxresp + expect resp.status == 400 +} -run + +client c6 -connect ${h1_li1_sock} { + txreq -req "CONNECT" -url "haproxy.org:80" \ + -hdr "Host: haproxy.org" + rxresp + expect resp.status == 200 + +} -run + +client c7 -connect ${h1_li1_sock} { + txreq -req "CONNECT" -url "haproxy.org" \ + -hdr "Host: haproxy.org" + rxresp + expect resp.status == 400 +} -run + +client c8 -connect ${h1_li1_sock} { + txreq -req "CONNECT" -url "/" + rxresp + expect resp.status == 400 +} -run + +client c9 -connect ${h1_li1_sock} { + txreq -req "CONNECT" -url "http://haproxy.org:80" \ + -hdr "Host: haproxy.org" + rxresp + expect resp.status == 400 +} -run + +client c11 -connect ${h1_li1_sock} { + txreq -req "GET" -url "/" \ + -hdr "Host: haproxy.org" + rxresp + expect resp.status == 200 +} -run + +client c12 -connect ${h1_li1_sock} { + txreq -req "GET" -url "haproxy.org:80" \ + -hdr "Host: haproxy.org" + rxresp + expect resp.status == 400 +} -run + +client c13 -connect ${h1_li1_sock} { + txreq -req "GET" -url "admin" + rxresp + expect resp.status == 400 +} -run + +client c14 -connect ${h1_li1_sock} { + txreq -req "GET" -url "admin/a/b" + rxresp + expect resp.status == 400 +} -run diff --git a/src/h1.c b/src/h1.c index b20327c10b..bbaa2f67ce 100644 --- a/src/h1.c +++ b/src/h1.c @@ -1103,47 +1103,85 @@ int h1_headers_to_hdr_list(char *start, const char *stop, struct ist scheme, authority = IST_NULL; int ret; - scheme = http_parse_scheme(&parser); - if (istlen(scheme) || sl.rq.meth == HTTP_METH_CONNECT) { - /* Expect an authority if for CONNECT method or if there is a scheme */ - authority = http_parse_authority(&parser, 1); - } + /* WT: gcc seems to see a path where sl.rq.u.ptr was used + * uninitialized, but it doesn't know that the function is + * called with initial states making this impossible. + */ + ALREADY_CHECKED(sl.rq.u.ptr); + switch (parser.format) { + case URI_PARSER_FORMAT_ASTERISK: + /* We must take care "PRI * HTTP/2.0" is supported here. check for OTHER methods here is enough */ + if ((sl.rq.meth != HTTP_METH_OTHER && sl.rq.meth != HTTP_METH_OPTIONS) || istlen(sl.rq.u) != 1) { + ptr = sl.rq.u.ptr; /* Set ptr on the error */ + goto http_msg_invalid; + } + break; - if (sl.rq.meth == HTTP_METH_CONNECT) { - struct ist *host = ((host_idx != -1) ? &hdr[host_idx].v : NULL); + case URI_PARSER_FORMAT_ABSPATH: + if (sl.rq.meth == HTTP_METH_CONNECT) { + ptr = sl.rq.u.ptr; /* Set ptr on the error */ + goto http_msg_invalid; + } + break; - ret = h1_validate_connect_authority(scheme, authority, host); - if (ret < 0) { - if (h1m->err_pos < -1) { - state = H1_MSG_LAST_LF; - /* WT: gcc seems to see a path where sl.rq.u.ptr was used - * uninitialized, but it doesn't know that the function is - * called with initial states making this impossible. - */ - ALREADY_CHECKED(sl.rq.u.ptr); - ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */ + case URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY: + scheme = http_parse_scheme(&parser); + if (!isttest(scheme)) { /* scheme not found: MUST be an authority */ + struct ist *host = NULL; + + if (sl.rq.meth != HTTP_METH_CONNECT) { + ptr = sl.rq.u.ptr; /* Set ptr on the error */ goto http_msg_invalid; } - if (h1m->err_pos == -1) /* capture the error pointer */ - h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */ - } - } - else if (host_idx != -1 && istlen(authority)) { - struct ist host = hdr[host_idx].v; - - /* For non-CONNECT method, the authority must match the host header value */ - if (!isteqi(authority, host)) { - ret = h1_validate_mismatch_authority(scheme, authority, host); + if (host_idx != -1) + host = &hdr[host_idx].v; + authority = http_parse_authority(&parser, 1); + ret = h1_validate_connect_authority(scheme, authority, host); if (ret < 0) { if (h1m->err_pos < -1) { state = H1_MSG_LAST_LF; - ptr = host.ptr; /* Set ptr on the error */ + /* WT: gcc seems to see a path where sl.rq.u.ptr was used + * uninitialized, but it doesn't know that the function is + * called with initial states making this impossible. + */ + ALREADY_CHECKED(sl.rq.u.ptr); + ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */ goto http_msg_invalid; } if (h1m->err_pos == -1) /* capture the error pointer */ - h1m->err_pos = v.ptr - start + skip; /* >= 0 now */ + h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */ } } + else { /* Scheme found: MUST be an absolute-URI */ + struct ist host = IST_NULL; + + if (sl.rq.meth == HTTP_METH_CONNECT) { + ptr = sl.rq.u.ptr; /* Set ptr on the error */ + goto http_msg_invalid; + } + + if (host_idx != -1) + host = hdr[host_idx].v; + authority = http_parse_authority(&parser, 1); + /* For non-CONNECT method, the authority must match the host header value */ + if (isttest(host) && !isteqi(authority, host)) { + ret = h1_validate_mismatch_authority(scheme, authority, host); + if (ret < 0) { + if (h1m->err_pos < -1) { + state = H1_MSG_LAST_LF; + ptr = host.ptr; /* Set ptr on the error */ + goto http_msg_invalid; + } + if (h1m->err_pos == -1) /* capture the error pointer */ + h1m->err_pos = v.ptr - start + skip; /* >= 0 now */ + } + } + } + break; + + default: + ptr = sl.rq.u.ptr; /* Set ptr on the error */ + goto http_msg_invalid; } }