BUG/MAJOR: h1: Be stricter on request target validation during message parsing

As stated in issue #2565, checks on the request target during H1 message
parsing are not good enough. Invalid paths, not starting by a slash are in
fact parsed as authorities. The same error is repeated at the sample fetch
level. This last point is annoying because routing rules may be fooled. It
is also an issue when the URI or the Host header are updated.

Because the error is repeated at different places, it must be fixed. We
cannot be lax by arguing it is the server's job to accept or reject invalid
request targets. With this patch, we strengthen the checks performed on the
request target during H1 parsing. Idea is to reject invalid requests at this
step to be sure it is safe to manipulate the path or the authority at other
places.

So now, the asterisk-form is only allowed for OPTIONS and OTHER methods.
This last point was added to not reject the H2 preface. In addition, we take
care to have only one asterisk and nothing more. For the CONNECT method, we
take care to have a valid authority-form. All other form are rejected. The
authority-form is now only supported for CONNECT method. No specific check
is performed on the origin-form (except for the CONNECT method). For the
absolute-form, we take care to have a scheme and a valid authority.

These checks are not perfect but should be good enough to properly identify
each part of the request target for a relative small cost. But, it is a
breaking change. Some requests are now be rejected while they was not on
older versions. However, nowadays, it is most probably not an issue.  If it
turns out it's really an issue for legitimate use-cases, an option would be
to supports these kinds of requests when the "accept-invalid-http-request"
option is set, with the consequence of seeing some sample fetches having an
unexpected behavior.

This patch should fix the issue #2665. It MUST NOT be backported. First
because it is a breaking change. And then because by avoiding backporting
it, it remains possible to relax the parsing with the
"accept-invalid-http-request" option.
This commit is contained in:
Christopher Faulet 2024-05-15 16:53:50 +02:00
parent d3d9d83f03
commit 25bcdb1d95
2 changed files with 178 additions and 29 deletions

View File

@ -0,0 +1,111 @@
varnishtest "HTTP request tests: H1 request target parsing"
feature ignore_unknown_macro
#REQUIRE_VERSION=3.0
haproxy h1 -conf {
global
# WT: limit false-positives causing "HTTP header incomplete" due to
# idle server connections being randomly used and randomly expiring
# under us.
tune.idle-pool.shared off
defaults
mode http
timeout connect "${HAPROXY_TEST_TIMEOUT-5s}"
timeout client "${HAPROXY_TEST_TIMEOUT-5s}"
timeout server "${HAPROXY_TEST_TIMEOUT-5s}"
listen li1
bind "fd@${li1}"
http-request return status 200
} -start
client c1 -connect ${h1_li1_sock} {
txreq -req "OPTIONS" -url "*"
rxresp
expect resp.status == 200
} -run
client c2 -connect ${h1_li1_sock} {
txreq -req "OPTIONS" -url "/"
rxresp
expect resp.status == 200
} -run
client c3 -connect ${h1_li1_sock} {
txreq -req "OPTIONS" -url "http://haproxy.org" \
-hdr "Host: haproxy.org"
rxresp
expect resp.status == 200
} -run
client c4 -connect ${h1_li1_sock} {
txreq -req "OPTIONS" -url "*/test"
rxresp
expect resp.status == 400
} -run
client c5 -connect ${h1_li1_sock} {
txreq -req "GET" -url "*"
rxresp
expect resp.status == 400
} -run
client c6 -connect ${h1_li1_sock} {
txreq -req "CONNECT" -url "haproxy.org:80" \
-hdr "Host: haproxy.org"
rxresp
expect resp.status == 200
} -run
client c7 -connect ${h1_li1_sock} {
txreq -req "CONNECT" -url "haproxy.org" \
-hdr "Host: haproxy.org"
rxresp
expect resp.status == 400
} -run
client c8 -connect ${h1_li1_sock} {
txreq -req "CONNECT" -url "/"
rxresp
expect resp.status == 400
} -run
client c9 -connect ${h1_li1_sock} {
txreq -req "CONNECT" -url "http://haproxy.org:80" \
-hdr "Host: haproxy.org"
rxresp
expect resp.status == 400
} -run
client c11 -connect ${h1_li1_sock} {
txreq -req "GET" -url "/" \
-hdr "Host: haproxy.org"
rxresp
expect resp.status == 200
} -run
client c12 -connect ${h1_li1_sock} {
txreq -req "GET" -url "haproxy.org:80" \
-hdr "Host: haproxy.org"
rxresp
expect resp.status == 400
} -run
client c13 -connect ${h1_li1_sock} {
txreq -req "GET" -url "admin"
rxresp
expect resp.status == 400
} -run
client c14 -connect ${h1_li1_sock} {
txreq -req "GET" -url "admin/a/b"
rxresp
expect resp.status == 400
} -run

View File

@ -1103,47 +1103,85 @@ int h1_headers_to_hdr_list(char *start, const char *stop,
struct ist scheme, authority = IST_NULL;
int ret;
scheme = http_parse_scheme(&parser);
if (istlen(scheme) || sl.rq.meth == HTTP_METH_CONNECT) {
/* Expect an authority if for CONNECT method or if there is a scheme */
authority = http_parse_authority(&parser, 1);
}
/* WT: gcc seems to see a path where sl.rq.u.ptr was used
* uninitialized, but it doesn't know that the function is
* called with initial states making this impossible.
*/
ALREADY_CHECKED(sl.rq.u.ptr);
switch (parser.format) {
case URI_PARSER_FORMAT_ASTERISK:
/* We must take care "PRI * HTTP/2.0" is supported here. check for OTHER methods here is enough */
if ((sl.rq.meth != HTTP_METH_OTHER && sl.rq.meth != HTTP_METH_OPTIONS) || istlen(sl.rq.u) != 1) {
ptr = sl.rq.u.ptr; /* Set ptr on the error */
goto http_msg_invalid;
}
break;
if (sl.rq.meth == HTTP_METH_CONNECT) {
struct ist *host = ((host_idx != -1) ? &hdr[host_idx].v : NULL);
case URI_PARSER_FORMAT_ABSPATH:
if (sl.rq.meth == HTTP_METH_CONNECT) {
ptr = sl.rq.u.ptr; /* Set ptr on the error */
goto http_msg_invalid;
}
break;
ret = h1_validate_connect_authority(scheme, authority, host);
if (ret < 0) {
if (h1m->err_pos < -1) {
state = H1_MSG_LAST_LF;
/* WT: gcc seems to see a path where sl.rq.u.ptr was used
* uninitialized, but it doesn't know that the function is
* called with initial states making this impossible.
*/
ALREADY_CHECKED(sl.rq.u.ptr);
ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */
case URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY:
scheme = http_parse_scheme(&parser);
if (!isttest(scheme)) { /* scheme not found: MUST be an authority */
struct ist *host = NULL;
if (sl.rq.meth != HTTP_METH_CONNECT) {
ptr = sl.rq.u.ptr; /* Set ptr on the error */
goto http_msg_invalid;
}
if (h1m->err_pos == -1) /* capture the error pointer */
h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */
}
}
else if (host_idx != -1 && istlen(authority)) {
struct ist host = hdr[host_idx].v;
/* For non-CONNECT method, the authority must match the host header value */
if (!isteqi(authority, host)) {
ret = h1_validate_mismatch_authority(scheme, authority, host);
if (host_idx != -1)
host = &hdr[host_idx].v;
authority = http_parse_authority(&parser, 1);
ret = h1_validate_connect_authority(scheme, authority, host);
if (ret < 0) {
if (h1m->err_pos < -1) {
state = H1_MSG_LAST_LF;
ptr = host.ptr; /* Set ptr on the error */
/* WT: gcc seems to see a path where sl.rq.u.ptr was used
* uninitialized, but it doesn't know that the function is
* called with initial states making this impossible.
*/
ALREADY_CHECKED(sl.rq.u.ptr);
ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */
goto http_msg_invalid;
}
if (h1m->err_pos == -1) /* capture the error pointer */
h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */
}
}
else { /* Scheme found: MUST be an absolute-URI */
struct ist host = IST_NULL;
if (sl.rq.meth == HTTP_METH_CONNECT) {
ptr = sl.rq.u.ptr; /* Set ptr on the error */
goto http_msg_invalid;
}
if (host_idx != -1)
host = hdr[host_idx].v;
authority = http_parse_authority(&parser, 1);
/* For non-CONNECT method, the authority must match the host header value */
if (isttest(host) && !isteqi(authority, host)) {
ret = h1_validate_mismatch_authority(scheme, authority, host);
if (ret < 0) {
if (h1m->err_pos < -1) {
state = H1_MSG_LAST_LF;
ptr = host.ptr; /* Set ptr on the error */
goto http_msg_invalid;
}
if (h1m->err_pos == -1) /* capture the error pointer */
h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
}
}
}
break;
default:
ptr = sl.rq.u.ptr; /* Set ptr on the error */
goto http_msg_invalid;
}
}