From fcdfd857b3d2e7181e96c182cede96fc9aa3c099 Mon Sep 17 00:00:00 2001 From: Maciej Zdeb Date: Mon, 30 Nov 2020 18:27:47 +0000 Subject: [PATCH] MINOR: log: Logging HTTP path only with %HPO This patch adds a new logging variable '%HPO' for logging HTTP path only (without query string) from relative or absolute URI. For example: log-format "hpo=%HPO hp=%HP hu=%HU hq=%HQ" GET /r/1 HTTP/1.1 => hpo=/r/1 hp=/r/1 hu=/r/1 hq= GET /r/2?q=2 HTTP/1.1 => hpo=/r/2 hp=/r/2 hu=/r/2?q=2 hq=?q=2 GET http://host/r/3 HTTP/1.1 => hpo=/r/3 hp=http://host/r/3 hu=http://host/r/3 hq= GET http://host/r/4?q=4 HTTP/1.1 => hpo=/r/4 hp=http://host/r/4 hu=http://host/r/4?q=4 hq=?q=4 --- doc/configuration.txt | 1 + include/haproxy/log-t.h | 1 + reg-tests/log/log_uri.vtc | 59 +++++++++++++++++++++++++++++++++++++++ src/log.c | 50 ++++++++++++++++++++++++++++++++- 4 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 reg-tests/log/log_uri.vtc diff --git a/doc/configuration.txt b/doc/configuration.txt index d30048da0..bc2ad0168 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -19635,6 +19635,7 @@ Please refer to the table below for currently defined variables : | | %H | hostname | string | | H | %HM | HTTP method (ex: POST) | string | | H | %HP | HTTP request URI without query string | string | + | H | %HPO | HTTP path only (without host nor query string)| string | | H | %HQ | HTTP request URI query string (ex: ?bar=baz) | string | | H | %HU | HTTP request URI (ex: /foo?bar=baz) | string | | H | %HV | HTTP version (ex: HTTP/1.0) | string | diff --git a/include/haproxy/log-t.h b/include/haproxy/log-t.h index c8f52f554..9146b77ba 100644 --- a/include/haproxy/log-t.h +++ b/include/haproxy/log-t.h @@ -169,6 +169,7 @@ enum { LOG_FMT_HTTP_METHOD, LOG_FMT_HTTP_URI, LOG_FMT_HTTP_PATH, + LOG_FMT_HTTP_PATH_ONLY, LOG_FMT_HTTP_QUERY, LOG_FMT_HTTP_VERSION, LOG_FMT_HOSTNAME, diff --git a/reg-tests/log/log_uri.vtc b/reg-tests/log/log_uri.vtc new file mode 100644 index 000000000..934a3ef10 --- /dev/null +++ b/reg-tests/log/log_uri.vtc @@ -0,0 +1,59 @@ +varnishtest "Verify logging of relative/aboslute URI path" +feature ignore_unknown_macro + +server s1 { + rxreq + txresp +} -repeat 4 -start + +syslog Slg_1 -level info { + recv + expect ~ "[^:\\[ ]\\[${h1_pid}\\]: .* hpo=/r/1 hp=/r/1 hu=/r/1 hq=" + recv + expect ~ "[^:\\[ ]\\[${h1_pid}\\]: .* hpo=/r/2 hp=/r/2 hu=/r/2\\?q=2 hq=\\?q=2" + recv + expect ~ "[^:\\[ ]\\[${h1_pid}\\]: .* hpo=/r/3 hp=http://localhost/r/3 hu=http://localhost/r/3 hq=" + recv + expect ~ "[^:\\[ ]\\[${h1_pid}\\]: .* hpo=/r/4 hp=http://localhost/r/4 hu=http://localhost/r/4\\?q=4 hq=\\?q=4" +} -start + +haproxy h1 -conf { + global + nbthread 1 + + defaults + mode http + option httplog + timeout connect 1000 + timeout client 1000 + timeout server 1000 + + frontend fe1 + bind "fd@${fe_1}" + log ${Slg_1_addr}:${Slg_1_port} local0 + log-format "ci:%cp [%tr] hpo=%HPO hp=%HP hu=%HU hq=%HQ" + default_backend be + + backend be + server app1 ${s1_addr}:${s1_port} +} -start + +# The following client are started in background and synchronized +client c1 -connect ${h1_fe_1_sock} { + txreq -url "/r/1" + rxresp + expect resp.status == 200 + txreq -url "/r/2?q=2" + rxresp + expect resp.status == 200 + txreq -url "http://localhost/r/3" -hdr "host: localhost" + rxresp + expect resp.status == 200 + txreq -url "http://localhost/r/4?q=4" -hdr "host: localhost" + rxresp + expect resp.status == 200 +} -start + +syslog Slg_1 -wait + +client c1 -wait diff --git a/src/log.c b/src/log.c index 6014bfc2d..4e6dc3008 100644 --- a/src/log.c +++ b/src/log.c @@ -169,7 +169,8 @@ static const struct logformat_type logformat_keywords[] = { { "hs", LOG_FMT_HDRRESPONS, PR_MODE_TCP, LW_RSPHDR, NULL }, /* header response */ { "hsl", LOG_FMT_HDRRESPONSLIST, PR_MODE_TCP, LW_RSPHDR, NULL }, /* header response list */ { "HM", LOG_FMT_HTTP_METHOD, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP method */ - { "HP", LOG_FMT_HTTP_PATH, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP path */ + { "HP", LOG_FMT_HTTP_PATH, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP relative or absolute path */ + { "HPO", LOG_FMT_HTTP_PATH_ONLY, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP path only (without host nor query string) */ { "HQ", LOG_FMT_HTTP_QUERY, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP query */ { "HU", LOG_FMT_HTTP_URI, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP full URI */ { "HV", LOG_FMT_HTTP_VERSION, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP version */ @@ -2102,6 +2103,7 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t struct logformat_node *tmp; struct timeval tv; struct strm_logs tmp_strm_log; + struct ist path; /* FIXME: let's limit ourselves to frontend logging for now. */ @@ -2855,6 +2857,52 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t last_isspace = 0; break; + case LOG_FMT_HTTP_PATH_ONLY: // %HPO + uri = txn && txn->uri ? txn->uri : ""; + + if (tmp->options & LOG_OPT_QUOTE) + LOGCHAR('"'); + + end = uri + strlen(uri); + + // look for the first whitespace character + while (uri < end && !HTTP_IS_SPHT(*uri)) + uri++; + + // keep advancing past multiple spaces + while (uri < end && HTTP_IS_SPHT(*uri)) { + uri++; nspaces++; + } + + // look for first space after url + spc = uri; + while (spc < end && !HTTP_IS_SPHT(*spc)) + spc++; + + path.ptr = uri; + path.len = spc - uri; + + // extract relative path without query params from url + path = iststop(http_get_path(path), '?'); + if (!txn || !txn->uri || nspaces == 0) { + chunk.area = ""; + chunk.data = strlen(""); + } else { + chunk.area = path.ptr; + chunk.data = path.len; + } + + ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp); + if (ret == NULL || *ret != '\0') + goto out; + + tmplog = ret; + if (tmp->options & LOG_OPT_QUOTE) + LOGCHAR('"'); + + last_isspace = 0; + break; + case LOG_FMT_HTTP_QUERY: // %HQ if (tmp->options & LOG_OPT_QUOTE) LOGCHAR('"');