MINOR: log: Logging HTTP path only with %HPO

This patch adds a new logging variable '%HPO' for logging HTTP path only
(without query string) from relative or absolute URI.

For example:
log-format "hpo=%HPO hp=%HP hu=%HU hq=%HQ"

GET /r/1 HTTP/1.1
=>
hpo=/r/1 hp=/r/1 hu=/r/1 hq=

GET /r/2?q=2 HTTP/1.1
=>
hpo=/r/2 hp=/r/2 hu=/r/2?q=2 hq=?q=2

GET http://host/r/3 HTTP/1.1
=>
hpo=/r/3 hp=http://host/r/3 hu=http://host/r/3 hq=

GET http://host/r/4?q=4 HTTP/1.1
=>
hpo=/r/4 hp=http://host/r/4 hu=http://host/r/4?q=4 hq=?q=4
This commit is contained in:
Maciej Zdeb 2020-11-30 18:27:47 +00:00 committed by Willy Tarreau
parent c94431b308
commit fcdfd857b3
4 changed files with 110 additions and 1 deletions

View File

@ -19635,6 +19635,7 @@ Please refer to the table below for currently defined variables :
| | %H | hostname | string |
| H | %HM | HTTP method (ex: POST) | string |
| H | %HP | HTTP request URI without query string | string |
| H | %HPO | HTTP path only (without host nor query string)| string |
| H | %HQ | HTTP request URI query string (ex: ?bar=baz) | string |
| H | %HU | HTTP request URI (ex: /foo?bar=baz) | string |
| H | %HV | HTTP version (ex: HTTP/1.0) | string |

View File

@ -169,6 +169,7 @@ enum {
LOG_FMT_HTTP_METHOD,
LOG_FMT_HTTP_URI,
LOG_FMT_HTTP_PATH,
LOG_FMT_HTTP_PATH_ONLY,
LOG_FMT_HTTP_QUERY,
LOG_FMT_HTTP_VERSION,
LOG_FMT_HOSTNAME,

59
reg-tests/log/log_uri.vtc Normal file
View File

@ -0,0 +1,59 @@
varnishtest "Verify logging of relative/aboslute URI path"
feature ignore_unknown_macro
server s1 {
rxreq
txresp
} -repeat 4 -start
syslog Slg_1 -level info {
recv
expect ~ "[^:\\[ ]\\[${h1_pid}\\]: .* hpo=/r/1 hp=/r/1 hu=/r/1 hq="
recv
expect ~ "[^:\\[ ]\\[${h1_pid}\\]: .* hpo=/r/2 hp=/r/2 hu=/r/2\\?q=2 hq=\\?q=2"
recv
expect ~ "[^:\\[ ]\\[${h1_pid}\\]: .* hpo=/r/3 hp=http://localhost/r/3 hu=http://localhost/r/3 hq="
recv
expect ~ "[^:\\[ ]\\[${h1_pid}\\]: .* hpo=/r/4 hp=http://localhost/r/4 hu=http://localhost/r/4\\?q=4 hq=\\?q=4"
} -start
haproxy h1 -conf {
global
nbthread 1
defaults
mode http
option httplog
timeout connect 1000
timeout client 1000
timeout server 1000
frontend fe1
bind "fd@${fe_1}"
log ${Slg_1_addr}:${Slg_1_port} local0
log-format "ci:%cp [%tr] hpo=%HPO hp=%HP hu=%HU hq=%HQ"
default_backend be
backend be
server app1 ${s1_addr}:${s1_port}
} -start
# The following client are started in background and synchronized
client c1 -connect ${h1_fe_1_sock} {
txreq -url "/r/1"
rxresp
expect resp.status == 200
txreq -url "/r/2?q=2"
rxresp
expect resp.status == 200
txreq -url "http://localhost/r/3" -hdr "host: localhost"
rxresp
expect resp.status == 200
txreq -url "http://localhost/r/4?q=4" -hdr "host: localhost"
rxresp
expect resp.status == 200
} -start
syslog Slg_1 -wait
client c1 -wait

View File

@ -169,7 +169,8 @@ static const struct logformat_type logformat_keywords[] = {
{ "hs", LOG_FMT_HDRRESPONS, PR_MODE_TCP, LW_RSPHDR, NULL }, /* header response */
{ "hsl", LOG_FMT_HDRRESPONSLIST, PR_MODE_TCP, LW_RSPHDR, NULL }, /* header response list */
{ "HM", LOG_FMT_HTTP_METHOD, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP method */
{ "HP", LOG_FMT_HTTP_PATH, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP path */
{ "HP", LOG_FMT_HTTP_PATH, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP relative or absolute path */
{ "HPO", LOG_FMT_HTTP_PATH_ONLY, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP path only (without host nor query string) */
{ "HQ", LOG_FMT_HTTP_QUERY, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP query */
{ "HU", LOG_FMT_HTTP_URI, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP full URI */
{ "HV", LOG_FMT_HTTP_VERSION, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP version */
@ -2102,6 +2103,7 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t
struct logformat_node *tmp;
struct timeval tv;
struct strm_logs tmp_strm_log;
struct ist path;
/* FIXME: let's limit ourselves to frontend logging for now. */
@ -2855,6 +2857,52 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t
last_isspace = 0;
break;
case LOG_FMT_HTTP_PATH_ONLY: // %HPO
uri = txn && txn->uri ? txn->uri : "<BADREQ>";
if (tmp->options & LOG_OPT_QUOTE)
LOGCHAR('"');
end = uri + strlen(uri);
// look for the first whitespace character
while (uri < end && !HTTP_IS_SPHT(*uri))
uri++;
// keep advancing past multiple spaces
while (uri < end && HTTP_IS_SPHT(*uri)) {
uri++; nspaces++;
}
// look for first space after url
spc = uri;
while (spc < end && !HTTP_IS_SPHT(*spc))
spc++;
path.ptr = uri;
path.len = spc - uri;
// extract relative path without query params from url
path = iststop(http_get_path(path), '?');
if (!txn || !txn->uri || nspaces == 0) {
chunk.area = "<BADREQ>";
chunk.data = strlen("<BADREQ>");
} else {
chunk.area = path.ptr;
chunk.data = path.len;
}
ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp);
if (ret == NULL || *ret != '\0')
goto out;
tmplog = ret;
if (tmp->options & LOG_OPT_QUOTE)
LOGCHAR('"');
last_isspace = 0;
break;
case LOG_FMT_HTTP_QUERY: // %HQ
if (tmp->options & LOG_OPT_QUOTE)
LOGCHAR('"');