MEDIUM: pattern: add the "base" sample fetch method

This one returns the concatenation of the first Host header entry with
the path. It can make content-switching rules easier, help with fighting
DDoS on certain URLs and improve shared caches efficiency.
This commit is contained in:
Willy Tarreau 2012-04-29 15:39:40 +02:00
parent 6812bcfc94
commit a7ad50cdb1
2 changed files with 107 additions and 3 deletions

View File

@ -7968,6 +7968,47 @@ application layer (layer 7). Those require that a full HTTP request has been
read, and are only evaluated then. They may require slightly more CPU resources
than the layer 4 ones, but not much since the request and response are indexed.
base <string>
Returns true when the concatenation of the first Host header and the path
part of the request, which starts at the first slash and ends before the
question mark, equals one of the strings. It may be used to match known
files in virtual hosting environments, such as "www.example.com/favicon.ico".
See also "path" and "uri".
base_beg <string>
Returns true when the base (see above) begins with one of the strings. This
can be used to send certain directory names to alternative backends. See also
"path_beg".
base_dir <string>
Returns true when one of the strings is found isolated or delimited with
slashes in the base (see above). Probably of little use, see "url_dir" and
"path_dir" instead.
base_dom <string>
Returns true when one of the strings is found isolated or delimited with dots
in the base (see above). Probably of little use, see "path_dom" and "url_dom"
instead.
base_end <string>
Returns true when the base (see above) ends with one of the strings. This may
be used to control file name extension, though "path_end" is cheaper.
base_len <integer>
Returns true when the base (see above) length matches the values or ranges
specified. This may be used to detect abusive requests for instance.
base_reg <regex>
Returns true when the base (see above) matches one of the regular
expressions. It can be used any time, but it is important to remember that
regex matching is slower than other methods. See also "path_reg", "url_reg"
and all "base_" criteria.
base_sub <string>
Returns true when the base (see above) contains one of the strings. It can be
used to detect particular patterns in paths, such as "../" for example. See
also "base_dir".
cook(<name>) <string>
All "cook*" matching criteria inspect all "Cookie" headers to find a cookie
with the name between parenthesis. If multiple occurrences of the cookie are
@ -8209,11 +8250,12 @@ status <integer>
url <string>
Applies to the whole URL passed in the request. The only real use is to match
"*", for which there already is a predefined ACL.
"*", for which there already is a predefined ACL. See also "base".
url_beg <string>
Returns true when the URL begins with one of the strings. This can be used to
check whether a URL begins with a slash or with a protocol scheme.
check whether a URL begins with a slash or with a protocol scheme. See also
"base_beg".
url_dir <string>
Returns true when one of the strings is found isolated or delimited with
@ -8248,7 +8290,7 @@ url_port <integer>
url_reg <regex>
Returns true when the URL matches one of the regular expressions. It can be
used any time, but it is important to remember that regex matching is slower
than other methods. See also "path_reg" and all "url_" criteria.
than other methods. See also "base_reg", "path_reg" and all "url_" criteria.
url_sub <string>
Returns true when the URL contains one of the strings. It can be used to
@ -8421,6 +8463,14 @@ equivalent used in ACLs.
The list of currently supported pattern fetch functions is the following :
base This returns the concatenation of the first Host header and the
path part of the request, which starts at the first slash and
ends before the question mark. It can be useful in virtual
hosted environments to detect URL abuses as well as to improve
shared caches efficiency. Using this with a limited size stick
table also allows one to collect statistics about most commonly
requested objects by host/path.
src This is the source IPv4 address of the client of the session.
It is of type IPv4 and works on both IPv4 and IPv6 tables.
On IPv6 tables, IPv4 address is mapped to its IPv6 equivalent,

View File

@ -8041,6 +8041,50 @@ smp_fetch_path(struct proxy *px, struct session *l4, void *l7, unsigned int opt,
return 1;
}
/* This produces a concatenation of the first occurrence of the Host header
* followed by the path component if it begins with a slash ('/'). This means
* that '*' will not be added, resulting in exactly the first Host entry.
* If no Host header is found, then the path is returned as-is. The returned
* value is stored in the trash so it does not need to be marked constant.
*/
static int
smp_fetch_base(struct proxy *px, struct session *l4, void *l7, unsigned int opt,
const struct arg *args, struct sample *smp)
{
struct http_txn *txn = l7;
char *ptr, *end, *beg;
struct hdr_ctx ctx;
CHECK_HTTP_MESSAGE_FIRST();
ctx.idx = 0;
if (!http_find_header2("Host", 4, txn->req.buf->p + txn->req.sol, &txn->hdr_idx, &ctx) ||
!ctx.vlen)
return smp_fetch_path(px, l4, l7, opt, args, smp);
/* OK we have the header value in ctx.line+ctx.val for ctx.vlen bytes */
memcpy(trash, ctx.line + ctx.val, ctx.vlen);
smp->type = SMP_T_STR;
smp->data.str.str = trash;
smp->data.str.len = ctx.vlen;
/* now retrieve the path */
end = txn->req.buf->p + txn->req.sol + txn->req.sl.rq.u + txn->req.sl.rq.u_l;
beg = http_get_path(txn);
if (!beg)
beg = end;
for (ptr = beg; ptr < end && *ptr != '?'; ptr++);
if (beg < ptr && *beg == '/') {
memcpy(smp->data.str.str + smp->data.str.len, beg, ptr - beg);
smp->data.str.len += ptr - beg;
}
smp->flags = SMP_F_VOL_1ST;
return 1;
}
static int
acl_fetch_proto_http(struct proxy *px, struct session *l4, void *l7, unsigned int opt,
const struct arg *args, struct sample *smp)
@ -8530,6 +8574,15 @@ static int val_hdr(struct arg *arg, char **err_msg)
* Please take care of keeping this list alphabetically sorted.
*/
static struct acl_kw_list acl_kws = {{ },{
{ "base", acl_parse_str, smp_fetch_base, acl_match_str, ACL_USE_L7REQ_VOLATILE|ACL_MAY_LOOKUP, 0 },
{ "base_beg", acl_parse_str, smp_fetch_base, acl_match_beg, ACL_USE_L7REQ_VOLATILE, 0 },
{ "base_dir", acl_parse_str, smp_fetch_base, acl_match_dir, ACL_USE_L7REQ_VOLATILE, 0 },
{ "base_dom", acl_parse_str, smp_fetch_base, acl_match_dom, ACL_USE_L7REQ_VOLATILE, 0 },
{ "base_end", acl_parse_str, smp_fetch_base, acl_match_end, ACL_USE_L7REQ_VOLATILE, 0 },
{ "base_len", acl_parse_int, smp_fetch_base, acl_match_len, ACL_USE_L7REQ_VOLATILE, 0 },
{ "base_reg", acl_parse_reg, smp_fetch_base, acl_match_reg, ACL_USE_L7REQ_VOLATILE, 0 },
{ "base_sub", acl_parse_str, smp_fetch_base, acl_match_sub, ACL_USE_L7REQ_VOLATILE, 0 },
{ "cook", acl_parse_str, smp_fetch_cookie, acl_match_str, ACL_USE_L7REQ_VOLATILE|ACL_MAY_LOOKUP, ARG1(0,STR) },
{ "cook_beg", acl_parse_str, smp_fetch_cookie, acl_match_beg, ACL_USE_L7REQ_VOLATILE, ARG1(0,STR) },
{ "cook_cnt", acl_parse_int, acl_fetch_cookie_cnt, acl_match_int, ACL_USE_L7REQ_VOLATILE, ARG1(0,STR) },
@ -8627,6 +8680,7 @@ static struct acl_kw_list acl_kws = {{ },{
/* Note: must not be declared <const> as its list will be overwritten */
static struct sample_fetch_kw_list sample_fetch_keywords = {{ },{
{ "hdr", smp_fetch_hdr, ARG2(1,STR,SINT), val_hdr, SMP_T_CSTR, SMP_CAP_REQ },
{ "base", smp_fetch_base, 0, NULL, SMP_T_CSTR, SMP_CAP_REQ },
{ "path", smp_fetch_path, 0, NULL, SMP_T_CSTR, SMP_CAP_REQ },
{ "url", smp_fetch_url, 0, NULL, SMP_T_CSTR, SMP_CAP_REQ },
{ "url_ip", smp_fetch_url_ip, 0, NULL, SMP_T_IPV4, SMP_CAP_REQ },