From 5eb1a9033a94fc5dc1a47f05cac2e4c9c751024e Mon Sep 17 00:00:00 2001 From: Alexandre Cassen Date: Thu, 29 Nov 2007 15:43:32 +0100 Subject: [PATCH] [MEDIUM] New option http_proxy Hello, You will find attached an updated release of previously submitted patch. It polish some part and extend ACL engine to match IP and PORT parsed in HTTP request. (and take care of comments made by Willy ! ;)) Best regards, Alexandre --- doc/configuration.txt | 10 ++++ examples/option-http_proxy.cfg | 53 +++++++++++++++++++ include/common/standard.h | 5 ++ include/types/proxy.h | 1 + src/backend.c | 8 +++ src/cfgparse.c | 3 +- src/client.c | 13 ++--- src/proto_http.c | 78 ++++++++++++++++++++++++---- src/standard.c | 94 ++++++++++++++++++++++++++++++++++ 9 files changed, 248 insertions(+), 17 deletions(-) create mode 100644 examples/option-http_proxy.cfg diff --git a/doc/configuration.txt b/doc/configuration.txt index 149f330a4..5ebe3cc6d 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -275,6 +275,7 @@ option httpclose X X X X option httplog X X X X option logasap X X X - option nolinger X X X X +option http_proxy X X X X option persist X - X X option redispatch X - X X option smtpchk X - X X @@ -539,6 +540,15 @@ url_reg used any time, but it is important to remember that regex matching is slower than other methods. See also "path_reg" and all "url_" criteria. +url_ip + Applies to the IP address parsed in HTTP request. It can be used to + prevent access to certain resources such as local network. It is useful + with option 'http_proxy'. + +url_port + Applies to the port parsed in HTTP request. It can be used to + prevent access to certain resources. It is useful with option 'http_proxy'. + hdr hdr(header) Note: all the "hdr*" matching criteria either apply to all headers, or to a diff --git a/examples/option-http_proxy.cfg b/examples/option-http_proxy.cfg new file mode 100644 index 000000000..8f73c3bb8 --- /dev/null +++ b/examples/option-http_proxy.cfg @@ -0,0 +1,53 @@ +# +# demo config for Proxy mode +# + +global + maxconn 20000 + ulimit-n 16384 + log 127.0.0.1 local0 + uid 200 + gid 200 + chroot /var/empty + nbproc 4 + daemon + +frontend test-proxy + bind 192.168.200.10:8080 + mode http + log global + option httplog + option dontlognull + option httpclose + option nolinger + option http_proxy + maxconn 8000 + clitimeout 30000 + + # layer3: Valid users + acl allow_host src 192.168.200.150/32 + block if !allow_host + + # layer7: prevent private network relaying + acl forbidden_dst url_ip 192.168.0.0/24 + acl forbidden_dst url_ip 172.16.0.0/12 + acl forbidden_dst url_ip 10.0.0.0/8 + block if forbidden_dst + + default_backend test-proxy-srv + + +backend test-proxy-srv + mode http + contimeout 5000 + srvtimeout 5000 + retries 2 + option nolinger + option http_proxy + + # layer7: Only GET method is valid + acl valid_method method GET + block if !valid_method + + # layer7: protect bad reply + rspdeny ^Content-Type:[\ ]*audio/mp3 diff --git a/include/common/standard.h b/include/common/standard.h index 90ed618a9..80f6d2a28 100644 --- a/include/common/standard.h +++ b/include/common/standard.h @@ -132,6 +132,11 @@ struct sockaddr_in *str2sa(char *str); */ int str2net(const char *str, struct in_addr *addr, struct in_addr *mask); +/* + * Resolve destination server from URL. Convert to a sockaddr_in*. + */ +int url2sa(const char *url, int ulen, struct sockaddr_in *addr); + /* will try to encode the string replacing all characters tagged in * with the hexadecimal representation of their ASCII-code (2 digits) * prefixed by , and will store the result between (included) diff --git a/include/types/proxy.h b/include/types/proxy.h index 6ffc264cd..2a75fad9e 100644 --- a/include/types/proxy.h +++ b/include/types/proxy.h @@ -101,6 +101,7 @@ #define PR_O_TCPSPLICE 0x08000000 /* delegate data transfer to linux kernel's tcp_splice */ #define PR_O_CONTSTATS 0x10000000 /* continous counters */ +#define PR_O_HTTP_PROXY 0x20000000 /* Enable session to use HTTP proxy operations */ /* This structure is used to apply fast weighted round robin on a server group */ struct fwrr_group { diff --git a/src/backend.c b/src/backend.c index 91027e817..a8676b3fa 100644 --- a/src/backend.c +++ b/src/backend.c @@ -936,6 +936,10 @@ int assign_server(struct session *s) return SRV_STATUS_INTERNAL; } } + else if (s->be->options & PR_O_HTTP_PROXY) { + if (!s->srv_addr.sin_addr.s_addr) + return SRV_STATUS_NOSRV; + } else if (!*(int *)&s->be->dispatch_addr.sin_addr && !(s->fe->options & PR_O_TRANSP)) { return SRV_STATUS_NOSRV; @@ -999,6 +1003,10 @@ int assign_server_address(struct session *s) return SRV_STATUS_INTERNAL; } } + else if (s->be->options & PR_O_HTTP_PROXY) { + /* If HTTP PROXY option is set, then server is already assigned + * during incoming client request parsing. */ + } else { /* no server and no LB algorithm ! */ return SRV_STATUS_INTERNAL; diff --git a/src/cfgparse.c b/src/cfgparse.c index 47530120d..affb9569a 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -97,6 +97,7 @@ static const struct { { "keepalive", PR_O_KEEPALIVE, PR_CAP_NONE, 0 }, { "httpclose", PR_O_HTTP_CLOSE, PR_CAP_FE | PR_CAP_BE, 0 }, { "nolinger", PR_O_TCP_NOLING, PR_CAP_FE | PR_CAP_BE, 0 }, + { "http_proxy", PR_O_HTTP_PROXY, PR_CAP_FE | PR_CAP_BE, 0 }, { "logasap", PR_O_LOGASAP, PR_CAP_FE, 0 }, { "contstats", PR_O_CONTSTATS, PR_CAP_FE, 0 }, { "abortonclose", PR_O_ABRT_CLOSE, PR_CAP_BE, 0 }, @@ -2473,7 +2474,7 @@ int readcfgfile(const char *file) } else if (curproxy->cap & PR_CAP_BE && ((curproxy->mode != PR_MODE_HEALTH) && - !(curproxy->options & PR_O_TRANSP) && + !(curproxy->options & (PR_O_TRANSP | PR_O_HTTP_PROXY)) && !(curproxy->lbprm.algo & BE_LB_ALGO) && (*(int *)&curproxy->dispatch_addr.sin_addr == 0))) { Alert("parsing %s : %s '%s' has no dispatch address and is not in transparent or balance mode.\n", diff --git a/src/client.c b/src/client.c index 2ee41a6f8..73c889555 100644 --- a/src/client.c +++ b/src/client.c @@ -522,6 +522,7 @@ acl_fetch_dport(struct proxy *px, struct session *l4, void *l7, int dir, return 1; } + /* set test->i to the number of connexions to the proxy */ static int acl_fetch_dconn(struct proxy *px, struct session *l4, void *l7, int dir, @@ -534,14 +535,14 @@ acl_fetch_dconn(struct proxy *px, struct session *l4, void *l7, int dir, /* Note: must not be declared as its list will be overwritten */ static struct acl_kw_list acl_kws = {{ },{ - { "src_port", acl_parse_int, acl_fetch_sport, acl_match_int }, - { "src", acl_parse_ip, acl_fetch_src, acl_match_ip }, - { "dst", acl_parse_ip, acl_fetch_dst, acl_match_ip }, - { "dst_port", acl_parse_int, acl_fetch_dport, acl_match_int }, + { "src_port", acl_parse_int, acl_fetch_sport, acl_match_int }, + { "src", acl_parse_ip, acl_fetch_src, acl_match_ip }, + { "dst", acl_parse_ip, acl_fetch_dst, acl_match_ip }, + { "dst_port", acl_parse_int, acl_fetch_dport, acl_match_int }, #if 0 - { "src_limit", acl_parse_int, acl_fetch_sconn, acl_match_int }, + { "src_limit", acl_parse_int, acl_fetch_sconn, acl_match_int }, #endif - { "dst_conn", acl_parse_int, acl_fetch_dconn, acl_match_int }, + { "dst_conn", acl_parse_int, acl_fetch_dconn, acl_match_int }, { NULL, NULL, NULL, NULL }, }}; diff --git a/src/proto_http.c b/src/proto_http.c index 372743146..59b9055f6 100644 --- a/src/proto_http.c +++ b/src/proto_http.c @@ -1476,7 +1476,7 @@ void http_msg_analyzer(struct buffer *buf, struct http_msg *msg, struct hdr_idx msg->msg_state = HTTP_MSG_ERROR; return; } - + /* * manages the client FSM and its socket. BTW, it also tries to handle the * cookie. It returns 1 if a state has changed (and a resync may be needed), @@ -1908,8 +1908,13 @@ int process_cli(struct session *t) * may have separate values for ->fe, ->be. */ - - + /* + * If HTTP PROXY is set we simply get remote server address + * parsing incoming request. + */ + if ((t->be->options & PR_O_HTTP_PROXY) && !(t->flags & SN_ADDR_SET)) { + url2sa(req->data + msg->sl.rq.u, msg->sl.rq.u_l, &t->srv_addr); + } /* * 7: the appsession cookie was looked up very early in 1.2, @@ -4950,6 +4955,57 @@ acl_fetch_url(struct proxy *px, struct session *l4, void *l7, int dir, return 1; } +static int +acl_fetch_url_ip(struct proxy *px, struct session *l4, void *l7, int dir, + struct acl_expr *expr, struct acl_test *test) +{ + struct http_txn *txn = l7; + + if (txn->req.msg_state != HTTP_MSG_BODY) + return 0; + if (txn->rsp.msg_state != HTTP_MSG_RPBEFORE) + /* ensure the indexes are not affected */ + return 0; + + /* Parse HTTP request */ + url2sa(txn->req.sol + txn->req.sl.rq.u, txn->req.sl.rq.u_l, &l4->srv_addr); + test->ptr = (void *)&((struct sockaddr_in *)&l4->srv_addr)->sin_addr; + test->i = AF_INET; + + /* + * If we are parsing url in frontend space, we prepare backend stage + * to not parse again the same url ! optimization lazyness... + */ + if (px->options & PR_O_HTTP_PROXY) + l4->flags |= SN_ADDR_SET; + + test->flags = ACL_TEST_F_READ_ONLY; + return 1; +} + +static int +acl_fetch_url_port(struct proxy *px, struct session *l4, void *l7, int dir, + struct acl_expr *expr, struct acl_test *test) +{ + struct http_txn *txn = l7; + + if (txn->req.msg_state != HTTP_MSG_BODY) + return 0; + if (txn->rsp.msg_state != HTTP_MSG_RPBEFORE) + /* ensure the indexes are not affected */ + return 0; + + /* Same optimization as url_ip */ + url2sa(txn->req.sol + txn->req.sl.rq.u, txn->req.sl.rq.u_l, &l4->srv_addr); + test->i = ntohs(((struct sockaddr_in *)&l4->srv_addr)->sin_port); + + if (px->options & PR_O_HTTP_PROXY) + l4->flags |= SN_ADDR_SET; + + test->flags = ACL_TEST_F_READ_ONLY; + return 1; +} + /* 5. Check on HTTP header. A pointer to the beginning of the value is returned. * This generic function is used by both acl_fetch_chdr() and acl_fetch_shdr(). */ @@ -5186,13 +5242,15 @@ static struct acl_kw_list acl_kws = {{ },{ { "resp_ver", acl_parse_ver, acl_fetch_stver, acl_match_str }, { "status", acl_parse_int, acl_fetch_stcode, acl_match_int }, - { "url", acl_parse_str, acl_fetch_url, acl_match_str }, - { "url_beg", acl_parse_str, acl_fetch_url, acl_match_beg }, - { "url_end", acl_parse_str, acl_fetch_url, acl_match_end }, - { "url_sub", acl_parse_str, acl_fetch_url, acl_match_sub }, - { "url_dir", acl_parse_str, acl_fetch_url, acl_match_dir }, - { "url_dom", acl_parse_str, acl_fetch_url, acl_match_dom }, - { "url_reg", acl_parse_reg, acl_fetch_url, acl_match_reg }, + { "url", acl_parse_str, acl_fetch_url, acl_match_str }, + { "url_beg", acl_parse_str, acl_fetch_url, acl_match_beg }, + { "url_end", acl_parse_str, acl_fetch_url, acl_match_end }, + { "url_sub", acl_parse_str, acl_fetch_url, acl_match_sub }, + { "url_dir", acl_parse_str, acl_fetch_url, acl_match_dir }, + { "url_dom", acl_parse_str, acl_fetch_url, acl_match_dom }, + { "url_reg", acl_parse_reg, acl_fetch_url, acl_match_reg }, + { "url_ip", acl_parse_ip, acl_fetch_url_ip, acl_match_ip }, + { "url_port", acl_parse_int, acl_fetch_url_port, acl_match_int }, { "hdr", acl_parse_str, acl_fetch_chdr, acl_match_str }, { "hdr_reg", acl_parse_reg, acl_fetch_chdr, acl_match_reg }, diff --git a/src/standard.c b/src/standard.c index 1e631301c..d245949a9 100644 --- a/src/standard.c +++ b/src/standard.c @@ -202,6 +202,100 @@ int str2net(const char *str, struct in_addr *addr, struct in_addr *mask) goto out_free; } + +/* + * Parse IP address found in url. + */ +static int url2ip(const char *addr, struct in_addr *dst) +{ + int saw_digit, octets, ch; + u_char tmp[4], *tp; + const char *cp = addr; + + saw_digit = 0; + octets = 0; + *(tp = tmp) = 0; + + while (*addr) { + unsigned char digit = (ch = *addr++) - '0'; + if (digit > 9 && ch != '.') + break; + if (digit <= 9) { + u_int new = *tp * 10 + digit; + if (new > 255) + return 0; + *tp = new; + if (!saw_digit) { + if (++octets > 4) + return 0; + saw_digit = 1; + } + } else if (ch == '.' && saw_digit) { + if (octets == 4) + return 0; + *++tp = 0; + saw_digit = 0; + } else + return 0; + } + + if (octets < 4) + return 0; + + memcpy(&dst->s_addr, tmp, 4); + return addr-cp-1; +} + +/* + * Resolve destination server from URL. Convert to a sockaddr_in*. + */ +int url2sa(const char *url, int ulen, struct sockaddr_in *addr) +{ + const char *curr = url, *cp = url; + int ret, url_code = 0; + unsigned int http_code = 0; + + /* Cleanup the room */ + addr->sin_family = AF_INET; + addr->sin_addr.s_addr = 0; + addr->sin_port = 0; + + /* Firstly, try to find :// pattern */ + while (curr < url+ulen && url_code != 0x3a2f2f) { + url_code = ((url_code & 0xffff) << 8); + url_code += (unsigned char)*curr++; + } + + /* Secondly, if :// pattern is found, verify parsed stuff + * before pattern is matching our http pattern. + * If so parse ip address and port in uri. + * + * WARNING: Current code doesn't support dynamic async dns resolver. + */ + if (url_code == 0x3a2f2f) { + while (cp < curr - 3) + http_code = (http_code << 8) + *cp++; + http_code |= 0x20202020; /* Turn everything to lower case */ + + /* HTTP url matching */ + if (http_code == 0x68747470) { + /* We are looking for IP address. If you want to parse and + * resolve hostname found in url, you can use str2sa(), but + * be warned this can slow down global daemon performances + * while handling lagging dns responses. + */ + ret = url2ip(curr, &addr->sin_addr); + if (!ret) + return -1; + curr += ret; + addr->sin_port = (*curr == ':') ? htons(str2uic(++curr)) : htons(80); + } + return 0; + } + + return -1; +} + /* will try to encode the string replacing all characters tagged in * with the hexadecimal representation of their ASCII-code (2 digits) * prefixed by , and will store the result between (included)