[MEDIUM] implement the URI hash algorithm

Guillaume Dallaire contributed the URI hashing algorithm for
use with proxy-caches. It provides the advantage of optimizing
the cache hit rate.
This commit is contained in:
Willy Tarreau 2007-05-08 13:35:26 +02:00
parent 9cdde230a5
commit 2fcb500481
6 changed files with 100 additions and 9 deletions

View File

@ -861,8 +861,9 @@ described above. It has advantages such as server health monitoring, multiple
port binding and port mapping. To use this mode, the 'balance' keyword is used,
followed by the selected algorithm. Up to version 1.2.11, only 'roundrobin' was
available, which is also the default value if unspecified. Starting with
version 1.2.12, a new 'source' keyword appeared. In this mode, there will be no
dispatch address, but the proxy needs at least one server.
version 1.2.12, a new 'source' keyword appeared. A new 'uri' keyword was added
in version 1.3.10. In this mode, there will be no dispatch address, but the
proxy needs at least one server.
Example : same as the last one, with internal load balancer
---------
@ -959,6 +960,24 @@ Examples :
server web1 192.168.1.1 cookie server01
server web2 192.168.1.2 cookie server02
As indicated above, the 'uri' keyword was introduced in version 1.3.10. It is
useful when load-balancing between reverse proxy-caches, because it will hash
the URI and use the hash result to select a server, thus optimizing the hit
rate on the caches, because the same URI will always reach the same cache. This
keyword is only allowed in HTTP mode.
Example :
---------
# Always send a given URI to the same server
listen http_proxy
bind :3128
mode http
balance uri
server squid1 192.168.1.1
server squid2 192.168.1.2
3.1) Server monitoring
----------------------

View File

@ -863,9 +863,10 @@ serveurs d
cela, on précise le mot clé 'balance' dans la définition du service,
éventuellement suivi du nom d'un algorithme de répartition. Jusqu'à la version
1.2.11, seul 'roundrobin' était géré, et c'est aussi la valeur implicite par
défaut. Avec la version 1.2.12, le nouveau mot clé 'source' est apparu. Il est
évident qu'en cas d'utilisation du répartiteur interne, il ne faudra pas
spécifier d'adresse de dispatch, et qu'il faudra au moins un serveur.
défaut. Avec la version 1.2.12, le nouveau mot clé 'source' est apparu. La
version 1.3.10 a également apporté le mot clé 'uri'. Il est évident qu'en cas
d'utilisation du répartiteur interne, il ne faudra pas spécifier d'adresse de
dispatch, et qu'il faudra au moins un serveur.
Exemple : même que précédemment en répartition interne
---------
@ -965,6 +966,25 @@ Exemples :
server web1 192.168.1.1 cookie server01
server web2 192.168.1.2 cookie server02
De plus, tel qu'indiqué ci-dessus, la version 1.3.10 a introduit le mot clé
'uri'. Il est très pratique dans le cas de répartition de charge entre des
reverse-proxy-caches, parce qu'il utilisera le résultat d'un hachage de l'URI
pour choisir un serveur, ce qui aura pour effet d'optimiser le taux de cache
du fait que la même URI sera toujours envoyée au même cache. Ce mot-clé n'est
autorisé qu'en mode HTTP.
Example :
---------
# Envoie toujours une URI donnée au même serveur
listen http_proxy
bind :3128
mode http
balance uri
server squid1 192.168.1.1
server squid2 192.168.1.2
3.1) Surveillance des serveurs
------------------------------

View File

@ -2,7 +2,7 @@
include/proto/backend.h
Functions prototypes for the backend.
Copyright (C) 2000-2006 Willy Tarreau - w@1wt.eu
Copyright (C) 2000-2007 Willy Tarreau - w@1wt.eu
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@ -116,6 +116,36 @@ static inline struct server *get_server_sh(const struct proxy *px,
return px->srv_map[h];
}
/*
* This function tries to find a running server for the proxy <px> following
* the URI hash method. In order to optimize cache hits, the hash computation
* ends at the question mark. Depending on the number of active/backup servers,
* it will either look for active servers, or for backup servers.
* If any server is found, it will be returned. If no valid server is found,
* NULL is returned.
*
* This code was contributed by Guillaume Dallaire, who also selected this hash
* algorithm out of a tens because it gave him the best results.
*
*/
static inline struct server *get_server_uh(struct proxy *px, char *uri, int uri_len)
{
unsigned long hash = 0;
int c;
if (px->srv_map_sz == 0)
return NULL;
while (uri_len--) {
c = *uri++;
if (c == '?')
break;
hash = c + (hash << 6) + (hash << 16) - hash;
}
return px->srv_map[hash % px->srv_map_sz];
}
#endif /* _PROTO_BACKEND_H */

View File

@ -49,7 +49,6 @@
#define PR_O_USE_ALL_BK 0x00100000 /* load-balance between backup servers */
#define PR_O_FORCE_CLO 0x00200000 /* enforce the connection close immediately after server response */
#define PR_O_BALANCE_SH 0x00400000 /* balance on source IP hash */
#define PR_O_BALANCE (PR_O_BALANCE_RR | PR_O_BALANCE_SH)
#define PR_O_ABRT_CLOSE 0x00800000 /* immediately abort request when client closes */
#define PR_O_SSL3_CHK 0x01000000 /* use SSLv3 CLIENT_HELLO packets for server health */
@ -58,6 +57,8 @@
#define PR_O_TPXY_CLI 0x06000000 /* bind to the client's IP+port when connect()ing */
#define PR_O_TPXY_MASK 0x06000000 /* bind to a non-local address when connect()ing */
#define PR_O_TCPSPLICE 0x08000000 /* delegate data transfer to linux kernel's tcp_splice */
#define PR_O_BALANCE_UH 0x10000000 /* balance on URI hash */
#define PR_O_BALANCE (PR_O_BALANCE_RR | PR_O_BALANCE_SH | PR_O_BALANCE_UH)
#endif /* _TYPES_BACKEND_H */

View File

@ -187,6 +187,12 @@ int assign_server(struct session *s)
(void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
len);
}
else if (s->be->options & PR_O_BALANCE_UH) {
/* URI hashing */
s->srv = get_server_uh(s->be,
s->txn.req.sol + s->txn.req.sl.rq.u,
s->txn.req.sl.rq.u_l);
}
else /* unknown balancing algorithm */
return SRV_STATUS_INTERNAL;
}

View File

@ -1129,18 +1129,26 @@ int cfg_parse_listen(const char *file, int linenum, char **args)
if (*(args[1])) {
if (!strcmp(args[1], "roundrobin")) {
curproxy->options &= ~PR_O_BALANCE;
curproxy->options |= PR_O_BALANCE_RR;
}
else if (!strcmp(args[1], "source")) {
curproxy->options &= ~PR_O_BALANCE;
curproxy->options |= PR_O_BALANCE_SH;
}
else if (!strcmp(args[1], "uri")) {
curproxy->options &= ~PR_O_BALANCE;
curproxy->options |= PR_O_BALANCE_UH;
}
else {
Alert("parsing [%s:%d] : '%s' only supports 'roundrobin' and 'source' options.\n", file, linenum, args[0]);
Alert("parsing [%s:%d] : '%s' only supports 'roundrobin', 'source' and 'uri' options.\n", file, linenum, args[0]);
return -1;
}
}
else /* if no option is set, use round-robin by default */
else {/* if no option is set, use round-robin by default */
curproxy->options &= ~PR_O_BALANCE;
curproxy->options |= PR_O_BALANCE_RR;
}
}
else if (!strcmp(args[0], "server")) { /* server address */
int cur_arg;
@ -2236,6 +2244,13 @@ int readcfgfile(const char *file)
Warning("parsing %s : monitor-uri will be ignored for %s '%s'.\n",
file, proxy_type_str(curproxy), curproxy->id);
}
if (curproxy->options & PR_O_BALANCE_UH) {
curproxy->options &= ~PR_O_BALANCE;
curproxy->options |= PR_O_BALANCE_RR;
Warning("parsing %s : URI hash will be ignored for %s '%s'. Falling back to round robin.\n",
file, proxy_type_str(curproxy), curproxy->id);
}
}
else if (curproxy->mode == PR_MODE_HTTP) { /* HTTP PROXY */
if ((curproxy->cookie_name != NULL) && ((newsrv = curproxy->srv) == NULL)) {