mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-04-01 22:48:25 +00:00
MEDIUM: resolvers: use a kill list to preserve the list consistency
When scanning resolution.curr it's possible to try to free some resolutions which will themselves result in freeing other ones. If one of these other ones is exactly the next one in the list, the list walk visits deleted nodes and causes memory corruption, double-frees and so on. The approach taken using the "safe" argument to some functions seems to work but it's extremely brittle as it is required to carefully check all call paths from process_ressolvers() and pass the argument to 1 there to refrain from deleting entries, so the bug is very likely to come back after some tiny changes to this code. A variant was tried, checking at various places that the current task corresponds to process_resolvers() but this is also quite brittle even though a bit less. This patch uses another approach which consists in carefully unlinking elements from the list and deferring their removal by placing it in a kill list instead of deleting them synchronously. The real benefit here is that the complexity only has to be placed where the complications are. A thread-local list is fed with elements to be deleted before scanning the resolutions, and it's flushed at the end by picking the first one until the list is empty. This way we never dereference the next element and do not care about its presence or not in the list. One function, resolv_unlink_resolution(), is exported and used outside, so it had to be modified to use this list as well. Internal code has to use _resolv_unlink_resolution() instead.
This commit is contained in:
parent
aae7320b0d
commit
f766ec6b53
126
src/resolvers.c
126
src/resolvers.c
@ -52,6 +52,7 @@
|
||||
struct list sec_resolvers = LIST_HEAD_INIT(sec_resolvers);
|
||||
struct list resolv_srvrq_list = LIST_HEAD_INIT(resolv_srvrq_list);
|
||||
|
||||
static THREAD_LOCAL struct list death_row; /* list of deferred resolutions to kill, local validity only */
|
||||
static THREAD_LOCAL uint64_t resolv_query_id_seed = 0; /* random seed */
|
||||
struct resolvers *curr_resolvers = NULL;
|
||||
|
||||
@ -61,6 +62,9 @@ DECLARE_POOL(resolv_requester_pool, "resolv_requester", sizeof(struct resolv_r
|
||||
|
||||
static unsigned int resolution_uuid = 1;
|
||||
unsigned int resolv_failed_resolutions = 0;
|
||||
static struct task *process_resolvers(struct task *t, void *context, unsigned int state);
|
||||
static void resolv_free_resolution(struct resolv_resolution *resolution);
|
||||
static void _resolv_unlink_resolution(struct resolv_requester *requester, int safe);
|
||||
|
||||
enum {
|
||||
DNS_STAT_ID,
|
||||
@ -558,15 +562,51 @@ int resolv_read_name(unsigned char *buffer, unsigned char *bufend,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Reinitialize the list of aborted resolutions before calling certain
|
||||
* functions relying on it. The list must be processed by calling
|
||||
* free_aborted_resolutions() after operations.
|
||||
*/
|
||||
static void init_aborted_resolutions()
|
||||
{
|
||||
LIST_INIT(&death_row);
|
||||
}
|
||||
|
||||
static void abort_resolution(struct resolv_resolution *res)
|
||||
{
|
||||
LIST_DEL_INIT(&res->list);
|
||||
LIST_APPEND(&death_row, &res->list);
|
||||
}
|
||||
|
||||
/* This releases any aborted resolution found in the death row. It is mandatory
|
||||
* to call init_aborted_resolutions() first before the function (or loop) that
|
||||
* needs to defer deletions. Note that some of them are in relation via internal
|
||||
* objects and might cause the deletion of other ones from the same list, so we
|
||||
* must absolutely not use a list_for_each_entry_safe() nor any such thing here,
|
||||
* and solely rely on each call to remove the first remaining list element.
|
||||
*/
|
||||
static void free_aborted_resolutions()
|
||||
{
|
||||
struct resolv_resolution *res;
|
||||
|
||||
while (!LIST_ISEMPTY(&death_row)) {
|
||||
res = LIST_NEXT(&death_row, struct resolv_resolution *, list);
|
||||
resolv_free_resolution(res);
|
||||
}
|
||||
|
||||
/* make sure nobody tries to add anything without having initialized it */
|
||||
death_row = (struct list){ };
|
||||
}
|
||||
|
||||
/* Cleanup fqdn/port and address of a server attached to a SRV resolution. This
|
||||
* happens when an SRV item is purged or when the server status is considered as
|
||||
* obsolete.
|
||||
*
|
||||
* Must be called with the DNS lock held.
|
||||
* Must be called with the DNS lock held, and with the death_row already
|
||||
* initialized via init_aborted_resolutions().
|
||||
*/
|
||||
static void resolv_srvrq_cleanup_srv(struct server *srv, int safe)
|
||||
static void resolv_srvrq_cleanup_srv(struct server *srv)
|
||||
{
|
||||
resolv_unlink_resolution(srv->resolv_requester, safe);
|
||||
_resolv_unlink_resolution(srv->resolv_requester, 0);
|
||||
HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
|
||||
srvrq_update_srv_status(srv, 1);
|
||||
ha_free(&srv->hostname);
|
||||
@ -597,7 +637,9 @@ static struct task *resolv_srvrq_expire_task(struct task *t, void *context, unsi
|
||||
goto end;
|
||||
|
||||
HA_SPIN_LOCK(DNS_LOCK, &srv->srvrq->resolvers->lock);
|
||||
resolv_srvrq_cleanup_srv(srv, 0);
|
||||
init_aborted_resolutions();
|
||||
resolv_srvrq_cleanup_srv(srv);
|
||||
free_aborted_resolutions();
|
||||
HA_SPIN_UNLOCK(DNS_LOCK, &srv->srvrq->resolvers->lock);
|
||||
|
||||
end:
|
||||
@ -605,8 +647,9 @@ static struct task *resolv_srvrq_expire_task(struct task *t, void *context, unsi
|
||||
}
|
||||
|
||||
/* Checks for any obsolete record, also identify any SRV request, and try to
|
||||
* find a corresponding server.
|
||||
*/
|
||||
* find a corresponding server. Must be called with the death_row already
|
||||
* initialized via init_aborted_resolutions().
|
||||
*/
|
||||
static void resolv_check_response(struct resolv_resolution *res)
|
||||
{
|
||||
struct resolvers *resolvers = res->resolvers;
|
||||
@ -638,7 +681,7 @@ static void resolv_check_response(struct resolv_resolution *res)
|
||||
else if (item->type == DNS_RTYPE_SRV) {
|
||||
/* Remove any associated server */
|
||||
list_for_each_entry_safe(srv, srvback, &item->attached_servers, srv_rec_item)
|
||||
resolv_srvrq_cleanup_srv(srv, 0);
|
||||
resolv_srvrq_cleanup_srv(srv);
|
||||
}
|
||||
|
||||
LIST_DEL_INIT(&item->list);
|
||||
@ -760,7 +803,7 @@ srv_found:
|
||||
/* Unlink A/AAAA resolution for this server if there is an AR item.
|
||||
* It is usless to perform an extra resolution
|
||||
*/
|
||||
resolv_unlink_resolution(srv->resolv_requester, 0);
|
||||
_resolv_unlink_resolution(srv->resolv_requester, 0);
|
||||
}
|
||||
|
||||
if (!srv->hostname_dn) {
|
||||
@ -1874,7 +1917,8 @@ resolv_get_requester(struct resolv_requester **req, enum obj_type *owner,
|
||||
}
|
||||
|
||||
/* Links a requester (a server or a resolv_srvrq) with a resolution. It returns 0
|
||||
* on success, -1 otherwise.
|
||||
* on success, -1 otherwise. Must be called with the death_row already initialized
|
||||
* via init_aborted_resolutions().
|
||||
*/
|
||||
int resolv_link_resolution(void *requester, int requester_type, int requester_locked)
|
||||
{
|
||||
@ -1967,7 +2011,8 @@ int resolv_link_resolution(void *requester, int requester_type, int requester_lo
|
||||
|
||||
/* This function removes all server/srvrq references on answer items
|
||||
* if <safe> is set to 1, in case of srvrq, sub server resquesters unlink
|
||||
* is called using safe == 1 to make it usable into callbacks
|
||||
* is called using safe == 1 to make it usable into callbacks. Must be called
|
||||
* with the death_row already initialized via init_aborted_resolutions().
|
||||
*/
|
||||
void resolv_detach_from_resolution_answer_items(struct resolv_resolution *res, struct resolv_requester *req, int safe)
|
||||
{
|
||||
@ -1983,7 +2028,7 @@ void resolv_detach_from_resolution_answer_items(struct resolv_resolution *res,
|
||||
if (item->type == DNS_RTYPE_SRV) {
|
||||
list_for_each_entry_safe(srv, srvback, &item->attached_servers, srv_rec_item) {
|
||||
if (srv->srvrq == srvrq)
|
||||
resolv_srvrq_cleanup_srv(srv, safe);
|
||||
resolv_srvrq_cleanup_srv(srv);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1992,9 +2037,10 @@ void resolv_detach_from_resolution_answer_items(struct resolv_resolution *res,
|
||||
|
||||
/* Removes a requester from a DNS resolution. It takes takes care of all the
|
||||
* consequences. It also cleans up some parameters from the requester.
|
||||
* if <safe> is set to 1, the corresponding resolution is not released.
|
||||
* if <safe> is set to 1, the corresponding resolution is not released. Must be
|
||||
* called with the death_row already initialized via init_aborted_resolutions().
|
||||
*/
|
||||
void resolv_unlink_resolution(struct resolv_requester *requester, int safe)
|
||||
static void _resolv_unlink_resolution(struct resolv_requester *requester, int safe)
|
||||
{
|
||||
struct resolv_resolution *res;
|
||||
struct resolv_requester *req;
|
||||
@ -2024,7 +2070,7 @@ void resolv_unlink_resolution(struct resolv_requester *requester, int safe)
|
||||
return;
|
||||
}
|
||||
|
||||
resolv_free_resolution(res);
|
||||
abort_resolution(res);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2049,12 +2095,22 @@ void resolv_unlink_resolution(struct resolv_requester *requester, int safe)
|
||||
}
|
||||
}
|
||||
|
||||
/* The public version of the function above that deals with the death row. */
|
||||
void resolv_unlink_resolution(struct resolv_requester *requester, int safe)
|
||||
{
|
||||
init_aborted_resolutions();
|
||||
_resolv_unlink_resolution(requester, safe);
|
||||
free_aborted_resolutions();
|
||||
}
|
||||
|
||||
/* Called when a network IO is generated on a name server socket for an incoming
|
||||
* packet. It performs the following actions:
|
||||
* - check if the packet requires processing (not outdated resolution)
|
||||
* - ensure the DNS packet received is valid and call requester's callback
|
||||
* - call requester's error callback if invalid response
|
||||
* - check the dn_name in the packet against the one sent
|
||||
*
|
||||
* Must be called with the death_row already initialized via init_aborted_resolutions().
|
||||
*/
|
||||
static int resolv_process_responses(struct dns_nameserver *ns)
|
||||
{
|
||||
@ -2256,10 +2312,19 @@ static struct task *process_resolvers(struct task *t, void *context, unsigned in
|
||||
|
||||
HA_SPIN_LOCK(DNS_LOCK, &resolvers->lock);
|
||||
|
||||
/* Handle all expired resolutions from the active list */
|
||||
list_for_each_entry_safe(res, resback, &resolvers->resolutions.curr, list) {
|
||||
/* Handle all expired resolutions from the active list. Elements that
|
||||
* need to be removed will in fact be moved to the death_row. Other
|
||||
* ones will be handled normally.
|
||||
*/
|
||||
|
||||
init_aborted_resolutions();
|
||||
res = LIST_NEXT(&resolvers->resolutions.curr, struct resolv_resolution *, list);
|
||||
while (&res->list != &resolvers->resolutions.curr) {
|
||||
resback = LIST_NEXT(&res->list, struct resolv_resolution *, list);
|
||||
|
||||
if (LIST_ISEMPTY(&res->requesters)) {
|
||||
resolv_free_resolution(res);
|
||||
abort_resolution(res);
|
||||
res = resback;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -2287,8 +2352,12 @@ static struct task *process_resolvers(struct task *t, void *context, unsigned in
|
||||
/* Clean up resolution info and remove it from the
|
||||
* current list */
|
||||
resolv_reset_resolution(res);
|
||||
|
||||
/* subsequent entries might have been deleted here */
|
||||
resback = LIST_NEXT(&res->list, struct resolv_resolution *, list);
|
||||
LIST_DEL_INIT(&res->list);
|
||||
LIST_APPEND(&resolvers->resolutions.wait, &res->list);
|
||||
res = resback;
|
||||
}
|
||||
else {
|
||||
/* Otherwise resend the DNS query and requeue the resolution */
|
||||
@ -2309,13 +2378,15 @@ static struct task *process_resolvers(struct task *t, void *context, unsigned in
|
||||
res->try--;
|
||||
}
|
||||
resolv_send_query(res);
|
||||
resback = LIST_NEXT(&res->list, struct resolv_resolution *, list);
|
||||
res = resback;
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle all resolutions in the wait list */
|
||||
list_for_each_entry_safe(res, resback, &resolvers->resolutions.wait, list) {
|
||||
if (LIST_ISEMPTY(&res->requesters)) {
|
||||
resolv_free_resolution(res);
|
||||
abort_resolution(res);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -2330,6 +2401,9 @@ static struct task *process_resolvers(struct task *t, void *context, unsigned in
|
||||
}
|
||||
}
|
||||
|
||||
/* now we can purge all queued deletions */
|
||||
free_aborted_resolutions();
|
||||
|
||||
resolv_update_resolvers_timeout(resolvers);
|
||||
HA_SPIN_UNLOCK(DNS_LOCK, &resolvers->lock);
|
||||
return t;
|
||||
@ -2344,6 +2418,7 @@ static void resolvers_deinit(void)
|
||||
struct resolv_requester *req, *reqback;
|
||||
struct resolv_srvrq *srvrq, *srvrqback;
|
||||
|
||||
init_aborted_resolutions();
|
||||
list_for_each_entry_safe(resolvers, resolversback, &sec_resolvers, list) {
|
||||
list_for_each_entry_safe(ns, nsback, &resolvers->nameservers, list) {
|
||||
free(ns->id);
|
||||
@ -2376,7 +2451,7 @@ static void resolvers_deinit(void)
|
||||
LIST_DEL_INIT(&req->list);
|
||||
pool_free(resolv_requester_pool, req);
|
||||
}
|
||||
resolv_free_resolution(res);
|
||||
abort_resolution(res);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(res, resback, &resolvers->resolutions.wait, list) {
|
||||
@ -2384,7 +2459,7 @@ static void resolvers_deinit(void)
|
||||
LIST_DEL_INIT(&req->list);
|
||||
pool_free(resolv_requester_pool, req);
|
||||
}
|
||||
resolv_free_resolution(res);
|
||||
abort_resolution(res);
|
||||
}
|
||||
|
||||
free(resolvers->id);
|
||||
@ -2400,6 +2475,8 @@ static void resolvers_deinit(void)
|
||||
LIST_DEL_INIT(&srvrq->list);
|
||||
free(srvrq);
|
||||
}
|
||||
|
||||
free_aborted_resolutions();
|
||||
}
|
||||
|
||||
/* Finalizes the DNS configuration by allocating required resources and checking
|
||||
@ -2412,6 +2489,8 @@ static int resolvers_finalize_config(void)
|
||||
struct proxy *px;
|
||||
int err_code = 0;
|
||||
|
||||
init_aborted_resolutions();
|
||||
|
||||
/* allocate pool of resolution per resolvers */
|
||||
list_for_each_entry(resolvers, &sec_resolvers, list) {
|
||||
struct dns_nameserver *ns;
|
||||
@ -2507,8 +2586,10 @@ static int resolvers_finalize_config(void)
|
||||
if (err_code & (ERR_ALERT|ERR_ABORT))
|
||||
goto err;
|
||||
|
||||
free_aborted_resolutions();
|
||||
return err_code;
|
||||
err:
|
||||
free_aborted_resolutions();
|
||||
resolvers_deinit();
|
||||
return err_code;
|
||||
|
||||
@ -2798,6 +2879,8 @@ enum act_return resolv_action_do_resolve(struct act_rule *rule, struct proxy *px
|
||||
|
||||
resolvers = rule->arg.resolv.resolvers;
|
||||
|
||||
init_aborted_resolutions();
|
||||
|
||||
/* we have a response to our DNS resolution */
|
||||
use_cache:
|
||||
if (s->resolv_ctx.requester && s->resolv_ctx.requester->resolution != NULL) {
|
||||
@ -2880,6 +2963,7 @@ enum act_return resolv_action_do_resolve(struct act_rule *rule, struct proxy *px
|
||||
ret = ACT_RET_YIELD;
|
||||
|
||||
end:
|
||||
free_aborted_resolutions();
|
||||
if (locked)
|
||||
HA_SPIN_UNLOCK(DNS_LOCK, &resolvers->lock);
|
||||
return ret;
|
||||
@ -2888,7 +2972,7 @@ enum act_return resolv_action_do_resolve(struct act_rule *rule, struct proxy *px
|
||||
ha_free(&s->resolv_ctx.hostname_dn);
|
||||
s->resolv_ctx.hostname_dn_len = 0;
|
||||
if (s->resolv_ctx.requester) {
|
||||
resolv_unlink_resolution(s->resolv_ctx.requester, 0);
|
||||
_resolv_unlink_resolution(s->resolv_ctx.requester, 0);
|
||||
pool_free(resolv_requester_pool, s->resolv_ctx.requester);
|
||||
s->resolv_ctx.requester = NULL;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user