BUG/MEDIUM: server: fix race on servers_list during server deletion

Each server is inserted in a global list named servers_list on
new_server(). This list is then only used to finalize servers
initialization after parsing.

On dynamic server creation, there is no issue as new_server() is under
thread isolation. However, when a server is deleted after its refcount
reached zero, srv_drop() removes it from servers_list without lock
protection. In the longterm, this can cause list corruption and crashes,
especially if multiple adjacent servers are removed in parallel.

To fix this, convert servers_list to a mt_list. This should not impact
performance as servers_list is not used during runtime outside of server
creation/deletion.

This should fix github issue #2733. Thanks to Chris Staite who first
found the issue here.

This must be backported up to 2.6.
This commit is contained in:
Amaury Denoyelle 2024-10-23 18:18:48 +02:00
parent 116178563c
commit 7a02fcaf20
4 changed files with 8 additions and 7 deletions

View File

@ -301,7 +301,7 @@ struct server {
signed char use_ssl; /* ssl enabled (1: on, 0: disabled, -1 forced off) */ signed char use_ssl; /* ssl enabled (1: on, 0: disabled, -1 forced off) */
unsigned int flags; /* server flags (SRV_F_*) */ unsigned int flags; /* server flags (SRV_F_*) */
unsigned int pp_opts; /* proxy protocol options (SRV_PP_*) */ unsigned int pp_opts; /* proxy protocol options (SRV_PP_*) */
struct list global_list; /* attach point in the global servers_list */ struct mt_list global_list; /* attach point in the global servers_list */
struct server *next; struct server *next;
struct mt_list prev_deleted; /* deleted servers with 'next' ptr pointing to us */ struct mt_list prev_deleted; /* deleted servers with 'next' ptr pointing to us */
int cklen; /* the len of the cookie, to speed up checks */ int cklen; /* the len of the cookie, to speed up checks */

View File

@ -41,7 +41,7 @@
__decl_thread(extern HA_SPINLOCK_T idle_conn_srv_lock); __decl_thread(extern HA_SPINLOCK_T idle_conn_srv_lock);
extern struct idle_conns idle_conns[MAX_THREADS]; extern struct idle_conns idle_conns[MAX_THREADS];
extern struct task *idle_conn_task; extern struct task *idle_conn_task;
extern struct list servers_list; extern struct mt_list servers_list;
extern struct dict server_key_dict; extern struct dict server_key_dict;
int srv_downtime(const struct server *s); int srv_downtime(const struct server *s);

View File

@ -2810,6 +2810,7 @@ int check_config_validity()
struct proxy *init_proxies_list = NULL; struct proxy *init_proxies_list = NULL;
struct stktable *t; struct stktable *t;
struct server *newsrv = NULL; struct server *newsrv = NULL;
struct mt_list back;
int err_code = 0; int err_code = 0;
unsigned int next_pxid = 1; unsigned int next_pxid = 1;
struct bind_conf *bind_conf; struct bind_conf *bind_conf;
@ -4250,7 +4251,7 @@ int check_config_validity()
/* we must finish to initialize certain things on the servers */ /* we must finish to initialize certain things on the servers */
list_for_each_entry(newsrv, &servers_list, global_list) { MT_LIST_FOR_EACH_ENTRY_LOCKED(newsrv, &servers_list, global_list, back) {
/* initialize idle conns lists */ /* initialize idle conns lists */
if (srv_init_per_thr(newsrv) == -1) { if (srv_init_per_thr(newsrv) == -1) {
ha_alert("parsing [%s:%d] : failed to allocate per-thread lists for server '%s'.\n", ha_alert("parsing [%s:%d] : failed to allocate per-thread lists for server '%s'.\n",

View File

@ -72,7 +72,7 @@ struct srv_kw_list srv_keywords = {
__decl_thread(HA_SPINLOCK_T idle_conn_srv_lock); __decl_thread(HA_SPINLOCK_T idle_conn_srv_lock);
struct eb_root idle_conn_srv = EB_ROOT; struct eb_root idle_conn_srv = EB_ROOT;
struct task *idle_conn_task __read_mostly = NULL; struct task *idle_conn_task __read_mostly = NULL;
struct list servers_list = LIST_HEAD_INIT(servers_list); struct mt_list servers_list = MT_LIST_HEAD_INIT(servers_list);
static struct task *server_atomic_sync_task = NULL; static struct task *server_atomic_sync_task = NULL;
static event_hdl_async_equeue server_atomic_sync_queue; static event_hdl_async_equeue server_atomic_sync_queue;
@ -2962,7 +2962,7 @@ struct server *new_server(struct proxy *proxy)
srv->obj_type = OBJ_TYPE_SERVER; srv->obj_type = OBJ_TYPE_SERVER;
srv->proxy = proxy; srv->proxy = proxy;
queue_init(&srv->queue, proxy, srv); queue_init(&srv->queue, proxy, srv);
LIST_APPEND(&servers_list, &srv->global_list); MT_LIST_APPEND(&servers_list, &srv->global_list);
LIST_INIT(&srv->srv_rec_item); LIST_INIT(&srv->srv_rec_item);
LIST_INIT(&srv->ip_rec_item); LIST_INIT(&srv->ip_rec_item);
LIST_INIT(&srv->pp_tlvs); LIST_INIT(&srv->pp_tlvs);
@ -3088,7 +3088,7 @@ struct server *srv_drop(struct server *srv)
HA_SPIN_DESTROY(&srv->lock); HA_SPIN_DESTROY(&srv->lock);
LIST_DELETE(&srv->global_list); MT_LIST_DELETE(&srv->global_list);
event_hdl_sub_list_destroy(&srv->e_subs); event_hdl_sub_list_destroy(&srv->e_subs);
EXTRA_COUNTERS_FREE(srv->extra_counters); EXTRA_COUNTERS_FREE(srv->extra_counters);
@ -3267,7 +3267,7 @@ static int _srv_parse_tmpl_init(struct server *srv, struct proxy *px)
release_sample_expr(newsrv->ssl_ctx.sni); release_sample_expr(newsrv->ssl_ctx.sni);
free_check(&newsrv->agent); free_check(&newsrv->agent);
free_check(&newsrv->check); free_check(&newsrv->check);
LIST_DELETE(&newsrv->global_list); MT_LIST_DELETE(&newsrv->global_list);
} }
free(newsrv); free(newsrv);
return i - srv->tmpl_info.nb_low; return i - srv->tmpl_info.nb_low;