From 34a150ccf5157a04558d5b67e3901045d99ccdea Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 11 Jun 2019 09:16:41 +0200 Subject: [PATCH] MEDIUM: init/threads: don't use spinlocks during the init phase PiBa-NL found some pathological cases where starting threads can hinder each other and cause a measurable slow down. This problem is reproducible with the following config (haproxy must be built with -DDEBUG_DEV) : global stats socket /tmp/sock1 mode 666 level admin nbthread 64 backend stopme timeout server 1s option tcp-check tcp-check send "debug dev exit\n" server cli unix@/tmp/sock1 check This will cause the process to be stopped once the checks are ready to start. Binding all these to just a few cores magnifies the problem. Starting them in loops shows a significant time difference among the commits : # before startup serialization $ time for i in {1..20}; do taskset -c 0,1,2,3 ./haproxy-e186161 -db -f slow-init.cfg >/dev/null 2>&1; done real 0m1.581s user 0m0.621s sys 0m5.339s # after startup serialization $ time for i in {1..20}; do taskset -c 0,1,2,3 ./haproxy-e4d7c9dd -db -f slow-init.cfg >/dev/null 2>&1; done real 0m2.366s user 0m0.894s sys 0m8.238s In order to address this, let's use plain mutexes and cond_wait during the init phase. With this done, waiting threads now sleep and the problem completely disappeared : $ time for i in {1..20}; do taskset -c 0,1,2,3 ./haproxy -db -f slow-init.cfg >/dev/null 2>&1; done real 0m0.161s user 0m0.079s sys 0m0.149s --- src/haproxy.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/src/haproxy.c b/src/haproxy.c index a8898b78d..c9370a323 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -2562,6 +2562,9 @@ static void *run_thread_poll_loop(void *data) struct per_thread_init_fct *ptif; struct per_thread_deinit_fct *ptdf; struct per_thread_free_fct *ptff; + static int init_left = 0; + __decl_hathreads(static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER); + __decl_hathreads(static pthread_cond_t init_cond = PTHREAD_COND_INITIALIZER); ha_set_tid((unsigned long)data); @@ -2577,7 +2580,13 @@ static void *run_thread_poll_loop(void *data) * after reallocating them locally. This will also ensure there is * no race on file descriptors allocation. */ - thread_isolate(); +#ifdef USE_THREAD + pthread_mutex_lock(&init_mutex); +#endif + /* The first thread must set the number of threads left */ + if (!init_left) + init_left = global.nbthread; + init_left--; tv_update_date(-1,-1); @@ -2606,16 +2615,21 @@ static void *run_thread_poll_loop(void *data) /* enabling protocols will result in fd_insert() calls to be performed, * we want all threads to have already allocated their local fd tables - * before doing so. + * before doing so, thus only the last thread does it. */ - thread_sync_release(); - thread_isolate(); - - if (tid == 0) + if (init_left == 0) protocol_enable_all(); - /* done initializing this thread, don't start before others are done */ - thread_sync_release(); +#ifdef USE_THREAD + pthread_cond_broadcast(&init_cond); + pthread_mutex_unlock(&init_mutex); + + /* now wait for other threads to finish starting */ + pthread_mutex_lock(&init_mutex); + while (init_left) + pthread_cond_wait(&init_cond, &init_mutex); + pthread_mutex_unlock(&init_mutex); +#endif run_poll_loop();