MEDIUM: init/threads: don't use spinlocks during the init phase

PiBa-NL found some pathological cases where starting threads can hinder
each other and cause a measurable slow down. This problem is reproducible
with the following config (haproxy must be built with -DDEBUG_DEV) :

    global
	stats socket /tmp/sock1 mode 666 level admin
	nbthread 64

    backend stopme
	timeout server  1s
	option tcp-check
	tcp-check send "debug dev exit\n"
	server cli unix@/tmp/sock1 check

This will cause the process to be stopped once the checks are ready to
start. Binding all these to just a few cores magnifies the problem.
Starting them in loops shows a significant time difference among the
commits :

  # before startup serialization
  $ time for i in {1..20}; do taskset -c 0,1,2,3 ./haproxy-e186161 -db -f slow-init.cfg >/dev/null 2>&1; done

  real    0m1.581s
  user    0m0.621s
  sys     0m5.339s

  # after startup serialization
  $ time for i in {1..20}; do taskset -c 0,1,2,3 ./haproxy-e4d7c9dd -db -f slow-init.cfg >/dev/null 2>&1; done

  real    0m2.366s
  user    0m0.894s
  sys     0m8.238s

In order to address this, let's use plain mutexes and cond_wait during
the init phase. With this done, waiting threads now sleep and the problem
completely disappeared :

  $ time for i in {1..20}; do taskset -c 0,1,2,3 ./haproxy -db -f slow-init.cfg >/dev/null 2>&1; done

  real    0m0.161s
  user    0m0.079s
  sys     0m0.149s
This commit is contained in:
Willy Tarreau 2019-06-11 09:16:41 +02:00
parent b5ecf0393c
commit 34a150ccf5

View File

@ -2562,6 +2562,9 @@ static void *run_thread_poll_loop(void *data)
struct per_thread_init_fct *ptif;
struct per_thread_deinit_fct *ptdf;
struct per_thread_free_fct *ptff;
static int init_left = 0;
__decl_hathreads(static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER);
__decl_hathreads(static pthread_cond_t init_cond = PTHREAD_COND_INITIALIZER);
ha_set_tid((unsigned long)data);
@ -2577,7 +2580,13 @@ static void *run_thread_poll_loop(void *data)
* after reallocating them locally. This will also ensure there is
* no race on file descriptors allocation.
*/
thread_isolate();
#ifdef USE_THREAD
pthread_mutex_lock(&init_mutex);
#endif
/* The first thread must set the number of threads left */
if (!init_left)
init_left = global.nbthread;
init_left--;
tv_update_date(-1,-1);
@ -2606,16 +2615,21 @@ static void *run_thread_poll_loop(void *data)
/* enabling protocols will result in fd_insert() calls to be performed,
* we want all threads to have already allocated their local fd tables
* before doing so.
* before doing so, thus only the last thread does it.
*/
thread_sync_release();
thread_isolate();
if (tid == 0)
if (init_left == 0)
protocol_enable_all();
/* done initializing this thread, don't start before others are done */
thread_sync_release();
#ifdef USE_THREAD
pthread_cond_broadcast(&init_cond);
pthread_mutex_unlock(&init_mutex);
/* now wait for other threads to finish starting */
pthread_mutex_lock(&init_mutex);
while (init_left)
pthread_cond_wait(&init_cond, &init_mutex);
pthread_mutex_unlock(&init_mutex);
#endif
run_poll_loop();