diff --git a/doc/configuration.txt b/doc/configuration.txt index 980de0b92..7387f4b53 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -1413,6 +1413,7 @@ The following keywords are supported in the "global" section : - tune.rcvbuf.frontend - tune.rcvbuf.server - tune.recv_enough + - tune.ring.queues - tune.runqueue-depth - tune.sched.low-latency - tune.sndbuf.backend @@ -3769,6 +3770,15 @@ tune.recv_enough may be changed by this setting to better deal with workloads involving lots of short messages such as telnet or SSH sessions. +tune.ring.queues + Sets the number of write queues in front of ring buffers. This can have an + effect on the CPU usage of traces during debugging sessions, and both too + low or too large a value can have an important effect. The good value was + determined experimentally by developers and there should be no reason to + try to change it unless instructed to do so in order to try to address + specific issues. Such a setting should not be left in the configuration + across version upgrades because its optimal value may evolve over time. + tune.runqueue-depth Sets the maximum amount of task that can be processed at once when running tasks. The default value depends on the number of threads but sits between 35 diff --git a/include/haproxy/defaults.h b/include/haproxy/defaults.h index 051ca8111..47db366d0 100644 --- a/include/haproxy/defaults.h +++ b/include/haproxy/defaults.h @@ -532,4 +532,24 @@ # endif #endif +/* number of ring wait queues depending on the number + * of threads. + */ +#ifndef RING_WAIT_QUEUES +# if defined(USE_THREAD) && MAX_THREADS >= 32 +# define RING_WAIT_QUEUES 16 +# elif defined(USE_THREAD) +# define RING_WAIT_QUEUES ((MAX_THREADS + 1) / 2) +# else +# define RING_WAIT_QUEUES 1 +# endif +#endif + +/* it has been found that 6 queues was optimal on various archs at various + * thread counts, so let's use that by default. + */ +#ifndef RING_DFLT_QUEUES +# define RING_DFLT_QUEUES 6 +#endif + #endif /* _HAPROXY_DEFAULTS_H */ diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h index 25536df14..f26b13f21 100644 --- a/include/haproxy/global-t.h +++ b/include/haproxy/global-t.h @@ -190,6 +190,7 @@ struct global { int nb_stk_ctr; /* number of stick counters, defaults to MAX_SESS_STKCTR */ int default_shards; /* default shards for listeners, or -1 (by-thread) or -2 (by-group) */ uint max_checks_per_thread; /* if >0, no more than this concurrent checks per thread */ + uint ring_queues; /* if >0, #ring queues, otherwise equals #thread groups */ #ifdef USE_QUIC unsigned int quic_backend_max_idle_timeout; unsigned int quic_frontend_max_idle_timeout; diff --git a/include/haproxy/ring-t.h b/include/haproxy/ring-t.h index 3d699b503..4e091ee0a 100644 --- a/include/haproxy/ring-t.h +++ b/include/haproxy/ring-t.h @@ -148,10 +148,10 @@ struct ring { /* keep the queue in a separate cache line below */ THREAD_PAD(64 - 3*sizeof(void*) - 4*sizeof(int)); - struct ring_wait_cell *queue; // wait queue - - /* and leave a spacer after it to avoid false sharing */ - THREAD_PAD(64 - sizeof(void*)); + struct { + struct ring_wait_cell *ptr; + THREAD_PAD(64 - sizeof(void*)); + } queue[RING_WAIT_QUEUES + 1]; // wait queue + 1 spacer }; #endif /* _HAPROXY_RING_T_H */ diff --git a/include/haproxy/tinfo-t.h b/include/haproxy/tinfo-t.h index 357c4c0aa..8e7638e2b 100644 --- a/include/haproxy/tinfo-t.h +++ b/include/haproxy/tinfo-t.h @@ -110,7 +110,7 @@ struct thread_info { uint tid, ltid; /* process-wide and group-wide thread ID (start at 0) */ ulong ltid_bit; /* bit masks for the tid/ltid */ uint tgid; /* ID of the thread group this thread belongs to (starts at 1; 0=unset) */ - /* 32-bit hole here */ + uint ring_queue; /* queue number for the rings */ ullong pth_id; /* the pthread_t cast to a ullong */ void *stack_top; /* the top of the stack when entering the thread */ diff --git a/src/haproxy.c b/src/haproxy.c index 723335a6e..7b2a18a72 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -3150,6 +3150,18 @@ static void *run_thread_poll_loop(void *data) #endif ha_thread_info[tid].stack_top = __builtin_frame_address(0); + /* Assign the ring queue. Contrary to an intuitive thought, this does + * not benefit from locality and it's counter-productive to group + * threads from a same group or range number in the same queue. In some + * sense it arranges us because it means we can use a modulo and ensure + * that even small numbers of threads are well spread. + */ + ha_thread_info[tid].ring_queue = + (tid % MIN(global.nbthread, + (global.tune.ring_queues ? + global.tune.ring_queues : + RING_DFLT_QUEUES))) % RING_WAIT_QUEUES; + /* thread is started, from now on it is not idle nor harmless */ thread_harmless_end(); thread_idle_end(); diff --git a/src/ring.c b/src/ring.c index d45bc245d..4118a645d 100644 --- a/src/ring.c +++ b/src/ring.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -51,7 +52,7 @@ void ring_init(struct ring *ring, void *area, size_t size, int reset) ring->storage = area; ring->pending = 0; ring->waking = 0; - ring->queue = NULL; + memset(&ring->queue, 0, sizeof(ring->queue)); if (reset) { ring->storage->size = size - sizeof(*ring->storage); @@ -646,6 +647,34 @@ size_t ring_max_payload(const struct ring *ring) return max; } +/* config parser for global "tune.ring.queues", accepts a number from 0 to RING_WAIT_QUEUES */ +static int cfg_parse_tune_ring_queues(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + int queues; + + if (too_many_args(1, args, err, NULL)) + return -1; + + queues = atoi(args[1]); + if (queues < 0 || queues > RING_WAIT_QUEUES) { + memprintf(err, "'%s' expects a number between 0 and %d but got '%s'.", args[0], RING_WAIT_QUEUES, args[1]); + return -1; + } + + global.tune.ring_queues = queues; + return 0; +} + +/* config keyword parsers */ +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "tune.ring.queues", cfg_parse_tune_ring_queues }, + { 0, NULL, NULL } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); + /* * Local variables: * c-indent-level: 8