diff --git a/doc/configuration.txt b/doc/configuration.txt
index 980de0b92..7387f4b53 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -1413,6 +1413,7 @@ The following keywords are supported in the "global" section :
    - tune.rcvbuf.frontend
    - tune.rcvbuf.server
    - tune.recv_enough
+   - tune.ring.queues
    - tune.runqueue-depth
    - tune.sched.low-latency
    - tune.sndbuf.backend
@@ -3769,6 +3770,15 @@ tune.recv_enough <number>
   may be changed by this setting to better deal with workloads involving lots
   of short messages such as telnet or SSH sessions.
 
+tune.ring.queues <number>
+  Sets the number of write queues in front of ring buffers. This can have an
+  effect on the CPU usage of traces during debugging sessions, and both too
+  low or too large a value can have an important effect. The good value was
+  determined experimentally by developers and there should be no reason to
+  try to change it unless instructed to do so in order to try to address
+  specific issues. Such a setting should not be left in the configuration
+  across version upgrades because its optimal value may evolve over time.
+
 tune.runqueue-depth <number>
   Sets the maximum amount of task that can be processed at once when running
   tasks. The default value depends on the number of threads but sits between 35
diff --git a/include/haproxy/defaults.h b/include/haproxy/defaults.h
index 051ca8111..47db366d0 100644
--- a/include/haproxy/defaults.h
+++ b/include/haproxy/defaults.h
@@ -532,4 +532,24 @@
 # endif
 #endif
 
+/* number of ring wait queues depending on the number
+ * of threads.
+ */
+#ifndef RING_WAIT_QUEUES
+# if defined(USE_THREAD) && MAX_THREADS >= 32
+#  define RING_WAIT_QUEUES   16
+# elif defined(USE_THREAD)
+#  define RING_WAIT_QUEUES ((MAX_THREADS + 1) / 2)
+# else
+#  define RING_WAIT_QUEUES 1
+# endif
+#endif
+
+/* it has been found that 6 queues was optimal on various archs at various
+ * thread counts, so let's use that by default.
+ */
+#ifndef RING_DFLT_QUEUES
+# define RING_DFLT_QUEUES   6
+#endif
+
 #endif /* _HAPROXY_DEFAULTS_H */
diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h
index 25536df14..f26b13f21 100644
--- a/include/haproxy/global-t.h
+++ b/include/haproxy/global-t.h
@@ -190,6 +190,7 @@ struct global {
 		int nb_stk_ctr;       /* number of stick counters, defaults to MAX_SESS_STKCTR */
 		int default_shards; /* default shards for listeners, or -1 (by-thread) or -2 (by-group) */
 		uint max_checks_per_thread; /* if >0, no more than this concurrent checks per thread */
+		uint ring_queues;   /* if >0, #ring queues, otherwise equals #thread groups */
 #ifdef USE_QUIC
 		unsigned int quic_backend_max_idle_timeout;
 		unsigned int quic_frontend_max_idle_timeout;
diff --git a/include/haproxy/ring-t.h b/include/haproxy/ring-t.h
index 3d699b503..4e091ee0a 100644
--- a/include/haproxy/ring-t.h
+++ b/include/haproxy/ring-t.h
@@ -148,10 +148,10 @@ struct ring {
 
 	/* keep the queue in a separate cache line below */
 	THREAD_PAD(64 - 3*sizeof(void*) - 4*sizeof(int));
-	struct ring_wait_cell *queue; // wait queue
-
-	/* and leave a spacer after it to avoid false sharing */
-	THREAD_PAD(64 - sizeof(void*));
+	struct {
+		struct ring_wait_cell *ptr;
+		THREAD_PAD(64 - sizeof(void*));
+	} queue[RING_WAIT_QUEUES + 1]; // wait queue + 1 spacer
 };
 
 #endif /* _HAPROXY_RING_T_H */
diff --git a/include/haproxy/tinfo-t.h b/include/haproxy/tinfo-t.h
index 357c4c0aa..8e7638e2b 100644
--- a/include/haproxy/tinfo-t.h
+++ b/include/haproxy/tinfo-t.h
@@ -110,7 +110,7 @@ struct thread_info {
 	uint tid, ltid;                   /* process-wide and group-wide thread ID (start at 0) */
 	ulong ltid_bit;                   /* bit masks for the tid/ltid */
 	uint tgid;                        /* ID of the thread group this thread belongs to (starts at 1; 0=unset) */
-	/* 32-bit hole here */
+	uint ring_queue;                  /* queue number for the rings */
 
 	ullong pth_id;                    /* the pthread_t cast to a ullong */
 	void *stack_top;                  /* the top of the stack when entering the thread */
diff --git a/src/haproxy.c b/src/haproxy.c
index 723335a6e..7b2a18a72 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -3150,6 +3150,18 @@ static void *run_thread_poll_loop(void *data)
 #endif
 	ha_thread_info[tid].stack_top = __builtin_frame_address(0);
 
+	/* Assign the ring queue. Contrary to an intuitive thought, this does
+	 * not benefit from locality and it's counter-productive to group
+	 * threads from a same group or range number in the same queue. In some
+	 * sense it arranges us because it means we can use a modulo and ensure
+	 * that even small numbers of threads are well spread.
+	 */
+	ha_thread_info[tid].ring_queue =
+		(tid % MIN(global.nbthread,
+			   (global.tune.ring_queues ?
+			    global.tune.ring_queues :
+			    RING_DFLT_QUEUES))) % RING_WAIT_QUEUES;
+
 	/* thread is started, from now on it is not idle nor harmless */
 	thread_harmless_end();
 	thread_idle_end();
diff --git a/src/ring.c b/src/ring.c
index d45bc245d..4118a645d 100644
--- a/src/ring.c
+++ b/src/ring.c
@@ -22,6 +22,7 @@
 #include <haproxy/api.h>
 #include <haproxy/applet.h>
 #include <haproxy/buf.h>
+#include <haproxy/cfgparse.h>
 #include <haproxy/cli.h>
 #include <haproxy/ring.h>
 #include <haproxy/sc_strm.h>
@@ -51,7 +52,7 @@ void ring_init(struct ring *ring, void *area, size_t size, int reset)
 	ring->storage = area;
 	ring->pending = 0;
 	ring->waking = 0;
-	ring->queue = NULL;
+	memset(&ring->queue, 0, sizeof(ring->queue));
 
 	if (reset) {
 		ring->storage->size = size - sizeof(*ring->storage);
@@ -646,6 +647,34 @@ size_t ring_max_payload(const struct ring *ring)
 	return max;
 }
 
+/* config parser for global "tune.ring.queues", accepts a number from 0 to RING_WAIT_QUEUES */
+static int cfg_parse_tune_ring_queues(char **args, int section_type, struct proxy *curpx,
+                                       const struct proxy *defpx, const char *file, int line,
+                                       char **err)
+{
+	int queues;
+
+	if (too_many_args(1, args, err, NULL))
+		return -1;
+
+	queues = atoi(args[1]);
+	if (queues < 0 || queues > RING_WAIT_QUEUES) {
+		memprintf(err, "'%s' expects a number between 0 and %d but got '%s'.", args[0], RING_WAIT_QUEUES, args[1]);
+		return -1;
+	}
+
+	global.tune.ring_queues = queues;
+	return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+	{ CFG_GLOBAL, "tune.ring.queues", cfg_parse_tune_ring_queues },
+	{ 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
 /*
  * Local variables:
  *  c-indent-level: 8