From 0a0a64ef029d026c7d818c497786bdf79089d58d Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Fri, 22 Mar 2024 16:47:17 +0100
Subject: [PATCH] OPTIM: ring: use relaxed stores to release the threads

We don't care in what order the threads are released, so we can write
their sent value using relaxed atomic stores. This brings a 3-5% perf
boost on ARM with 80 cores, reaching 7.25M/s, and doesn't change
anything on x86 since it keeps using strict ordering.
---
 src/ring.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ring.c b/src/ring.c
index 9f2be10fd..8d46679cc 100644
--- a/src/ring.c
+++ b/src/ring.c
@@ -411,7 +411,7 @@ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], siz
 		/* now release */
 		for (curr_cell = &cell; curr_cell; curr_cell = next_cell) {
 			next_cell = HA_ATOMIC_LOAD(&curr_cell->next);
-			HA_ATOMIC_STORE(&curr_cell->next, curr_cell);
+			_HA_ATOMIC_STORE(&curr_cell->next, curr_cell);
 		}
 
 		/* unlock the message area */
@@ -421,7 +421,7 @@ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], siz
 		for (curr_cell = &cell; curr_cell; curr_cell = next_cell) {
 			next_cell = HA_ATOMIC_LOAD(&curr_cell->next);
 			HA_ATOMIC_STORE(&curr_cell->to_send_self, 0);
-			HA_ATOMIC_STORE(&curr_cell->next, curr_cell);
+			_HA_ATOMIC_STORE(&curr_cell->next, curr_cell);
 		}
 	}