OPTIM: ring: use relaxed stores to release the threads

We don't care in what order the threads are released, so we can write their sent value using relaxed atomic stores. This brings a 3-5% perf boost on ARM with 80 cores, reaching 7.25M/s, and doesn't change anything on x86 since it keeps using strict ordering.
2024-12-26 14:42:21 +00:00 · 2024-03-22 16:47:17 +01:00 · 2024-03-22 16:47:17 +01:00 · 0a0a64ef02
commit 0a0a64ef02
parent cabe945876
1 changed files with 2 additions and 2 deletions
--- a/src/ring.c
+++ b/src/ring.c
@ -411,7 +411,7 @@ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], siz
 		/* now release */
 		for (curr_cell = &cell; curr_cell; curr_cell = next_cell) {
 			next_cell = HA_ATOMIC_LOAD(&curr_cell->next);
-			HA_ATOMIC_STORE(&curr_cell->next, curr_cell);
+			_HA_ATOMIC_STORE(&curr_cell->next, curr_cell);
 		}

 		/* unlock the message area */
@ -421,7 +421,7 @@ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], siz
 		for (curr_cell = &cell; curr_cell; curr_cell = next_cell) {
 			next_cell = HA_ATOMIC_LOAD(&curr_cell->next);
 			HA_ATOMIC_STORE(&curr_cell->to_send_self, 0);
-			HA_ATOMIC_STORE(&curr_cell->next, curr_cell);
+			_HA_ATOMIC_STORE(&curr_cell->next, curr_cell);
 		}
 	}