REORG: atomic: reimplement pl_cpu_relax() from atomic-ops.h

There is some confusion here as we need to place some cpu_relax statements
in some loops where it's not easily possible to condition them on the use
of threads. That's what atomic.h already does. So let's take the various
pl_cpu_relax() implementations from there and place them in atomic.h under
the name __ha_cpu_relax() and let them adapt to the presence or absence of
threads and to the architecture (currently only x86 and aarch64 use a barrier
instruction), though it's very likely that arm would work well with a cache
flushing ISB instruction as well).

This time they were implemented as expressions returning 1 rather than
statements, in order to ease their placement as the loop condition or the
continuation expression inside "for" loops. We should probably do the same
with barriers and a few such other ones.
This commit is contained in:
Willy Tarreau 2021-03-02 07:08:34 +01:00
parent 1568355afd
commit 958ae26c35

View File

@ -152,6 +152,7 @@
#define __ha_barrier_store() do { } while (0)
#define __ha_barrier_full() do { } while (0)
#define __ha_compiler_barrier() do { } while (0)
#define __ha_cpu_relax() ({ 1; })
#else /* !USE_THREAD */
@ -395,6 +396,9 @@ __ha_cas_dw(void *target, void *compare, const void *set)
return (ret);
}
/* short-lived CPU relaxation */
#define __ha_cpu_relax() ({ asm volatile("rep;nop\n"); 1; })
#elif defined(__arm__) && (defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__))
static __inline void
@ -457,6 +461,9 @@ static __inline int __ha_cas_dw(void *target, void *compare, const void *set)
return (tmp);
}
/* short-lived CPU relaxation */
#define __ha_cpu_relax() ({ asm volatile(""); 1; })
#elif defined (__aarch64__)
static __inline void
@ -498,6 +505,11 @@ __ha_barrier_atomic_full(void)
__asm __volatile("dmb ish" ::: "memory");
}
/* short-lived CPU relaxation; this was shown to improve fairness on
* modern ARMv8 cores such as Neoverse N1.
*/
#define __ha_cpu_relax() ({ asm volatile("isb" ::: "memory"); 1; })
static __inline int __ha_cas_dw(void *target, void *compare, void *set)
{
void *value[2];
@ -534,6 +546,9 @@ static __inline int __ha_cas_dw(void *target, void *compare, void *set)
#define __ha_barrier_full __sync_synchronize
/* Note: there is no generic DWCAS */
/* short-lived CPU relaxation */
#define __ha_cpu_relax() ({ asm volatile(""); 1; })
#endif /* end of arch-specific barrier/dwcas */
static inline void __ha_compiler_barrier(void)