diff --git a/include/import/atomic-ops.h b/include/import/atomic-ops.h index 9ee7da7767..f613a0ba16 100644 --- a/include/import/atomic-ops.h +++ b/include/import/atomic-ops.h @@ -8,14 +8,22 @@ static inline void pl_barrier() asm volatile("" ::: "memory"); } -/* full memory barrier */ +#if defined(__i386__) || defined (__i486__) || defined (__i586__) || defined (__i686__) || defined (__x86_64__) + +/* full memory barrier using mfence when SSE2 is supported, falling back to + * "lock add %esp" (gcc uses "lock add" or "lock or"). + */ static inline void pl_mb() { - __sync_synchronize(); +#if defined(__SSE2__) + asm volatile("mfence" ::: "memory"); +#elif defined(__x86_64__) + asm volatile("lock addl $0,0 (%%rsp)" ::: "memory", "cc"); +#else + asm volatile("lock addl $0,0 (%%esp)" ::: "memory", "cc"); +#endif } -#if defined(__i386__) || defined (__i486__) || defined (__i586__) || defined (__i686__) || defined (__x86_64__) - /* * Generic functions common to the x86 family */ @@ -488,6 +496,12 @@ static inline void pl_cpu_relax() asm volatile(""); } +/* full memory barrier */ +static inline void pl_mb() +{ + __sync_synchronize(); +} + #define pl_inc_noret(ptr) ({ __sync_add_and_fetch((ptr), 1); }) #define pl_dec_noret(ptr) ({ __sync_sub_and_fetch((ptr), 1); }) #define pl_inc(ptr) ({ __sync_add_and_fetch((ptr), 1); })