add membarrier syscall wrapper, refactor dynamic tls install to use it

the motivation for this change is twofold. first, it gets the fallback
logic out of the dynamic linker, improving code readability and
organization. second, it provides application code that wants to use
the membarrier syscall, which depends on preregistration of intent
before the process becomes multithreaded unless unbounded latency is
acceptable, with a symbol that, when linked, ensures that this
registration happens.
This commit is contained in:
Rich Felker 2019-02-22 02:56:10 -05:00
parent 7865d569de
commit ba18c1ecc6
6 changed files with 110 additions and 34 deletions

17
include/sys/membarrier.h Normal file
View File

@ -0,0 +1,17 @@
#ifndef _SYS_MEMBARRIER_H
#define _SYS_MEMBARRIER_H
#define MEMBARRIER_CMD_QUERY 0
#define MEMBARRIER_CMD_GLOBAL 1
#define MEMBARRIER_CMD_GLOBAL_EXPEDITED 2
#define MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED 4
#define MEMBARRIER_CMD_PRIVATE_EXPEDITED 8
#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED 16
#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE 32
#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE 64
#define MEMBARRIER_CMD_SHARED MEMBARRIER_CMD_GLOBAL
int membarrier(int, int);
#endif

View File

@ -18,6 +18,7 @@
#include <ctype.h>
#include <dlfcn.h>
#include <semaphore.h>
#include <sys/membarrier.h>
#include "pthread_impl.h"
#include "libc.h"
#include "dynlink.h"
@ -1351,18 +1352,6 @@ static void update_tls_size()
tls_align);
}
void __dl_prepare_for_threads(void)
{
/* MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED */
__syscall(SYS_membarrier, 1<<4, 0);
}
static sem_t barrier_sem;
static void bcast_barrier(int s)
{
sem_post(&barrier_sem);
}
static void install_new_tls(void)
{
sigset_t set;
@ -1397,26 +1386,11 @@ static void install_new_tls(void)
}
/* Broadcast barrier to ensure contents of new dtv is visible
* if the new dtv pointer is. Use SYS_membarrier if it works,
* otherwise emulate with a signal. */
* if the new dtv pointer is. The __membarrier function has a
* fallback emulation using signals for kernels that lack the
* feature at the syscall level. */
/* MEMBARRIER_CMD_PRIVATE_EXPEDITED */
if (__syscall(SYS_membarrier, 1<<3, 0)) {
sem_init(&barrier_sem, 0, 0);
struct sigaction sa = {
.sa_flags = SA_RESTART,
.sa_handler = bcast_barrier
};
memset(&sa.sa_mask, -1, sizeof sa.sa_mask);
__libc_sigaction(SIGSYNCCALL, &sa, 0);
for (td=self->next; td!=self; td=td->next)
__syscall(SYS_tkill, td->tid, SIGSYNCCALL);
for (td=self->next; td!=self; td=td->next)
sem_wait(&barrier_sem);
sa.sa_handler = SIG_IGN;
__libc_sigaction(SIGSYNCCALL, &sa, 0);
sem_destroy(&barrier_sem);
}
__membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0);
/* Install new dtv for each thread. */
for (j=0, td=self; !j || td!=self; j++, td=td->next) {

View File

@ -0,0 +1,9 @@
#ifndef SYS_MEMBARRIER_H
#define SYS_MEMBARRIER_H
#include "../../../include/sys/membarrier.h"
#include <features.h>
hidden int __membarrier(int, int);
#endif

View File

@ -130,7 +130,7 @@ hidden int __init_tp(void *);
hidden void *__copy_tls(unsigned char *);
hidden void __reset_tls();
hidden void __dl_prepare_for_threads(void);
hidden void __membarrier_init(void);
hidden void __dl_thread_cleanup(void);
hidden void __testcancel();
hidden void __do_cleanup_push(struct __ptcb *);

76
src/linux/membarrier.c Normal file
View File

@ -0,0 +1,76 @@
#include <sys/membarrier.h>
#include <semaphore.h>
#include <signal.h>
#include <string.h>
#include "pthread_impl.h"
#include "syscall.h"
static void dummy_0(void)
{
}
static void dummy_1(pthread_t t)
{
}
weak_alias(dummy_0, __tl_lock);
weak_alias(dummy_0, __tl_unlock);
weak_alias(dummy_1, __tl_sync);
static sem_t barrier_sem;
static void bcast_barrier(int s)
{
sem_post(&barrier_sem);
}
int __membarrier(int cmd, int flags)
{
int r = __syscall(SYS_membarrier, cmd, flags);
/* Emulate the private expedited command, which is needed by the
* dynamic linker for installation of dynamic TLS, for older
* kernels that lack the syscall. Unlike the syscall, this only
* synchronizes with threads of the process, not other processes
* sharing the VM, but such sharing is not a supported usage
* anyway. */
if (r && cmd == MEMBARRIER_CMD_PRIVATE_EXPEDITED && !flags) {
pthread_t self=__pthread_self(), td;
sigset_t set;
__block_app_sigs(&set);
__tl_lock();
sem_init(&barrier_sem, 0, 0);
struct sigaction sa = {
.sa_flags = SA_RESTART,
.sa_handler = bcast_barrier
};
memset(&sa.sa_mask, -1, sizeof sa.sa_mask);
__libc_sigaction(SIGSYNCCALL, &sa, 0);
for (td=self->next; td!=self; td=td->next)
__syscall(SYS_tkill, td->tid, SIGSYNCCALL);
for (td=self->next; td!=self; td=td->next)
sem_wait(&barrier_sem);
sa.sa_handler = SIG_IGN;
__libc_sigaction(SIGSYNCCALL, &sa, 0);
sem_destroy(&barrier_sem);
__tl_unlock();
__restore_sigs(&set);
return 0;
}
return __syscall_ret(r);
}
void __membarrier_init(void)
{
/* If membarrier is linked, attempt to pre-register to be able to use
* the private expedited command before the process becomes multi-
* threaded, since registering later has bad, potentially unbounded
* latency. This syscall should be essentially free, and it's arguably
* a mistake in the API design that registration was even required.
* For other commands, registration may impose some cost, so it's left
* to the application to do so if desired. Unfortunately this means
* library code initialized after the process becomes multi-threaded
* cannot use these features without accepting registration latency. */
__syscall(SYS_membarrier, MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0);
}
weak_alias(__membarrier, membarrier);

View File

@ -15,7 +15,7 @@ weak_alias(dummy_0, __release_ptc);
weak_alias(dummy_0, __pthread_tsd_run_dtors);
weak_alias(dummy_0, __do_orphaned_stdio_locks);
weak_alias(dummy_0, __dl_thread_cleanup);
weak_alias(dummy_0, __dl_prepare_for_threads);
weak_alias(dummy_0, __membarrier_init);
static int tl_lock_count;
static int tl_lock_waiters;
@ -246,7 +246,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
init_file_lock(__stderr_used);
__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);
self->tsd = (void **)__pthread_tsd_main;
__dl_prepare_for_threads();
__membarrier_init();
libc.threaded = 1;
}
if (attrp && !c11) attr = *attrp;