move rsyscall out of pthread_create module

this is something of a tradeoff, as now set*id() functions, rather
than pthread_create, are what pull in the code overhead for dealing
with linux's refusal to implement proper POSIX thread-vs-process
semantics. my motivations are:

1. it's cleaner this way, especially cleaner to optimize out the
rsyscall locking overhead from pthread_create when it's not needed.
2. it's expected that only a tiny number of core system programs will
ever use set*id() functions, whereas many programs may want to use
threads, and making thread overhead tiny is an incentive for "light"
programs to try threads.
This commit is contained in:
Rich Felker 2011-04-06 20:27:07 -04:00
parent 74950b336d
commit b2486a8922
10 changed files with 133 additions and 109 deletions

View File

@ -10,7 +10,6 @@ struct __libc {
void (*lock)(volatile int *);
void (*lockfile)(FILE *);
void (*fork_handler)(int);
int (*rsyscall)(int, long, long, long, long, long, long);
int (*atexit)(void (*)(void));
void (*fini)(void);
void (*ldso_fini)(void);
@ -48,6 +47,8 @@ void __lockfile(FILE *);
#define CANCELPT_INHIBIT CANCELPT(2)
#define CANCELPT_RESUME CANCELPT(-2)
int __rsyscall(int, long, long, long, long, long, long);
extern char **__environ;
#define environ __environ

View File

@ -86,6 +86,9 @@ int __timedwait(volatile int *, int, clockid_t, const struct timespec *, int);
void __wait(volatile int *, volatile int *, int, int);
void __wake(volatile int *, int, int);
void __rsyscall_lock();
void __rsyscall_unlock();
#define DEFAULT_STACK_SIZE (16384-PAGE_SIZE)
#define DEFAULT_GUARD_SIZE PAGE_SIZE

113
src/thread/__rsyscall.c Normal file
View File

@ -0,0 +1,113 @@
#include "pthread_impl.h"
/* "rsyscall" is a mechanism by which a thread can synchronously force all
* other threads to perform an arbitrary syscall. It is necessary to work
* around the non-conformant implementation of setuid() et al on Linux,
* which affect only the calling thread and not the whole process. This
* implementation performs some tricks with signal delivery to work around
* the fact that it does not keep any list of threads in userspace. */
static struct {
volatile int lock, hold, blocks, cnt;
unsigned long arg[6];
int nr;
int err;
int init;
} rs;
static void rsyscall_handler(int sig, siginfo_t *si, void *ctx)
{
struct pthread *self = __pthread_self();
long r;
if (!rs.hold || rs.cnt == libc.threads_minus_1) return;
/* Threads which have already decremented themselves from the
* thread count must not increment rs.cnt or otherwise act. */
if (self->dead) {
sigfillset(&((ucontext_t *)ctx)->uc_sigmask);
return;
}
r = __syscall(rs.nr, rs.arg[0], rs.arg[1],
rs.arg[2], rs.arg[3], rs.arg[4], rs.arg[5]);
if (r < 0) rs.err=-r;
a_inc(&rs.cnt);
__wake(&rs.cnt, 1, 1);
while(rs.hold)
__wait(&rs.hold, 0, 1, 1);
a_dec(&rs.cnt);
if (!rs.cnt) __wake(&rs.cnt, 1, 1);
}
int __rsyscall(int nr, long a, long b, long c, long d, long e, long f)
{
int i, ret;
sigset_t set = { 0 };
struct pthread *self;
if (!libc.threads_minus_1)
return syscall(nr, a, b, c, d, e, f);
self = __pthread_self();
LOCK(&rs.lock);
while ((i=rs.blocks))
__wait(&rs.blocks, 0, i, 1);
sigfillset(&set);
__libc_sigprocmask(SIG_BLOCK, &set, &set);
if (!rs.init) {
struct sigaction sa = {
.sa_sigaction = rsyscall_handler,
.sa_mask = set
};
sigfillset(&sa.sa_mask);
sa.sa_sigaction = rsyscall_handler;
__libc_sigaction(SIGSYSCALL, &sa, 0);
}
rs.nr = nr;
rs.arg[0] = a; rs.arg[1] = b;
rs.arg[2] = c; rs.arg[3] = d;
rs.arg[4] = d; rs.arg[5] = f;
rs.err = 0;
rs.cnt = 0;
rs.hold = 1;
/* Dispatch signals until all threads respond */
for (i=libc.threads_minus_1; i; i--)
sigqueue(self->pid, SIGSYSCALL, (union sigval){0});
while ((i=rs.cnt) < libc.threads_minus_1) {
sigqueue(self->pid, SIGSYSCALL, (union sigval){0});
__wait(&rs.cnt, 0, i, 1);
}
/* Handle any lingering signals with no-op */
__libc_sigprocmask(SIG_UNBLOCK, &set, &set);
/* Resume other threads' signal handlers and wait for them */
rs.hold = 0;
__wake(&rs.hold, -1, 0);
while((i=rs.cnt)) __wait(&rs.cnt, 0, i, 1);
if (rs.err) errno = rs.err, ret = -1;
else ret = syscall(nr, a, b, c, d, e, f);
UNLOCK(&rs.lock);
return ret;
}
void __rsyscall_lock()
{
a_inc(&rs.blocks);
while (rs.lock) __wait(&rs.lock, 0, 1, 1);
}
void __rsyscall_unlock()
{
a_dec(&rs.blocks);
if (rs.lock) __wake(&rs.blocks, 1, 1);
}

View File

@ -1,5 +1,11 @@
#include "pthread_impl.h"
static void dummy_0()
{
}
weak_alias(dummy_0, __rsyscall_lock);
weak_alias(dummy_0, __rsyscall_unlock);
static void dummy_1(pthread_t self)
{
}
@ -72,101 +78,12 @@ static void cancelpt(int x)
}
}
/* "rsyscall" is a mechanism by which a thread can synchronously force all
* other threads to perform an arbitrary syscall. It is necessary to work
* around the non-conformant implementation of setuid() et al on Linux,
* which affect only the calling thread and not the whole process. This
* implementation performs some tricks with signal delivery to work around
* the fact that it does not keep any list of threads in userspace. */
static struct {
volatile int lock, hold, blocks, cnt;
unsigned long arg[6];
int nr;
int err;
} rs;
static void rsyscall_handler(int sig, siginfo_t *si, void *ctx)
{
struct pthread *self = __pthread_self();
long r;
if (!rs.hold || rs.cnt == libc.threads_minus_1) return;
/* Threads which have already decremented themselves from the
* thread count must not increment rs.cnt or otherwise act. */
if (self->dead) {
sigfillset(&((ucontext_t *)ctx)->uc_sigmask);
return;
}
r = __syscall(rs.nr, rs.arg[0], rs.arg[1],
rs.arg[2], rs.arg[3], rs.arg[4], rs.arg[5]);
if (r < 0) rs.err=-r;
a_inc(&rs.cnt);
__wake(&rs.cnt, 1, 1);
while(rs.hold)
__wait(&rs.hold, 0, 1, 1);
a_dec(&rs.cnt);
if (!rs.cnt) __wake(&rs.cnt, 1, 1);
}
static int rsyscall(int nr, long a, long b, long c, long d, long e, long f)
{
int i, ret;
sigset_t set = { 0 };
struct pthread *self = __pthread_self();
sigaddset(&set, SIGSYSCALL);
LOCK(&rs.lock);
while ((i=rs.blocks))
__wait(&rs.blocks, 0, i, 1);
__libc_sigprocmask(SIG_BLOCK, &set, 0);
rs.nr = nr;
rs.arg[0] = a; rs.arg[1] = b;
rs.arg[2] = c; rs.arg[3] = d;
rs.arg[4] = d; rs.arg[5] = f;
rs.err = 0;
rs.cnt = 0;
rs.hold = 1;
/* Dispatch signals until all threads respond */
for (i=libc.threads_minus_1; i; i--)
sigqueue(self->pid, SIGSYSCALL, (union sigval){0});
while ((i=rs.cnt) < libc.threads_minus_1) {
sigqueue(self->pid, SIGSYSCALL, (union sigval){0});
__wait(&rs.cnt, 0, i, 1);
}
/* Handle any lingering signals with no-op */
__libc_sigprocmask(SIG_UNBLOCK, &set, 0);
/* Resume other threads' signal handlers and wait for them */
rs.hold = 0;
__wake(&rs.hold, -1, 0);
while((i=rs.cnt)) __wait(&rs.cnt, 0, i, 1);
if (rs.err) errno = rs.err, ret = -1;
else ret = syscall(nr, a, b, c, d, e, f);
UNLOCK(&rs.lock);
return ret;
}
static void init_threads()
{
struct sigaction sa = { .sa_flags = SA_SIGINFO | SA_RESTART };
libc.lock = __lock;
libc.lockfile = __lockfile;
libc.cancelpt = cancelpt;
libc.rsyscall = rsyscall;
sigfillset(&sa.sa_mask);
sa.sa_sigaction = rsyscall_handler;
__libc_sigaction(SIGSYSCALL, &sa, 0);
sigemptyset(&sa.sa_mask);
sa.sa_sigaction = cancel_handler;
@ -205,7 +122,7 @@ int pthread_create(pthread_t *res, const pthread_attr_t *attr, void *(*entry)(vo
size_t size, guard;
struct pthread *self = pthread_self(), *new;
unsigned char *map, *stack, *tsd;
static const pthread_attr_t default_attr;
const pthread_attr_t default_attr = { 0 };
if (!self) return ENOSYS;
if (!init && ++init) init_threads();
@ -236,16 +153,12 @@ int pthread_create(pthread_t *res, const pthread_attr_t *attr, void *(*entry)(vo
new->tlsdesc[1] = (uintptr_t)new;
stack = (void *)((uintptr_t)new-1 & ~(uintptr_t)15);
/* We must synchronize new thread creation with rsyscall
* delivery. This looks to be the least expensive way: */
a_inc(&rs.blocks);
while (rs.lock) __wait(&rs.lock, 0, 1, 1);
__rsyscall_lock();
a_inc(&libc.threads_minus_1);
ret = __uniclone(stack, start, new);
a_dec(&rs.blocks);
if (rs.lock) __wake(&rs.blocks, 1, 1);
__rsyscall_unlock();
if (ret < 0) {
a_dec(&libc.threads_minus_1);

View File

@ -4,6 +4,5 @@
int setgid(gid_t gid)
{
if (libc.rsyscall) return libc.rsyscall(__NR_setgid, gid, 0, 0, 0, 0, 0);
return syscall(SYS_setgid, gid);
return __rsyscall(__NR_setgid, gid, 0, 0, 0, 0, 0);
}

View File

@ -4,6 +4,5 @@
int setregid(gid_t rgid, gid_t egid)
{
if (libc.rsyscall) return libc.rsyscall(__NR_setregid, rgid, egid, 0, 0, 0, 0);
return syscall(SYS_setregid, rgid, egid);
return __rsyscall(__NR_setregid, rgid, egid, 0, 0, 0, 0);
}

View File

@ -5,6 +5,5 @@
int setresgid(gid_t rgid, gid_t egid, gid_t sgid)
{
if (libc.rsyscall) return libc.rsyscall(__NR_setresgid, rgid, egid, sgid, 0, 0, 0);
return syscall(SYS_setresgid, rgid, egid, sgid);
return __rsyscall(__NR_setresgid, rgid, egid, sgid, 0, 0, 0);
}

View File

@ -5,6 +5,5 @@
int setresuid(uid_t ruid, uid_t euid, uid_t suid)
{
if (libc.rsyscall) return libc.rsyscall(__NR_setresuid, ruid, euid, suid, 0, 0, 0);
return syscall(SYS_setresuid, ruid, euid, suid);
return __rsyscall(__NR_setresuid, ruid, euid, suid, 0, 0, 0);
}

View File

@ -4,6 +4,5 @@
int setreuid(uid_t ruid, uid_t euid)
{
if (libc.rsyscall) return libc.rsyscall(__NR_setreuid, ruid, euid, 0, 0, 0, 0);
return syscall(SYS_setreuid, ruid, euid);
return __rsyscall(__NR_setreuid, ruid, euid, 0, 0, 0, 0);
}

View File

@ -4,6 +4,5 @@
int setuid(uid_t uid)
{
if (libc.rsyscall) return libc.rsyscall(__NR_setuid, uid, 0, 0, 0, 0, 0);
return syscall(SYS_setuid, uid);
return __rsyscall(__NR_setuid, uid, 0, 0, 0, 0, 0);
}