haproxy/src/i386-linux-vsys.c
Willy Tarreau 1bc4aab290 MEDIUM: listener: add support for linux's accept4() syscall
On Linux, accept4() does the same as accept() except that it allows
the caller to specify some flags to set on the resulting socket. We
use this to set the O_NONBLOCK flag and thus to save one fcntl()
call in each connection. The effect is a small performance gain of
around 1%.

The option is automatically enabled when target linux2628 is set, or
when the USE_ACCEPT4 Makefile variable is set. If the libc is too old
to provide the equivalent function, this is automatically detected and
our own function is used instead. In any case it is possible to force
the use of our implementation with USE_MY_ACCEPT4.
2012-10-08 20:11:03 +02:00

213 lines
5.8 KiB
C

/*
* Fast system call support for x86 on Linux
*
* Copyright 2010 Willy Tarreau <w@1wt.eu>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Recent kernels support a faster syscall ABI on x86 using the VDSO page, but
* some libc that are built for CPUs earlier than i686 do not implement it.
* This code bypasses the libc when the VDSO is detected. It should only be
* used when it's sure that the libc really does not support the VDSO, but
* fixing the libc is preferred. Using the VDSO can improve the overall
* performance by about 10%.
*/
#if defined(__linux__) && defined(__i386__)
/* Silently ignore other platforms to be friendly with distro packagers */
#include <dlfcn.h>
#include <sys/mman.h>
void int80(void); /* declared in the assembler code */
static void *vsyscall = &int80; /* initialize vsyscall to use int80 by default */
static __attribute__((used)) unsigned int back_ebx;
/* now we redefine some frequently used syscalls. Epoll_create is defined too
* in order to replace old disabled implementations.
*/
asm
(
"epoll_create: .GLOBL epoll_create\n"
" mov $0xfe, %eax\n"
" mov %ebx, back_ebx\n"
" mov 4(%esp), %ebx\n"
" jmp do_syscall\n"
"epoll_ctl: .GLOBL epoll_ctl\n"
" push %esi\n"
" mov $0xff, %eax\n"
" mov %ebx, back_ebx\n"
" mov 20(%esp), %esi\n"
" mov 16(%esp), %edx\n"
" mov 12(%esp), %ecx\n"
" mov 8(%esp), %ebx\n"
" call do_syscall\n"
" pop %esi\n"
" ret\n"
"epoll_wait: .GLOBL epoll_wait\n"
" push %esi\n"
" mov $0x100, %eax\n"
" mov %ebx, back_ebx\n"
" mov 20(%esp), %esi\n"
" mov 16(%esp), %edx\n"
" mov 12(%esp), %ecx\n"
" mov 8(%esp), %ebx\n"
" call do_syscall\n"
" pop %esi\n"
" ret\n"
"splice: .GLOBL splice\n"
" push %ebp\n"
" push %edi\n"
" push %esi\n"
" mov $0x139, %eax\n"
" mov %ebx, back_ebx\n"
" mov 36(%esp), %ebp\n"
" mov 32(%esp), %edi\n"
" mov 28(%esp), %esi\n"
" mov 24(%esp), %edx\n"
" mov 20(%esp), %ecx\n"
" mov 16(%esp), %ebx\n"
" call do_syscall\n"
" pop %esi\n"
" pop %edi\n"
" pop %ebp\n"
" ret\n"
"close: .GLOBL close\n"
" mov $0x06, %eax\n"
" mov %ebx, back_ebx\n"
" mov 4(%esp), %ebx\n"
" jmp do_syscall\n"
"gettimeofday: .GLOBL gettimeofday\n"
" mov $0x4e, %eax\n"
" mov %ebx, back_ebx\n"
" mov 8(%esp), %ecx\n"
" mov 4(%esp), %ebx\n"
" jmp do_syscall\n"
"fcntl: .GLOBL fcntl\n"
" mov $0xdd, %eax\n"
" mov %ebx, back_ebx\n"
" mov 12(%esp), %edx\n"
" mov 8(%esp), %ecx\n"
" mov 4(%esp), %ebx\n"
" jmp do_syscall\n"
"socket: .GLOBL socket\n"
" mov $0x01, %eax\n"
" jmp socketcall\n"
"bind: .GLOBL bind\n"
" mov $0x02, %eax\n"
" jmp socketcall\n"
"connect: .GLOBL connect\n"
" mov $0x03, %eax\n"
" jmp socketcall\n"
"listen: .GLOBL listen\n"
" mov $0x04, %eax\n"
" jmp socketcall\n"
"accept: .GLOBL accept\n"
" mov $0x05, %eax\n"
" jmp socketcall\n"
"accept4: .GLOBL accept4\n"
" mov $0x12, %eax\n"
" jmp socketcall\n"
"getsockname: .GLOBL getsockname\n"
" mov $0x06, %eax\n"
" jmp socketcall\n"
"send: .GLOBL send\n"
" mov $0x09, %eax\n"
" jmp socketcall\n"
"recv: .GLOBL recv\n"
" mov $0x0a, %eax\n"
" jmp socketcall\n"
"shutdown: .GLOBL shutdown\n"
" mov $0x0d, %eax\n"
" jmp socketcall\n"
"setsockopt: .GLOBL setsockopt\n"
" mov $0x0e, %eax\n"
" jmp socketcall\n"
"getsockopt: .GLOBL getsockopt\n"
" mov $0x0f, %eax\n"
" jmp socketcall\n"
"socketcall:\n"
" mov %ebx, back_ebx\n"
" mov %eax, %ebx\n"
" mov $0x66, %eax\n"
" lea 4(%esp), %ecx\n"
/* fall through */
"do_syscall:\n"
" call *vsyscall\n" // always valid, may be int80 or vsyscall
" mov back_ebx, %ebx\n"
" cmpl $0xfffff000, %eax\n" // consider -4096..-1 for errno
" jae 0f\n"
" ret\n"
"0:\n" // error handling
" neg %eax\n" // get errno value
" push %eax\n" // save it
" call __errno_location\n"
" popl (%eax)\n" // store the pushed errno into the proper location
" mov $-1, %eax\n" // and return -1
" ret\n"
"int80:\n" // default compatible calling convention
" int $0x80\n"
" ret\n"
);
__attribute__((constructor))
static void __i386_linux_vsyscall_init(void)
{
/* We can get the pointer by resolving the __kernel_vsyscall symbol
* from the "linux-gate.so.1" virtual shared object, but this requires
* libdl. Or we can also know that the vsyscall pointer is always
* located at 0xFFFFE018 when /proc/sys/abi/vsyscall32 contains the
* default value 2. So we can use that once we've checked that we can
* access it without faulting. The dlsym method will also work when
* vsyscall32 = 1, which randomizes the VDSO address.
*/
#ifdef USE_VSYSCALL_DLSYM
void *handle = dlopen("linux-gate.so.1", RTLD_NOW);
if (handle) {
void *ptr;
ptr = dlsym(handle, "__kernel_vsyscall_kml");
if (!ptr)
ptr = dlsym(handle, "__kernel_vsyscall");
if (ptr)
vsyscall = ptr;
dlclose(handle);
}
#else
/* Heuristic: trying to mprotect() the VDSO area will only succeed if
* it is mapped.
*/
if (mprotect((void *)0xffffe000, 4096, PROT_READ|PROT_EXEC) == 0) {
unsigned long ptr = *(unsigned long *)0xFFFFE018; /* VDSO is mapped */
if ((ptr & 0xFFFFE000) == 0xFFFFE000)
vsyscall = (void *)ptr;
}
#endif
}
#endif /* defined(__linux__) && defined(__i386__) */