mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-02-19 20:27:01 +00:00
MEDIUM: cpu-map: replace the process number with the thread group number
The principle remains the same, but instead of having a single process and ignoring extra ones, now we set the affinity masks for the respective threads of all groups. The doc was updated with a few extra examples.
This commit is contained in:
parent
1b2b59bfa7
commit
5b09341c02
@ -1210,58 +1210,57 @@ close-spread-time <time>
|
||||
|
||||
See also: grace, hard-stop-after, idle-close-on-response
|
||||
|
||||
cpu-map [auto:]<process-set>[/<thread-set>] <cpu-set>...
|
||||
On some operating systems, it is possible to bind a process or a thread to a
|
||||
specific CPU set. This means that the process or the thread will never run on
|
||||
other CPUs. The "cpu-map" directive specifies CPU sets for process or thread
|
||||
sets. The first argument is a process set, eventually followed by a thread
|
||||
set. These sets have the format
|
||||
cpu-map [auto:]<thread-group>[/<thread-set>] <cpu-set>...
|
||||
On some operating systems, it is possible to bind a thread group or a thread
|
||||
to a specific CPU set. This means that the designated threads will never run
|
||||
on other CPUs. The "cpu-map" directive specifies CPU sets for individual
|
||||
threads or thread groups. The first argument is a thread group range,
|
||||
optionally followed by a thread set. These ranges have the following format:
|
||||
|
||||
all | odd | even | number[-[number]]
|
||||
|
||||
<number> must be a number between 1 and 32 or 64, depending on the machine's
|
||||
word size. Any process IDs above 1 and any thread IDs above nbthread are
|
||||
ignored. It is possible to specify a range with two such number delimited by
|
||||
a dash ('-'). It also is possible to specify all thraeds at once using
|
||||
"all", only odd numbers using "odd" or even numbers using "even", just like
|
||||
with the bind "thread" directive. The second and forthcoming arguments are
|
||||
CPU sets. Each CPU set is either a unique number starting at 0 for the first
|
||||
CPU or a range with two such numbers delimited by a dash ('-'). Outside of
|
||||
Linux and BSDs, there may be a limitation on the maximum CPU index to either
|
||||
31 or 63. Multiple CPU numbers or ranges may be specified, and the processes
|
||||
or threads will be allowed to bind to all of them. Obviously, multiple
|
||||
"cpu-map" directives may be specified. Each "cpu-map" directive will replace
|
||||
the previous ones when they overlap. A thread will be bound on the
|
||||
intersection of its mapping and the one of the process on which it is
|
||||
attached. If the intersection is null, no specific binding will be set for
|
||||
the thread.
|
||||
word size. Any group IDs above 'thread-groups' and any thread IDs above the
|
||||
machine's word size are ignored. All thread numbers are relative to the group
|
||||
they belong to. It is possible to specify a range with two such number
|
||||
delimited by a dash ('-'). It also is possible to specify all threads at once
|
||||
using "all", only odd numbers using "odd" or even numbers using "even", just
|
||||
like with the "thread" bind directive. The second and forthcoming arguments
|
||||
are CPU sets. Each CPU set is either a unique number starting at 0 for the
|
||||
first CPU or a range with two such numbers delimited by a dash ('-'). Outside
|
||||
of Linux and BSDs, there may be a limitation on the maximum CPU index to
|
||||
either 31 or 63. Multiple CPU numbers or ranges may be specified, and the
|
||||
processes or threads will be allowed to bind to all of them. Obviously,
|
||||
multiple "cpu-map" directives may be specified. Each "cpu-map" directive will
|
||||
replace the previous ones when they overlap.
|
||||
|
||||
Ranges can be partially defined. The higher bound can be omitted. In such
|
||||
case, it is replaced by the corresponding maximum value, 32 or 64 depending
|
||||
on the machine's word size.
|
||||
|
||||
The prefix "auto:" can be added before the process set to let HAProxy
|
||||
automatically bind a process or a thread to a CPU by incrementing threads and
|
||||
The prefix "auto:" can be added before the thread set to let HAProxy
|
||||
automatically bind a set of threads to a CPU by incrementing threads and
|
||||
CPU sets. To be valid, both sets must have the same size. No matter the
|
||||
declaration order of the CPU sets, it will be bound from the lowest to the
|
||||
highest bound. Having both a process and a thread range with the "auto:"
|
||||
highest bound. Having both a group and a thread range with the "auto:"
|
||||
prefix is not supported. Only one range is supported, the other one must be
|
||||
a fixed number.
|
||||
|
||||
Note that process ranges are supported for historical reasons. Nowadays, a
|
||||
lone number designates a process and must be 1, and specifying a thread range
|
||||
or number requires to prepend "1/" in front of it. Finally, "1" is strictly
|
||||
equivalent to "1/all" and designates all threads on the process.
|
||||
Note that group ranges are supported for historical reasons. Nowadays, a lone
|
||||
number designates a thread group and must be 1 if thread-groups are not used,
|
||||
and specifying a thread range or number requires to prepend "1/" in front of
|
||||
it if thread groups are not used. Finally, "1" is strictly equivalent to
|
||||
"1/all" and designates all threads in the group.
|
||||
|
||||
Examples:
|
||||
cpu-map 1/all 0-3 # bind all threads of the first process on the
|
||||
cpu-map 1/all 0-3 # bind all threads of the first group on the
|
||||
# first 4 CPUs
|
||||
|
||||
cpu-map 1/1- 0- # will be replaced by "cpu-map 1/1-64 0-63"
|
||||
# or "cpu-map 1/1-32 0-31" depending on the machine's
|
||||
# word size.
|
||||
|
||||
# all these lines bind the thread 1 to the cpu 0, the thread 2 to cpu 1
|
||||
# all these lines bind thread 1 to the cpu 0, the thread 2 to cpu 1
|
||||
# and so on.
|
||||
cpu-map auto:1/1-4 0-3
|
||||
cpu-map auto:1/1-4 0-1 2-3
|
||||
@ -1276,6 +1275,21 @@ cpu-map [auto:]<process-set>[/<thread-set>] <cpu-set>...
|
||||
cpu-map auto:1/1-4 0 # invalid
|
||||
cpu-map auto:1/1 0-3 # invalid
|
||||
|
||||
# map 40 threads of those 4 groups to individual CPUs
|
||||
cpu-map auto:1/1-10 0-9
|
||||
cpu-map auto:2/1-10 10-19
|
||||
cpu-map auto:3/1-10 20-29
|
||||
cpu-map auto:4/1-10 30-39
|
||||
|
||||
# Map 80 threads to one physical socket and 80 others to another socket
|
||||
# without forcing assignment. These are split into 4 groups since no
|
||||
# group may have more than 64 threads.
|
||||
cpu-map 1/1-40 0-39 80-119 # node0, siblings 0 & 1
|
||||
cpu-map 2/1-40 0-39 80-119
|
||||
cpu-map 3/1-40 40-79 120-159 # node1, siblings 0 & 1
|
||||
cpu-map 4/1-40 40-79 120-159
|
||||
|
||||
|
||||
crt-base <dir>
|
||||
Assigns a default directory to fetch SSL certificates from when a relative
|
||||
path is used with "crtfile" or "crt" directives. Absolute locations specified
|
||||
|
@ -48,9 +48,9 @@ struct hap_cpuset {
|
||||
};
|
||||
|
||||
struct cpu_map {
|
||||
struct hap_cpuset proc; /* list of CPU masks for the whole process */
|
||||
struct hap_cpuset proc_t1 ; /* list of CPU masks for the 1st thread of the process */
|
||||
struct hap_cpuset thread[MAX_THREADS]; /* list of CPU masks for the 32/64 first threads of the 1st process */
|
||||
struct hap_cpuset proc; /* list of CPU masks for the whole thread group */
|
||||
struct hap_cpuset proc_t1 ; /* list of CPU masks for the 1st thread of the group */
|
||||
struct hap_cpuset thread[MAX_THREADS_PER_GROUP]; /* list of CPU masks for the 32/64 threads of this group */
|
||||
};
|
||||
|
||||
#endif /* _HAPROXY_CPUSET_T_H */
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
#include <haproxy/cpuset-t.h>
|
||||
|
||||
extern struct cpu_map cpu_map;
|
||||
extern struct cpu_map cpu_map[MAX_TGROUPS];
|
||||
|
||||
/* Unset all indexes in <set>.
|
||||
*/
|
||||
|
@ -1039,12 +1039,12 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm)
|
||||
/* map a process list to a CPU set */
|
||||
#ifdef USE_CPU_AFFINITY
|
||||
char *slash;
|
||||
unsigned long proc = 0, thread = 0;
|
||||
int j, n, autoinc;
|
||||
unsigned long tgroup = 0, thread = 0;
|
||||
int g, j, n, autoinc;
|
||||
struct hap_cpuset cpus, cpus_copy;
|
||||
|
||||
if (!*args[1] || !*args[2]) {
|
||||
ha_alert("parsing [%s:%d] : %s expects a process number "
|
||||
ha_alert("parsing [%s:%d] : %s expects a thread group number "
|
||||
" ('all', 'odd', 'even', a number from 1 to %d or a range), "
|
||||
" followed by a list of CPU ranges with numbers from 0 to %d.\n",
|
||||
file, linenum, args[0], LONGBITS, LONGBITS - 1);
|
||||
@ -1055,11 +1055,11 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm)
|
||||
if ((slash = strchr(args[1], '/')) != NULL)
|
||||
*slash = 0;
|
||||
|
||||
/* note: we silently ignore processes over MAX_PROCS and
|
||||
* threads over MAX_THREADS so as not to make configurations a
|
||||
/* note: we silently ignore thread group numbers over MAX_TGROUPS
|
||||
* and threads over MAX_THREADS so as not to make configurations a
|
||||
* pain to maintain.
|
||||
*/
|
||||
if (parse_process_number(args[1], &proc, LONGBITS, &autoinc, &errmsg)) {
|
||||
if (parse_process_number(args[1], &tgroup, LONGBITS, &autoinc, &errmsg)) {
|
||||
ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
|
||||
err_code |= ERR_ALERT | ERR_FATAL;
|
||||
goto out;
|
||||
@ -1081,9 +1081,9 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm)
|
||||
}
|
||||
|
||||
if (autoinc &&
|
||||
my_popcountl(proc) != ha_cpuset_count(&cpus) &&
|
||||
my_popcountl(tgroup) != ha_cpuset_count(&cpus) &&
|
||||
my_popcountl(thread) != ha_cpuset_count(&cpus)) {
|
||||
ha_alert("parsing [%s:%d] : %s : PROC/THREAD range and CPU sets "
|
||||
ha_alert("parsing [%s:%d] : %s : TGROUP/THREAD range and CPU sets "
|
||||
"must have the same size to be automatically bound\n",
|
||||
file, linenum, args[0]);
|
||||
err_code |= ERR_ALERT | ERR_FATAL;
|
||||
@ -1091,10 +1091,9 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm)
|
||||
}
|
||||
|
||||
/* we now have to deal with 3 real cases :
|
||||
* cpu-map P-Q => mapping for whole processes, numbers P to Q
|
||||
* cpu-map P-Q/1 => mapping of first thread of processes P to Q
|
||||
* cpu-map 1/T-U => mapping of threads T to U of process 1
|
||||
* (note: P=Q=1 since 2.5).
|
||||
* cpu-map P-Q => mapping for whole tgroups, numbers P to Q
|
||||
* cpu-map P-Q/1 => mapping of first thread of groups P to Q
|
||||
* cpu-map P/T-U => mapping of threads T to U of tgroup P
|
||||
* Otherwise other combinations are silently ignored since nbthread
|
||||
* and nbproc cannot both be >1 :
|
||||
* cpu-map P-Q/T => mapping for thread T for processes P to Q.
|
||||
@ -1103,37 +1102,48 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm)
|
||||
* one of P,U may be > 1, others ignored.
|
||||
*/
|
||||
if (!thread || thread == 0x1) {
|
||||
/* mapping for whole process. E.g. cpu-map 1 0-3 or cpu-map 1/1 0-3 */
|
||||
ha_cpuset_assign(&cpus_copy, &cpus);
|
||||
|
||||
if (!autoinc)
|
||||
ha_cpuset_assign(&cpu_map.proc, &cpus);
|
||||
else {
|
||||
ha_cpuset_zero(&cpu_map.proc);
|
||||
n = ha_cpuset_ffs(&cpus_copy) - 1;
|
||||
ha_cpuset_clr(&cpus_copy, n);
|
||||
ha_cpuset_set(&cpu_map.proc, n);
|
||||
}
|
||||
} else {
|
||||
/* first process, iterate on threads. E.g. cpu-map 1/1-4 0-3 */
|
||||
ha_cpuset_assign(&cpus_copy, &cpus);
|
||||
for (j = n = 0; j < MAX_THREADS; j++) {
|
||||
/* No mapping for this thread */
|
||||
if (!(thread & (1UL << j)))
|
||||
/* mapping for whole tgroups. E.g. cpu-map 1 0-3 or cpu-map 1/1 0-3 */
|
||||
for (g = 0; g < MAX_TGROUPS; g++) {
|
||||
/* No mapping for this tgroup */
|
||||
if (!(tgroup & (1UL << g)))
|
||||
continue;
|
||||
|
||||
ha_cpuset_assign(&cpus_copy, &cpus);
|
||||
if (!autoinc)
|
||||
ha_cpuset_assign(&cpu_map.thread[j], &cpus);
|
||||
ha_cpuset_assign(&cpu_map[g].proc, &cpus);
|
||||
else {
|
||||
ha_cpuset_zero(&cpu_map.thread[j]);
|
||||
ha_cpuset_zero(&cpu_map[g].proc);
|
||||
n = ha_cpuset_ffs(&cpus_copy) - 1;
|
||||
ha_cpuset_clr(&cpus_copy, n);
|
||||
ha_cpuset_set(&cpu_map.thread[j], n);
|
||||
ha_cpuset_set(&cpu_map[g].proc, n);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* first tgroup, iterate on threads. E.g. cpu-map 1/1-4 0-3 */
|
||||
for (g = 0; g < MAX_TGROUPS; g++) {
|
||||
/* No mapping for this tgroup */
|
||||
if (!(tgroup & (1UL << g)))
|
||||
continue;
|
||||
|
||||
ha_cpuset_assign(&cpus_copy, &cpus);
|
||||
for (j = n = 0; j < MAX_THREADS_PER_GROUP; j++) {
|
||||
/* No mapping for this thread */
|
||||
if (!(thread & (1UL << j)))
|
||||
continue;
|
||||
|
||||
if (!autoinc)
|
||||
ha_cpuset_assign(&cpu_map[g].thread[j], &cpus);
|
||||
else {
|
||||
ha_cpuset_zero(&cpu_map[g].thread[j]);
|
||||
n = ha_cpuset_ffs(&cpus_copy) - 1;
|
||||
ha_cpuset_clr(&cpus_copy, n);
|
||||
ha_cpuset_set(&cpu_map[g].thread[j], n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HA_DIAG_WARNING_COND(proc != 0x1 && thread != 0x1,
|
||||
"parsing [%s:%d] : cpu-map statement is considered invalid and thus ignored as it addresses multiple processes and threads at the same time. At least one of them should be 1 and only 1.", file, linenum);
|
||||
HA_DIAG_WARNING_COND(tgroup != 0x1 && thread != 0x1,
|
||||
"parsing [%s:%d] : cpu-map statement is considered invalid and thus ignored as it addresses multiple groups and threads at the same time. At least one of them should be 1 and only 1.", file, linenum);
|
||||
}
|
||||
#else
|
||||
ha_alert("parsing [%s:%d] : '%s' is not enabled, please check build options for USE_CPU_AFFINITY.\n",
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <haproxy/cpuset.h>
|
||||
#include <haproxy/intops.h>
|
||||
|
||||
struct cpu_map cpu_map;
|
||||
struct cpu_map cpu_map[MAX_TGROUPS];
|
||||
|
||||
void ha_cpuset_zero(struct hap_cpuset *set)
|
||||
{
|
||||
|
@ -1518,11 +1518,14 @@ static void init_early(int argc, char **argv)
|
||||
/* Some CPU affinity stuff may have to be initialized */
|
||||
#ifdef USE_CPU_AFFINITY
|
||||
{
|
||||
int i;
|
||||
ha_cpuset_zero(&cpu_map.proc);
|
||||
ha_cpuset_zero(&cpu_map.proc_t1);
|
||||
for (i = 0; i < MAX_THREADS; ++i) {
|
||||
ha_cpuset_zero(&cpu_map.thread[i]);
|
||||
int g, i;
|
||||
|
||||
for (g = 0; g < MAX_TGROUPS; g++) {
|
||||
ha_cpuset_zero(&cpu_map[g].proc);
|
||||
ha_cpuset_zero(&cpu_map[g].proc_t1);
|
||||
for (i = 0; i < MAX_THREADS_PER_GROUP; ++i) {
|
||||
ha_cpuset_zero(&cpu_map[g].thread[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -3405,13 +3408,13 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
#ifdef USE_CPU_AFFINITY
|
||||
if (!in_parent && ha_cpuset_count(&cpu_map.proc)) { /* only do this if the process has a CPU map */
|
||||
if (!in_parent && ha_cpuset_count(&cpu_map[0].proc)) { /* only do this if the process has a CPU map */
|
||||
|
||||
#if defined(CPUSET_USE_CPUSET) || defined(__DragonFly__)
|
||||
struct hap_cpuset *set = &cpu_map.proc;
|
||||
struct hap_cpuset *set = &cpu_map[0].proc;
|
||||
sched_setaffinity(0, sizeof(set->cpuset), &set->cpuset);
|
||||
#elif defined(__FreeBSD__)
|
||||
struct hap_cpuset *set = &cpu_map.proc;
|
||||
struct hap_cpuset *set = &cpu_map[0].proc;
|
||||
ret = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(set->cpuset), &set->cpuset);
|
||||
#endif
|
||||
}
|
||||
|
10
src/thread.c
10
src/thread.c
@ -249,13 +249,13 @@ void set_thread_cpu_affinity()
|
||||
return;
|
||||
|
||||
/* Now the CPU affinity for all threads */
|
||||
if (ha_cpuset_count(&cpu_map.proc))
|
||||
ha_cpuset_and(&cpu_map.thread[tid], &cpu_map.proc);
|
||||
if (ha_cpuset_count(&cpu_map[tgid - 1].proc))
|
||||
ha_cpuset_and(&cpu_map[tgid - 1].thread[ti->ltid], &cpu_map[tgid - 1].proc);
|
||||
|
||||
if (ha_cpuset_count(&cpu_map.thread[tid])) {/* only do this if the thread has a THREAD map */
|
||||
if (ha_cpuset_count(&cpu_map[tgid - 1].thread[ti->ltid])) {/* only do this if the thread has a THREAD map */
|
||||
# if defined(__APPLE__)
|
||||
/* Note: this API is limited to the first 32/64 CPUs */
|
||||
unsigned long set = cpu_map.thread[tid].cpuset;
|
||||
unsigned long set = cpu_map[tgid - 1].thread[ti->ltid].cpuset;
|
||||
int j;
|
||||
|
||||
while ((j = ffsl(set)) > 0) {
|
||||
@ -267,7 +267,7 @@ void set_thread_cpu_affinity()
|
||||
set &= ~(1UL << (j - 1));
|
||||
}
|
||||
# else
|
||||
struct hap_cpuset *set = &cpu_map.thread[tid];
|
||||
struct hap_cpuset *set = &cpu_map[tgid - 1].thread[ti->ltid];
|
||||
|
||||
pthread_setaffinity_np(ha_pthread[tid], sizeof(set->cpuset), &set->cpuset);
|
||||
# endif
|
||||
|
Loading…
Reference in New Issue
Block a user