From 7764a57d3292b6b4f1e488b8ae07ff5699dc73e1 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 16 Jul 2019 15:10:34 +0200 Subject: [PATCH] BUG/MEDIUM: threads: cpu-map designating a single thread/process are ignored Since commit 81492c989 ("MINOR: threads: flatten the per-thread cpu-map"), we don't keep the proc*thread matrix anymore to represent the full binding possibilities, but only the proc and thread ones. The problem is that the per-process binding is not the same for each thread and for the process, and the proc[] array was assumed to store the per-proc first thread value when doing this change. Worse, the logic present there tries to deal with thread ranges and process ranges in a way which automatically exclused the other possibility (since ranges cannot be used on both) but as such fails to apply changes if neither the process nor the thread is expressed as a range. The real problem comes from the fact that specifying cpu-map 1/1 doesn't yet reveal if the per-process mask or the per-thread mask needs to be updated. In practice it's the thread one but then the current storage doesn't allow to store the binding of the first thread of each other process in nbproc>1 configurations. When removing the proc*thread matrix, what ought to have been kept was both the thread column for process 1 and the process line for threads 1, but instead only the thread column was kept. This patch reintroduces the storage of the configuration for the first thread of each process so that it is again possible to store either the per-thread or per-process configuration. As a partial workaround for existing configurations, it is possible to systematically indicate at least two processes or two threads at once and map them by pairs or more so that at least two values are present in the range. E.g : # set processes 1-4 to cpus 0-3 : cpu-map auto:1-4/1 0 1 2 3 # or: cpu-map 1-2/1 0 1 cpu-map 2-3/1 2 3 # set threads 1-4 to cpus 0-3 : cpu-map auto:1/1-4 0 1 2 3 # or : cpu-map 1/1-2 0 1 cpu-map 3/3-4 2 3 This fix must be backported to 2.0. --- include/types/global.h | 3 ++- src/cfgparse-global.c | 58 ++++++++++++++++++++++++++++++++---------- src/haproxy.c | 3 +++ 3 files changed, 49 insertions(+), 15 deletions(-) diff --git a/include/types/global.h b/include/types/global.h index b6ba673700..ec1700b5ba 100644 --- a/include/types/global.h +++ b/include/types/global.h @@ -184,7 +184,8 @@ struct global { #ifdef USE_CPU_AFFINITY struct { unsigned long proc[MAX_PROCS]; /* list of CPU masks for the 32/64 first processes */ - unsigned long thread[MAX_THREADS]; /* list of CPU masks for the 32/64 first threads */ + unsigned long proc_t1[MAX_PROCS]; /* list of CPU masks for the 1st thread of each process */ + unsigned long thread[MAX_THREADS]; /* list of CPU masks for the 32/64 first threads of the 1st process */ } cpu_map; #endif }; diff --git a/src/cfgparse-global.c b/src/cfgparse-global.c index 4ecaff1ae9..b117ebc7c2 100644 --- a/src/cfgparse-global.c +++ b/src/cfgparse-global.c @@ -1032,8 +1032,19 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm) goto out; } - if (atleast2(proc)) { - /* Mapping at the process level */ + /* we now have to deal with 3 real cases : + * cpu-map P-Q => mapping for whole processes, numbers P to Q + * cpu-map P-Q/1 => mapping of first thread of processes P to Q + * cpu-map 1/T-U => mapping of threads T to U of process 1 + * Otherwise other combinations are silently ignored since nbthread + * and nbproc cannot both be >1 : + * cpu-map P-Q/T => mapping for thread T for processes P to Q. + * Only one of T,Q may be > 1, others ignored. + * cpu-map P/T-U => mapping for threads T to U of process P. Only + * one of P,U may be > 1, others ignored. + */ + if (!thread) { + /* mapping for whole processes. E.g. cpu-map 1-4 0-3 */ for (i = n = 0; i < MAX_PROCS; i++) { /* No mapping for this process */ if (!(proc & (1UL << i))) @@ -1046,20 +1057,39 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm) global.cpu_map.proc[i] = (1UL << (n-1)); } } - } + } else { + /* Mapping at the thread level. All threads are retained + * for process 1, and only thread 1 is retained for other + * processes. + */ + if (thread == 0x1) { + /* first thread, iterate on processes. E.g. cpu-map 1-4/1 0-3 */ + for (i = n = 0; i < MAX_PROCS; i++) { + /* No mapping for this process */ + if (!(proc & (1UL << i))) + continue; + if (!autoinc) + global.cpu_map.proc_t1[i] = cpus; + else { + n += my_ffsl(cpus >> n); + global.cpu_map.proc_t1[i] = (1UL << (n-1)); + } + } + } - if (atleast2(thread)) { - /* Mapping at the thread level */ - for (j = n = 0; j < MAX_THREADS; j++) { - /* No mapping for this thread */ - if (!(thread & (1UL << j))) - continue; + if (proc == 0x1) { + /* first process, iterate on threads. E.g. cpu-map 1/1-4 0-3 */ + for (j = n = 0; j < MAX_THREADS; j++) { + /* No mapping for this thread */ + if (!(thread & (1UL << j))) + continue; - if (!autoinc) - global.cpu_map.thread[j] = cpus; - else { - n += my_ffsl(cpus >> n); - global.cpu_map.thread[j] = (1UL << (n-1)); + if (!autoinc) + global.cpu_map.thread[j] = cpus; + else { + n += my_ffsl(cpus >> n); + global.cpu_map.thread[j] = (1UL << (n-1)); + } } } } diff --git a/src/haproxy.c b/src/haproxy.c index 617116c2b3..f6f00fc1ac 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -3278,6 +3278,9 @@ int main(int argc, char **argv) #ifdef USE_CPU_AFFINITY /* Now the CPU affinity for all threads */ + if (global.cpu_map.proc_t1[relative_pid-1]) + global.cpu_map.thread[0] &= global.cpu_map.proc_t1[relative_pid-1]; + for (i = 0; i < global.nbthread; i++) { if (global.cpu_map.proc[relative_pid-1]) global.cpu_map.thread[i] &= global.cpu_map.proc[relative_pid-1];