BUG/MEDIUM: threads: cpu-map designating a single thread/process are ignored
Since commit 81492c989
("MINOR: threads: flatten the per-thread cpu-map"),
we don't keep the proc*thread matrix anymore to represent the full binding
possibilities, but only the proc and thread ones. The problem is that the
per-process binding is not the same for each thread and for the process,
and the proc[] array was assumed to store the per-proc first thread value
when doing this change. Worse, the logic present there tries to deal with
thread ranges and process ranges in a way which automatically exclused the
other possibility (since ranges cannot be used on both) but as such fails
to apply changes if neither the process nor the thread is expressed as a
range.
The real problem comes from the fact that specifying cpu-map 1/1 doesn't
yet reveal if the per-process mask or the per-thread mask needs to be
updated. In practice it's the thread one but then the current storage
doesn't allow to store the binding of the first thread of each other
process in nbproc>1 configurations.
When removing the proc*thread matrix, what ought to have been kept was
both the thread column for process 1 and the process line for threads 1,
but instead only the thread column was kept. This patch reintroduces the
storage of the configuration for the first thread of each process so that
it is again possible to store either the per-thread or per-process
configuration.
As a partial workaround for existing configurations, it is possible to
systematically indicate at least two processes or two threads at once
and map them by pairs or more so that at least two values are present
in the range. E.g :
# set processes 1-4 to cpus 0-3 :
cpu-map auto:1-4/1 0 1 2 3
# or:
cpu-map 1-2/1 0 1
cpu-map 2-3/1 2 3
# set threads 1-4 to cpus 0-3 :
cpu-map auto:1/1-4 0 1 2 3
# or :
cpu-map 1/1-2 0 1
cpu-map 3/3-4 2 3
This fix must be backported to 2.0.
This commit is contained in:
parent
885f64fb6d
commit
7764a57d32
|
@ -184,7 +184,8 @@ struct global {
|
|||
#ifdef USE_CPU_AFFINITY
|
||||
struct {
|
||||
unsigned long proc[MAX_PROCS]; /* list of CPU masks for the 32/64 first processes */
|
||||
unsigned long thread[MAX_THREADS]; /* list of CPU masks for the 32/64 first threads */
|
||||
unsigned long proc_t1[MAX_PROCS]; /* list of CPU masks for the 1st thread of each process */
|
||||
unsigned long thread[MAX_THREADS]; /* list of CPU masks for the 32/64 first threads of the 1st process */
|
||||
} cpu_map;
|
||||
#endif
|
||||
};
|
||||
|
|
|
@ -1032,8 +1032,19 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm)
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (atleast2(proc)) {
|
||||
/* Mapping at the process level */
|
||||
/* we now have to deal with 3 real cases :
|
||||
* cpu-map P-Q => mapping for whole processes, numbers P to Q
|
||||
* cpu-map P-Q/1 => mapping of first thread of processes P to Q
|
||||
* cpu-map 1/T-U => mapping of threads T to U of process 1
|
||||
* Otherwise other combinations are silently ignored since nbthread
|
||||
* and nbproc cannot both be >1 :
|
||||
* cpu-map P-Q/T => mapping for thread T for processes P to Q.
|
||||
* Only one of T,Q may be > 1, others ignored.
|
||||
* cpu-map P/T-U => mapping for threads T to U of process P. Only
|
||||
* one of P,U may be > 1, others ignored.
|
||||
*/
|
||||
if (!thread) {
|
||||
/* mapping for whole processes. E.g. cpu-map 1-4 0-3 */
|
||||
for (i = n = 0; i < MAX_PROCS; i++) {
|
||||
/* No mapping for this process */
|
||||
if (!(proc & (1UL << i)))
|
||||
|
@ -1046,20 +1057,39 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm)
|
|||
global.cpu_map.proc[i] = (1UL << (n-1));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Mapping at the thread level. All threads are retained
|
||||
* for process 1, and only thread 1 is retained for other
|
||||
* processes.
|
||||
*/
|
||||
if (thread == 0x1) {
|
||||
/* first thread, iterate on processes. E.g. cpu-map 1-4/1 0-3 */
|
||||
for (i = n = 0; i < MAX_PROCS; i++) {
|
||||
/* No mapping for this process */
|
||||
if (!(proc & (1UL << i)))
|
||||
continue;
|
||||
if (!autoinc)
|
||||
global.cpu_map.proc_t1[i] = cpus;
|
||||
else {
|
||||
n += my_ffsl(cpus >> n);
|
||||
global.cpu_map.proc_t1[i] = (1UL << (n-1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (atleast2(thread)) {
|
||||
/* Mapping at the thread level */
|
||||
for (j = n = 0; j < MAX_THREADS; j++) {
|
||||
/* No mapping for this thread */
|
||||
if (!(thread & (1UL << j)))
|
||||
continue;
|
||||
if (proc == 0x1) {
|
||||
/* first process, iterate on threads. E.g. cpu-map 1/1-4 0-3 */
|
||||
for (j = n = 0; j < MAX_THREADS; j++) {
|
||||
/* No mapping for this thread */
|
||||
if (!(thread & (1UL << j)))
|
||||
continue;
|
||||
|
||||
if (!autoinc)
|
||||
global.cpu_map.thread[j] = cpus;
|
||||
else {
|
||||
n += my_ffsl(cpus >> n);
|
||||
global.cpu_map.thread[j] = (1UL << (n-1));
|
||||
if (!autoinc)
|
||||
global.cpu_map.thread[j] = cpus;
|
||||
else {
|
||||
n += my_ffsl(cpus >> n);
|
||||
global.cpu_map.thread[j] = (1UL << (n-1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3278,6 +3278,9 @@ int main(int argc, char **argv)
|
|||
|
||||
#ifdef USE_CPU_AFFINITY
|
||||
/* Now the CPU affinity for all threads */
|
||||
if (global.cpu_map.proc_t1[relative_pid-1])
|
||||
global.cpu_map.thread[0] &= global.cpu_map.proc_t1[relative_pid-1];
|
||||
|
||||
for (i = 0; i < global.nbthread; i++) {
|
||||
if (global.cpu_map.proc[relative_pid-1])
|
||||
global.cpu_map.thread[i] &= global.cpu_map.proc[relative_pid-1];
|
||||
|
|
Loading…
Reference in New Issue