MINOR: cpu-topo: add a new "performance" cpu-policy

This cpu policy tries to evict efficient core clusters and only
focuses on performance-oriented ones. On an intel i9-14900k, we can
get 525k rps using only 8 cores this way, versus 594k when using all
24 cores. The gains from using all these codes are not significant
enough to waste them on this. Also these cores can be much slower
at doing SSL handshakes so it can make sense to evict them. Better
keep the efficiency cores for network interrupts for example.

Also, on a developer's machine it can be convenient to keep all these
cores for the local tasks and extra tools (load generators etc).
This commit is contained in:
Willy Tarreau 2025-03-14 15:09:07 +01:00
parent 96cd420dc3
commit dcae2fa4a4
2 changed files with 82 additions and 0 deletions

View File

@ -1991,6 +1991,17 @@ cpu-policy <policy>
respected. This is recommended on multi-socket and NUMA
systems, as well as CPUs with bad inter-CCX latencies.
- performance exactly like group-by-cluster above, except that CPU
clusters whose performance is less than half of the
next more performant one are evicted. These are
typically "little" or "efficient" cores, whose addition
generally doesn't bring significant gains and can
easily be counter-productive (e.g. TLS handshakes).
Often, keeping such cores for other tasks such as
network handling is much more effective. On development
systems, these can also be used to run auxiliary tools
such as load generators and monitoring tools.
See also: "cpu-map", "cpu-set", "nbthread"
cpu-set <directive>...

View File

@ -53,11 +53,13 @@ static int cpu_policy = 1; // "first-usable-node"
/* list of CPU policies for "cpu-policy". The default one is the first one. */
static int cpu_policy_first_usable_node(int policy, int tmin, int tmax, int gmin, int gmax, char **err);
static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin, int gmax, char **err);
static int cpu_policy_performance(int policy, int tmin, int tmax, int gmin, int gmax, char **err);
static struct ha_cpu_policy ha_cpu_policy[] = {
{ .name = "none", .desc = "use all available CPUs", .fct = NULL },
{ .name = "first-usable-node", .desc = "use only first usable node if nbthreads not set", .fct = cpu_policy_first_usable_node },
{ .name = "group-by-cluster", .desc = "make one thread group per core cluster", .fct = cpu_policy_group_by_cluster },
{ .name = "performance", .desc = "make one thread group per perf. core cluster", .fct = cpu_policy_performance },
{ 0 } /* end */
};
@ -531,6 +533,36 @@ void cpu_reorder_by_cluster_capa(struct ha_cpu_topo *topo, int entries)
qsort(topo, entries, sizeof(*topo), _cmp_cpu_cluster_capa);
}
/* functions below act on ha_cpu_cluster structs */
/* function used by qsort to reorder clusters by index */
int _cmp_cluster_index(const void *a, const void *b)
{
const struct ha_cpu_cluster *l = (const struct ha_cpu_cluster *)a;
const struct ha_cpu_cluster *r = (const struct ha_cpu_cluster *)b;
return l->idx - r->idx;
}
/* function used by qsort to order clustes by reverse capacity */
int _cmp_cluster_capa(const void *a, const void *b)
{
const struct ha_cpu_cluster *l = (const struct ha_cpu_cluster *)a;
const struct ha_cpu_cluster *r = (const struct ha_cpu_cluster *)b;
return r->capa - l->capa;
}
/* re-order a cluster array by cluster index only */
void cpu_cluster_reorder_by_index(struct ha_cpu_cluster *clusters, int entries)
{
qsort(clusters, entries, sizeof(*clusters), _cmp_cluster_index);
}
/* re-order a CPU topology array by locality and capacity to detect clusters. */
void cpu_cluster_reorder_by_capa(struct ha_cpu_cluster *clusters, int entries)
{
qsort(clusters, entries, sizeof(*clusters), _cmp_cluster_capa);
}
/* returns an optimal maxcpus for the current system. It will take into
* account what is reported by the OS, if any, otherwise will fall back
* to the cpuset size, which serves as an upper limit in any case.
@ -1064,6 +1096,45 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
return 0;
}
/* the "performance" cpu-policy:
* - does nothing if nbthread or thread-groups are set
* - eliminates clusters whose total capacity is below half of others
* - tries to create one thread-group per cluster, with as many
* threads as CPUs in the cluster, and bind all the threads of
* this group to all the CPUs of the cluster.
*/
static int cpu_policy_performance(int policy, int tmin, int tmax, int gmin, int gmax, char **err)
{
int cpu, cluster;
int capa;
if (global.nbthread || global.nbtgroups)
return 0;
/* sort clusters by reverse capacity */
cpu_cluster_reorder_by_capa(ha_cpu_clusters, cpu_topo_maxcpus);
capa = 0;
for (cluster = 0; cluster < cpu_topo_maxcpus; cluster++) {
if (capa && ha_cpu_clusters[cluster].capa < capa / 2) {
/* This cluster is more than twice as slow as the
* previous one, we're not interested in using it.
*/
for (cpu = 0; cpu <= cpu_topo_lastcpu; cpu++) {
if (ha_cpu_topo[cpu].cl_gid == ha_cpu_clusters[cluster].idx)
ha_cpu_topo[cpu].st |= HA_CPU_F_IGNORED;
}
}
else
capa = ha_cpu_clusters[cluster].capa;
}
cpu_cluster_reorder_by_index(ha_cpu_clusters, cpu_topo_maxcpus);
/* and finish using the group-by-cluster strategy */
return cpu_policy_group_by_cluster(policy, tmin, tmax, gmin, gmax, err);
}
/* apply the chosen CPU policy if no cpu-map was forced. Returns < 0 on failure
* with a message in *err that must be freed by the caller if non-null.
*/