MINOR: cpu-topo: add a new "performance" cpu-policy

This cpu policy tries to evict efficient core clusters and only focuses on performance-oriented ones. On an intel i9-14900k, we can get 525k rps using only 8 cores this way, versus 594k when using all 24 cores. The gains from using all these codes are not significant enough to waste them on this. Also these cores can be much slower at doing SSL handshakes so it can make sense to evict them. Better keep the efficiency cores for network interrupts for example. Also, on a developer's machine it can be convenient to keep all these cores for the local tasks and extra tools (load generators etc).
2025-05-08 02:40:24 +00:00 · 2025-03-14 15:09:07 +01:00 · 2025-03-14 15:09:07 +01:00 · dcae2fa4a4
commit dcae2fa4a4
parent 96cd420dc3
2 changed files with 82 additions and 0 deletions
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@ -1991,6 +1991,17 @@ cpu-policy <policy>
                        respected. This is recommended on multi-socket and NUMA
                        systems, as well as CPUs with bad inter-CCX latencies.

+   - performance        exactly like group-by-cluster above, except that CPU
+                        clusters whose performance is less than half of the
+                        next more performant one are evicted. These are
+                        typically "little" or "efficient" cores, whose addition
+                        generally doesn't bring significant gains and can
+                        easily be counter-productive (e.g. TLS handshakes).
+                        Often, keeping such cores for other tasks such as
+                        network handling is much more effective. On development
+                        systems, these can also be used to run auxiliary tools
+                        such as load generators and monitoring tools.
+
  See also: "cpu-map", "cpu-set", "nbthread"

 cpu-set <directive>...
--- a/src/cpu_topo.c
+++ b/src/cpu_topo.c
@ -53,11 +53,13 @@ static int cpu_policy = 1; // "first-usable-node"
 /* list of CPU policies for "cpu-policy". The default one is the first one. */
 static int cpu_policy_first_usable_node(int policy, int tmin, int tmax, int gmin, int gmax, char **err);
 static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin, int gmax, char **err);
+static int cpu_policy_performance(int policy, int tmin, int tmax, int gmin, int gmax, char **err);

 static struct ha_cpu_policy ha_cpu_policy[] = {
 	{ .name = "none",               .desc = "use all available CPUs",                           .fct = NULL   },
 	{ .name = "first-usable-node",  .desc = "use only first usable node if nbthreads not set",  .fct = cpu_policy_first_usable_node  },
 	{ .name = "group-by-cluster",   .desc = "make one thread group per core cluster",           .fct = cpu_policy_group_by_cluster   },
+	{ .name = "performance",        .desc = "make one thread group per perf. core cluster",     .fct = cpu_policy_performance        },
 	{ 0 } /* end */
 };

@ -531,6 +533,36 @@ void cpu_reorder_by_cluster_capa(struct ha_cpu_topo *topo, int entries)
 	qsort(topo, entries, sizeof(*topo), _cmp_cpu_cluster_capa);
 }

+/* functions below act on ha_cpu_cluster structs */
+
+/* function used by qsort to reorder clusters by index */
+int _cmp_cluster_index(const void *a, const void *b)
+{
+	const struct ha_cpu_cluster *l = (const struct ha_cpu_cluster *)a;
+	const struct ha_cpu_cluster *r = (const struct ha_cpu_cluster *)b;
+	return l->idx - r->idx;
+}
+
+/* function used by qsort to order clustes by reverse capacity */
+int _cmp_cluster_capa(const void *a, const void *b)
+{
+	const struct ha_cpu_cluster *l = (const struct ha_cpu_cluster *)a;
+	const struct ha_cpu_cluster *r = (const struct ha_cpu_cluster *)b;
+	return r->capa - l->capa;
+}
+
+/* re-order a cluster array by cluster index only */
+void cpu_cluster_reorder_by_index(struct ha_cpu_cluster *clusters, int entries)
+{
+	qsort(clusters, entries, sizeof(*clusters), _cmp_cluster_index);
+}
+
+/* re-order a CPU topology array by locality and capacity to detect clusters. */
+void cpu_cluster_reorder_by_capa(struct ha_cpu_cluster *clusters, int entries)
+{
+	qsort(clusters, entries, sizeof(*clusters), _cmp_cluster_capa);
+}
+
 /* returns an optimal maxcpus for the current system. It will take into
 * account what is reported by the OS, if any, otherwise will fall back
 * to the cpuset size, which serves as an upper limit in any case.
@ -1064,6 +1096,45 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
 	return 0;
 }

+/* the "performance" cpu-policy:
+ *  - does nothing if nbthread or thread-groups are set
+ *  - eliminates clusters whose total capacity is below half of others
+ *  - tries to create one thread-group per cluster, with as many
+ *    threads as CPUs in the cluster, and bind all the threads of
+ *    this group to all the CPUs of the cluster.
+ */
+static int cpu_policy_performance(int policy, int tmin, int tmax, int gmin, int gmax, char **err)
+{
+	int cpu, cluster;
+	int capa;
+
+	if (global.nbthread || global.nbtgroups)
+		return 0;
+
+	/* sort clusters by reverse capacity */
+	cpu_cluster_reorder_by_capa(ha_cpu_clusters, cpu_topo_maxcpus);
+
+	capa = 0;
+	for (cluster = 0; cluster < cpu_topo_maxcpus; cluster++) {
+		if (capa && ha_cpu_clusters[cluster].capa < capa / 2) {
+			/* This cluster is more than twice as slow as the
+			 * previous one, we're not interested in using it.
+			 */
+			for (cpu = 0; cpu <= cpu_topo_lastcpu; cpu++) {
+				if (ha_cpu_topo[cpu].cl_gid == ha_cpu_clusters[cluster].idx)
+					ha_cpu_topo[cpu].st |= HA_CPU_F_IGNORED;
+			}
+		}
+		else
+			capa = ha_cpu_clusters[cluster].capa;
+	}
+
+	cpu_cluster_reorder_by_index(ha_cpu_clusters, cpu_topo_maxcpus);
+
+	/* and finish using the group-by-cluster strategy */
+	return cpu_policy_group_by_cluster(policy, tmin, tmax, gmin, gmax, err);
+}
+
 /* apply the chosen CPU policy if no cpu-map was forced. Returns < 0 on failure
 * with a message in *err that must be freed by the caller if non-null.
 */