[PATCH 1/2] ppc64_cpu: Improve frequency calculation on large systems

13 views
Skip to first unread message

Nysal Jan K.A.

<nysal@linux.ibm.com>
unread,
Feb 10, 2026, 5:48:24 AMFeb 10
to powerpc-utils-devel@googlegroups.com, Tyrel Datwyler, Srikar Dronamraju, Nysal Jan K.A.
Commit fac783d18d61af ("ppc64_cpu: Limit number of CPUs for frequency
calculation") limits the number of threads used for CPU frequency
calculation to CPU_SETSIZE (currently 1024). It is incorrectly stated to
be a limitation of sched_setaffinity(). We can allocate larger masks
using the CPU_ALLOC() and lift the limitation of 1024 CPUs.

Signed-off-by: Nysal Jan K.A. <ny...@linux.ibm.com>
---
src/ppc64_cpu.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/ppc64_cpu.c b/src/ppc64_cpu.c
index 4017240..469e2ce 100644
--- a/src/ppc64_cpu.c
+++ b/src/ppc64_cpu.c
@@ -742,15 +742,19 @@ static void check_threads(struct cpu_freq *cpu_freqs, int max_thread)
static void *soak(void *arg)
{
unsigned int cpu = (long)arg;
- cpu_set_t cpumask;
+ cpu_set_t *cpumask;
+ size_t cpumask_size;

- CPU_ZERO(&cpumask);
- CPU_SET(cpu, &cpumask);
+ cpumask = CPU_ALLOC(threads_in_system);
+ cpumask_size = CPU_ALLOC_SIZE(threads_in_system);
+ CPU_ZERO_S(cpumask_size, cpumask);
+ CPU_SET_S(cpu, cpumask_size, cpumask);

- if (sched_setaffinity(0, sizeof(cpumask), &cpumask)) {
+ if (sched_setaffinity(0, cpumask_size, cpumask)) {
perror("sched_setaffinity");
pthread_exit(NULL);
}
+ CPU_FREE(cpumask);

while (1)
; /* Do Nothing */
@@ -930,10 +934,7 @@ static int do_cpu_frequency(int sleep_time)

setrlimit_open_files();

- max_thread = MIN(threads_in_system, CPU_SETSIZE);
- if (max_thread < threads_in_system)
- printf("ppc64_cpu currently supports up to %d CPUs\n",
- CPU_SETSIZE);
+ max_thread = threads_in_system;

cpu_freqs = calloc(max_thread, sizeof(*cpu_freqs));
if (!cpu_freqs)
--
2.52.0

Nysal Jan K.A.

<nysal@linux.ibm.com>
unread,
Feb 10, 2026, 5:48:34 AMFeb 10
to powerpc-utils-devel@googlegroups.com, Tyrel Datwyler, Srikar Dronamraju, Nysal Jan K.A.
Use a pthread barrier instead of sleeping for an arbitrary amount of
time waiting for all threads to start. This improves the
"ppc64_cpu --frequency" execution time by close to a second.

Signed-off-by: Nysal Jan K.A. <ny...@linux.ibm.com>
---
src/ppc64_cpu.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/ppc64_cpu.c b/src/ppc64_cpu.c
index 469e2ce..ae68ba2 100644
--- a/src/ppc64_cpu.c
+++ b/src/ppc64_cpu.c
@@ -93,6 +93,7 @@ enum energy_freq_attrs {
static int threads_per_cpu = 0;
static int cpus_in_system = 0;
static int threads_in_system = 0;
+static pthread_barrier_t barrier;

static int do_info(void);

@@ -756,6 +757,7 @@ static void *soak(void *arg)
}
CPU_FREE(cpumask);

+ pthread_barrier_wait(&barrier);
while (1)
; /* Do Nothing */
}
@@ -946,6 +948,7 @@ static int do_cpu_frequency(int sleep_time)
return rc;
}

+ pthread_barrier_init(&barrier, NULL, max_thread+1);
/* Start a soak thread on each CPU */
for (i = 0; i < max_thread; i++) {
if (cpu_freqs[i].offline)
@@ -960,7 +963,8 @@ static int do_cpu_frequency(int sleep_time)
}

/* Wait for soak threads to start */
- usleep(1000000);
+ pthread_barrier_wait(&barrier);
+ pthread_barrier_destroy(&barrier);

start_counters(cpu_freqs, max_thread);
/* Count for specified timeout in seconds */
--
2.52.0

Srikar Dronamraju

<srikar@linux.ibm.com>
unread,
Feb 10, 2026, 9:36:07 AMFeb 10
to Nysal Jan K.A., powerpc-utils-devel@googlegroups.com, Tyrel Datwyler
* Nysal Jan K.A. <ny...@linux.ibm.com> [2026-02-10 16:17:50]:
shouldn't we be calling CPU_FREE(cpumask) here?

pthread_exit(NULL);


> }
> + CPU_FREE(cpumask);
>
> while (1)
> ; /* Do Nothing */
> @@ -930,10 +934,7 @@ static int do_cpu_frequency(int sleep_time)
>
> setrlimit_open_files();
>
> - max_thread = MIN(threads_in_system, CPU_SETSIZE);
> - if (max_thread < threads_in_system)
> - printf("ppc64_cpu currently supports up to %d CPUs\n",
> - CPU_SETSIZE);
> + max_thread = threads_in_system;
>
> cpu_freqs = calloc(max_thread, sizeof(*cpu_freqs));
> if (!cpu_freqs)
> --
> 2.52.0
>

--
Thanks and Regards
Srikar Dronamraju

Srikar Dronamraju

<srikar@linux.ibm.com>
unread,
Feb 10, 2026, 9:41:25 AMFeb 10
to powerpc-utils-devel@googlegroups.com, Tyrel Datwyler
* Nysal Jan K.A. <ny...@linux.ibm.com> [2026-02-10 16:17:51]:

> Use a pthread barrier instead of sleeping for an arbitrary amount of
> time waiting for all threads to start. This improves the
> "ppc64_cpu --frequency" execution time by close to a second.
>
> Signed-off-by: Nysal Jan K.A. <ny...@linux.ibm.com>
> ---
> src/ppc64_cpu.c | 6 +++++-
> 1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/src/ppc64_cpu.c b/src/ppc64_cpu.c
> index 469e2ce..ae68ba2 100644
> --- a/src/ppc64_cpu.c
> +++ b/src/ppc64_cpu.c
> @@ -93,6 +93,7 @@ enum energy_freq_attrs {
> static int threads_per_cpu = 0;
> static int cpus_in_system = 0;
> static int threads_in_system = 0;
> +static pthread_barrier_t barrier;
>
> static int do_info(void);
>
> @@ -756,6 +757,7 @@ static void *soak(void *arg)
> }
> CPU_FREE(cpumask);
>
> + pthread_barrier_wait(&barrier);

Again, should this be again called in the pthread_exit path too?

> while (1)
> ; /* Do Nothing */
> }
> @@ -946,6 +948,7 @@ static int do_cpu_frequency(int sleep_time)
> return rc;
> }
>
> + pthread_barrier_init(&barrier, NULL, max_thread+1);
> /* Start a soak thread on each CPU */
> for (i = 0; i < max_thread; i++) {
> if (cpu_freqs[i].offline)
> @@ -960,7 +963,8 @@ static int do_cpu_frequency(int sleep_time)
> }
>
> /* Wait for soak threads to start */
> - usleep(1000000);
> + pthread_barrier_wait(&barrier);
> + pthread_barrier_destroy(&barrier);
>
> start_counters(cpu_freqs, max_thread);
> /* Count for specified timeout in seconds */
> --
> 2.52.0
>

Nysal Jan K.A.

<nysal@linux.ibm.com>
unread,
Feb 11, 2026, 12:57:36 AMFeb 11
to Srikar Dronamraju, powerpc-utils-devel@googlegroups.com, Tyrel Datwyler
On Tue, Feb 10, 2026 at 08:05:57PM +0530, Srikar Dronamraju wrote:
> > + cpumask = CPU_ALLOC(threads_in_system);
> > + cpumask_size = CPU_ALLOC_SIZE(threads_in_system);
> > + CPU_ZERO_S(cpumask_size, cpumask);
> > + CPU_SET_S(cpu, cpumask_size, cpumask);
> >
> > - if (sched_setaffinity(0, sizeof(cpumask), &cpumask)) {
> > + if (sched_setaffinity(0, cpumask_size, cpumask)) {
> > perror("sched_setaffinity");
>
> shouldn't we be calling CPU_FREE(cpumask) here?
>
> pthread_exit(NULL);

Thanks for the review, will address this and the pthread barrier comment in v2.

Regards
--Nysal
Reply all
Reply to author
Forward
0 new messages