[PATCH 1/2] cpu_info_helpers: Add helper function to retrieve present CPU core list

8 views
Skip to first unread message

Tyrel Datwyler

<tyreld@linux.ibm.com>
unread,
Feb 25, 2025, 5:20:06 PMFeb 25
to powerpc-utils-devel@googlegroups.com, Aboorva Devarajan, Tyrel Datwyler
From: Aboorva Devarajan <aboo...@linux.ibm.com>

Introduce get_present_core_list helper function to accurately parse
and retrieve the list of present CPU cores, addressing gaps in core
numbering caused by dynamic addition or removal of CPUs (via CPU DLPAR
operation)

Utilizes the present CPU list from `sys/devices/system/cpu/present`
to handle non-contiguous CPU IDs. Accurately maps core IDs to CPUs
considering specified number of threads per CPU, addressing gaps in
core numbering.

Signed-off-by: Aboorva Devarajan <aboo...@linux.ibm.com>
Signed-off-by: Tyrel Datwyler <tyr...@linux.ibm.com>
---
src/common/cpu_info_helpers.c | 107 ++++++++++++++++++++++++++++++++++
src/common/cpu_info_helpers.h | 9 ++-
2 files changed, 113 insertions(+), 3 deletions(-)

diff --git a/src/common/cpu_info_helpers.c b/src/common/cpu_info_helpers.c
index 8c57db8b9550..756e7925b39e 100644
--- a/src/common/cpu_info_helpers.c
+++ b/src/common/cpu_info_helpers.c
@@ -203,6 +203,113 @@ int __get_one_smt_state(int core, int threads_per_cpu)
return smt_state;
}

+int get_present_cpu_count(void)
+{
+ int start, end, total_cpus = 0;
+ size_t len = 0;
+ char *line = NULL;
+ FILE *fp;
+ char *token;
+
+ fp = fopen(CPU_PRESENT_PATH, "r");
+ if (!fp) {
+ perror("Error opening CPU_PRESENT_PATH");
+ return -1;
+ }
+
+ if (getline(&line, &len, fp) == -1) {
+ perror("Error reading CPU_PRESENT_PATH");
+ fclose(fp);
+ free(line);
+ return -1;
+ }
+ fclose(fp);
+
+ token = strtok(line, ",");
+ while (token) {
+ if (sscanf(token, "%d-%d", &start, &end) == 2) {
+ total_cpus += (end - start + 1);
+ } else if (sscanf(token, "%d", &start) == 1) {
+ total_cpus++;
+ }
+ token = strtok(NULL, ",");
+ }
+
+ free(line);
+ return total_cpus;
+}
+
+int get_present_core_list(int **present_cores, int *num_present_cores, int threads_per_cpu)
+{
+ FILE *fp = NULL;
+ char *line = NULL;
+ char *token = NULL;
+ size_t len = 0;
+ ssize_t read;
+ int core_count = 0;
+ int core_list_size;
+ int *cores = NULL;
+ int start, end, i;
+
+ if (threads_per_cpu <= 0) {
+ fprintf(stderr, "Invalid threads_per_cpu value, got %d expected >= 1\n", threads_per_cpu);
+ return -1;
+ }
+
+ core_list_size = get_present_cpu_count() / threads_per_cpu;
+ if (core_list_size <= 0) {
+ fprintf(stderr, "Error while calculating core list size\n");
+ return -1;
+ }
+
+ cores = malloc(core_list_size * sizeof(int));
+ if (!cores) {
+ perror("Memory allocation failed");
+ goto cleanup;
+ }
+
+ fp = fopen(CPU_PRESENT_PATH, "r");
+ if (!fp) {
+ perror("Error opening file");
+ goto cleanup;
+ }
+
+ read = getline(&line, &len, fp);
+ if (read == -1) {
+ perror("Error reading file");
+ goto cleanup;
+ }
+
+ token = strtok(line, ",");
+ while (token) {
+ if (sscanf(token, "%d-%d", &start, &end) == 2) {
+ for (i = start; i <= end; i++) {
+ if (i % threads_per_cpu == 0) {
+ cores[core_count++] = i / threads_per_cpu;
+ }
+ }
+ } else if (sscanf(token, "%d", &start) == 1) {
+ if (start % threads_per_cpu == 0) {
+ cores[core_count++] = start / threads_per_cpu;
+ }
+ }
+ token = strtok(NULL, ",");
+ }
+
+ *present_cores = cores;
+ *num_present_cores = core_count;
+ free(line);
+ return 0;
+
+cleanup:
+ if (fp) {
+ fclose(fp);
+ }
+ free(line);
+ free(cores);
+ return -1;
+}
+
static void print_cpu_list(const cpu_set_t *cpuset, int cpuset_size,
int cpus_in_system)
{
diff --git a/src/common/cpu_info_helpers.h b/src/common/cpu_info_helpers.h
index c063fff9a4d4..77e6ad7ad5c2 100644
--- a/src/common/cpu_info_helpers.h
+++ b/src/common/cpu_info_helpers.h
@@ -24,9 +24,10 @@
#ifndef _CPU_INFO_HELPERS_H
#define _CPU_INFO_HELPERS_H

-#define SYSFS_CPUDIR "/sys/devices/system/cpu/cpu%d"
-#define SYSFS_SUBCORES "/sys/devices/system/cpu/subcores_per_core"
-#define INTSERV_PATH "/proc/device-tree/cpus/%s/ibm,ppc-interrupt-server#s"
+#define SYSFS_CPUDIR "/sys/devices/system/cpu/cpu%d"
+#define SYSFS_SUBCORES "/sys/devices/system/cpu/subcores_per_core"
+#define INTSERV_PATH "/proc/device-tree/cpus/%s/ibm,ppc-interrupt-server#s"
+#define CPU_PRESENT_PATH "/sys/devices/system/cpu/present"

#define SYSFS_PATH_MAX 128

@@ -39,6 +40,8 @@ extern int num_subcores(void);
extern int get_attribute(char *path, const char *fmt, int *value);
extern int get_cpu_info(int *threads_per_cpu, int *cpus_in_system,
int *threads_in_system);
+extern int get_present_core_list(int **present_cores, int *num_present_cores,
+ int threads_per_cpu);
extern int __is_smt_capable(int threads_in_system);
extern int __get_one_smt_state(int core, int threads_per_cpu);
extern int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu,
--
2.48.1

Tyrel Datwyler

<tyreld@linux.ibm.com>
unread,
Feb 25, 2025, 5:20:07 PMFeb 25
to powerpc-utils-devel@googlegroups.com, Aboorva Devarajan, Tyrel Datwyler
From: Aboorva Devarajan <aboo...@linux.ibm.com>

In ppc64le environments, adding or removing CPUs dynamically through
DLPAR can create gaps in CPU IDs, such as `0-103,120-151`, in this
case CPUs 104-119 are missing.

ppc64_cpu doesn't handles this scenario and always considers CPU IDs
to be contiguous causing issues in core numbering, cpu info and SMT
mode reporting.

To illustrate the issues this patch fixes, consider the following
system configuration:

$ lscpu
Architecture: ppc64le
Byte Order: Little Endian
CPU(s): 136
On-line CPU(s) list: 0-103,120-151

**Note: CPU IDs are non-contiguous**

-----------------------------------------------------------------
Before Patch:
-----------------------------------------------------------------

$ ppc64_cpu --info
Core 0: 0* 1* 2* 3* 4* 5* 6* 7*
Core 1: 8* 9* 10* 11* 12* 13* 14* 15*
Core 2: 16* 17* 18* 19* 20* 21* 22* 23*
Core 3: 24* 25* 26* 27* 28* 29* 30* 31*
Core 4: 32* 33* 34* 35* 36* 37* 38* 39*
Core 5: 40* 41* 42* 43* 44* 45* 46* 47*
Core 6: 48* 49* 50* 51* 52* 53* 54* 55*
Core 7: 56* 57* 58* 59* 60* 61* 62* 63*
Core 8: 64* 65* 66* 67* 68* 69* 70* 71*
Core 9: 72* 73* 74* 75* 76* 77* 78* 79*
Core 10: 80* 81* 82* 83* 84* 85* 86* 87*
Core 11: 88* 89* 90* 91* 92* 93* 94* 95*
Core 12: 96* 97* 98* 99* 100* 101* 102* 103*
........................................................... *gap*
Core 13: 120* 121* 122* 123* 124* 125* 126* 127*
Core 14: 128* 129* 130* 131* 132* 133* 134* 135*
Core 15: 136* 137* 138* 139* 140* 141* 142* 143*
Core 16: 144* 145* 146* 147* 148* 149* 150* 151*

**Although the CPU IDs are non contiguous, associated core IDs are
represented in contiguous order, which makes it harder to interpret
this clearly.**

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

$ ppc64_cpu --cores-on
Number of cores online = 15

**Expected: Number of online cores = 17**
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

$ ppc64_cpu --offline-cores
Cores offline = 13, 14

**Even though no cores are actually offline, two cores (13, 14)
are displayed as offline.**
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

$ ppc64_cpu --online-cores
Cores online = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16

**The list of online cores is missing two cores (13, 14).**
-----------------------------------------------------------------

To resolve this, use the present CPU list from sysfs to assign
numbers to CPUs and cores, which will make this accurate.

$ cat /sys/devices/system/cpu/present
0-103,120-151

With this patch, the command output correctly reflects the
current CPU configuration, providing a more precise representation
of the system state.

-----------------------------------------------------------------
After Patch:
-----------------------------------------------------------------

$ ppc64_cpu --info
Core 0: 0* 1* 2* 3* 4* 5* 6* 7*
Core 1: 8* 9* 10* 11* 12* 13* 14* 15*
Core 2: 16* 17* 18* 19* 20* 21* 22* 23*
Core 3: 24* 25* 26* 27* 28* 29* 30* 31*
Core 4: 32* 33* 34* 35* 36* 37* 38* 39*
Core 5: 40* 41* 42* 43* 44* 45* 46* 47*
Core 6: 48* 49* 50* 51* 52* 53* 54* 55*
Core 7: 56* 57* 58* 59* 60* 61* 62* 63*
Core 8: 64* 65* 66* 67* 68* 69* 70* 71*
Core 9: 72* 73* 74* 75* 76* 77* 78* 79*
Core 10: 80* 81* 82* 83* 84* 85* 86* 87*
Core 11: 88* 89* 90* 91* 92* 93* 94* 95*
Core 12: 96* 97* 98* 99* 100* 101* 102* 103*
........................................................... *gap*
Core 15: 120* 121* 122* 123* 124* 125* 126* 127*
Core 16: 128* 129* 130* 131* 132* 133* 134* 135*
Core 17: 136* 137* 138* 139* 140* 141* 142* 143*
Core 18: 144* 145* 146* 147* 148* 149* 150* 151*

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

$ ppc64_cpu --cores-on
Number of cores online = 17

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

$ ppc64_cpu --offline-cores
Cores offline =

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

$ ppc64_cpu --online-cores
Cores online = 0,1,2,3,4,5,6,7,8,9,10,11,12,15,16,17,18

-----------------------------------------------------------------

Signed-off-by: Aboorva Devarajan <aboo...@linux.ibm.com>
Signed-off-by: Tyrel Datwyler <tyr...@linux.ibm.com>
---
src/common/cpu_info_helpers.c | 96 +++++++++-----
src/ppc64_cpu.c | 231 +++++++++++++++++++++++-----------
2 files changed, 221 insertions(+), 106 deletions(-)

diff --git a/src/common/cpu_info_helpers.c b/src/common/cpu_info_helpers.c
index 756e7925b39e..e75cf6e09223 100644
--- a/src/common/cpu_info_helpers.c
+++ b/src/common/cpu_info_helpers.c
@@ -311,67 +311,94 @@ cleanup:
}

static void print_cpu_list(const cpu_set_t *cpuset, int cpuset_size,
- int cpus_in_system)
+ int threads_per_cpu)
{
- int core;
+ int *present_cores = NULL;
+ int num_present_cores;
+ int start, end, i = 0;
const char *comma = "";

- for (core = 0; core < cpus_in_system; core++) {
- int begin = core;
- if (CPU_ISSET_S(core, cpuset_size, cpuset)) {
- while (CPU_ISSET_S(core+1, cpuset_size, cpuset))
- core++;
+ if (get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu) != 0) {
+ fprintf(stderr, "Failed to get present_cores list\n");
+ return;
+ }

- if (core > begin)
- printf("%s%d-%d", comma, begin, core);
- else
- printf("%s%d", comma, core);
+ while (i < num_present_cores) {
+ start = present_cores[i];
+ if (CPU_ISSET_S(start, cpuset_size, cpuset)) {
+ end = start;
+ while (i + 1 < num_present_cores &&
+ CPU_ISSET_S(present_cores[i + 1], cpuset_size, cpuset) &&
+ present_cores[i + 1] == end + 1) {
+ end = present_cores[++i];
+ }
+ if (start == end) {
+ printf("%s%d", comma, start);
+ } else {
+ printf("%s%d-%d", comma, start, end);
+ }
comma = ",";
}
+ i++;
}
+ free(present_cores);
}

-int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu,
- bool print_smt_state)
+int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu, bool print_smt_state)
{
- int thread, c, smt_state = 0;
cpu_set_t **cpu_states = NULL;
- int cpu_state_size = CPU_ALLOC_SIZE(cpus_in_system);
- int start_cpu = 0, stop_cpu = cpus_in_system;
+ int thread, smt_state = -1;
+ int cpu_state_size;
int rc = 0;
+ int i, core_id, threads_online;
+ int *present_cores = NULL;
+ int num_present_cores;

- cpu_states = (cpu_set_t **)calloc(threads_per_cpu, sizeof(cpu_set_t));
- if (!cpu_states)
+ if (get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu) != 0) {
+ fprintf(stderr, "Failed to get present core list\n");
return -ENOMEM;
+ }
+ cpu_state_size = CPU_ALLOC_SIZE(num_present_cores);
+ cpu_states = (cpu_set_t **)calloc(threads_per_cpu, sizeof(cpu_set_t *));
+ if (!cpu_states) {
+ rc = -ENOMEM;
+ goto cleanup_present_cores;
+ }

for (thread = 0; thread < threads_per_cpu; thread++) {
- cpu_states[thread] = CPU_ALLOC(cpus_in_system);
+ cpu_states[thread] = CPU_ALLOC(num_present_cores);
+ if (!cpu_states[thread]) {
+ rc = -ENOMEM;
+ goto cleanup_cpu_states;
+ }
CPU_ZERO_S(cpu_state_size, cpu_states[thread]);
}

- for (c = start_cpu; c < stop_cpu; c++) {
- int threads_online = __get_one_smt_state(c, threads_per_cpu);
-
+ for (i = 0; i < num_present_cores; i++) {
+ core_id = present_cores[i];
+ threads_online = __get_one_smt_state(core_id, threads_per_cpu);
if (threads_online < 0) {
rc = threads_online;
- goto cleanup_get_smt;
+ goto cleanup_cpu_states;
+ }
+ if (threads_online) {
+ CPU_SET_S(core_id, cpu_state_size, cpu_states[threads_online - 1]);
}
- if (threads_online)
- CPU_SET_S(c, cpu_state_size,
- cpu_states[threads_online - 1]);
}

for (thread = 0; thread < threads_per_cpu; thread++) {
if (CPU_COUNT_S(cpu_state_size, cpu_states[thread])) {
- if (smt_state == 0)
+ if (smt_state == -1)
smt_state = thread + 1;
else if (smt_state > 0)
smt_state = 0; /* mix of SMT modes */
}
}

- if (!print_smt_state)
- return smt_state;
+ if (!print_smt_state) {
+ rc = smt_state;
+ goto cleanup_cpu_states;
+ }

if (smt_state == 1) {
if (numeric)
@@ -380,11 +407,9 @@ int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu,
printf("SMT is off\n");
} else if (smt_state == 0) {
for (thread = 0; thread < threads_per_cpu; thread++) {
- if (CPU_COUNT_S(cpu_state_size,
- cpu_states[thread])) {
+ if (CPU_COUNT_S(cpu_state_size, cpu_states[thread])) {
printf("SMT=%d: ", thread + 1);
- print_cpu_list(cpu_states[thread],
- cpu_state_size, cpus_in_system);
+ print_cpu_list(cpu_states[thread], cpu_state_size, threads_per_cpu);
printf("\n");
}
}
@@ -392,9 +417,12 @@ int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu,
printf("SMT=%d\n", smt_state);
}

-cleanup_get_smt:
+cleanup_cpu_states:
for (thread = 0; thread < threads_per_cpu; thread++)
CPU_FREE(cpu_states[thread]);
+ free(cpu_states);
+cleanup_present_cores:
+ free(present_cores);

return rc;
}
diff --git a/src/ppc64_cpu.c b/src/ppc64_cpu.c
index 40172409ab81..0233d2900354 100644
--- a/src/ppc64_cpu.c
+++ b/src/ppc64_cpu.c
@@ -52,7 +52,6 @@

#define DSCR_DEFAULT_PATH "/sys/devices/system/cpu/dscr_default"

-#define MAX_NR_CPUS 1024
#define DIAGNOSTICS_RUN_MODE 42
#define CPU_OFFLINE -1

@@ -266,21 +265,31 @@ static int get_one_smt_state(int core)
static int get_smt_state(void)
{
int smt_state = -1;
- int i;
+ int i, rc;
+ int *present_cores;
+ int num_present_cores;
+
+ rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
+ if (rc != 0) {
+ return -1;
+ }
+
+ for (i = 0; i < num_present_cores; i++) {
+ int cpu_state = get_one_smt_state(present_cores[i]);

- for (i = 0; i < cpus_in_system; i++) {
- int cpu_state = get_one_smt_state(i);
if (cpu_state == 0)
continue;

if (smt_state == -1)
smt_state = cpu_state;
+
if (smt_state != cpu_state) {
smt_state = -1;
break;
}
}

+ free(present_cores);
return smt_state;
}

@@ -313,20 +322,36 @@ static int set_smt_state(int smt_state)
{
int i, j, rc = 0;
int error = 0;
+ int cpu_base, cpu_id, core_id;
+ int *present_cores = NULL;
+ int num_present_cores;

if (!sysattr_is_writeable("online")) {
perror("Cannot set smt state");
return -1;
}

- for (i = 0; i < threads_in_system; i += threads_per_cpu) {
+ rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
+
+ if (rc != 0) {
+ fprintf(stderr, "Failed to retrieve present core list\n");
+ return rc;
+ }
+
+ for (i = 0; i < num_present_cores; i++) {
+
+ core_id = present_cores[i];
+ cpu_base = core_id * threads_per_cpu;
+
/* Online means any thread on this core running, so check all
* threads in the core, not just the first. */
for (j = 0; j < threads_per_cpu; j++) {
- if (!cpu_online(i + j))
+ cpu_id = cpu_base + j;
+
+ if (!cpu_online(cpu_id))
continue;

- rc = set_one_smt_state(i, smt_state);
+ rc = set_one_smt_state(cpu_base, smt_state);
/* Record an error, but do not check result: if we
* have failed to set this core, keep trying
* subsequent ones. */
@@ -336,10 +361,13 @@ static int set_smt_state(int smt_state)
}
}

+ free(present_cores);
+
if (error) {
- fprintf(stderr, "One or more cpus could not be on/offlined\n");
+ fprintf(stderr, "One or more CPUs could not be on/offlined\n");
return -1;
}
+
return rc;
}

@@ -459,8 +487,8 @@ static int do_subcores_per_core(char *state)
}
printf("Subcores per core: %d\n", subcore_state);
} else {
- /* Kernel decides what values are valid, so no need to
- * check here. */
+ /* Kernel decides what values are valid, so no need to
+ * check here. */
subcore_state = strtol(state, NULL, 0);
rc = set_attribute(SYSFS_SUBCORES, "%d", subcore_state);
if (rc) {
@@ -1038,7 +1066,7 @@ static int set_all_threads_off(int cpu, int smt_state)
snprintf(path, SYSFS_PATH_MAX, SYSFS_CPUDIR"/%s", i, "online");
rc = offline_thread(path);
if (rc == -1)
- printf("Unable to take cpu%d offline", i);
+ printf("Unable to take CPU %d offline\n", i);
}

return rc;
@@ -1065,11 +1093,13 @@ static int set_one_core(int smt_state, int core, int state)
static int do_online_cores(char *cores, int state)
{
int smt_state;
- int *core_state, *desired_core_state;
+ int *core_state = NULL, *desired_core_state = NULL;
int i, rc = 0;
- int core;
+ int core, valid = 0, core_idx = 0;
char *str, *token, *end_token;
bool first_core = true;
+ int *present_cores = NULL;
+ int num_present_cores;

if (cores) {
if (!sysattr_is_writeable("online")) {
@@ -1083,49 +1113,62 @@ static int do_online_cores(char *cores, int state)
}
}

+ rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
+ if (rc != 0) {
+ fprintf(stderr, "Failed to retrieve present core list\n");
+ return rc;
+ }
+
smt_state = get_smt_state();

- core_state = calloc(cpus_in_system, sizeof(int));
- if (!core_state)
+ core_state = calloc(num_present_cores, sizeof(int));
+ if (!core_state) {
+ free(present_cores);
return -ENOMEM;
+ }

- for (i = 0; i < cpus_in_system ; i++)
- core_state[i] = (get_one_smt_state(i) > 0);
+ for (i = 0; i < num_present_cores; i++) {
+ core_state[i] = (get_one_smt_state(present_cores[i]) > 0);
+ }

if (!cores) {
printf("Cores %s = ", state == 0 ? "offline" : "online");
- for (i = 0; i < cpus_in_system; i++) {
+ for (i = 0; i < num_present_cores; i++) {
if (core_state[i] == state) {
if (first_core)
first_core = false;
else
printf(",");
- printf("%d", i);
+ printf("%d", present_cores[i]);
}
}
printf("\n");
free(core_state);
+ free(present_cores);
return 0;
}

if (smt_state == -1) {
printf("Bad or inconsistent SMT state: use ppc64_cpu --smt=on|off to set all\n"
- "cores to have the same number of online threads to continue.\n");
+ "cores to have the same number of online threads to continue.\n");
do_info();
+ free(present_cores);
return -1;
}

- desired_core_state = calloc(cpus_in_system, sizeof(int));
+ desired_core_state = calloc(num_present_cores, sizeof(int));
if (!desired_core_state) {
free(core_state);
+ free(present_cores);
return -ENOMEM;
}

- for (i = 0; i < cpus_in_system; i++)
+ for (i = 0; i < num_present_cores; i++) {
/*
* Not specified on command-line
*/
desired_core_state[i] = -1;
+ }

str = cores;
while (1) {
@@ -1141,42 +1184,57 @@ static int do_online_cores(char *cores, int state)
rc = -1;
continue;
}
- if (core >= cpus_in_system || core < 0) {
+
+ for (i = 0; i < num_present_cores; i++) {
+ if (core == present_cores[i]) {
+ valid = 1;
+ core_idx = i;
+ break;
+ }
+ }
+
+ if (!valid) {
printf("Invalid core to %s: %d\n", state == 0 ? "offline" : "online", core);
rc = -1;
continue;
}
- desired_core_state[core] = state;
+
+ desired_core_state[core_idx] = state;
}

if (rc) {
- free(core_state);
- free(desired_core_state);
- return rc;
+ goto cleanup;
}

- for (i = 0; i < cpus_in_system; i++) {
+ for (i = 0; i < num_present_cores; i++) {
if (desired_core_state[i] != -1) {
- rc = set_one_core(smt_state, i, state);
- if (rc)
+ rc = set_one_core(smt_state, present_cores[i], state);
+ if (rc) {
+ fprintf(stderr, "Failed to set core %d to %s\n", present_cores[i], state == 0 ? "offline" : "online");
break;
+ }
}
}

+cleanup:
free(core_state);
free(desired_core_state);
+ free(present_cores);
+
return rc;
}

static int do_cores_on(char *state)
{
int smt_state;
- int *core_state;
- int cores_now_online = 0;
- int i, rc;
+ int cores_now_online = 0, core_id = 0;
+ int i, rc = 0;
int number_to_have, number_to_change = 0, number_changed = 0;
+ int *core_state = NULL;
int new_state;
char *end_state;
+ int *present_cores = NULL;
+ int num_present_cores;

if (state) {
if (!sysattr_is_writeable("online")) {
@@ -1194,24 +1252,33 @@ static int do_cores_on(char *state)
if (!core_state)
return -ENOMEM;

- for (i = 0; i < cpus_in_system ; i++) {
- core_state[i] = (get_one_smt_state(i) > 0);
- if (core_state[i])
+ rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
+ if (rc != 0) {
+ fprintf(stderr, "Failed to retrieve present core list\n");
+ free(core_state);
+ return rc;
+ }
+ for (i = 0; i < num_present_cores; i++) {
+ int core = present_cores[i];
+ core_state[i] = (get_one_smt_state(core) > 0);
+ if (core_state[i]) {
cores_now_online++;
+ }
}

if (!state) {
printf("Number of cores online = %d\n", cores_now_online);
- free(core_state);
- return 0;
+ rc = 0;
+ goto cleanup;
}

smt_state = get_smt_state();
if (smt_state == -1) {
printf("Bad or inconsistent SMT state: use ppc64_cpu --smt=on|off to set all\n"
- "cores to have the same number of online threads to continue.\n");
+ "cores to have the same number of online threads to continue.\n");
do_info();
- return -1;
+ rc = -1;
+ goto cleanup;
}

if (!strcmp(state, "all")) {
@@ -1227,15 +1294,16 @@ static int do_cores_on(char *state)
}

if (number_to_have == cores_now_online) {
- free(core_state);
- return 0;
+ rc = 0;
+ goto cleanup;
}

- if (number_to_have > cpus_in_system) {
- printf("Cannot online more cores than are present.\n");
+ if (number_to_have <= 0 || number_to_have > cpus_in_system) {
+ printf("Error: Invalid number of cores requested: %d, possible values \
+ should be in range: (1-%d)\n", number_to_have, cpus_in_system);
do_cores_present();
- free(core_state);
- return -1;
+ rc = -1;
+ goto cleanup;
}

if (number_to_have > cores_now_online) {
@@ -1248,41 +1316,50 @@ static int do_cores_on(char *state)

if (new_state) {
for (i = 0; i < cpus_in_system; i++) {
+ core_id = present_cores[i];
if (!core_state[i]) {
- rc = set_one_core(smt_state, i, new_state);
- if (!rc)
+ rc = set_one_core(smt_state, core_id, new_state);
+ if (!rc) {
number_changed++;
- if (number_changed >= number_to_change)
+ }
+ if (number_changed >= number_to_change) {
break;
+ }
}
}
} else {
- for (i = cpus_in_system - 1; i > 0; i--) {
+ for (i = cpus_in_system - 1; i >= 0; i--) {
+ core_id = present_cores[i];
if (core_state[i]) {
- rc = set_one_core(smt_state, i, new_state);
- if (!rc)
+ rc = set_one_core(smt_state, core_id, new_state);
+ if (!rc) {
number_changed++;
- if (number_changed >= number_to_change)
+ }
+ if (number_changed >= number_to_change) {
break;
+ }
}
}
}

if (number_changed != number_to_change) {
cores_now_online = 0;
- for (i = 0; i < cpus_in_system ; i++) {
- if (cpu_online(i * threads_per_cpu))
+ for (i = 0; i < cpus_in_system; i++) {
+ core_id = present_cores[i];
+ if (cpu_online(core_id * threads_per_cpu)) {
cores_now_online++;
+ }
}
printf("Failed to set requested number of cores online.\n"
- "Requested: %d cores, Onlined: %d cores\n",
- number_to_have, cores_now_online);
- free(core_state);
- return -1;
+ "Requested: %d cores, Onlined: %d cores\n",
+ number_to_have, cores_now_online);
+ rc = -1;
}

+cleanup:
free(core_state);
- return 0;
+ free(present_cores);
+ return rc;
}

static bool core_is_online(int core)
@@ -1294,35 +1371,45 @@ static int do_info(void)
{
int i, j, thread_num;
char online;
- int core, subcores = 0;
+ int subcores = 0, core_id = 0;
+ int *present_cores = NULL;
+ int num_present_cores;

- if (is_subcore_capable())
+ if (is_subcore_capable()) {
subcores = num_subcores();
+ }

- for (i = 0, core = 0; core < cpus_in_system; i++) {
+ int rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
+ if (rc != 0) {
+ fprintf(stderr, "Failed to retrieve present core list\n");
+ return rc;
+ }

- if (!core_is_online(i))
+ for (i = 0; i < num_present_cores; i++) {
+ core_id = present_cores[i];
+ if (!core_is_online(core_id)) {
continue;
+ }

if (subcores > 1) {
- if (core % subcores == 0)
- printf("Core %3d:\n", core/subcores);
- printf(" Subcore %3d: ", core);
+ if (core_id % subcores == 0) {
+ printf("Core %3d:\n", core_id / subcores);
+ }
+ printf(" Subcore %3d: ", core_id);
} else {
- printf("Core %3d: ", core);
+ printf("Core %3d: ", core_id);
}

- thread_num = i * threads_per_cpu;
- for (j = 0; j < threads_per_cpu; j++, thread_num++) {
+ for (j = 0; j < threads_per_cpu; j++) {
+ thread_num = core_id * threads_per_cpu + j;
online = cpu_online(thread_num) ? '*' : ' ';
printf("%4d%c ", thread_num, online);
}
printf("\n");
- core++;
}
+ free(present_cores);
return 0;
}
-
static void usage(void)
{
printf(
--
2.48.1

Tyrel Datwyler

<tyreld@linux.ibm.com>
unread,
Feb 25, 2025, 6:20:47 PMFeb 25
to powerpc-utils-devel@googlegroups.com, Aboorva Devarajan
On 2/25/25 2:19 PM, Tyrel Datwyler wrote:
> From: Aboorva Devarajan <aboo...@linux.ibm.com>
>
> Introduce get_present_core_list helper function to accurately parse
> and retrieve the list of present CPU cores, addressing gaps in core
> numbering caused by dynamic addition or removal of CPUs (via CPU DLPAR
> operation)
>
> Utilizes the present CPU list from `sys/devices/system/cpu/present`
> to handle non-contiguous CPU IDs. Accurately maps core IDs to CPUs
> considering specified number of threads per CPU, addressing gaps in
> core numbering.
>
> Signed-off-by: Aboorva Devarajan <aboo...@linux.ibm.com>
> Signed-off-by: Tyrel Datwyler <tyr...@linux.ibm.com>
> ---

Patches applied to powerpc-utils/next branch.

[1/2]
https://github.com/ibm-power-utilities/powerpc-utils/commit/d6a771d970b0286edfaf50cfdff2673c92df6a47

[2/2]
https://github.com/ibm-power-utilities/powerpc-utils/commit/ae2cfe74ce0256a0c18478d9d0550f4cc88658cb

Thanks,
-Tyrel
Reply all
Reply to author
Forward
0 new messages