The drmgr selects the removal LMB based on NUMA node ratio and
calls the kernel interface to remove the each selected LMB. Then
the kernel interface removes the LMB only after all pages are
isolated. But this page isolation can take longer which may affect
the memory removal process. The kernel interface returns to the
user space if sny signals are pending.
So do not allow the kernel interface execute more then 30 secs for
each LMB removal. Setup 30 secs timer and generate SIGUSR1 signal
for each kernel interface.
Signed-off-by: Haren Myneni <
ha...@linux.ibm.com>
---
src/drmgr/common.c | 1 +
src/drmgr/drslot_chrp_mem.c | 68 +++++++++++++++++++++++++++++++++----
2 files changed, 63 insertions(+), 6 deletions(-)
diff --git a/src/drmgr/common.c b/src/drmgr/common.c
index 68041a9..70f5d3b 100644
--- a/src/drmgr/common.c
+++ b/src/drmgr/common.c
@@ -937,6 +937,7 @@ sig_setup(void)
sigdelset(&sigset, SIGALRM);
sigdelset(&sigset, SIGQUIT);
sigdelset(&sigset, SIGABRT);
+ sigdelset(&sigset, SIGUSR1);
/* Now block all remaining signals */
rc = sigprocmask(SIG_SETMASK, &sigset, NULL);
diff --git a/src/drmgr/drslot_chrp_mem.c b/src/drmgr/drslot_chrp_mem.c
index 4a3fe3a..ea8e8ce 100644
--- a/src/drmgr/drslot_chrp_mem.c
+++ b/src/drmgr/drslot_chrp_mem.c
@@ -39,6 +39,10 @@
uint64_t block_sz_bytes = 0;
static char *state_strs[] = {"offline", "online"};
sig_atomic_t numa_mem_timeout = 0;
+sig_atomic_t lmb_rm_timeout = 0;
+struct itimerspec lmb_tval;
+struct sigevent lmb_sevent;
+timer_t lmb_timer;
static char *usagestr = "-c mem {-a | -r} {-q <quantity> -p {variable_weight | ent_capacity} | {-q <quantity> | -s [<drc_name> | <drc_index>]}}";
@@ -62,6 +66,15 @@ void mem_timeout_handler(int sig)
numa_mem_timeout = 1;
}
+/*
+ * SIGUSR1 handler for LMB removal timeout and used
+ * only for NUMA based memory removal
+ */
+void lmb_rm_timeout_handler(int sig)
+{
+ if (sig == SIGUSR1) lmb_rm_timeout = 1;
+}
+
/**
* report_resource_count
* @brief Report the number of LMBs that were added or removed.
@@ -1461,12 +1474,36 @@ int valid_mem_options(void)
static int remove_lmb_by_index(uint32_t drc_index)
{
char cmdbuf[128];
- int offset;
+ int offset, rc;
offset = sprintf(cmdbuf, "memory remove index 0x%x", drc_index);
- return do_kernel_dlpar_common(cmdbuf, offset,
- 1 /* Don't report error */);
+ /*
+ * The kernel interface removes LMB only after all pages are
+ * isolated. So sometimes the kernel waits forever to isolate
+ * pages and the drmgr can not make any progress. The kernel
+ * returns to the user space for any pending signals.
+ *
+ * Setup 30 secs timer and generate SIGUSR1 signal in case
+ * the kernel request takes longer than 30 secs.
+ */
+ lmb_rm_timeout = 0;
+ lmb_tval.it_value.tv_sec = 30;
+ timer_settime(lmb_timer, 0, &lmb_tval, NULL);
+
+ rc = do_kernel_dlpar_common(cmdbuf, offset,
+ 1 /* Don't report error */);
+
+ if (!lmb_rm_timeout) {
+ /*
+ * Disable the timer if the kernel request returned before
+ * 30 secs interval.
+ */
+ lmb_tval.it_value.tv_sec = 0;
+ timer_settime(lmb_timer, 0, &lmb_tval, NULL);
+ }
+
+ return rc;
}
static int remove_lmb_from_node(struct ppcnuma_node *node, uint32_t count)
@@ -1707,12 +1744,30 @@ static void clear_numa_lmb_links(void)
* (with -w option). In the case of LMB removal, the kernel
* interface can run longer until all pages in LMB are isolated
* and can return to the user space if any pending signals.
- * This SIGALRM signal can exit the kernel in case LMB removal
- * is taking longer than timeout.
+ * It may cause drmgr waiting forever on 1 LMB removal and can not
+ * make progress further. So setup SIGUSR1 30 secs timer for each
+ * LMB kernel removal request.
+ *
+ * This SIGALRM signal is used to exit drmgr in case if the complete
+ * memory removal process takes longer than the timeout value.
*/
static int drmem_timer_setup(void)
{
- struct sigaction sigact;
+ struct sigaction sigact, lmb_sigact;
+
+ lmb_sigact.sa_handler = lmb_rm_timeout_handler;
+ sigemptyset(&lmb_sigact.sa_mask);
+ lmb_sigact.sa_flags = 0;
+ sigaction(SIGUSR1, &lmb_sigact, NULL);
+
+ lmb_sevent.sigev_notify = SIGEV_SIGNAL;
+ lmb_sevent.sigev_signo = SIGUSR1;
+ lmb_sevent.sigev_value.sival_ptr = &lmb_timer;
+ timer_create(CLOCK_MONOTONIC, &lmb_sevent, &lmb_timer);
+ lmb_tval.it_value.tv_sec = 30;
+ lmb_tval.it_value.tv_nsec = 0;
+ lmb_tval.it_interval.tv_sec = 0;
+ lmb_tval.it_interval.tv_nsec = 0;
if (!usr_timeout)
return 0;
@@ -1770,6 +1825,7 @@ static int numa_based_remove(uint32_t count)
out_free:
free_lmbs(lmb_list);
out_clear:
+ timer_delete(lmb_timer);
clear_numa_lmb_links();
report_resource_count(done);
return rc;
--
2.54.0