Hello Petr,
I agree. We should probably store the last woken worker in the worker_pool
structure and print it later.
I've spent some time verifying that the locking and lifecycle management are
correct. While I'm not completely certain, I believe it's getting closer. An
extra pair of eyes would be helpful.
This is the new version of this patch:
commit feccca7e696ead3272669ee4d4dc02b6946d0faf
Author: Breno Leitao <
lei...@debian.org>
Date: Mon Mar 16 09:47:09 2026 -0700
workqueue: print diagnostic info when no worker is in running state
show_cpu_pool_busy_workers() iterates over busy workers but gives no
feedback when none are found in running state, which is a key indicator
that a pool may be stuck — unable to wake an idle worker to process
pending work.
Add a diagnostic message when no running workers are found, reporting
pool id, CPU, idle state, and worker counts. Also trigger a single-CPU
backtrace for the stalled CPU.
To identify the task most likely responsible for the stall, add
last_woken_worker (L: pool->lock) to worker_pool and record it in
kick_pool() just before wake_up_process(). This captures the idle
worker that was kicked to take over when the last running worker went to
sleep; if the pool is now stuck with no running worker, that task is the
prime suspect and its backtrace is dumped.
Using struct worker * rather than struct task_struct * avoids any
lifetime concern: workers are only destroyed via set_worker_dying()
which requires pool->lock, and set_worker_dying() clears
last_woken_worker when the dying worker matches. show_cpu_pool_busy_workers()
holds pool->lock while calling sched_show_task(), so last_woken_worker
is either NULL or points to a live worker with a valid task. More
precisely, set_worker_dying() clears last_woken_worker before setting
WORKER_DIE, so a non-NULL last_woken_worker means the kthread has not
yet exited and worker->task is still alive.
The pool info message is printed inside pool->lock using
printk_deferred_enter/exit, the same pattern used by the existing
busy-worker loop, to avoid deadlocks with console drivers that queue
work while holding locks also taken in their write paths.
trigger_single_cpu_backtrace() is called after releasing the lock.
Suggested-by: Petr Mladek <
pml...@suse.com>
Signed-off-by: Breno Leitao <
lei...@debian.org>
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b77119d71641a..38aebf4514c03 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -217,6 +217,7 @@ struct worker_pool {
/* L: hash of busy workers */
struct worker *manager; /* L: purely informational */
+ struct worker *last_woken_worker; /* L: last worker woken by kick_pool() */
struct list_head workers; /* A: attached workers */
struct ida worker_ida; /* worker IDs for task name */
@@ -1295,6 +1296,9 @@ static bool kick_pool(struct worker_pool *pool)
}
}
#endif
+ /* Track the last idle worker woken, used for stall diagnostics. */
+ pool->last_woken_worker = worker;
+
wake_up_process(p);
return true;
}
@@ -2902,6 +2906,13 @@ static void set_worker_dying(struct worker *worker, struct list_head *list)
pool->nr_workers--;
pool->nr_idle--;
+ /*
+ * Clear last_woken_worker if it points to this worker, so that
+ * show_cpu_pool_busy_workers() cannot dereference a freed worker.
+ */
+ if (pool->last_woken_worker == worker)
+ pool->last_woken_worker = NULL;
+
worker->flags |= WORKER_DIE;
list_move(&worker->entry, list);
@@ -7582,20 +7593,58 @@ module_param_named(panic_on_stall_time, wq_panic_on_stall_time, uint, 0644);
MODULE_PARM_DESC(panic_on_stall_time, "Panic if stall exceeds this many seconds (0=disabled)");
/*
- * Show workers that might prevent the processing of pending work items.
- * A busy worker that is not running on the CPU (e.g. sleeping in
- * wait_event_idle() with PF_WQ_WORKER cleared) can stall the pool just as
- * effectively as a CPU-bound one, so dump every in-flight worker.
+ * Report that a pool has no worker in running state, which is a sign that the
+ * pool may be stuck. Print pool info. Must be called with pool->lock held and
+ * inside a printk_deferred_enter/exit region.
+ */
+static void show_pool_no_running_worker(struct worker_pool *pool)
+{
+ lockdep_assert_held(&pool->lock);
+
+ printk_deferred_enter();
+ pr_info("pool %d: no worker in running state, cpu=%d is %s (nr_workers=%d nr_idle=%d)\n",
+ pool->id, pool->cpu,
+ idle_cpu(pool->cpu) ? "idle" : "busy",
+ pool->nr_workers, pool->nr_idle);
+ pr_info("The pool might have trouble waking an idle worker.\n");
+ /*
+ * last_woken_worker and its task are valid here: set_worker_dying()
+ * clears it under pool->lock before setting WORKER_DIE, so if
+ * last_woken_worker is non-NULL the kthread has not yet exited and
+ * worker->task is still alive.
+ */
+ if (pool->last_woken_worker) {
+ pr_info("Backtrace of last woken worker:\n");
+ sched_show_task(pool->last_woken_worker->task);
+ } else {
+ pr_info("Last woken worker empty\n");
+ }
+ printk_deferred_exit();
+}
+
+/*
+ * Show running workers that might prevent the processing of pending work items.
+ * If no running worker is found, the pool may be stuck waiting for an idle
+ * worker to be woken, so report the pool state and the last woken worker.
*/
static void show_cpu_pool_busy_workers(struct worker_pool *pool)
{
struct worker *worker;
unsigned long irq_flags;
- int bkt;
+ bool found_running = false;
+ int cpu, bkt;
raw_spin_lock_irqsave(&pool->lock, irq_flags);
+ /* Snapshot cpu inside the lock to safely use it after unlock. */
+ cpu = pool->cpu;
+
hash_for_each(pool->busy_hash, bkt, worker, hentry) {
+ /* Skip workers that are not actively running on the CPU. */
+ if (!task_is_running(worker->task))
+ continue;
+
+ found_running = true;
/*
* Defer printing to avoid deadlocks in console
* drivers that queue work while holding locks
@@ -7609,7 +7658,23 @@ static void show_cpu_pool_busy_workers(struct worker_pool *pool)
printk_deferred_exit();
}
+ /*
+ * If no running worker was found, the pool is likely stuck. Print pool
+ * state and the backtrace of the last woken worker, which is the prime
+ * suspect for the stall.
+ */
+ if (!found_running)
+ show_pool_no_running_worker(pool);
+
raw_spin_unlock_irqrestore(&pool->lock, irq_flags);
+
+ /*
+ * Trigger a backtrace on the stalled CPU to capture what it is
+ * currently executing. Called after releasing the lock to avoid
+ * any potential issues with NMI delivery.
+ */
+ if (!found_running)
+ trigger_single_cpu_backtrace(cpu);
}
static void show_cpu_pools_busy_workers(void)