[PATCH PREEMPT_RT] kcov: fix locking splat from kcov_remote_start()

16 views
Skip to first unread message

Clark Williams

unread,
Aug 9, 2021, 4:59:16 PM8/9/21
to Sebastian Andrzej Siewior, Thomas Gleixner, Steven Rostedt, Dmitry Vyukov, Andrey Konovalov, kasa...@googlegroups.com, linux-...@vger.kernel.org
Saw the following splat on 5.14-rc4-rt5 with:

CONFIG_KCOV=y
CONFIG_KCOV_INSTRUMENT_ALL=y
CONFIG_KCOV_IRQ_AREA_SIZE=0x40000
CONFIG_RUNTIME_TESTING_MENU=y

kernel: ehci-pci 0000:00:1d.0: USB 2.0 started, EHCI 1.00
kernel: BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:35
kernel: in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 34, name: ksoftirqd/3
kernel: 4 locks held by ksoftirqd/3/34:
kernel: #0: ffff944376d989f8 ((softirq_ctrl.lock).lock){+.+.}-{2:2}, at: __local_bh_disable_ip+0xe0/0x190
kernel: #1: ffffffffbbfb61e0 (rcu_read_lock){....}-{1:2}, at: rt_spin_lock+0x5/0xd0
kernel: #2: ffffffffbbfb61e0 (rcu_read_lock){....}-{1:2}, at: __local_bh_disable_ip+0xbd/0x190
kernel: #3: ffffffffbc086518 (kcov_remote_lock){....}-{2:2}, at: kcov_remote_start+0x119/0x4a0
kernel: irq event stamp: 4653
kernel: hardirqs last enabled at (4652): [<ffffffffbafb85ce>] _raw_spin_unlock_irqrestore+0x6e/0x80
kernel: hardirqs last disabled at (4653): [<ffffffffba2517c8>] kcov_remote_start+0x298/0x4a0
kernel: softirqs last enabled at (4638): [<ffffffffba110a5b>] run_ksoftirqd+0x9b/0x100
kernel: softirqs last disabled at (4644): [<ffffffffba149f12>] smpboot_thread_fn+0x2b2/0x410
kernel: CPU: 3 PID: 34 Comm: ksoftirqd/3 Not tainted 5.14.0-rc4-rt5+ #3
kernel: Hardware name: /NUC5i7RYB, BIOS RYBDWi35.86A.0359.2016.0906.1028 09/06/2016
kernel: Call Trace:
kernel: dump_stack_lvl+0x7a/0x9b
kernel: ___might_sleep.cold+0xf3/0x107
kernel: rt_spin_lock+0x3a/0xd0
kernel: ? kcov_remote_start+0x119/0x4a0
kernel: kcov_remote_start+0x119/0x4a0
kernel: ? led_trigger_blink_oneshot+0x83/0xa0
kernel: __usb_hcd_giveback_urb+0x161/0x1e0
kernel: usb_giveback_urb_bh+0xb6/0x110
kernel: tasklet_action_common.constprop.0+0xe8/0x110
kernel: __do_softirq+0xe2/0x525
kernel: ? smpboot_thread_fn+0x31/0x410
kernel: run_ksoftirqd+0x8c/0x100
kernel: smpboot_thread_fn+0x2b2/0x410
kernel: ? smpboot_register_percpu_thread+0x130/0x130
kernel: kthread+0x1de/0x210
kernel: ? set_kthread_struct+0x60/0x60
kernel: ret_from_fork+0x22/0x30
kernel: usb usb1: New USB device found, idVendor=1d6b, idProduct=0002, bcdDevice= 5.14


Change kcov_remote_lock from regular spinlock_t to raw_spinlock_t so that
we don't get "sleeping function called from invalid context" on PREEMPT_RT kernel.

Signed-off-by: Clark Williams <will...@redhat.com>
---
kernel/kcov.c | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/kernel/kcov.c b/kernel/kcov.c
index 80bfe71bbe13..60f903f8a46c 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -82,7 +82,7 @@ struct kcov_remote {
struct hlist_node hnode;
};

-static DEFINE_SPINLOCK(kcov_remote_lock);
+static DEFINE_RAW_SPINLOCK(kcov_remote_lock);
static DEFINE_HASHTABLE(kcov_remote_map, 4);
static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas);

@@ -375,7 +375,7 @@ static void kcov_remote_reset(struct kcov *kcov)
struct hlist_node *tmp;
unsigned long flags;

- spin_lock_irqsave(&kcov_remote_lock, flags);
+ raw_spin_lock_irqsave(&kcov_remote_lock, flags);
hash_for_each_safe(kcov_remote_map, bkt, tmp, remote, hnode) {
if (remote->kcov != kcov)
continue;
@@ -384,7 +384,7 @@ static void kcov_remote_reset(struct kcov *kcov)
}
/* Do reset before unlock to prevent races with kcov_remote_start(). */
kcov_reset(kcov);
- spin_unlock_irqrestore(&kcov_remote_lock, flags);
+ raw_spin_unlock_irqrestore(&kcov_remote_lock, flags);
}

static void kcov_disable(struct task_struct *t, struct kcov *kcov)
@@ -638,18 +638,18 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
kcov->t = t;
kcov->remote = true;
kcov->remote_size = remote_arg->area_size;
- spin_lock_irqsave(&kcov_remote_lock, flags);
+ raw_spin_lock_irqsave(&kcov_remote_lock, flags);
for (i = 0; i < remote_arg->num_handles; i++) {
if (!kcov_check_handle(remote_arg->handles[i],
false, true, false)) {
- spin_unlock_irqrestore(&kcov_remote_lock,
+ raw_spin_unlock_irqrestore(&kcov_remote_lock,
flags);
kcov_disable(t, kcov);
return -EINVAL;
}
remote = kcov_remote_add(kcov, remote_arg->handles[i]);
if (IS_ERR(remote)) {
- spin_unlock_irqrestore(&kcov_remote_lock,
+ raw_spin_unlock_irqrestore(&kcov_remote_lock,
flags);
kcov_disable(t, kcov);
return PTR_ERR(remote);
@@ -658,7 +658,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
if (remote_arg->common_handle) {
if (!kcov_check_handle(remote_arg->common_handle,
true, false, false)) {
- spin_unlock_irqrestore(&kcov_remote_lock,
+ raw_spin_unlock_irqrestore(&kcov_remote_lock,
flags);
kcov_disable(t, kcov);
return -EINVAL;
@@ -666,14 +666,14 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
remote = kcov_remote_add(kcov,
remote_arg->common_handle);
if (IS_ERR(remote)) {
- spin_unlock_irqrestore(&kcov_remote_lock,
+ raw_spin_unlock_irqrestore(&kcov_remote_lock,
flags);
kcov_disable(t, kcov);
return PTR_ERR(remote);
}
t->kcov_handle = remote_arg->common_handle;
}
- spin_unlock_irqrestore(&kcov_remote_lock, flags);
+ raw_spin_unlock_irqrestore(&kcov_remote_lock, flags);
/* Put either in kcov_task_exit() or in KCOV_DISABLE. */
kcov_get(kcov);
return 0;
@@ -845,10 +845,10 @@ void kcov_remote_start(u64 handle)
return;
}

- spin_lock(&kcov_remote_lock);
+ raw_spin_lock(&kcov_remote_lock);
remote = kcov_remote_find(handle);
if (!remote) {
- spin_unlock_irqrestore(&kcov_remote_lock, flags);
+ raw_spin_unlock_irqrestore(&kcov_remote_lock, flags);
return;
}
kcov_debug("handle = %llx, context: %s\n", handle,
@@ -869,7 +869,7 @@ void kcov_remote_start(u64 handle)
size = CONFIG_KCOV_IRQ_AREA_SIZE;
area = this_cpu_ptr(&kcov_percpu_data)->irq_area;
}
- spin_unlock_irqrestore(&kcov_remote_lock, flags);
+ raw_spin_unlock_irqrestore(&kcov_remote_lock, flags);

/* Can only happen when in_task(). */
if (!area) {
@@ -1008,9 +1008,9 @@ void kcov_remote_stop(void)
spin_unlock(&kcov->lock);

if (in_task()) {
- spin_lock(&kcov_remote_lock);
+ raw_spin_lock(&kcov_remote_lock);
kcov_remote_area_put(area, size);
- spin_unlock(&kcov_remote_lock);
+ raw_spin_unlock(&kcov_remote_lock);
}

local_irq_restore(flags);
--
2.31.1



--
The United States Coast Guard
Ruining Natural Selection since 1790

Marco Elver

unread,
Aug 10, 2021, 5:43:08 AM8/10/21
to Clark Williams, Sebastian Andrzej Siewior, Thomas Gleixner, Steven Rostedt, Dmitry Vyukov, Andrey Konovalov, kasa...@googlegroups.com, linux-...@vger.kernel.org
Reviewed-by: Marco Elver <el...@google.com>

Indeed, most other debugging tools are using raw_spinlock or
arch_spinlock, I guess KCOV was still lagging behind. Should this go
into mainline?
> --
> You received this message because you are subscribed to the Google Groups "kasan-dev" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+...@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20210809155909.333073de%40theseus.lan.

Sebastian Andrzej Siewior

unread,
Aug 10, 2021, 5:50:34 AM8/10/21
to Clark Williams, Thomas Gleixner, Steven Rostedt, Dmitry Vyukov, Andrey Konovalov, kasa...@googlegroups.com, linux-...@vger.kernel.org
On 2021-08-09 15:59:09 [-0500], Clark Williams wrote:
> Saw the following splat on 5.14-rc4-rt5 with:

> Change kcov_remote_lock from regular spinlock_t to raw_spinlock_t so that
> we don't get "sleeping function called from invalid context" on PREEMPT_RT kernel.

I'm not entirely happy with that:
- kcov_remote_start() decouples spin_lock_irq() and does local_irq_save()
+ spin_lock() which shouldn't be done as per
Documentation/locking/locktypes.rst
I would prefer to see the local_irq_save() replaced by
local_lock_irqsave() so we get a context on what is going on.

- kcov_remote_reset() has a kfree() with that irq-off lock acquired.

- kcov_remote_add() has a kmalloc() and is invoked with that irq-off
lock acquired.

- kcov_remote_area_put() uses INIT_LIST_HEAD() for no reason (just
happen to notice).

- kcov_remote_stop() does local_irq_save() + spin_lock(&kcov->lock);.
This should also create a splat.

- With lock kcov_remote_lock acquired there is a possible
hash_for_each_safe() and list_for_each() iteration. I don't know what
the limits are here but with a raw_spinlock_t it will contribute to
the maximal latency.

> Signed-off-by: Clark Williams <will...@redhat.com>

Sebastian

Steven Rostedt

unread,
Aug 10, 2021, 3:14:36 PM8/10/21
to Sebastian Andrzej Siewior, Clark Williams, Thomas Gleixner, Dmitry Vyukov, Andrey Konovalov, kasa...@googlegroups.com, linux-...@vger.kernel.org
On Tue, 10 Aug 2021 11:50:32 +0200
Sebastian Andrzej Siewior <big...@linutronix.de> wrote:

> - With lock kcov_remote_lock acquired there is a possible
> hash_for_each_safe() and list_for_each() iteration. I don't know what
> the limits are here but with a raw_spinlock_t it will contribute to
> the maximal latency.

Note, anyone having a kernel with KCOV compiled in, probably doesn't
care about latency ;-) It's like worrying about latency when lockdep is
complied in.

-- Steve

Thomas Gleixner

unread,
Aug 10, 2021, 4:38:33 PM8/10/21
to Sebastian Andrzej Siewior, Clark Williams, Steven Rostedt, Dmitry Vyukov, Andrey Konovalov, kasa...@googlegroups.com, linux-...@vger.kernel.org
On Tue, Aug 10 2021 at 11:50, Sebastian Andrzej Siewior wrote:
> On 2021-08-09 15:59:09 [-0500], Clark Williams wrote:
>> Saw the following splat on 5.14-rc4-rt5 with:
> …
>> Change kcov_remote_lock from regular spinlock_t to raw_spinlock_t so that
>> we don't get "sleeping function called from invalid context" on PREEMPT_RT kernel.
>
> I'm not entirely happy with that:
> - kcov_remote_start() decouples spin_lock_irq() and does local_irq_save()
> + spin_lock() which shouldn't be done as per
> Documentation/locking/locktypes.rst
> I would prefer to see the local_irq_save() replaced by
> local_lock_irqsave() so we get a context on what is going on.

Which does not make it raw unless we create a raw_local_lock.

> - kcov_remote_reset() has a kfree() with that irq-off lock acquired.

That free needs to move out obviously

> - kcov_remote_add() has a kmalloc() and is invoked with that irq-off
> lock acquired.

So does the kmalloc.

> - kcov_remote_area_put() uses INIT_LIST_HEAD() for no reason (just
> happen to notice).
>
> - kcov_remote_stop() does local_irq_save() + spin_lock(&kcov->lock);.
> This should also create a splat.
>
> - With lock kcov_remote_lock acquired there is a possible
> hash_for_each_safe() and list_for_each() iteration. I don't know what
> the limits are here but with a raw_spinlock_t it will contribute to
> the maximal latency.

And that matters because? kcov has a massive overhead and with that
enabled you care as much about latencies as you do when running with
lockdep enabled.

Thanks,

tglx

Sebastian Andrzej Siewior

unread,
Aug 11, 2021, 5:00:36 AM8/11/21
to Thomas Gleixner, Clark Williams, Steven Rostedt, Dmitry Vyukov, Andrey Konovalov, kasa...@googlegroups.com, linux-...@vger.kernel.org
On 2021-08-10 22:38:30 [+0200], Thomas Gleixner wrote:
> On Tue, Aug 10 2021 at 11:50, Sebastian Andrzej Siewior wrote:
> > On 2021-08-09 15:59:09 [-0500], Clark Williams wrote:
> >> Saw the following splat on 5.14-rc4-rt5 with:
> > …
> >> Change kcov_remote_lock from regular spinlock_t to raw_spinlock_t so that
> >> we don't get "sleeping function called from invalid context" on PREEMPT_RT kernel.
> >
> > I'm not entirely happy with that:
> > - kcov_remote_start() decouples spin_lock_irq() and does local_irq_save()
> > + spin_lock() which shouldn't be done as per
> > Documentation/locking/locktypes.rst
> > I would prefer to see the local_irq_save() replaced by
> > local_lock_irqsave() so we get a context on what is going on.
>
> Which does not make it raw unless we create a raw_local_lock.

But why raw? I was thinking about local_lock_irqsave() instead of
local_irq_save() and keeping the spinlock_t.

> > - kcov_remote_reset() has a kfree() with that irq-off lock acquired.
>
> That free needs to move out obviously
>
> > - kcov_remote_add() has a kmalloc() and is invoked with that irq-off
> > lock acquired.
>
> So does the kmalloc.
>
> > - kcov_remote_area_put() uses INIT_LIST_HEAD() for no reason (just
> > happen to notice).
> >
> > - kcov_remote_stop() does local_irq_save() + spin_lock(&kcov->lock);.
> > This should also create a splat.
> >
> > - With lock kcov_remote_lock acquired there is a possible
> > hash_for_each_safe() and list_for_each() iteration. I don't know what
> > the limits are here but with a raw_spinlock_t it will contribute to
> > the maximal latency.
>
> And that matters because? kcov has a massive overhead and with that
> enabled you care as much about latencies as you do when running with
> lockdep enabled.

I wasn't aware of that. However, with that local_irq_save() ->
local_lock_irqsave() swap and that first C code from
Documentation/dev-tools/kcov.rst I don't see any spike in cyclictest's
results. Maybe I'm not using it right…

> Thanks,
>
> tglx

Sebastian

Thomas Gleixner

unread,
Aug 11, 2021, 8:25:30 AM8/11/21
to Sebastian Andrzej Siewior, Clark Williams, Steven Rostedt, Dmitry Vyukov, Andrey Konovalov, kasa...@googlegroups.com, linux-...@vger.kernel.org
The problem starts with remote coverage AFAICT.
Reply all
Reply to author
Forward
0 new messages