[syzbot] [crypto?] INFO: task hung in hwrng

syzbot

unread,

Nov 25, 2023, 8:21:29 AM11/25/23

to da...@davemloft.net, her...@gondor.apana.org.au, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzkall...@googlegroups.com

Hello,

syzbot found the following issue on:

HEAD commit: 98b1cc82c4af Linux 6.7-rc2
git tree: upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=12e89e10e80000
kernel config: https://syzkaller.appspot.com/x/.config?x=6ae1a4ee971a7305
dashboard link: https://syzkaller.appspot.com/bug?extid=c52ab18308964d248092
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40
syz repro: https://syzkaller.appspot.com/x/repro.syz?x=174f0bd4e80000
C reproducer: https://syzkaller.appspot.com/x/repro.c?x=14b83b84e80000

Downloadable assets:
disk image: https://storage.googleapis.com/syzbot-assets/39c6cdad13fc/disk-98b1cc82.raw.xz
vmlinux: https://storage.googleapis.com/syzbot-assets/5a77b5daef9b/vmlinux-98b1cc82.xz
kernel image: https://storage.googleapis.com/syzbot-assets/5e09ae712e0d/bzImage-98b1cc82.xz

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+c52ab1...@syzkaller.appspotmail.com

INFO: task hwrng:749 blocked for more than 143 seconds.
Not tainted 6.7.0-rc2-syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:hwrng state:D stack:29040 pid:749 tgid:749 ppid:2 flags:0x00004000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5376 [inline]
__schedule+0xedb/0x5af0 kernel/sched/core.c:6688
__schedule_loop kernel/sched/core.c:6763 [inline]
schedule+0xe9/0x270 kernel/sched/core.c:6778
schedule_preempt_disabled+0x13/0x20 kernel/sched/core.c:6835
__mutex_lock_common kernel/locking/mutex.c:679 [inline]
__mutex_lock+0x5b9/0x9d0 kernel/locking/mutex.c:747
hwrng_fillfn+0x145/0x430 drivers/char/hw_random/core.c:504
kthread+0x2c6/0x3a0 kernel/kthread.c:388
ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242
</TASK>

Showing all locks held in the system:
1 lock held by khungtaskd/29:
#0: ffffffff8cfabce0 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:301 [inline]
#0: ffffffff8cfabce0 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:747 [inline]
#0: ffffffff8cfabce0 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x75/0x340 kernel/locking/lockdep.c:6613
2 locks held by kswapd0/86:
1 lock held by hwrng/749:
#0: ffffffff8dbafee8 (reading_mutex){+.+.}-{3:3}, at: hwrng_fillfn+0x145/0x430 drivers/char/hw_random/core.c:504
2 locks held by getty/4824:
#0: ffff888025fa10a0 (&tty->ldisc_sem){++++}-{0:0}, at: tty_ldisc_ref_wait+0x24/0x80 drivers/tty/tty_ldisc.c:243
#1: ffffc90002f062f0 (&ldata->atomic_read_lock){+.+.}-{3:3}, at: n_tty_read+0xfc6/0x1490 drivers/tty/n_tty.c:2201
2 locks held by syz-executor391/5105:
2 locks held by syz-executor391/5106:

=============================================

NMI backtrace for cpu 1
CPU: 1 PID: 29 Comm: khungtaskd Not tainted 6.7.0-rc2-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106
nmi_cpu_backtrace+0x277/0x390 lib/nmi_backtrace.c:113
nmi_trigger_cpumask_backtrace+0x299/0x300 lib/nmi_backtrace.c:62
trigger_all_cpu_backtrace include/linux/nmi.h:160 [inline]
check_hung_uninterruptible_tasks kernel/hung_task.c:222 [inline]
watchdog+0xf87/0x1210 kernel/hung_task.c:379
kthread+0x2c6/0x3a0 kernel/kthread.c:388
ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242
</TASK>
Sending NMI from CPU 1 to CPUs 0:
NMI backtrace for cpu 0
CPU: 0 PID: 5105 Comm: syz-executor391 Not tainted 6.7.0-rc2-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023
RIP: 0010:__lock_acquire+0x30/0x3b10 kernel/locking/lockdep.c:4992
Code: f6 41 55 41 54 49 89 fc 55 89 d5 53 44 89 cb 48 81 ec f0 00 00 00 48 8b 84 24 28 01 00 00 48 c7 84 24 90 00 00 00 b3 8a b5 41 <44> 89 44 24 08 44 8b ac 24 48 01 00 00 48 c7 84 24 98 00 00 00 1b
RSP: 0018:ffffc900044271d8 EFLAGS: 00000086
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000002
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8cfabce0
RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
R10: ffffffff8f1934d7 R11: 0000000000000002 R12: ffffffff8cfabce0
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
FS: 00007f3c785f96c0(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00005565d7bb9be7 CR3: 000000001af30000 CR4: 0000000000350ef0
Call Trace:
<NMI>
</NMI>
<TASK>
lock_acquire kernel/locking/lockdep.c:5753 [inline]
lock_acquire+0x1ae/0x520 kernel/locking/lockdep.c:5718
rcu_lock_acquire include/linux/rcupdate.h:301 [inline]
rcu_read_lock include/linux/rcupdate.h:747 [inline]
get_mem_cgroup_from_mm+0x4b/0x4c0 mm/memcontrol.c:1081
__mem_cgroup_charge+0x1c/0x140 mm/memcontrol.c:7224
mem_cgroup_charge include/linux/memcontrol.h:684 [inline]
__filemap_add_folio+0x88c/0xed0 mm/filemap.c:854
filemap_add_folio+0xb1/0x1e0 mm/filemap.c:937
page_cache_ra_unbounded+0x1d0/0x5f0 mm/readahead.c:250
do_page_cache_ra mm/readahead.c:299 [inline]
page_cache_ra_order+0x72b/0xa80 mm/readahead.c:546
do_sync_mmap_readahead mm/filemap.c:3141 [inline]
filemap_fault+0x16a8/0x3570 mm/filemap.c:3233
__do_fault+0x107/0x600 mm/memory.c:4265
do_cow_fault mm/memory.c:4662 [inline]
do_fault mm/memory.c:4764 [inline]
do_pte_missing mm/memory.c:3730 [inline]
handle_pte_fault mm/memory.c:5038 [inline]
__handle_mm_fault+0x3a8d/0x3d70 mm/memory.c:5179
handle_mm_fault+0x47a/0xa10 mm/memory.c:5344
do_user_addr_fault+0x3d1/0x1000 arch/x86/mm/fault.c:1413
handle_page_fault arch/x86/mm/fault.c:1505 [inline]
exc_page_fault+0x5d/0xc0 arch/x86/mm/fault.c:1561
asm_exc_page_fault+0x26/0x30 arch/x86/include/asm/idtentry.h:570
RIP: 0010:rep_movs_alternative+0x57/0x70 arch/x86/lib/copy_user_64.S:80
Code: 00 66 90 48 8b 06 48 89 07 48 83 c6 08 48 83 c7 08 83 e9 08 74 df 83 f9 08 73 e8 eb c9 eb 01 c3 48 89 c8 48 c1 e9 03 83 e0 07 <f3> 48 a5 89 c1 85 c9 75 b3 c3 48 8d 0c c8 eb ac 66 0f 1f 84 00 00
RSP: 0018:ffffc90004427bb0 EFLAGS: 00050246
RAX: 0000000000000000 RBX: 0000000000000040 RCX: 0000000000000008
RDX: ffffed1028a4ab48 RSI: ffff888145255a00 RDI: 0000000020019980
RBP: 0000000020019980 R08: 0000000000000000 R09: ffffed1028a4ab47
R10: ffff888145255a3f R11: 0000000000000001 R12: ffff888145255a00
R13: 00000000200199c0 R14: 0000000000000000 R15: dffffc0000000000
copy_user_generic arch/x86/include/asm/uaccess_64.h:112 [inline]
raw_copy_to_user arch/x86/include/asm/uaccess_64.h:133 [inline]
_copy_to_user lib/usercopy.c:41 [inline]
_copy_to_user+0xa8/0xb0 lib/usercopy.c:34
copy_to_user include/linux/uaccess.h:191 [inline]
rng_dev_read+0x184/0x580 drivers/char/hw_random/core.c:255
do_loop_readv_writev fs/read_write.c:755 [inline]
do_loop_readv_writev fs/read_write.c:743 [inline]
do_iter_read+0x567/0x830 fs/read_write.c:797
vfs_readv+0x12d/0x1a0 fs/read_write.c:915
do_preadv fs/read_write.c:1007 [inline]
__do_sys_preadv fs/read_write.c:1057 [inline]
__se_sys_preadv fs/read_write.c:1052 [inline]
__x64_sys_preadv+0x228/0x300 fs/read_write.c:1052
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82
entry_SYSCALL_64_after_hwframe+0x63/0x6b
RIP: 0033:0x7f3c78638b29
Code: Unable to access opcode bytes at 0x7f3c78638aff.
RSP: 002b:00007f3c785f9168 EFLAGS: 00000246 ORIG_RAX: 0000000000000127
RAX: ffffffffffffffda RBX: 00007f3c786c2328 RCX: 00007f3c78638b29
RDX: 0000000000000001 RSI: 0000000020001880 RDI: 0000000000000003
RBP: 00007f3c786c2320 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007f3c786c232c
R13: 0000000000000000 R14: 00007ffc8a220310 R15: 00007ffc8a2203f8
</TASK>
INFO: NMI handler (nmi_cpu_backtrace_handler) took too long to run: 1.464 msecs

---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzk...@googlegroups.com.

syzbot will keep track of this issue. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.

If the report is already addressed, let syzbot know by replying with:
#syz fix: exact-commit-title

If you want syzbot to run the reproducer, reply with:
#syz test: git://repo/address.git branch-or-commit-hash
If you attach or paste a git patch, syzbot will apply it before testing.

If you want to overwrite report's subsystems, reply with:
#syz set subsystems: new-subsystem
(See the list of subsystem names on the web dashboard)

If the report is a duplicate of another one, reply with:
#syz dup: exact-subject-of-another-report

If you want to undo deduplication, reply with:
#syz undup

Hillf Danton

unread,

Nov 25, 2023, 8:57:42 PM11/25/23

to syzbot, linux-...@vger.kernel.org, syzkall...@googlegroups.com

On Sat, 25 Nov 2023 05:21:27 -0800

> syzbot found the following issue on:
>
> HEAD commit: 98b1cc82c4af Linux 6.7-rc2
> git tree: upstream

> C reproducer: https://syzkaller.appspot.com/x/repro.c?x=14b83b84e80000

#syz test https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master

--- x/drivers/char/hw_random/core.c
+++ y/drivers/char/hw_random/core.c
@@ -213,6 +213,7 @@ static ssize_t rng_dev_read(struct file
int err = 0;
int bytes_read, len;
struct hwrng *rng;
+ char rnd[32];

while (size) {
rng = get_current_rng();
@@ -245,24 +246,28 @@ static ssize_t rng_dev_read(struct file
err = -EAGAIN;
goto out_unlock_reading;
}
+ mutex_unlock(&reading_mutex);
} else {
len = data_avail;
if (len > size)
len = size;
+ if (len > 32)
+ len = 32;

data_avail -= len;

- if (copy_to_user(buf + ret, rng_buffer + data_avail,
- len)) {
+ memcpy(rnd, rng_buffer + data_avail, len);
+ mutex_unlock(&reading_mutex);
+
+ if (copy_to_user(buf + ret, rnd, len)) {
err = -EFAULT;
- goto out_unlock_reading;
+ goto out_put;
}

size -= len;
ret += len;
}

- mutex_unlock(&reading_mutex);
put_rng(rng);

if (need_resched())
--

syzbot

unread,

Nov 25, 2023, 9:20:06 PM11/25/23

to hda...@sina.com, linux-...@vger.kernel.org, syzkall...@googlegroups.com

Hello,

syzbot has tested the proposed patch and the reproducer did not trigger any issue:

Reported-and-tested-by: syzbot+c52ab1...@syzkaller.appspotmail.com

Tested on:

commit: b46ae77f Merge tag 'xfs-6.7-fixes-3' of git://git.kern..
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master
console output: https://syzkaller.appspot.com/x/log.txt?x=129d420ce80000

kernel config: https://syzkaller.appspot.com/x/.config?x=6ae1a4ee971a7305
dashboard link: https://syzkaller.appspot.com/bug?extid=c52ab18308964d248092
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40

patch: https://syzkaller.appspot.com/x/patch.diff?x=11f9fd9f680000

Note: testing is done by a robot and is best-effort only.

Edward Adam Davis

unread,

Nov 25, 2023, 10:53:06 PM11/25/23

to syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com

please test task hung in hwrng_fillfn

#syz test https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 98b1cc82c4af

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 420f155d251f..ebfa5eab84df 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -501,7 +501,10 @@ static int hwrng_fillfn(void *unused)
rng = get_current_rng();
if (IS_ERR(rng) || !rng)
break;
- mutex_lock(&reading_mutex);
+ if (mutex_lock_interruptible(&reading_mutex)) {
+ put_rng(rng);
+ return = -ERESTARTSYS;
+ }
rc = rng_get_data(rng, rng_fillbuf,
rng_buffer_size(), 1);
if (current_quality != rng->quality)

syzbot

unread,

Nov 25, 2023, 10:58:06 PM11/25/23

to ead...@qq.com, linux-...@vger.kernel.org, syzkall...@googlegroups.com

Hello,

syzbot tried to test the proposed patch but the build/boot failed:

drivers/char/hw_random/core.c:506:32: error: expected expression before '=' token

Tested on:

commit: 98b1cc82 Linux 6.7-rc2
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel config: https://syzkaller.appspot.com/x/.config?x=6ae1a4ee971a7305
dashboard link: https://syzkaller.appspot.com/bug?extid=c52ab18308964d248092
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40

patch: https://syzkaller.appspot.com/x/patch.diff?x=1627b408e80000

Edward Adam Davis

unread,

Nov 25, 2023, 11:05:56 PM11/25/23

to syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com

please test task hung in hwrng_fillfn

#syz test https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 98b1cc82c4af

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c

index 420f155d251f..883412bbf9f2 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -225,17 +225,18 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
goto out;
}

- if (mutex_lock_interruptible(&reading_mutex)) {
- err = -ERESTARTSYS;
- goto out_put;
- }
if (!data_avail) {
+ if (mutex_lock_interruptible(&reading_mutex)) {
+ err = -ERESTARTSYS;
+ goto out_put;
+ }
bytes_read = rng_get_data(rng, rng_buffer,
rng_buffer_size(),
!(filp->f_flags & O_NONBLOCK));
+ mutex_unlock(&reading_mutex);
if (bytes_read < 0) {
err = bytes_read;

- goto out_unlock_reading;
+ goto out_put;
}

data_avail = bytes_read;
}
@@ -243,7 +244,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
if (!data_avail) {
if (filp->f_flags & O_NONBLOCK) {
err = -EAGAIN;

- goto out_unlock_reading;
+ goto out_put;
}

} else {
len = data_avail;
@@ -255,14 +256,13 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,

if (copy_to_user(buf + ret, rng_buffer + data_avail,

len)) {
err = -EFAULT;
- goto out_unlock_reading;
+ goto out_put;
}

size -= len;
ret += len;
}

- mutex_unlock(&reading_mutex);
put_rng(rng);

if (need_resched())

@@ -276,8 +276,6 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
out:
return ret ? : err;

-out_unlock_reading:
- mutex_unlock(&reading_mutex);
out_put:
put_rng(rng);
goto out;
@@ -501,7 +499,10 @@ static int hwrng_fillfn(void *unused)

Edward Adam Davis

unread,

Nov 25, 2023, 11:06:51 PM11/25/23

to syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com

+ return -ERESTARTSYS;

syzbot

unread,

Nov 25, 2023, 11:11:05 PM11/25/23

to ead...@qq.com, linux-...@vger.kernel.org, syzkall...@googlegroups.com

Hello,

syzbot tried to test the proposed patch but the build/boot failed:

drivers/char/hw_random/core.c:504:32: error: expected expression before '=' token

Tested on:

commit: 98b1cc82 Linux 6.7-rc2
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel config: https://syzkaller.appspot.com/x/.config?x=6ae1a4ee971a7305
dashboard link: https://syzkaller.appspot.com/bug?extid=c52ab18308964d248092
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40

patch: https://syzkaller.appspot.com/x/patch.diff?x=169076d0e80000

syzbot

unread,

Nov 25, 2023, 11:37:06 PM11/25/23

to ead...@qq.com, linux-...@vger.kernel.org, syzkall...@googlegroups.com

Hello,

syzbot has tested the proposed patch and the reproducer did not trigger any issue:

Reported-and-tested-by: syzbot+c52ab1...@syzkaller.appspotmail.com

Tested on:

commit: 98b1cc82 Linux 6.7-rc2
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

console output: https://syzkaller.appspot.com/x/log.txt?x=14d59724e80000

kernel config: https://syzkaller.appspot.com/x/.config?x=6ae1a4ee971a7305
dashboard link: https://syzkaller.appspot.com/bug?extid=c52ab18308964d248092
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40

patch: https://syzkaller.appspot.com/x/patch.diff?x=103c62e8e80000

Edward Adam Davis

unread,

Nov 26, 2023, 12:35:51 AM11/26/23

to syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com

syzbot

unread,

Nov 26, 2023, 1:08:07 AM11/26/23

to ead...@qq.com, linux-...@vger.kernel.org, syzkall...@googlegroups.com

Hello,

syzbot has tested the proposed patch and the reproducer did not trigger any issue:

Reported-and-tested-by: syzbot+c52ab1...@syzkaller.appspotmail.com

Tested on:

commit: 98b1cc82 Linux 6.7-rc2
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

console output: https://syzkaller.appspot.com/x/log.txt?x=161857af680000

kernel config: https://syzkaller.appspot.com/x/.config?x=6ae1a4ee971a7305
dashboard link: https://syzkaller.appspot.com/bug?extid=c52ab18308964d248092
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40

patch: https://syzkaller.appspot.com/x/patch.diff?x=16732694e80000

Edward Adam Davis

unread,

Nov 26, 2023, 2:08:02 AM11/26/23

to syzbot+c52ab1...@syzkaller.appspotmail.com, da...@davemloft.net, her...@gondor.apana.org.au, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzkall...@googlegroups.com

[Syz repo]

INFO: task hwrng:749 blocked for more than 143 seconds.
Not tainted 6.7.0-rc2-syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:hwrng state:D stack:29040 pid:749 tgid:749 ppid:2 flags:0x00004000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5376 [inline]
__schedule+0xedb/0x5af0 kernel/sched/core.c:6688
__schedule_loop kernel/sched/core.c:6763 [inline]
schedule+0xe9/0x270 kernel/sched/core.c:6778
schedule_preempt_disabled+0x13/0x20 kernel/sched/core.c:6835
__mutex_lock_common kernel/locking/mutex.c:679 [inline]
__mutex_lock+0x5b9/0x9d0 kernel/locking/mutex.c:747
hwrng_fillfn+0x145/0x430 drivers/char/hw_random/core.c:504
kthread+0x2c6/0x3a0 kernel/kthread.c:388
ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242
</TASK>

...

[Analysis]
The lock reading_mutex in rng_dev_read() has been occupied for too long,
causing the thread callback function hwrng_fillfn() to wait for a timeout.

[Fix]
Move code that does not require this lock protection out of the critical area.

Reported-and-tested-by: syzbot+c52ab1...@syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <ead...@qq.com>
---
drivers/char/hw_random/core.c | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 420f155d251f..7323ddc958ce 100644

--
2.25.1

Edward Adam Davis

unread,

Nov 26, 2023, 2:11:47 AM11/26/23

to syzbot+c52ab1...@syzkaller.appspotmail.com, da...@davemloft.net, her...@gondor.apana.org.au, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzkall...@googlegroups.com

[Syz repo]

INFO: task hwrng:749 blocked for more than 143 seconds.
Not tainted 6.7.0-rc2-syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:hwrng state:D stack:29040 pid:749 tgid:749 ppid:2 flags:0x00004000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5376 [inline]
__schedule+0xedb/0x5af0 kernel/sched/core.c:6688
__schedule_loop kernel/sched/core.c:6763 [inline]
schedule+0xe9/0x270 kernel/sched/core.c:6778
schedule_preempt_disabled+0x13/0x20 kernel/sched/core.c:6835
__mutex_lock_common kernel/locking/mutex.c:679 [inline]
__mutex_lock+0x5b9/0x9d0 kernel/locking/mutex.c:747
hwrng_fillfn+0x145/0x430 drivers/char/hw_random/core.c:504
kthread+0x2c6/0x3a0 kernel/kthread.c:388
ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242
</TASK>

...

--
2.25.1

Edward Adam Davis

unread,

Nov 28, 2023, 9:59:22 PM11/28/23

to syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com

please test WARNING in rate_control_rate_init

#syz test https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 6e2332e0ab53

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 606b1b2e4123..13d52452a124 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1796,7 +1796,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
!params->supported_rates_len &&
!params->ht_capa && !params->vht_capa &&
!params->he_capa && !params->eht_capa &&
- !params->opmode_notif_used)
+ !params->opmode_notif_used && 0)
return 0;

if (!link || !link_sta)
@@ -1817,6 +1817,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
} else if (new_link) {
return -EINVAL;
}
+ printk("b, %p \n", rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf));

if (params->txpwr_set) {
link_sta->pub->txpwr.type = params->txpwr.type;
@@ -1868,6 +1869,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
params->opmode_notif,
sband->band);
}
+ printk("e, %p \n", rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf));

return ret;
}
@@ -1982,6 +1984,10 @@ static int sta_apply_parameters(struct ieee80211_local *local,
if (params->listen_interval >= 0)
sta->listen_interval = params->listen_interval;

+ printk("b, stp: %d, sa: %d, src: %d\n",
+ test_sta_flag(sta, WLAN_STA_TDLS_PEER),
+ test_sta_flag(sta, WLAN_STA_ASSOC),
+ test_sta_flag(sta, WLAN_STA_RATE_CONTROL));
ret = sta_link_apply_parameters(local, sta, false,
&params->link_sta_params);
if (ret)
@@ -1996,6 +2002,10 @@ static int sta_apply_parameters(struct ieee80211_local *local,
if (params->airtime_weight)
sta->airtime_weight = params->airtime_weight;

+ printk("a, stp: %d, sa: %d, src: %d\n",
+ test_sta_flag(sta, WLAN_STA_TDLS_PEER),
+ test_sta_flag(sta, WLAN_STA_ASSOC),
+ test_sta_flag(sta, WLAN_STA_RATE_CONTROL));
/* set the STA state after all sta info from usermode has been set */
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
set & BIT(NL80211_STA_FLAG_ASSOCIATED)) {

syzbot

unread,

Nov 28, 2023, 10:23:12 PM11/28/23

to ead...@qq.com, linux-...@vger.kernel.org, syzkall...@googlegroups.com

Hello,

syzbot has tested the proposed patch but the reproducer is still triggering an issue:
INFO: task hung in hwrng_fillfn

INFO: task hwrng:729 blocked for more than 143 seconds.
Not tainted 6.4.0-syzkaller-01647-g6e2332e0ab53-dirty #0

"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.

task:hwrng state:D stack:29760 pid:729 ppid:2 flags:0x00004000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5381 [inline]
__schedule+0xeda/0x5b60 kernel/sched/core.c:6710
schedule+0xe7/0x1b0 kernel/sched/core.c:6786
schedule_preempt_disabled+0x13/0x20 kernel/sched/core.c:6845
__mutex_lock_common kernel/locking/mutex.c:679 [inline]
__mutex_lock+0x5b4/0x990 kernel/locking/mutex.c:747
hwrng_fillfn+0x145/0x430 drivers/char/hw_random/core.c:505
kthread+0x2c4/0x3a0 kernel/kthread.c:389
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308

</TASK>

Showing all locks held in the system:

1 lock held by rcu_tasks_kthre/13:
#0: ffffffff8c9996f0 (rcu_tasks.tasks_gp_mutex){+.+.}-{3:3}, at: rcu_tasks_one_gp+0x2c/0xe30 kernel/rcu/tasks.h:522
1 lock held by rcu_tasks_trace/14:
#0: ffffffff8c9993f0 (rcu_tasks_trace.tasks_gp_mutex){+.+.}-{3:3}, at: rcu_tasks_one_gp+0x2c/0xe30 kernel/rcu/tasks.h:522
1 lock held by khungtaskd/28:
#0: ffffffff8c99a300 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x55/0x340 kernel/locking/lockdep.c:6615
1 lock held by hwrng/729:
#0: ffffffff8d4887a8 (reading_mutex){+.+.}-{3:3}, at: hwrng_fillfn+0x145/0x430 drivers/char/hw_random/core.c:505
2 locks held by getty/4749:
#0: ffff8880287f3098 (&tty->ldisc_sem){++++}-{0:0}, at: tty_ldisc_ref_wait+0x24/0x80 drivers/tty/tty_ldisc.c:243
#1: ffffc900015b02f0 (&ldata->atomic_read_lock){+.+.}-{3:3}, at: n_tty_read+0xfc5/0x1460 drivers/tty/n_tty.c:2176
1 lock held by syz-executor.0/5449:
2 locks held by syz-executor.0/5450:
1 lock held by syz-executor.0/5744:
1 lock held by syz-executor.0/5762:

=============================================

NMI backtrace for cpu 1

CPU: 1 PID: 28 Comm: khungtaskd Not tainted 6.4.0-syzkaller-01647-g6e2332e0ab53-dirty #0

Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106
nmi_cpu_backtrace+0x277/0x390 lib/nmi_backtrace.c:113

nmi_trigger_cpumask_backtrace+0x2ac/0x310 lib/nmi_backtrace.c:62
trigger_all_cpu_backtrace include/linux/nmi.h:148 [inline]
check_hung_uninterruptible_tasks kernel/hung_task.c:222 [inline]
watchdog+0xf29/0x11b0 kernel/hung_task.c:379
kthread+0x2c4/0x3a0 kernel/kthread.c:389
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308

</TASK>
Sending NMI from CPU 1 to CPUs 0:
NMI backtrace for cpu 0

CPU: 0 PID: 5744 Comm: syz-executor.0 Not tainted 6.4.0-syzkaller-01647-g6e2332e0ab53-dirty #0

Tested on:

commit: 6e2332e0 Merge tag 'cgroup-for-6.5' of git://git.kerne..
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
console output: https://syzkaller.appspot.com/x/log.txt?x=1271b7ece80000
kernel config: https://syzkaller.appspot.com/x/.config?x=102b18358d5797d8

dashboard link: https://syzkaller.appspot.com/bug?extid=c52ab18308964d248092
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40

patch: https://syzkaller.appspot.com/x/patch.diff?x=147cc8b4e80000

Herbert Xu

unread,

Dec 1, 2023, 4:57:09 AM12/1/23

to Edward Adam Davis, syzbot+c52ab1...@syzkaller.appspotmail.com, da...@davemloft.net, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzkall...@googlegroups.com

Does this change anything at all? Please explain why it was holding
this lock for 143 seconds in the first place. If it's doing it in
rng_get_data, then your change has zero effect.

> @@ -501,7 +499,10 @@ static int hwrng_fillfn(void *unused)
> rng = get_current_rng();
> if (IS_ERR(rng) || !rng)
> break;
> - mutex_lock(&reading_mutex);
> + if (mutex_lock_interruptible(&reading_mutex)) {
> + put_rng(rng);
> + return -ERESTARTSYS;
> + }

No this is just the symptom. The real problem is why is the driver
spending 143 seconds in rng_get_data?

Cheers,
--
Email: Herbert Xu <her...@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

Edward Adam Davis

unread,

Dec 1, 2023, 6:38:00 AM12/1/23

to her...@gondor.apana.org.au, da...@davemloft.net, ead...@qq.com, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com

Reduce the scope of critical zone protection.
The original critical zone contains a too large range, especially like
copy_to_user() should not be included in the critical zone.

>
> > @@ -501,7 +499,10 @@ static int hwrng_fillfn(void *unused)
> > rng = get_current_rng();
> > if (IS_ERR(rng) || !rng)
> > break;
> > - mutex_lock(&reading_mutex);
> > + if (mutex_lock_interruptible(&reading_mutex)) {
> > + put_rng(rng);
> > + return -ERESTARTSYS;
> > + }
>
> No this is just the symptom. The real problem is why is the driver
> spending 143 seconds in rng_get_data?

In the second version of the patch, I have removed the fix in hwrng_fillfn().
But for some reason, the V2 patch did not appear in the mailing list.

I think it was due to consuming too much time while executing copy_to_user()
that resulted in 143s.
So, I narrowed down the scope of the critical area and moved the code
copy_to_user() out of the critical area.

Edward

Herbert Xu

unread,

Dec 1, 2023, 6:41:21 AM12/1/23

to Edward Adam Davis, da...@davemloft.net, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com

On Fri, Dec 01, 2023 at 07:37:39PM +0800, Edward Adam Davis wrote:
>
> Reduce the scope of critical zone protection.
> The original critical zone contains a too large range, especially like
> copy_to_user() should not be included in the critical zone.

Which part in particular is taking 143 seconds? The buffer is
only 128 bytes long. Why is a 128-byte copy taking 143 seconds,
even with a page fault?

Edward Adam Davis

unread,

Dec 1, 2023, 7:34:24 AM12/1/23

to her...@gondor.apana.org.au, da...@davemloft.net, ead...@qq.com, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com

On Fri, 1 Dec 2023 19:41:11 +0800, Herbert Xu wrote:
> >
> > Reduce the scope of critical zone protection.
> > The original critical zone contains a too large range, especially like
> > copy_to_user() should not be included in the critical zone.
>
> Which part in particular is taking 143 seconds? The buffer is
> only 128 bytes long. Why is a 128-byte copy taking 143 seconds,
> even with a page fault?

According to splat, after a page fault occurred, the attempt to retrieve
rcu_read_lock() failed, resulting in a timeout of 143s. This is my speculation.

Edward

Herbert Xu

unread,

Dec 1, 2023, 6:39:08 PM12/1/23

to Edward Adam Davis, da...@davemloft.net, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com

On Fri, Dec 01, 2023 at 08:34:12PM +0800, Edward Adam Davis wrote:
>
> According to splat, after a page fault occurred, the attempt to retrieve
> rcu_read_lock() failed, resulting in a timeout of 143s. This is my speculation.

Oh I see what's going on. The reproducer is mapping /dev/hwrng, so
the write to user-space is then triggering another read which then
dead-locks.

Let me think about this.

Thanks,

Herbert Xu

unread,

Dec 1, 2023, 8:02:06 PM12/1/23

to Edward Adam Davis, da...@davemloft.net, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com, Ian Molton, Rusty Russell

There is a dead-lock in the hwrng device read path. This triggers
when the user reads from /dev/hwrng into memory also mmap-ed from
/dev/hwrng. The resulting page fault triggers a recursive read
which then dead-locks.

Fix this by using a stack buffer when calling copy_to_user.

Reported-by: Edward Adam Davis <ead...@qq.com>
Reported-by: syzbot+c52ab1...@syzkaller.appspotmail.com
Fixes: 9996508b3353 ("hwrng: core - Replace u32 in driver API with byte array")
Cc: <sta...@vger.kernel.org>
Signed-off-by: Herbert Xu <her...@gondor.apana.org.au>

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 420f155d251f..a3bbdd6e60fc 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -23,10 +23,13 @@
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
+#include <linux/string.h>
#include <linux/uaccess.h>

#define RNG_MODULE_NAME "hw_random"

+#define RNG_BUFFER_SIZE (SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES)
+
static struct hwrng *current_rng;
/* the current rng has been explicitly chosen by user via sysfs */
static int cur_rng_set_by_user;
@@ -58,7 +61,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size,

static size_t rng_buffer_size(void)
{
- return SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES;
+ return RNG_BUFFER_SIZE;
}

static void add_early_randomness(struct hwrng *rng)
@@ -209,6 +212,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size,

static ssize_t rng_dev_read(struct file *filp, char __user *buf,

size_t size, loff_t *offp)
{
+ u8 buffer[RNG_BUFFER_SIZE];
ssize_t ret = 0;

int err = 0;
int bytes_read, len;

@@ -236,34 +240,37 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,

if (bytes_read < 0) {
err = bytes_read;

goto out_unlock_reading;
- }
- data_avail = bytes_read;
- }
-
- if (!data_avail) {
- if (filp->f_flags & O_NONBLOCK) {
+ } else if (bytes_read == 0 &&
+ (filp->f_flags & O_NONBLOCK)) {

err = -EAGAIN;
goto out_unlock_reading;
}

- } else {
- len = data_avail;
+
+ data_avail = bytes_read;
+ }
+
+ len = data_avail;
+ if (len) {

if (len > size)
len = size;

data_avail -= len;

- if (copy_to_user(buf + ret, rng_buffer + data_avail,
- len)) {
+ memcpy(buffer, rng_buffer + data_avail, len);
+ }
+ mutex_unlock(&reading_mutex);
+ put_rng(rng);
+
+ if (len) {
+ if (copy_to_user(buf + ret, buffer, len)) {
err = -EFAULT;
- goto out_unlock_reading;
+ goto out;

}

size -= len;
ret += len;
}

- mutex_unlock(&reading_mutex);

- put_rng(rng);

if (need_resched())
schedule_timeout_interruptible(1);
@@ -274,6 +281,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
}
}
out:
+ memzero_explicit(buffer, sizeof(buffer));
return ret ? : err;

out_unlock_reading:

PrasannaKumar Muralidharan

unread,

Dec 12, 2023, 5:00:33 PM12/12/23

to Herbert Xu, Edward Adam Davis, da...@davemloft.net, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com, Ian Molton, Rusty Russell

Reviewed-by: PrasannaKumar Muralidharan <prasanna...@gmail.com>

Regards,
PrasannaKumar

Reply all

Reply to author

Forward

[syzbot] [crypto?] INFO: task hung in hwrng_fillfn

syzbot

Hillf Danton

syzbot

Edward Adam Davis

syzbot

Edward Adam Davis

Edward Adam Davis

syzbot

syzbot

Edward Adam Davis

syzbot

Edward Adam Davis

Edward Adam Davis

Edward Adam Davis

syzbot

Herbert Xu

Edward Adam Davis

Herbert Xu

Edward Adam Davis

Herbert Xu

Herbert Xu

PrasannaKumar Muralidharan