[syzbot] [crypto?] INFO: task hung in hwrng_fillfn

18 views
Skip to first unread message

syzbot

unread,
Nov 25, 2023, 8:21:29 AM11/25/23
to da...@davemloft.net, her...@gondor.apana.org.au, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzkall...@googlegroups.com
Hello,

syzbot found the following issue on:

HEAD commit: 98b1cc82c4af Linux 6.7-rc2
git tree: upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=12e89e10e80000
kernel config: https://syzkaller.appspot.com/x/.config?x=6ae1a4ee971a7305
dashboard link: https://syzkaller.appspot.com/bug?extid=c52ab18308964d248092
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40
syz repro: https://syzkaller.appspot.com/x/repro.syz?x=174f0bd4e80000
C reproducer: https://syzkaller.appspot.com/x/repro.c?x=14b83b84e80000

Downloadable assets:
disk image: https://storage.googleapis.com/syzbot-assets/39c6cdad13fc/disk-98b1cc82.raw.xz
vmlinux: https://storage.googleapis.com/syzbot-assets/5a77b5daef9b/vmlinux-98b1cc82.xz
kernel image: https://storage.googleapis.com/syzbot-assets/5e09ae712e0d/bzImage-98b1cc82.xz

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+c52ab1...@syzkaller.appspotmail.com

INFO: task hwrng:749 blocked for more than 143 seconds.
Not tainted 6.7.0-rc2-syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:hwrng state:D stack:29040 pid:749 tgid:749 ppid:2 flags:0x00004000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5376 [inline]
__schedule+0xedb/0x5af0 kernel/sched/core.c:6688
__schedule_loop kernel/sched/core.c:6763 [inline]
schedule+0xe9/0x270 kernel/sched/core.c:6778
schedule_preempt_disabled+0x13/0x20 kernel/sched/core.c:6835
__mutex_lock_common kernel/locking/mutex.c:679 [inline]
__mutex_lock+0x5b9/0x9d0 kernel/locking/mutex.c:747
hwrng_fillfn+0x145/0x430 drivers/char/hw_random/core.c:504
kthread+0x2c6/0x3a0 kernel/kthread.c:388
ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242
</TASK>

Showing all locks held in the system:
1 lock held by khungtaskd/29:
#0: ffffffff8cfabce0 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:301 [inline]
#0: ffffffff8cfabce0 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:747 [inline]
#0: ffffffff8cfabce0 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x75/0x340 kernel/locking/lockdep.c:6613
2 locks held by kswapd0/86:
1 lock held by hwrng/749:
#0: ffffffff8dbafee8 (reading_mutex){+.+.}-{3:3}, at: hwrng_fillfn+0x145/0x430 drivers/char/hw_random/core.c:504
2 locks held by getty/4824:
#0: ffff888025fa10a0 (&tty->ldisc_sem){++++}-{0:0}, at: tty_ldisc_ref_wait+0x24/0x80 drivers/tty/tty_ldisc.c:243
#1: ffffc90002f062f0 (&ldata->atomic_read_lock){+.+.}-{3:3}, at: n_tty_read+0xfc6/0x1490 drivers/tty/n_tty.c:2201
2 locks held by syz-executor391/5105:
2 locks held by syz-executor391/5106:

=============================================

NMI backtrace for cpu 1
CPU: 1 PID: 29 Comm: khungtaskd Not tainted 6.7.0-rc2-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106
nmi_cpu_backtrace+0x277/0x390 lib/nmi_backtrace.c:113
nmi_trigger_cpumask_backtrace+0x299/0x300 lib/nmi_backtrace.c:62
trigger_all_cpu_backtrace include/linux/nmi.h:160 [inline]
check_hung_uninterruptible_tasks kernel/hung_task.c:222 [inline]
watchdog+0xf87/0x1210 kernel/hung_task.c:379
kthread+0x2c6/0x3a0 kernel/kthread.c:388
ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242
</TASK>
Sending NMI from CPU 1 to CPUs 0:
NMI backtrace for cpu 0
CPU: 0 PID: 5105 Comm: syz-executor391 Not tainted 6.7.0-rc2-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023
RIP: 0010:__lock_acquire+0x30/0x3b10 kernel/locking/lockdep.c:4992
Code: f6 41 55 41 54 49 89 fc 55 89 d5 53 44 89 cb 48 81 ec f0 00 00 00 48 8b 84 24 28 01 00 00 48 c7 84 24 90 00 00 00 b3 8a b5 41 <44> 89 44 24 08 44 8b ac 24 48 01 00 00 48 c7 84 24 98 00 00 00 1b
RSP: 0018:ffffc900044271d8 EFLAGS: 00000086
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000002
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8cfabce0
RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
R10: ffffffff8f1934d7 R11: 0000000000000002 R12: ffffffff8cfabce0
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
FS: 00007f3c785f96c0(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00005565d7bb9be7 CR3: 000000001af30000 CR4: 0000000000350ef0
Call Trace:
<NMI>
</NMI>
<TASK>
lock_acquire kernel/locking/lockdep.c:5753 [inline]
lock_acquire+0x1ae/0x520 kernel/locking/lockdep.c:5718
rcu_lock_acquire include/linux/rcupdate.h:301 [inline]
rcu_read_lock include/linux/rcupdate.h:747 [inline]
get_mem_cgroup_from_mm+0x4b/0x4c0 mm/memcontrol.c:1081
__mem_cgroup_charge+0x1c/0x140 mm/memcontrol.c:7224
mem_cgroup_charge include/linux/memcontrol.h:684 [inline]
__filemap_add_folio+0x88c/0xed0 mm/filemap.c:854
filemap_add_folio+0xb1/0x1e0 mm/filemap.c:937
page_cache_ra_unbounded+0x1d0/0x5f0 mm/readahead.c:250
do_page_cache_ra mm/readahead.c:299 [inline]
page_cache_ra_order+0x72b/0xa80 mm/readahead.c:546
do_sync_mmap_readahead mm/filemap.c:3141 [inline]
filemap_fault+0x16a8/0x3570 mm/filemap.c:3233
__do_fault+0x107/0x600 mm/memory.c:4265
do_cow_fault mm/memory.c:4662 [inline]
do_fault mm/memory.c:4764 [inline]
do_pte_missing mm/memory.c:3730 [inline]
handle_pte_fault mm/memory.c:5038 [inline]
__handle_mm_fault+0x3a8d/0x3d70 mm/memory.c:5179
handle_mm_fault+0x47a/0xa10 mm/memory.c:5344
do_user_addr_fault+0x3d1/0x1000 arch/x86/mm/fault.c:1413
handle_page_fault arch/x86/mm/fault.c:1505 [inline]
exc_page_fault+0x5d/0xc0 arch/x86/mm/fault.c:1561
asm_exc_page_fault+0x26/0x30 arch/x86/include/asm/idtentry.h:570
RIP: 0010:rep_movs_alternative+0x57/0x70 arch/x86/lib/copy_user_64.S:80
Code: 00 66 90 48 8b 06 48 89 07 48 83 c6 08 48 83 c7 08 83 e9 08 74 df 83 f9 08 73 e8 eb c9 eb 01 c3 48 89 c8 48 c1 e9 03 83 e0 07 <f3> 48 a5 89 c1 85 c9 75 b3 c3 48 8d 0c c8 eb ac 66 0f 1f 84 00 00
RSP: 0018:ffffc90004427bb0 EFLAGS: 00050246
RAX: 0000000000000000 RBX: 0000000000000040 RCX: 0000000000000008
RDX: ffffed1028a4ab48 RSI: ffff888145255a00 RDI: 0000000020019980
RBP: 0000000020019980 R08: 0000000000000000 R09: ffffed1028a4ab47
R10: ffff888145255a3f R11: 0000000000000001 R12: ffff888145255a00
R13: 00000000200199c0 R14: 0000000000000000 R15: dffffc0000000000
copy_user_generic arch/x86/include/asm/uaccess_64.h:112 [inline]
raw_copy_to_user arch/x86/include/asm/uaccess_64.h:133 [inline]
_copy_to_user lib/usercopy.c:41 [inline]
_copy_to_user+0xa8/0xb0 lib/usercopy.c:34
copy_to_user include/linux/uaccess.h:191 [inline]
rng_dev_read+0x184/0x580 drivers/char/hw_random/core.c:255
do_loop_readv_writev fs/read_write.c:755 [inline]
do_loop_readv_writev fs/read_write.c:743 [inline]
do_iter_read+0x567/0x830 fs/read_write.c:797
vfs_readv+0x12d/0x1a0 fs/read_write.c:915
do_preadv fs/read_write.c:1007 [inline]
__do_sys_preadv fs/read_write.c:1057 [inline]
__se_sys_preadv fs/read_write.c:1052 [inline]
__x64_sys_preadv+0x228/0x300 fs/read_write.c:1052
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82
entry_SYSCALL_64_after_hwframe+0x63/0x6b
RIP: 0033:0x7f3c78638b29
Code: Unable to access opcode bytes at 0x7f3c78638aff.
RSP: 002b:00007f3c785f9168 EFLAGS: 00000246 ORIG_RAX: 0000000000000127
RAX: ffffffffffffffda RBX: 00007f3c786c2328 RCX: 00007f3c78638b29
RDX: 0000000000000001 RSI: 0000000020001880 RDI: 0000000000000003
RBP: 00007f3c786c2320 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007f3c786c232c
R13: 0000000000000000 R14: 00007ffc8a220310 R15: 00007ffc8a2203f8
</TASK>
INFO: NMI handler (nmi_cpu_backtrace_handler) took too long to run: 1.464 msecs


---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzk...@googlegroups.com.

syzbot will keep track of this issue. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.

If the report is already addressed, let syzbot know by replying with:
#syz fix: exact-commit-title

If you want syzbot to run the reproducer, reply with:
#syz test: git://repo/address.git branch-or-commit-hash
If you attach or paste a git patch, syzbot will apply it before testing.

If you want to overwrite report's subsystems, reply with:
#syz set subsystems: new-subsystem
(See the list of subsystem names on the web dashboard)

If the report is a duplicate of another one, reply with:
#syz dup: exact-subject-of-another-report

If you want to undo deduplication, reply with:
#syz undup

Hillf Danton

unread,
Nov 25, 2023, 8:57:42 PM11/25/23
to syzbot, linux-...@vger.kernel.org, syzkall...@googlegroups.com
On Sat, 25 Nov 2023 05:21:27 -0800
> syzbot found the following issue on:
>
> HEAD commit: 98b1cc82c4af Linux 6.7-rc2
> git tree: upstream
> C reproducer: https://syzkaller.appspot.com/x/repro.c?x=14b83b84e80000

#syz test https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master

--- x/drivers/char/hw_random/core.c
+++ y/drivers/char/hw_random/core.c
@@ -213,6 +213,7 @@ static ssize_t rng_dev_read(struct file
int err = 0;
int bytes_read, len;
struct hwrng *rng;
+ char rnd[32];

while (size) {
rng = get_current_rng();
@@ -245,24 +246,28 @@ static ssize_t rng_dev_read(struct file
err = -EAGAIN;
goto out_unlock_reading;
}
+ mutex_unlock(&reading_mutex);
} else {
len = data_avail;
if (len > size)
len = size;
+ if (len > 32)
+ len = 32;

data_avail -= len;

- if (copy_to_user(buf + ret, rng_buffer + data_avail,
- len)) {
+ memcpy(rnd, rng_buffer + data_avail, len);
+ mutex_unlock(&reading_mutex);
+
+ if (copy_to_user(buf + ret, rnd, len)) {
err = -EFAULT;
- goto out_unlock_reading;
+ goto out_put;
}

size -= len;
ret += len;
}

- mutex_unlock(&reading_mutex);
put_rng(rng);

if (need_resched())
--

syzbot

unread,
Nov 25, 2023, 9:20:06 PM11/25/23
to hda...@sina.com, linux-...@vger.kernel.org, syzkall...@googlegroups.com
Hello,

syzbot has tested the proposed patch and the reproducer did not trigger any issue:

Reported-and-tested-by: syzbot+c52ab1...@syzkaller.appspotmail.com

Tested on:

commit: b46ae77f Merge tag 'xfs-6.7-fixes-3' of git://git.kern..
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master
console output: https://syzkaller.appspot.com/x/log.txt?x=129d420ce80000
kernel config: https://syzkaller.appspot.com/x/.config?x=6ae1a4ee971a7305
dashboard link: https://syzkaller.appspot.com/bug?extid=c52ab18308964d248092
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40
patch: https://syzkaller.appspot.com/x/patch.diff?x=11f9fd9f680000

Note: testing is done by a robot and is best-effort only.

Edward Adam Davis

unread,
Nov 25, 2023, 10:53:06 PM11/25/23
to syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com
please test task hung in hwrng_fillfn

#syz test https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 98b1cc82c4af

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 420f155d251f..ebfa5eab84df 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -501,7 +501,10 @@ static int hwrng_fillfn(void *unused)
rng = get_current_rng();
if (IS_ERR(rng) || !rng)
break;
- mutex_lock(&reading_mutex);
+ if (mutex_lock_interruptible(&reading_mutex)) {
+ put_rng(rng);
+ return = -ERESTARTSYS;
+ }
rc = rng_get_data(rng, rng_fillbuf,
rng_buffer_size(), 1);
if (current_quality != rng->quality)

syzbot

unread,
Nov 25, 2023, 10:58:06 PM11/25/23
to ead...@qq.com, linux-...@vger.kernel.org, syzkall...@googlegroups.com
Hello,

syzbot tried to test the proposed patch but the build/boot failed:

drivers/char/hw_random/core.c:506:32: error: expected expression before '=' token


Tested on:

commit: 98b1cc82 Linux 6.7-rc2
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel config: https://syzkaller.appspot.com/x/.config?x=6ae1a4ee971a7305
dashboard link: https://syzkaller.appspot.com/bug?extid=c52ab18308964d248092
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40
patch: https://syzkaller.appspot.com/x/patch.diff?x=1627b408e80000

Edward Adam Davis

unread,
Nov 25, 2023, 11:05:56 PM11/25/23
to syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com
please test task hung in hwrng_fillfn

#syz test https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 98b1cc82c4af

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 420f155d251f..883412bbf9f2 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -225,17 +225,18 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
goto out;
}

- if (mutex_lock_interruptible(&reading_mutex)) {
- err = -ERESTARTSYS;
- goto out_put;
- }
if (!data_avail) {
+ if (mutex_lock_interruptible(&reading_mutex)) {
+ err = -ERESTARTSYS;
+ goto out_put;
+ }
bytes_read = rng_get_data(rng, rng_buffer,
rng_buffer_size(),
!(filp->f_flags & O_NONBLOCK));
+ mutex_unlock(&reading_mutex);
if (bytes_read < 0) {
err = bytes_read;
- goto out_unlock_reading;
+ goto out_put;
}
data_avail = bytes_read;
}
@@ -243,7 +244,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
if (!data_avail) {
if (filp->f_flags & O_NONBLOCK) {
err = -EAGAIN;
- goto out_unlock_reading;
+ goto out_put;
}
} else {
len = data_avail;
@@ -255,14 +256,13 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
if (copy_to_user(buf + ret, rng_buffer + data_avail,
len)) {
err = -EFAULT;
- goto out_unlock_reading;
+ goto out_put;
}

size -= len;
ret += len;
}

- mutex_unlock(&reading_mutex);
put_rng(rng);

if (need_resched())
@@ -276,8 +276,6 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
out:
return ret ? : err;

-out_unlock_reading:
- mutex_unlock(&reading_mutex);
out_put:
put_rng(rng);
goto out;
@@ -501,7 +499,10 @@ static int hwrng_fillfn(void *unused)

Edward Adam Davis

unread,
Nov 25, 2023, 11:06:51 PM11/25/23
to syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com
+ return -ERESTARTSYS;

syzbot

unread,
Nov 25, 2023, 11:11:05 PM11/25/23
to ead...@qq.com, linux-...@vger.kernel.org, syzkall...@googlegroups.com
Hello,

syzbot tried to test the proposed patch but the build/boot failed:

drivers/char/hw_random/core.c:504:32: error: expected expression before '=' token


Tested on:

commit: 98b1cc82 Linux 6.7-rc2
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel config: https://syzkaller.appspot.com/x/.config?x=6ae1a4ee971a7305
dashboard link: https://syzkaller.appspot.com/bug?extid=c52ab18308964d248092
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40
patch: https://syzkaller.appspot.com/x/patch.diff?x=169076d0e80000

syzbot

unread,
Nov 25, 2023, 11:37:06 PM11/25/23
to ead...@qq.com, linux-...@vger.kernel.org, syzkall...@googlegroups.com
Hello,

syzbot has tested the proposed patch and the reproducer did not trigger any issue:

Reported-and-tested-by: syzbot+c52ab1...@syzkaller.appspotmail.com

Tested on:

commit: 98b1cc82 Linux 6.7-rc2
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
console output: https://syzkaller.appspot.com/x/log.txt?x=14d59724e80000
kernel config: https://syzkaller.appspot.com/x/.config?x=6ae1a4ee971a7305
dashboard link: https://syzkaller.appspot.com/bug?extid=c52ab18308964d248092
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40
patch: https://syzkaller.appspot.com/x/patch.diff?x=103c62e8e80000

Edward Adam Davis

unread,
Nov 26, 2023, 12:35:51 AM11/26/23
to syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com

syzbot

unread,
Nov 26, 2023, 1:08:07 AM11/26/23
to ead...@qq.com, linux-...@vger.kernel.org, syzkall...@googlegroups.com
Hello,

syzbot has tested the proposed patch and the reproducer did not trigger any issue:

Reported-and-tested-by: syzbot+c52ab1...@syzkaller.appspotmail.com

Tested on:

commit: 98b1cc82 Linux 6.7-rc2
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
console output: https://syzkaller.appspot.com/x/log.txt?x=161857af680000
kernel config: https://syzkaller.appspot.com/x/.config?x=6ae1a4ee971a7305
dashboard link: https://syzkaller.appspot.com/bug?extid=c52ab18308964d248092
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40
patch: https://syzkaller.appspot.com/x/patch.diff?x=16732694e80000

Edward Adam Davis

unread,
Nov 26, 2023, 2:08:02 AM11/26/23
to syzbot+c52ab1...@syzkaller.appspotmail.com, da...@davemloft.net, her...@gondor.apana.org.au, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzkall...@googlegroups.com
[Syz repo]
INFO: task hwrng:749 blocked for more than 143 seconds.
Not tainted 6.7.0-rc2-syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:hwrng state:D stack:29040 pid:749 tgid:749 ppid:2 flags:0x00004000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5376 [inline]
__schedule+0xedb/0x5af0 kernel/sched/core.c:6688
__schedule_loop kernel/sched/core.c:6763 [inline]
schedule+0xe9/0x270 kernel/sched/core.c:6778
schedule_preempt_disabled+0x13/0x20 kernel/sched/core.c:6835
__mutex_lock_common kernel/locking/mutex.c:679 [inline]
__mutex_lock+0x5b9/0x9d0 kernel/locking/mutex.c:747
hwrng_fillfn+0x145/0x430 drivers/char/hw_random/core.c:504
kthread+0x2c6/0x3a0 kernel/kthread.c:388
ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242
</TASK>

...
[Analysis]
The lock reading_mutex in rng_dev_read() has been occupied for too long,
causing the thread callback function hwrng_fillfn() to wait for a timeout.

[Fix]
Move code that does not require this lock protection out of the critical area.

Reported-and-tested-by: syzbot+c52ab1...@syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <ead...@qq.com>
---
drivers/char/hw_random/core.c | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 420f155d251f..7323ddc958ce 100644
--
2.25.1

Edward Adam Davis

unread,
Nov 26, 2023, 2:11:47 AM11/26/23
to syzbot+c52ab1...@syzkaller.appspotmail.com, da...@davemloft.net, her...@gondor.apana.org.au, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzkall...@googlegroups.com
[Syz repo]
INFO: task hwrng:749 blocked for more than 143 seconds.
Not tainted 6.7.0-rc2-syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:hwrng state:D stack:29040 pid:749 tgid:749 ppid:2 flags:0x00004000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5376 [inline]
__schedule+0xedb/0x5af0 kernel/sched/core.c:6688
__schedule_loop kernel/sched/core.c:6763 [inline]
schedule+0xe9/0x270 kernel/sched/core.c:6778
schedule_preempt_disabled+0x13/0x20 kernel/sched/core.c:6835
__mutex_lock_common kernel/locking/mutex.c:679 [inline]
__mutex_lock+0x5b9/0x9d0 kernel/locking/mutex.c:747
hwrng_fillfn+0x145/0x430 drivers/char/hw_random/core.c:504
kthread+0x2c6/0x3a0 kernel/kthread.c:388
ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242
</TASK>

...
--
2.25.1

Edward Adam Davis

unread,
Nov 28, 2023, 9:59:22 PM11/28/23
to syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com
please test WARNING in rate_control_rate_init

#syz test https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 6e2332e0ab53

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 606b1b2e4123..13d52452a124 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1796,7 +1796,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
!params->supported_rates_len &&
!params->ht_capa && !params->vht_capa &&
!params->he_capa && !params->eht_capa &&
- !params->opmode_notif_used)
+ !params->opmode_notif_used && 0)
return 0;

if (!link || !link_sta)
@@ -1817,6 +1817,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
} else if (new_link) {
return -EINVAL;
}
+ printk("b, %p \n", rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf));

if (params->txpwr_set) {
link_sta->pub->txpwr.type = params->txpwr.type;
@@ -1868,6 +1869,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
params->opmode_notif,
sband->band);
}
+ printk("e, %p \n", rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf));

return ret;
}
@@ -1982,6 +1984,10 @@ static int sta_apply_parameters(struct ieee80211_local *local,
if (params->listen_interval >= 0)
sta->listen_interval = params->listen_interval;

+ printk("b, stp: %d, sa: %d, src: %d\n",
+ test_sta_flag(sta, WLAN_STA_TDLS_PEER),
+ test_sta_flag(sta, WLAN_STA_ASSOC),
+ test_sta_flag(sta, WLAN_STA_RATE_CONTROL));
ret = sta_link_apply_parameters(local, sta, false,
&params->link_sta_params);
if (ret)
@@ -1996,6 +2002,10 @@ static int sta_apply_parameters(struct ieee80211_local *local,
if (params->airtime_weight)
sta->airtime_weight = params->airtime_weight;

+ printk("a, stp: %d, sa: %d, src: %d\n",
+ test_sta_flag(sta, WLAN_STA_TDLS_PEER),
+ test_sta_flag(sta, WLAN_STA_ASSOC),
+ test_sta_flag(sta, WLAN_STA_RATE_CONTROL));
/* set the STA state after all sta info from usermode has been set */
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
set & BIT(NL80211_STA_FLAG_ASSOCIATED)) {

syzbot

unread,
Nov 28, 2023, 10:23:12 PM11/28/23
to ead...@qq.com, linux-...@vger.kernel.org, syzkall...@googlegroups.com
Hello,

syzbot has tested the proposed patch but the reproducer is still triggering an issue:
INFO: task hung in hwrng_fillfn

INFO: task hwrng:729 blocked for more than 143 seconds.
Not tainted 6.4.0-syzkaller-01647-g6e2332e0ab53-dirty #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:hwrng state:D stack:29760 pid:729 ppid:2 flags:0x00004000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5381 [inline]
__schedule+0xeda/0x5b60 kernel/sched/core.c:6710
schedule+0xe7/0x1b0 kernel/sched/core.c:6786
schedule_preempt_disabled+0x13/0x20 kernel/sched/core.c:6845
__mutex_lock_common kernel/locking/mutex.c:679 [inline]
__mutex_lock+0x5b4/0x990 kernel/locking/mutex.c:747
hwrng_fillfn+0x145/0x430 drivers/char/hw_random/core.c:505
kthread+0x2c4/0x3a0 kernel/kthread.c:389
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308
</TASK>

Showing all locks held in the system:
1 lock held by rcu_tasks_kthre/13:
#0: ffffffff8c9996f0 (rcu_tasks.tasks_gp_mutex){+.+.}-{3:3}, at: rcu_tasks_one_gp+0x2c/0xe30 kernel/rcu/tasks.h:522
1 lock held by rcu_tasks_trace/14:
#0: ffffffff8c9993f0 (rcu_tasks_trace.tasks_gp_mutex){+.+.}-{3:3}, at: rcu_tasks_one_gp+0x2c/0xe30 kernel/rcu/tasks.h:522
1 lock held by khungtaskd/28:
#0: ffffffff8c99a300 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x55/0x340 kernel/locking/lockdep.c:6615
1 lock held by hwrng/729:
#0: ffffffff8d4887a8 (reading_mutex){+.+.}-{3:3}, at: hwrng_fillfn+0x145/0x430 drivers/char/hw_random/core.c:505
2 locks held by getty/4749:
#0: ffff8880287f3098 (&tty->ldisc_sem){++++}-{0:0}, at: tty_ldisc_ref_wait+0x24/0x80 drivers/tty/tty_ldisc.c:243
#1: ffffc900015b02f0 (&ldata->atomic_read_lock){+.+.}-{3:3}, at: n_tty_read+0xfc5/0x1460 drivers/tty/n_tty.c:2176
1 lock held by syz-executor.0/5449:
2 locks held by syz-executor.0/5450:
1 lock held by syz-executor.0/5744:
1 lock held by syz-executor.0/5762:

=============================================

NMI backtrace for cpu 1
CPU: 1 PID: 28 Comm: khungtaskd Not tainted 6.4.0-syzkaller-01647-g6e2332e0ab53-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106
nmi_cpu_backtrace+0x277/0x390 lib/nmi_backtrace.c:113
nmi_trigger_cpumask_backtrace+0x2ac/0x310 lib/nmi_backtrace.c:62
trigger_all_cpu_backtrace include/linux/nmi.h:148 [inline]
check_hung_uninterruptible_tasks kernel/hung_task.c:222 [inline]
watchdog+0xf29/0x11b0 kernel/hung_task.c:379
kthread+0x2c4/0x3a0 kernel/kthread.c:389
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308
</TASK>
Sending NMI from CPU 1 to CPUs 0:
NMI backtrace for cpu 0
CPU: 0 PID: 5744 Comm: syz-executor.0 Not tainted 6.4.0-syzkaller-01647-g6e2332e0ab53-dirty #0


Tested on:

commit: 6e2332e0 Merge tag 'cgroup-for-6.5' of git://git.kerne..
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
console output: https://syzkaller.appspot.com/x/log.txt?x=1271b7ece80000
kernel config: https://syzkaller.appspot.com/x/.config?x=102b18358d5797d8
dashboard link: https://syzkaller.appspot.com/bug?extid=c52ab18308964d248092
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40
patch: https://syzkaller.appspot.com/x/patch.diff?x=147cc8b4e80000

Herbert Xu

unread,
Dec 1, 2023, 4:57:09 AM12/1/23
to Edward Adam Davis, syzbot+c52ab1...@syzkaller.appspotmail.com, da...@davemloft.net, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzkall...@googlegroups.com
Does this change anything at all? Please explain why it was holding
this lock for 143 seconds in the first place. If it's doing it in
rng_get_data, then your change has zero effect.

> @@ -501,7 +499,10 @@ static int hwrng_fillfn(void *unused)
> rng = get_current_rng();
> if (IS_ERR(rng) || !rng)
> break;
> - mutex_lock(&reading_mutex);
> + if (mutex_lock_interruptible(&reading_mutex)) {
> + put_rng(rng);
> + return -ERESTARTSYS;
> + }

No this is just the symptom. The real problem is why is the driver
spending 143 seconds in rng_get_data?

Cheers,
--
Email: Herbert Xu <her...@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

Edward Adam Davis

unread,
Dec 1, 2023, 6:38:00 AM12/1/23
to her...@gondor.apana.org.au, da...@davemloft.net, ead...@qq.com, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com
Reduce the scope of critical zone protection.
The original critical zone contains a too large range, especially like
copy_to_user() should not be included in the critical zone.
>
> > @@ -501,7 +499,10 @@ static int hwrng_fillfn(void *unused)
> > rng = get_current_rng();
> > if (IS_ERR(rng) || !rng)
> > break;
> > - mutex_lock(&reading_mutex);
> > + if (mutex_lock_interruptible(&reading_mutex)) {
> > + put_rng(rng);
> > + return -ERESTARTSYS;
> > + }
>
> No this is just the symptom. The real problem is why is the driver
> spending 143 seconds in rng_get_data?
In the second version of the patch, I have removed the fix in hwrng_fillfn().
But for some reason, the V2 patch did not appear in the mailing list.

I think it was due to consuming too much time while executing copy_to_user()
that resulted in 143s.
So, I narrowed down the scope of the critical area and moved the code
copy_to_user() out of the critical area.

Edward

Herbert Xu

unread,
Dec 1, 2023, 6:41:21 AM12/1/23
to Edward Adam Davis, da...@davemloft.net, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com
On Fri, Dec 01, 2023 at 07:37:39PM +0800, Edward Adam Davis wrote:
>
> Reduce the scope of critical zone protection.
> The original critical zone contains a too large range, especially like
> copy_to_user() should not be included in the critical zone.

Which part in particular is taking 143 seconds? The buffer is
only 128 bytes long. Why is a 128-byte copy taking 143 seconds,
even with a page fault?

Edward Adam Davis

unread,
Dec 1, 2023, 7:34:24 AM12/1/23
to her...@gondor.apana.org.au, da...@davemloft.net, ead...@qq.com, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com
On Fri, 1 Dec 2023 19:41:11 +0800, Herbert Xu wrote:
> >
> > Reduce the scope of critical zone protection.
> > The original critical zone contains a too large range, especially like
> > copy_to_user() should not be included in the critical zone.
>
> Which part in particular is taking 143 seconds? The buffer is
> only 128 bytes long. Why is a 128-byte copy taking 143 seconds,
> even with a page fault?
According to splat, after a page fault occurred, the attempt to retrieve
rcu_read_lock() failed, resulting in a timeout of 143s. This is my speculation.

Edward

Herbert Xu

unread,
Dec 1, 2023, 6:39:08 PM12/1/23
to Edward Adam Davis, da...@davemloft.net, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com
On Fri, Dec 01, 2023 at 08:34:12PM +0800, Edward Adam Davis wrote:
>
> According to splat, after a page fault occurred, the attempt to retrieve
> rcu_read_lock() failed, resulting in a timeout of 143s. This is my speculation.

Oh I see what's going on. The reproducer is mapping /dev/hwrng, so
the write to user-space is then triggering another read which then
dead-locks.

Let me think about this.

Thanks,

Herbert Xu

unread,
Dec 1, 2023, 8:02:06 PM12/1/23
to Edward Adam Davis, da...@davemloft.net, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com, Ian Molton, Rusty Russell
There is a dead-lock in the hwrng device read path. This triggers
when the user reads from /dev/hwrng into memory also mmap-ed from
/dev/hwrng. The resulting page fault triggers a recursive read
which then dead-locks.

Fix this by using a stack buffer when calling copy_to_user.

Reported-by: Edward Adam Davis <ead...@qq.com>
Reported-by: syzbot+c52ab1...@syzkaller.appspotmail.com
Fixes: 9996508b3353 ("hwrng: core - Replace u32 in driver API with byte array")
Cc: <sta...@vger.kernel.org>
Signed-off-by: Herbert Xu <her...@gondor.apana.org.au>

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 420f155d251f..a3bbdd6e60fc 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -23,10 +23,13 @@
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
+#include <linux/string.h>
#include <linux/uaccess.h>

#define RNG_MODULE_NAME "hw_random"

+#define RNG_BUFFER_SIZE (SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES)
+
static struct hwrng *current_rng;
/* the current rng has been explicitly chosen by user via sysfs */
static int cur_rng_set_by_user;
@@ -58,7 +61,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size,

static size_t rng_buffer_size(void)
{
- return SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES;
+ return RNG_BUFFER_SIZE;
}

static void add_early_randomness(struct hwrng *rng)
@@ -209,6 +212,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size,
static ssize_t rng_dev_read(struct file *filp, char __user *buf,
size_t size, loff_t *offp)
{
+ u8 buffer[RNG_BUFFER_SIZE];
ssize_t ret = 0;
int err = 0;
int bytes_read, len;
@@ -236,34 +240,37 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
if (bytes_read < 0) {
err = bytes_read;
goto out_unlock_reading;
- }
- data_avail = bytes_read;
- }
-
- if (!data_avail) {
- if (filp->f_flags & O_NONBLOCK) {
+ } else if (bytes_read == 0 &&
+ (filp->f_flags & O_NONBLOCK)) {
err = -EAGAIN;
goto out_unlock_reading;
}
- } else {
- len = data_avail;
+
+ data_avail = bytes_read;
+ }
+
+ len = data_avail;
+ if (len) {
if (len > size)
len = size;

data_avail -= len;

- if (copy_to_user(buf + ret, rng_buffer + data_avail,
- len)) {
+ memcpy(buffer, rng_buffer + data_avail, len);
+ }
+ mutex_unlock(&reading_mutex);
+ put_rng(rng);
+
+ if (len) {
+ if (copy_to_user(buf + ret, buffer, len)) {
err = -EFAULT;
- goto out_unlock_reading;
+ goto out;
}

size -= len;
ret += len;
}

- mutex_unlock(&reading_mutex);
- put_rng(rng);

if (need_resched())
schedule_timeout_interruptible(1);
@@ -274,6 +281,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
}
}
out:
+ memzero_explicit(buffer, sizeof(buffer));
return ret ? : err;

out_unlock_reading:

PrasannaKumar Muralidharan

unread,
Dec 12, 2023, 5:00:33 PM12/12/23
to Herbert Xu, Edward Adam Davis, da...@davemloft.net, linux-...@vger.kernel.org, linux-...@vger.kernel.org, oli...@selenic.com, syzbot+c52ab1...@syzkaller.appspotmail.com, syzkall...@googlegroups.com, Ian Molton, Rusty Russell
Reviewed-by: PrasannaKumar Muralidharan <prasanna...@gmail.com>

Regards,
PrasannaKumar
Reply all
Reply to author
Forward
0 new messages