INFO: task can't die in io_uring

syzbot

unread,

Aug 10, 2020, 4:37:19 PM8/10/20

to ax...@kernel.dk, io-u...@vger.kernel.org, linux-...@vger.kernel.org, linux-...@vger.kernel.org, syzkall...@googlegroups.com, vi...@zeniv.linux.org.uk

Hello,

syzbot found the following issue on:

HEAD commit: f80535b9 Add linux-next specific files for 20200810
git tree: linux-next
console output: https://syzkaller.appspot.com/x/log.txt?x=11df00d6900000
kernel config: https://syzkaller.appspot.com/x/.config?x=2055bd0d83d5ee16
dashboard link: https://syzkaller.appspot.com/bug?extid=6d70b15b0d106c3450c5
compiler: gcc (GCC) 10.1.0-syz 20200507

Unfortunately, I don't have any reproducer for this issue yet.

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+6d70b1...@syzkaller.appspotmail.com

INFO: task syz-executor.5:31048 can't die for more than 143 seconds.
syz-executor.5 D28360 31048 7448 0x00000004
Call Trace:
context_switch kernel/sched/core.c:3778 [inline]
__schedule+0x8e5/0x21e0 kernel/sched/core.c:4527
schedule+0xd0/0x2a0 kernel/sched/core.c:4602
io_uring_cancel_files fs/io_uring.c:7897 [inline]
io_uring_flush+0x740/0xa90 fs/io_uring.c:7914
filp_close+0xb4/0x170 fs/open.c:1276
__close_fd+0x2f/0x50 fs/file.c:671
__do_sys_close fs/open.c:1295 [inline]
__se_sys_close fs/open.c:1293 [inline]
__x64_sys_close+0x69/0x100 fs/open.c:1293
do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x416981
Code: Bad RIP value.
RSP: 002b:00007ffe164f4ff0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003
RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 0000000000416981
RDX: 0000000000000000 RSI: 00000000000010bc RDI: 0000000000000003
RBP: 0000000000000001 R08: 00000000102b50bc R09: 00000000102b50c0
R10: 00007ffe164f50e0 R11: 0000000000000293 R12: 0000000001191d50
R13: 0000000000126257 R14: ffffffffffffffff R15: 000000000118bf2c
INFO: task syz-executor.5:31048 blocked for more than 143 seconds.
Not tainted 5.8.0-next-20200810-syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
syz-executor.5 D28360 31048 7448 0x00000004
Call Trace:
context_switch kernel/sched/core.c:3778 [inline]
__schedule+0x8e5/0x21e0 kernel/sched/core.c:4527
schedule+0xd0/0x2a0 kernel/sched/core.c:4602
io_uring_cancel_files fs/io_uring.c:7897 [inline]
io_uring_flush+0x740/0xa90 fs/io_uring.c:7914
filp_close+0xb4/0x170 fs/open.c:1276
__close_fd+0x2f/0x50 fs/file.c:671
__do_sys_close fs/open.c:1295 [inline]
__se_sys_close fs/open.c:1293 [inline]
__x64_sys_close+0x69/0x100 fs/open.c:1293
do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x416981
Code: Bad RIP value.
RSP: 002b:00007ffe164f4ff0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003
RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 0000000000416981
RDX: 0000000000000000 RSI: 00000000000010bc RDI: 0000000000000003
RBP: 0000000000000001 R08: 00000000102b50bc R09: 00000000102b50c0
R10: 00007ffe164f50e0 R11: 0000000000000293 R12: 0000000001191d50
R13: 0000000000126257 R14: ffffffffffffffff R15: 000000000118bf2c

Showing all locks held in the system:
1 lock held by khungtaskd/1170:
#0: ffffffff89c66c40 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:5825
1 lock held by in:imklog/6542:
#0: ffff88809e544630 (&f->f_pos_lock){+.+.}-{3:3}, at: __fdget_pos+0xe9/0x100 fs/file.c:930

=============================================

NMI backtrace for cpu 1
CPU: 1 PID: 1170 Comm: khungtaskd Not tainted 5.8.0-next-20200810-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x18f/0x20d lib/dump_stack.c:118
nmi_cpu_backtrace.cold+0x70/0xb1 lib/nmi_backtrace.c:101
nmi_trigger_cpumask_backtrace+0x1b3/0x223 lib/nmi_backtrace.c:62
trigger_all_cpu_backtrace include/linux/nmi.h:147 [inline]
check_hung_uninterruptible_tasks kernel/hung_task.c:253 [inline]
watchdog+0xd89/0xf30 kernel/hung_task.c:339
kthread+0x3b5/0x4a0 kernel/kthread.c:292
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294
Sending NMI from CPU 1 to CPUs 0:
NMI backtrace for cpu 0
CPU: 0 PID: 6543 Comm: rs:main Q:Reg Not tainted 5.8.0-next-20200810-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0033:0x55c2017dc011
Code: e7 e8 03 5d fc ff 4c 89 e7 48 89 c5 e8 d8 5c fc ff 48 63 54 24 2c 4c 63 e0 89 44 24 1c 48 89 d8 4c 01 e0 48 01 d0 80 7d 00 20 <41> 0f 95 c5 4d 8d 6c 05 13 41 8b 47 08 49 39 c5 72 13 4c 89 ee 4c
RSP: 002b:00007faa8110e810 EFLAGS: 00000202
RAX: 0000000000000032 RBX: 0000000000000009 RCX: 0000000000000000
RDX: 0000000000000007 RSI: 00007faa8110e840 RDI: 00007faa7401fb70
RBP: 00007faa7401fcb0 R08: 0000000000000000 R09: 0000000000000000
R10: 000055c201a19280 R11: 0000000000000000 R12: 0000000000000022
R13: 0000000000000000 R14: 00007faa7401fd68 R15: 00007faa78019c00
FS: 00007faa8110f700 GS: 0000000000000000

---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzk...@googlegroups.com.

syzbot will keep track of this issue. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.

Jens Axboe

unread,

Aug 10, 2020, 4:45:19 PM8/10/20

to syzbot, io-u...@vger.kernel.org, linux-...@vger.kernel.org, linux-...@vger.kernel.org, syzkall...@googlegroups.com, vi...@zeniv.linux.org.uk

#syz dup: INFO: task hung in io_uring_flush

--
Jens Axboe

Hillf Danton

unread,

Aug 10, 2020, 11:08:30 PM8/10/20

to syzbot, ax...@kernel.dk, io-u...@vger.kernel.org, linux-...@vger.kernel.org, linux-...@vger.kernel.org, Pavel Begunkov, syzkall...@googlegroups.com, vi...@zeniv.linux.org.uk, Markus Elfring, Hillf Danton

Mon, 10 Aug 2020 13:37:16 -0700

Looks like a wakeup is missing on flushing IO.
It's fixed by collecting IOs in flight and wait for every one to
finish in one go instead of waiting one after another. Sounds like
it fixes nothing.

--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1549,6 +1549,7 @@ static void io_dismantle_req(struct io_k

spin_lock_irqsave(&ctx->inflight_lock, flags);
list_del(&req->inflight_entry);
+ smp_mb();
if (waitqueue_active(&ctx->inflight_wait))
wake_up(&ctx->inflight_wait);
spin_unlock_irqrestore(&ctx->inflight_lock, flags);
@@ -7840,6 +7841,8 @@ static bool io_wq_files_match(struct io_
static void io_uring_cancel_files(struct io_ring_ctx *ctx,
struct files_struct *files)
{
+ LIST_HEAD(io_in_flight);
+
if (list_empty_careful(&ctx->inflight_list))
return;

@@ -7848,7 +7851,6 @@ static void io_uring_cancel_files(struct

while (!list_empty_careful(&ctx->inflight_list)) {
struct io_kiocb *cancel_req = NULL, *req;
- DEFINE_WAIT(wait);

spin_lock_irq(&ctx->inflight_lock);
list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
@@ -7861,8 +7863,7 @@ static void io_uring_cancel_files(struct
break;
}
if (cancel_req)
- prepare_to_wait(&ctx->inflight_wait, &wait,
- TASK_UNINTERRUPTIBLE);
+ list_move(&cancel_req->inflight_entry, &io_in_flight);
spin_unlock_irq(&ctx->inflight_lock);

/* We need to keep going until we don't find a matching req */
@@ -7884,19 +7885,15 @@ static void io_uring_cancel_files(struct
* Put inflight ref and overflow ref. If that's
* all we had, then we're done with this request.
*/
- if (refcount_sub_and_test(2, &cancel_req->refs)) {
+ if (refcount_sub_and_test(2, &cancel_req->refs))
io_free_req(cancel_req);
- finish_wait(&ctx->inflight_wait, &wait);
- continue;
- }
} else {
io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
io_put_req(cancel_req);
}
-
- schedule();
- finish_wait(&ctx->inflight_wait, &wait);
}
+
+ wait_event(ctx->inflight_wait, list_empty(&io_in_flight));
}

static bool io_cancel_task_cb(struct io_wq_work *work, void *data)

Reply all

Reply to author

Forward

INFO: task can't die in io_uring_flush

syzbot

Jens Axboe

Hillf Danton