mm: BUG in khugepaged_scan_mm

Dmitry Vyukov

unread,

Apr 2, 2016, 5:48:42 AM4/2/16

to Andrew Morton, Kirill A. Shutemov, Vlastimil Babka, linu...@kvack.org, LKML, Hugh Dickins, Greg Thelen, Konstantin Khlebnikov, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin

Hello,

The following program triggers a BUG in khugepaged_scan_mm_slot:

vma ffff880032698f90 start 0000000020c57000 end 0000000020c58000
next ffff88003269a1b8 prev ffff88003269ac18 mm ffff88005e274780
prot 35 anon_vma ffff88003182c000 vm_ops (null)
pgoff fed00 file ffff8800324552c0 private_data (null)
flags: 0x5144477(read|write|exec|mayread|maywrite|mayexec|pfnmap|io|dontexpand|account)
------------[ cut here ]------------
kernel BUG at mm/huge_memory.c:2313!
invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN
Modules linked in:
CPU: 2 PID: 1180 Comm: khugepaged Not tainted 4.5.0-rc7+ #337
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
task: ffff88003d910000 ti: ffff88003da70000 task.ti: ffff88003da70000
RIP: 0010:[<ffffffff8178bd07>] [<ffffffff8178bd07>]
hugepage_vma_check+0x117/0x150
RSP: 0018:ffff88003da77bb0 EFLAGS: 00010286
RAX: 0000000000000001 RBX: ffff880032698f90 RCX: 0000000000000000
RDX: 0000000000000001 RSI: ffff88006d616d18 RDI: ffffed0007b4ef4c
RBP: ffff88003da77bc8 R08: 0000000000000001 R09: 0000000000000000
R10: 1ffff100064d31f2 R11: 0000000000000001 R12: 0000000000000001
R13: ffff880032698fe0 R14: 0000000000000806 R15: ffff880032698f90
FS: 0000000000000000(0000) GS:ffff88006d600000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 00007fe965ad9e78 CR3: 0000000007ae9000 CR4: 00000000000006e0
Stack:
0000000020c00000 0000000000000000 ffffffff88937638 ffff88003da77e00
ffffffff81790b33 0000000000000082 ffffffff87fc1580 0000000000000004
ffff88003da77c38 ffff88003d910810 0000000000000000 ffff88003d910000
Call Trace:
[< inline >] khugepaged_scan_mm_slot mm/huge_memory.c:2651
[< inline >] khugepaged_do_scan mm/huge_memory.c:2755
[<ffffffff81790b33>] khugepaged+0x993/0x48e0 mm/huge_memory.c:2790
[<ffffffff813c195f>] kthread+0x23f/0x2d0 drivers/block/aoe/aoecmd.c:1303
[<ffffffff866d1b2f>] ret_from_fork+0x3f/0x70 arch/x86/entry/entry_64.S:468
Code: 00 fc ff df 48 c1 ea 03 80 3c 02 00 75 2c 48 f7 43 50 88 44 44
10 41 bc 01 00 00 00 74 b4 e8 71 cf de ff 48 89 df e8 99 e5 f5 ff <0f>
0b 4c 89 ef e8 ff c0 fe ff e9 0a ff ff ff 4c 89 ef e8 f2 c0
RIP [<ffffffff8178bd07>] hugepage_vma_check+0x117/0x150 mm/huge_memory.c:2313
RSP <ffff88003da77bb0>
---[ end trace 61cae986a344948b ]---

The process itself hangs dead.

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include <pthread.h>
#include <stdint.h>
#include <string.h>
#include <sys/syscall.h>
#include <unistd.h>

#ifndef SYS_userfaultfd
#define SYS_userfaultfd 323
#endif

long r[29];

void* thr(void* arg)
{
switch ((long)arg) {
case 0:
r[0] = syscall(SYS_mmap, 0x20000000ul, 0xc59000ul, 0x3ul, 0x32ul,
0xfffffffffffffffful, 0x0ul);
break;
case 1:
r[1] = syscall(SYS_accept, 0x1869ful, 0x20c51ffful, 0x20c51000ul, 0,
0, 0);
break;
case 2:
r[2] = syscall(SYS_fcntl, r[1], 0x9ul, 0, 0, 0, 0);
break;
case 3:
r[3] = syscall(SYS_ioprio_set, 0x1ul, r[2], 0xfffffffffffffffful, 0,
0, 0);
break;
case 4:
r[4] = syscall(SYS_userfaultfd, 0x0ul, 0, 0, 0, 0, 0);
break;
case 5:
*(uint64_t*)0x20a49fe8 = (uint64_t)0xaa;
*(uint64_t*)0x20a49ff0 = (uint64_t)0x0;
*(uint64_t*)0x20a49ff8 = (uint64_t)0x0;
r[8] =
syscall(SYS_ioctl, r[4], 0xc018aa3ful, 0x20a49fe8ul, 0, 0, 0);
break;
case 6:
*(uint64_t*)0x20c4c000 = (uint64_t)0x200cb000;
*(uint64_t*)0x20c4c008 = (uint64_t)0x800000;
*(uint64_t*)0x20c4c010 = (uint64_t)0x1;
*(uint64_t*)0x20c4c018 = (uint64_t)0x0;
r[13] =
syscall(SYS_ioctl, r[4], 0xc020aa00ul, 0x20c4c000ul, 0, 0, 0);
break;
case 7:
r[14] = syscall(SYS_readahead, 0xfffffffffffffffful, 0x40ul, 0x6ul,
0, 0, 0);
break;
case 8:
r[15] = syscall(SYS_sched_getaffinity, 0x0ul, 0x8ul, 0x20472000ul,
0, 0, 0);
break;
case 9:
r[16] = syscall(SYS_prctl, 0xful, 0x205d5ff8ul, 0, 0, 0, 0);
break;
case 10:
r[18] =
syscall(SYS_open, "/dev/hpet", 0x40ul, 0, 0, 0);
break;
case 11:
*(uint32_t*)0x20c56000 = (uint32_t)0x7fff;
r[20] = syscall(SYS_ioctl, r[18], 0x5420ul, 0x20c56000ul, 0, 0, 0);
break;
case 12:
r[21] = syscall(SYS_mmap, 0x20c57000ul, 0x1000ul, 0x7ul, 0x812ul,
r[18], 0x0ul);
break;
case 13:
*(uint8_t*)0x20c57ffe = (uint8_t)0x6;
*(uint8_t*)0x20c57fff = (uint8_t)0x100000001;
r[24] = syscall(SYS_ioctl, r[18], 0x541cul, 0x20c57ffeul, 0, 0, 0);
break;
case 14:
memcpy((void*)0x20814ffd, "\x2e\x2f\x66\x69\x6c\x65\x30\x00", 8);
r[26] = syscall(SYS_creat, 0x20814ffdul, 0x80ul, 0, 0, 0, 0);
break;
case 15:
r[27] = syscall(SYS_pipe, 0x205a4ffful, 0, 0, 0, 0, 0);
break;
case 16:
r[28] = syscall(SYS_accept4, r[18], 0x20b0112aul, 0x20c55ffeul,
0x80800ul, 0, 0);
break;
}
return 0;
}

int main()
{
long i;
pthread_t th[17];

memset(r, -1, sizeof(r));
for (i = 0; i < 17; i++) {
pthread_create(&th[i], 0, thr, (void*)i);
usleep(10000);
}
usleep(100000);
return 0;
}

For better reproducibility also do:

$ echo 0 > /sys/kernel/mm/transparent_hugepage/khugepaged/alloc_sleep_millisecs;
echo 0 > /sys/kernel/mm/transparent_hugepage/khugepaged/scan_sleep_millisecs

On commit 8e0f93cda48ed054e1216bab5c60017e1a5fc1e8.

Vlastimil Babka

unread,

Apr 4, 2016, 8:03:57 AM4/4/16

to Dmitry Vyukov, Andrew Morton, Kirill A. Shutemov, linu...@kvack.org, LKML, Hugh Dickins, Greg Thelen, Konstantin Khlebnikov, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin, Andrea Arcangeli

[+CC Andrea]

On 04/02/2016 11:48 AM, Dmitry Vyukov wrote:
> Hello,
>
> The following program triggers a BUG in khugepaged_scan_mm_slot:
>
>
> vma ffff880032698f90 start 0000000020c57000 end 0000000020c58000
> next ffff88003269a1b8 prev ffff88003269ac18 mm ffff88005e274780
> prot 35 anon_vma ffff88003182c000 vm_ops (null)
> pgoff fed00 file ffff8800324552c0 private_data (null)
> flags: 0x5144477(read|write|exec|mayread|maywrite|mayexec|pfnmap|io|dontexpand|account)
> ------------[ cut here ]------------
> kernel BUG at mm/huge_memory.c:2313!
> invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN

That's VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma) in
hugepage_vma_check().

#define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)

#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)

Of those, we have VM_IO | VM_DONTEXPAND.

I don't know if it's valid for a vma with anon_vma to have such flags,
if yes, we should probably modify hugepage_vma_check(). Called from
khugepaged_scan_mm_slot() it should just return false out VM_NO_THP.
Called from collapse_huge_page() it could keep the VM_BUG_ON. Or maybe
just have VM_BUG_ON(!hugepage_vma_check()) there? Hmm actually no,
there's a mmap_sem release for read and then acquire for write, so we
can't rely on the check done earlier from khugepaged_scan_mm_slot().

So we should probably just change the VM_BUG_ON to another "return
false" condition. Unless the VM_BUG_ON uncovered a real bug and the
earlier conditions in hugepage_vma_check() should guarantee the
VM_BUG_ON be false for any vma.

Kirill A. Shutemov

unread,

Apr 4, 2016, 8:06:28 AM4/4/16

to Vlastimil Babka, Dmitry Vyukov, Andrew Morton, Kirill A. Shutemov, linu...@kvack.org, LKML, Hugh Dickins, Greg Thelen, Konstantin Khlebnikov, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin, Andrea Arcangeli

http://lkml.kernel.org/r/145961146490.28194.16019687861681349309.stgit@zurg

--
Kirill A. Shutemov

Andrea Arcangeli

unread,

Apr 4, 2016, 1:15:27 PM4/4/16

to Kirill A. Shutemov, Vlastimil Babka, Dmitry Vyukov, Andrew Morton, Kirill A. Shutemov, linu...@kvack.org, LKML, Hugh Dickins, Greg Thelen, Konstantin Khlebnikov, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin

Hello,

That's not the only place that assumes vm_ops NULL means anonymous and
not VM_IO though, so I agree with Vlastimil we should think once more
about this fix, either that or extend it to other places.

I wonder if perhaps there was a mistake in checking vm_ops in the
first place and leaving the vm_ops check isn't the right fix. Wouldn't
it be more correct to apply a s/!vm_ops/!vm_file/ and not just there?
What problem would then we run into if we used !vm_file?

The assumption in this vm_ops check is that it was safer to a vm_file
check but clearly it isn't as some chardev is not setting vm_ops
(don't they need to vm_ops->close?). But all chardevs have vm_file
set, so if we could use that instead, we can retain the VM_BUG_ON or
better convert it to a graceful warn on that bails out.

Thanks,
Andrea

JonD

unread,

Apr 1, 2019, 5:36:44 PM4/1/19

to syzkaller

Hi,

Seeing similar panic running on 4.14.90

What release was this seen on ? Was it resolved ?

Dmitry Vyukov

unread,

Apr 2, 2019, 1:44:49 AM4/2/19

to JonD, syzkaller

On Mon, Apr 1, 2019 at 11:36 PM JonD <b24wa...@gmail.com> wrote:
>
> Hi,
>
> Seeing similar panic running on 4.14.90
>
> What release was this seen on ? Was it resolved ?

I have no idea what this is.

> --
> You received this message because you are subscribed to the Google Groups "syzkaller" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller+...@googlegroups.com.
> For more options, visit https://groups.google.com/d/optout.

JonD

unread,

Apr 2, 2019, 10:05:36 AM4/2/19

to syzkaller

It appears many of these panics are random and not reproducible .

Are these tests accurate ?

> To unsubscribe from this group and stop receiving emails from it, send an email to syzk...@googlegroups.com.

Dmitry Vyukov

unread,

Apr 2, 2019, 10:06:30 AM4/2/19

to JonD, syzkaller

On Tue, Apr 2, 2019 at 4:05 PM JonD <b24wa...@gmail.com> wrote:
>
>
> It appears many of these panics are random and not reproducible .
>
> Are these tests accurate ?

What panics and what tests?

> To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller+...@googlegroups.com.

Reply all

Reply to author

Forward

mm: BUG in khugepaged_scan_mm_slot

Dmitry Vyukov

Vlastimil Babka

Kirill A. Shutemov

Andrea Arcangeli

JonD

Dmitry Vyukov

JonD

Dmitry Vyukov