struct pid memory leak

104 views
Skip to first unread message

Dmitry Vyukov

unread,
Jan 22, 2016, 10:09:15 AM1/22/16
to Michal Hocko, Andrew Morton, Paul E. McKenney, Vladimir Davydov, Johannes Weiner, Eric Dumazet, LKML, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
Hello,

The following program causes struct pid memory leak:

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include <pthread.h>
#include <stdint.h>
#include <string.h>
#include <sys/syscall.h>
#include <unistd.h>

long r[37];

void* thr(void* arg)
{
switch ((long)arg) {
case 0:
r[0] = syscall(SYS_mmap, 0x20000000ul, 0xd000ul, 0x3ul, 0x32ul,
0xfffffffffffffffful, 0x0ul);
break;
case 1:
r[1] = syscall(SYS_socketpair, 0x1ul, 0x2ul, 0x0ul, 0x20000ffcul, 0,
0);
if (r[1] != -1)
r[2] = *(uint32_t*)0x20001000;
break;
case 2:
r[3] =
syscall(SYS_accept, r[2], 0x20000ffbul, 0x20000ffful, 0, 0, 0);
break;
case 3:
r[4] = syscall(SYS_socketpair, 0x1ul, 0x1ul, 0x0ul, 0x20001ff8ul, 0,
0);
if (r[4] != -1)
r[5] = *(uint32_t*)0x20001ff8;
if (r[4] != -1)
r[6] = *(uint32_t*)0x20001ffc;
break;
case 4:
memcpy((void*)0x2000bf5c,
"\xd4\x37\x4c\x81\xff\x25\x00\xf7\x44\x0d\x1a\xe2\x4d\xae"
"\x17\x36\xb0\xef\x85\xd0\xb6\xa2\x0a\x4c\x29\xf0\x43\x3c"
"\x2b\xab\xdf\x9f\x3e\x4b\x9c\x1b\xb0\x36\xce\xe7\x14\x2b"
"\xa4\x33\x47\xd5\x58\x76\x63\x83\x71\xb3\x95\x37\xca\x25"
"\x93\x3f\x46\xd7\xc0\x8f\x8e\x2a\xcf\x0d\x60\xb7\x62\xac"
"\xd9\xaf\x6e\x88\x3f\xe0\xbf\x94\xc3\x57\x74\x8d\x22\xed"
"\x61\x71\x85\x10\x64\x2d\x50\xdf\xae\x9a\xdd\xa2\x5e\x28"
"\xa3\xf8\x14\xf0\x94\x4a\xac\x82\x45\xed\x85\x7a\xb6\x2b"
"\xef\xb4\x0b\x78\xb8\x92\x30\xcc\x5d\xcc\x07\xbf\x70\x4e"
"\x1c\x10\x38\xde\x89\x58\x8b\x87\x97\xc9\x6a\x62\x84\x3b"
"\xcd\x37\xbb\x8d\x41\x50\x65\x24\xa8\x90\x85\xa7\x51\x32"
"\x58\xf9\x71\xb3\x0b\xf0\x0f\xe6\xc4\x81",
164);
r[8] = syscall(SYS_write, r[5], 0x2000bf5cul, 0xa4ul, 0, 0, 0);
break;
case 5:
*(uint32_t*)0x2000cb16 = (uint32_t)0x20;
*(uint32_t*)0x2000cb1a = (uint32_t)0xfffffffffffffffd;
*(uint64_t*)0x2000cb1e = (uint64_t)0x1;
*(uint32_t*)0x2000cb26 = (uint32_t)0xd51;
*(uint32_t*)0x2000cb2a = (uint32_t)0x3;
*(uint32_t*)0x2000cb2e = (uint32_t)0x9;
*(uint32_t*)0x2000cb32 = (uint32_t)0x9;
r[16] = syscall(SYS_write, r[5], 0x2000cb16ul, 0x20ul, 0, 0, 0);
break;
case 6:
*(uint32_t*)0x20005ffc = (uint32_t)0x7;
r[18] = syscall(SYS_setsockopt, r[6], 0x1ul, 0x10ul, 0x20005ffcul,
0x4ul, 0);
break;
case 7:
memcpy((void*)0x20003000,
"\xad\xd4\xf6\xb6\x5d\x21\x41\x96\x29\xc7\x46\x59\xb5\x12"
"\x13\x1f\xc2\xab\x18\x66\x38\x2f\x01\xd0\x78\x07\x19\xe4"
"\x2f\xac\xa5\x81\xc9\x01\x6f\x8d\xeb\x2f\x06\x23\xc8\x42"
"\xf8\x6e\x04\xf6\xcf\x7e\x76\x1a\xb8\xe3\xff\x45\x30\x9b"
"\x0a\x9a\x0d\x1a\x6d\xfe\x01\x94\xc3\xc6\xfb\xc7\xd2\x7d"
"\xe3\x5f\xc9\xdb\xa8\xfc\x9a\x0c\xdf\x4a\xf9\x6c\xf5\xcd"
"\x20\x90\x16\xd6\x2a\xec\x79\xac\x6a\x04\x9d\x92\xd3\x7d"
"\x2c\xf5\x24\x60\xcc\x57\xb1\x1e\x2a\xf9\x33\x54\x7b\xd8"
"\x5b\x23\x26\x79\xdb\x89\x72\xf7\x17\xe0\x1c\x1f\x2e\xc0"
"\x23\x94\xc5\xb1\x7d\xea\x84\xd1\x40\x43\x8a\xc1\x89\xa2"
"\x72\xd8\x8a\xff\xf7\x30\xc9\x96\x5c\x84\x58\x4f\x7e\x04"
"\x84\x45\x1b\x83\x51\xb6\x90\x4a\x17\x4e\x95\x09\xb9\x37"
"\x6e\xe6\xb0\x5e\xb5\x11\xb6\x2f\x06\x75\x31\x57\xda\xc2"
"\xfe\x5d\x84\x0e\x6a\x29\xb7\xe6\x22\xf2\xc9\x00\xa5\x80"
"\x0f\x48\x42\x38\x5a\x66\x32\x91\x5a\xe5\x5e\xfe\xce\xc0"
"\x98\x16\x19\x39\x21\x4b\x60\xe1\xa5\x7a\xba\x62\xd4\x38"
"\x96\x2d\x79\x09\x30\x2c\x75\x54\x68\xca",
234);
r[20] = syscall(SYS_sendto, r[5], 0x20003000ul, 0xeaul, 0x4000ul,
0x20003000ul, 0x0ul);
break;
case 8:
*(uint64_t*)0x2000a000 = (uint64_t)0x2000a000;
*(uint32_t*)0x2000a008 = (uint32_t)0x1c;
*(uint64_t*)0x2000a010 = (uint64_t)0x2000ac60;
*(uint64_t*)0x2000a018 = (uint64_t)0x4;
*(uint64_t*)0x2000a020 = (uint64_t)0x2000a8b3;
*(uint64_t*)0x2000a028 = (uint64_t)0x1000;
*(uint32_t*)0x2000a030 = (uint32_t)0x0;
*(uint64_t*)0x2000ac60 = (uint64_t)0x2000afb4;
*(uint64_t*)0x2000ac68 = (uint64_t)0x7a;
*(uint64_t*)0x2000ac70 = (uint64_t)0x2000affe;
*(uint64_t*)0x2000ac78 = (uint64_t)0x10;
*(uint64_t*)0x2000ac80 = (uint64_t)0x2000afe2;
*(uint64_t*)0x2000ac88 = (uint64_t)0x2f;
*(uint64_t*)0x2000ac90 = (uint64_t)0x2000afdb;
*(uint64_t*)0x2000ac98 = (uint64_t)0xd4;
r[36] =
syscall(SYS_recvmsg, r[6], 0x2000a000ul, 0x12100ul, 0, 0, 0);
break;
}
return 0;
}

int main()
{
long i;
pthread_t th[9];

memset(r, -1, sizeof(r));
for (i = 0; i < 9; i++) {
pthread_create(&th[i], 0, thr, (void*)i);
usleep(10000);
}
for (i = 0; i < 9; i++) {
pthread_create(&th[i], 0, thr, (void*)i);
if (i % 2 == 0)
usleep(10000);
}
usleep(100000);
return 0;
}

unreferenced object 0xffff8800324af200 (size 112):
comm "syz-executor", pid 18413, jiffies 4295500287 (age 14.321s)
hex dump (first 32 bytes):
01 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<ffffffff86315673>] kmemleak_alloc+0x63/0xa0 mm/kmemleak.c:916
[< inline >] kmemleak_alloc_recursive include/linux/kmemleak.h:47
[< inline >] slab_post_alloc_hook mm/slub.c:1337
[< inline >] slab_alloc_node mm/slub.c:2596
[< inline >] slab_alloc mm/slub.c:2604
[<ffffffff81758b49>] kmem_cache_alloc+0x149/0x2d0 mm/slub.c:2609
[<ffffffff813adabd>] alloc_pid+0x5d/0xc90 kernel/pid.c:306
[<ffffffff8134de09>] copy_process.part.35+0x3759/0x57a0 kernel/fork.c:1462
[< inline >] copy_process kernel/fork.c:1274
[<ffffffff8135017c>] _do_fork+0x1bc/0xcb0 kernel/fork.c:1723
[< inline >] SYSC_clone kernel/fork.c:1832
[<ffffffff81350d47>] SyS_clone+0x37/0x50 kernel/fork.c:1826
[<ffffffff86332bb6>] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185
[<ffffffffffffffff>] 0xffffffffffffffff


# cat /proc/slabinfo | grep pid
pid 297 532 576 28 4 : tunables 0 0
0 : slabdata 19 19 0
...
pid 412 532 576 28 4 : tunables 0 0
0 : slabdata 19 19 0
...
pid 1107 1176 576 28 4 : tunables 0 0
0 : slabdata 42 42 0
...
pid 1545 1652 576 28 4 : tunables 0 0
0 : slabdata 59 59 0


On commit 30f05309bde49295e02e45c7e615f73aa4e0ccc2 (Jan 20).

Eric Dumazet

unread,
Jan 22, 2016, 11:50:03 AM1/22/16
to Dmitry Vyukov, netdev, willy tarreau, Rainer Weikusat, Michal Hocko, Andrew Morton, Paul E. McKenney, Vladimir Davydov, Johannes Weiner, Eric Dumazet, LKML, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
CC netdev, as it looks some af_unix issue ...

Willy Tarreau

unread,
Jan 23, 2016, 1:00:17 PM1/23/16
to Eric Dumazet, Dmitry Vyukov, netdev, Rainer Weikusat, Michal Hocko, Andrew Morton, Paul E. McKenney, Vladimir Davydov, Johannes Weiner, Eric Dumazet, LKML, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
Hi Eric, Dmitry,

On Fri, Jan 22, 2016 at 08:50:01AM -0800, Eric Dumazet wrote:
> CC netdev, as it looks some af_unix issue ...
>
> On Fri, 2016-01-22 at 16:08 +0100, Dmitry Vyukov wrote:
> > Hello,
> >
> > The following program causes struct pid memory leak:
> >
> > // autogenerated by syzkaller (http://github.com/google/syzkaller)
(...)
> > unreferenced object 0xffff8800324af200 (size 112):
> > comm "syz-executor", pid 18413, jiffies 4295500287 (age 14.321s)
> > hex dump (first 32 bytes):
> > 01 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................
> > 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
> > backtrace:
> > [<ffffffff86315673>] kmemleak_alloc+0x63/0xa0 mm/kmemleak.c:916
> > [< inline >] kmemleak_alloc_recursive include/linux/kmemleak.h:47
(...)
> > On commit 30f05309bde49295e02e45c7e615f73aa4e0ccc2 (Jan 20).

I can't reproduce this with the indicated commit. I'm unsure how/what
I'm supposed to see. Is a certain config needed ? I've enabled kmemleak
in my .config but there are too few information here to go further
unfortunately.

Regards,
Willy

Dmitry Vyukov

unread,
Jan 23, 2016, 1:14:53 PM1/23/16
to Willy Tarreau, Eric Dumazet, netdev, Rainer Weikusat, Michal Hocko, Andrew Morton, Paul E. McKenney, Vladimir Davydov, Johannes Weiner, Eric Dumazet, LKML, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
Hi Willy,

I've attached my .config.
Also run this program in a parallel loop. I think it's leaking not
every time, probably some race is involved.
.config

Willy Tarreau

unread,
Jan 23, 2016, 1:40:13 PM1/23/16
to Dmitry Vyukov, Eric Dumazet, netdev, Rainer Weikusat, Michal Hocko, Andrew Morton, Paul E. McKenney, Vladimir Davydov, Johannes Weiner, Eric Dumazet, LKML, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
On Sat, Jan 23, 2016 at 07:14:33PM +0100, Dmitry Vyukov wrote:
> I've attached my .config.
> Also run this program in a parallel loop. I think it's leaking not
> every time, probably some race is involved.

Thank you. Just in order to confirm, am I supposed to see the
messages you quoted in dmesg ?

Thanks,
Willy

Dmitry Vyukov

unread,
Jan 23, 2016, 1:47:05 PM1/23/16
to Willy Tarreau, Eric Dumazet, netdev, Rainer Weikusat, Michal Hocko, Andrew Morton, Paul E. McKenney, Vladimir Davydov, Johannes Weiner, Eric Dumazet, LKML, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
I think the simplest way to confirm that you can reproduce it locally
is to check /proc/slabinfo. When I run this program in a parallel
loop, number of objects in pid cache was constantly growing:

# cat /proc/slabinfo | grep pid
pid 297 532 576 28 4 : tunables 0 0
0 : slabdata 19 19 0
...
pid 412 532 576 28 4 : tunables 0 0
0 : slabdata 19 19 0
...
pid 1107 1176 576 28 4 : tunables 0 0
0 : slabdata 42 42 0
...
pid 1545 1652 576 28 4 : tunables 0 0
0 : slabdata 59 59 0


If you want to use kmemleak, then you need to run this program in a
parallel loop for some time, then stop it and then:

$ echo scan > /sys/kernel/debug/kmemleak
$ cat /sys/kernel/debug/kmemleak

If kmemleak has detected any leaks, cat will show them. I noticed that
kmemleak can delay leaks with significant delay, so usually I do scan
at least 5 times.

Eric Dumazet

unread,
Jan 23, 2016, 2:50:15 PM1/23/16
to Dmitry Vyukov, Willy Tarreau, Eric Dumazet, netdev, Rainer Weikusat, Michal Hocko, Andrew Morton, Paul E. McKenney, Vladimir Davydov, Johannes Weiner, LKML, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
Note that kmemleak is not needed.

Just run a normal kernel (eventually using slab_nomerge=1 boot cmd to
make sure 'pid' slab is not shared)

It seems that bug is rather old, as linux-4.0 has it.

Willy Tarreau

unread,
Jan 23, 2016, 9:11:50 PM1/23/16
to Dmitry Vyukov, Eric Dumazet, netdev, Rainer Weikusat, Michal Hocko, Andrew Morton, Paul E. McKenney, Vladimir Davydov, Johannes Weiner, Eric Dumazet, LKML, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
On Sat, Jan 23, 2016 at 07:46:45PM +0100, Dmitry Vyukov wrote:
> On Sat, Jan 23, 2016 at 7:40 PM, Willy Tarreau <w...@1wt.eu> wrote:
> > On Sat, Jan 23, 2016 at 07:14:33PM +0100, Dmitry Vyukov wrote:
> >> I've attached my .config.
> >> Also run this program in a parallel loop. I think it's leaking not
> >> every time, probably some race is involved.
> >
> > Thank you. Just in order to confirm, am I supposed to see the
> > messages you quoted in dmesg ?
>
>
> I think the simplest way to confirm that you can reproduce it locally
> is to check /proc/slabinfo. When I run this program in a parallel
> loop, number of objects in pid cache was constantly growing:
>
> # cat /proc/slabinfo | grep pid
> pid 297 532 576 28 4 : tunables 0 0
> 0 : slabdata 19 19 0
> ...
> pid 412 532 576 28 4 : tunables 0 0
> 0 : slabdata 19 19 0
> ...
> pid 1107 1176 576 28 4 : tunables 0 0
> 0 : slabdata 42 42 0
> ...
> pid 1545 1652 576 28 4 : tunables 0 0
> 0 : slabdata 59 59 0

OK got it and indeed I can see it grow. In fact, the active column grows and
once it reaches the num objects, this one grows in turn, which makes sense.

All I can say now is that it doesn't need to run over multiple processes
to leak, though that makes it easier. SMP is not needed either.

> If you want to use kmemleak, then you need to run this program in a
> parallel loop for some time, then stop it and then:
>
> $ echo scan > /sys/kernel/debug/kmemleak
> $ cat /sys/kernel/debug/kmemleak
>
> If kmemleak has detected any leaks, cat will show them. I noticed that
> kmemleak can delay leaks with significant delay, so usually I do scan
> at least 5 times.

Thank you for these information.

I've tested on an older (3.14) kernel and I can see the effect there as well.
I don't have "pid" in slabinfo, but launching 1000 processes at a time uses
a few tens to hundreds kB of RAM on each round. 3.10 doesn't seem affected,
I'm seeing the memory grow to a fixed point if I increase the number of
parallel processes but then even after a few tens of thousands of processes,
the reported used memory doesn't seem to increase (remember no "pid" entry
here).

kmemleak indeed reports me something on 3.14 which seems to match your
trace as I'm seeing bash as the process (instead of syz-executor in your
case) and alloc_pid() calls kmem_cache_alloc() :

Unreferenced object 0xffff88003facd000 (size 128):
comm "bash", pid 1822, jiffies 4294951223 (age 15.280s)
hex dump (first 32 bytes):
01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<ffffffff810dfc22>] kmem_cache_alloc+0x92/0xe0
[<ffffffff81065d74>] alloc_pid+0x24/0x4a0
[<ffffffff81180a93>] cpumask_any_but+0x23/0x40
[<ffffffff8104b258>] copy_process.part.66+0x1068/0x16e0
[<ffffffff812038db>] n_tty_write+0x37b/0x4f0
[<ffffffff812003d1>] tty_write+0x1c1/0x2a0
[<ffffffff8104ba90>] do_fork+0xe0/0x340
[<ffffffff81058b30>] __set_task_blocked+0x30/0x80
[<ffffffff8105af38>] __set_current_blocked+0x38/0x60
[<ffffffff813b6e39>] stub_clone+0x69/0x90
[<ffffffff813b6b59>] system_call_fastpath+0x16/0x1b
[<ffffffffffffffff>] 0xffffffffffffffff

It doesn't report this on 3.10.

Unfortunately I feel totally incompetent on the subject :-/

Willy

Willy Tarreau

unread,
Jan 23, 2016, 9:38:29 PM1/23/16
to Dmitry Vyukov, Eric Dumazet, netdev, Rainer Weikusat, Michal Hocko, Andrew Morton, Paul E. McKenney, Vladimir Davydov, Johannes Weiner, Eric Dumazet, LKML, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
On Sun, Jan 24, 2016 at 03:11:45AM +0100, Willy Tarreau wrote:
> It doesn't report this on 3.10.

To be more precise, kmemleak reports the issue on 3.13 and not on 3.12.
I'm not sure if it's reliable enough to run a bisect though.

Willy

Eric Dumazet

unread,
Jan 23, 2016, 9:50:12 PM1/23/16
to Willy Tarreau, Dmitry Vyukov, Eric Dumazet, netdev, Rainer Weikusat, Michal Hocko, Andrew Morton, Paul E. McKenney, Vladimir Davydov, Johannes Weiner, LKML, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
I have the leak on linux-3.11.

I believe even linux-3.3 gets the leak, although I had to wait about
one hour to be confident the leak was there.

Willy Tarreau

unread,
Jan 23, 2016, 10:04:17 PM1/23/16
to Eric Dumazet, Dmitry Vyukov, Eric Dumazet, netdev, Rainer Weikusat, Michal Hocko, Andrew Morton, Paul E. McKenney, Vladimir Davydov, Johannes Weiner, LKML, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
OK so I'm stopping my bisect. It's possible it's affected with some option
which changed along the various "make oldconfig" at each step. Thanks for
letting me know.

Willy

Eric Dumazet

unread,
Jan 24, 2016, 4:11:23 PM1/24/16
to Willy Tarreau, Dmitry Vyukov, Eric Dumazet, netdev, Rainer Weikusat, Michal Hocko, Andrew Morton, Paul E. McKenney, Vladimir Davydov, Johannes Weiner, LKML, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
Bug was added in 2.6.38 . I am testing a fix and will send it today.

Eric Dumazet

unread,
Jan 24, 2016, 4:53:52 PM1/24/16
to Willy Tarreau, Eric Dumazet, Dmitry Vyukov, netdev, Rainer Weikusat, Michal Hocko, Andrew Morton, Paul E. McKenney, Vladimir Davydov, Johannes Weiner, LKML, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
From: Eric Dumazet <edum...@google.com>

Dmitry reported a struct pid leak detected by a syzkaller program.

Bug happens in unix_stream_recvmsg() when we break the loop when a
signal is pending, without properly releasing scm.

Fixes: b3ca9b02b007 ("net: fix multithreaded signal handling in unix recv routines")
Reported-by: Dmitry Vyukov <dvy...@google.com>
Signed-off-by: Eric Dumazet <edum...@google.com>
Cc: Rainer Weikusat <rwei...@mobileactivedefense.com>
---
net/unix/af_unix.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c5bf5ef2bf89..49d5093eb055 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2339,6 +2339,7 @@ again:

if (signal_pending(current)) {
err = sock_intr_errno(timeo);
+ scm_destroy(&scm);
goto out;
}



Willy Tarreau

unread,
Jan 24, 2016, 8:06:33 PM1/24/16
to Eric Dumazet, Eric Dumazet, Dmitry Vyukov, netdev, Rainer Weikusat, Michal Hocko, Andrew Morton, Paul E. McKenney, Vladimir Davydov, Johannes Weiner, LKML, syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin
Hi Eric,
Good job on this one! FWIW, I managed to test it on 3.14 and I confirm it
completely fixes the leak there as well. I had to modify it a little bit
however since there's no scm local variable there :

- scm_destroy(&scm);
+ scm_destroy(siocb->scm);

Cheers,
Willy

Reply all
Reply to author
Forward
0 new messages