KASAN: use-after-free Read in hci_chan_del

517 views
Skip to first unread message

syzbot

unread,
Aug 2, 2020, 4:45:21 PM8/2/20
to da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
Hello,

syzbot found the following issue on:

HEAD commit: ac3a0c84 Merge git://git.kernel.org/pub/scm/linux/kernel/g..
git tree: upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=11b8d570900000
kernel config: https://syzkaller.appspot.com/x/.config?x=e59ee776d5aa8d55
dashboard link: https://syzkaller.appspot.com/bug?extid=305a91e025a73e4fd6ce
compiler: clang version 10.0.0 (https://github.com/llvm/llvm-project/ c2443155a0fb245c8f17f2c1c72b6ea391e86e81)
syz repro: https://syzkaller.appspot.com/x/repro.syz?x=11f7ceea900000
C reproducer: https://syzkaller.appspot.com/x/repro.c?x=17e5de04900000

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+305a91...@syzkaller.appspotmail.com

IPVS: ftp: loaded support on port[0] = 21
==================================================================
BUG: KASAN: use-after-free in hci_chan_del+0x33/0x130 net/bluetooth/hci_conn.c:1707
Read of size 8 at addr ffff8880a9591f18 by task syz-executor081/6793

CPU: 0 PID: 6793 Comm: syz-executor081 Not tainted 5.8.0-rc7-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x1f0/0x31e lib/dump_stack.c:118
print_address_description+0x66/0x5a0 mm/kasan/report.c:383
__kasan_report mm/kasan/report.c:513 [inline]
kasan_report+0x132/0x1d0 mm/kasan/report.c:530
hci_chan_del+0x33/0x130 net/bluetooth/hci_conn.c:1707
l2cap_conn_del+0x4c2/0x650 net/bluetooth/l2cap_core.c:1900
hci_disconn_cfm include/net/bluetooth/hci_core.h:1355 [inline]
hci_conn_hash_flush+0x127/0x200 net/bluetooth/hci_conn.c:1536
hci_dev_do_close+0xb7b/0x1040 net/bluetooth/hci_core.c:1761
hci_unregister_dev+0x16d/0x1590 net/bluetooth/hci_core.c:3606
vhci_release+0x73/0xc0 drivers/bluetooth/hci_vhci.c:340
__fput+0x2f0/0x750 fs/file_table.c:281
task_work_run+0x137/0x1c0 kernel/task_work.c:135
exit_task_work include/linux/task_work.h:25 [inline]
do_exit+0x601/0x1f80 kernel/exit.c:805
do_group_exit+0x161/0x2d0 kernel/exit.c:903
__do_sys_exit_group+0x13/0x20 kernel/exit.c:914
__se_sys_exit_group+0x10/0x10 kernel/exit.c:912
__x64_sys_exit_group+0x37/0x40 kernel/exit.c:912
do_syscall_64+0x73/0xe0 arch/x86/entry/common.c:384
entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x444fe8
Code: Bad RIP value.
RSP: 002b:00007ffe96e46e68 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 0000000000444fe8
RDX: 0000000000000001 RSI: 000000000000003c RDI: 0000000000000001
RBP: 00000000004ccdd0 R08: 00000000000000e7 R09: ffffffffffffffd0
R10: 00007f5ee25cd700 R11: 0000000000000246 R12: 0000000000000001
R13: 00000000006e0200 R14: 0000000000000000 R15: 0000000000000000

Allocated by task 6821:
save_stack mm/kasan/common.c:48 [inline]
set_track mm/kasan/common.c:56 [inline]
__kasan_kmalloc+0x103/0x140 mm/kasan/common.c:494
kmem_cache_alloc_trace+0x234/0x300 mm/slab.c:3551
kmalloc include/linux/slab.h:555 [inline]
kzalloc include/linux/slab.h:669 [inline]
hci_chan_create+0x9a/0x270 net/bluetooth/hci_conn.c:1692
l2cap_conn_add+0x66/0xb00 net/bluetooth/l2cap_core.c:7699
l2cap_connect_cfm+0xdb/0x12b0 net/bluetooth/l2cap_core.c:8097
hci_connect_cfm include/net/bluetooth/hci_core.h:1340 [inline]
hci_remote_features_evt net/bluetooth/hci_event.c:3210 [inline]
hci_event_packet+0x1164c/0x18260 net/bluetooth/hci_event.c:6061
hci_rx_work+0x236/0x9c0 net/bluetooth/hci_core.c:4705
process_one_work+0x789/0xfc0 kernel/workqueue.c:2269
worker_thread+0xaa4/0x1460 kernel/workqueue.c:2415
kthread+0x37e/0x3a0 drivers/block/aoe/aoecmd.c:1234
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293

Freed by task 1530:
save_stack mm/kasan/common.c:48 [inline]
set_track mm/kasan/common.c:56 [inline]
kasan_set_free_info mm/kasan/common.c:316 [inline]
__kasan_slab_free+0x114/0x170 mm/kasan/common.c:455
__cache_free mm/slab.c:3426 [inline]
kfree+0x10a/0x220 mm/slab.c:3757
hci_disconn_loglink_complete_evt net/bluetooth/hci_event.c:4999 [inline]
hci_event_packet+0x304e/0x18260 net/bluetooth/hci_event.c:6188
hci_rx_work+0x236/0x9c0 net/bluetooth/hci_core.c:4705
process_one_work+0x789/0xfc0 kernel/workqueue.c:2269
worker_thread+0xaa4/0x1460 kernel/workqueue.c:2415
kthread+0x37e/0x3a0 drivers/block/aoe/aoecmd.c:1234
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293

The buggy address belongs to the object at ffff8880a9591f00
which belongs to the cache kmalloc-128 of size 128
The buggy address is located 24 bytes inside of
128-byte region [ffff8880a9591f00, ffff8880a9591f80)
The buggy address belongs to the page:
page:ffffea0002a56440 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff8880a9591800
flags: 0xfffe0000000200(slab)
raw: 00fffe0000000200 ffffea0002a5a648 ffffea00028a4a08 ffff8880aa400700
raw: ffff8880a9591800 ffff8880a9591000 000000010000000a 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
ffff8880a9591e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
ffff8880a9591e80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff8880a9591f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
^
ffff8880a9591f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff8880a9592000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
==================================================================


---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzk...@googlegroups.com.

syzbot will keep track of this issue. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
syzbot can test patches for this issue, for details see:
https://goo.gl/tpsmEJ#testing-patches

syzbot

unread,
Aug 3, 2020, 1:08:07 PM8/3/20
to core...@netfilter.org, da...@davemloft.net, de...@driverdev.osuosl.org, er...@anholt.net, gre...@linuxfoundation.org, johan....@gmail.com, ka...@trash.net, kad...@blackhole.kfki.hu, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, netfilt...@vger.kernel.org, pa...@netfilter.org, syzkall...@googlegroups.com
syzbot has bisected this issue to:

commit 166beccd47e11e4d27477e8ca1d7eda47cf3b2da
Author: Eric Anholt <er...@anholt.net>
Date: Mon Oct 3 18:52:06 2016 +0000

staging/vchi: Convert to current get_user_pages() arguments.

bisection log: https://syzkaller.appspot.com/x/bisect.txt?x=178321a4900000
start commit: 5a30a789 Merge tag 'x86-urgent-2020-08-02' of git://git.ke..
git tree: upstream
final oops: https://syzkaller.appspot.com/x/report.txt?x=144321a4900000
console output: https://syzkaller.appspot.com/x/log.txt?x=104321a4900000
syz repro: https://syzkaller.appspot.com/x/repro.syz?x=127dd914900000
C reproducer: https://syzkaller.appspot.com/x/repro.c?x=122a94ec900000

Reported-by: syzbot+305a91...@syzkaller.appspotmail.com
Fixes: 166beccd47e1 ("staging/vchi: Convert to current get_user_pages() arguments.")

For information about bisection process see: https://goo.gl/tpsmEJ#bisection

syzbot

unread,
Aug 4, 2020, 11:41:09 AM8/4/20
to coib...@gmail.com, syzkall...@googlegroups.com
Hello,

syzbot has tested the proposed patch but the reproducer is still triggering an issue:
KASAN: use-after-free Read in hci_send_acl

==================================================================
BUG: KASAN: use-after-free in hci_send_acl+0xabe/0xc60 net/bluetooth/hci_core.c:3991
Read of size 8 at addr ffff8880a7fa5e18 by task kworker/u5:1/8259

CPU: 1 PID: 8259 Comm: kworker/u5:1 Not tainted 5.8.0-rc7-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: hci0 hci_rx_work
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x18f/0x20d lib/dump_stack.c:118
print_address_description.constprop.0.cold+0xae/0x436 mm/kasan/report.c:383
__kasan_report mm/kasan/report.c:513 [inline]
kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530
hci_send_acl+0xabe/0xc60 net/bluetooth/hci_core.c:3991
l2cap_send_cmd+0x6d5/0x8a0 net/bluetooth/l2cap_core.c:949
l2cap_send_move_chan_cfm_icid net/bluetooth/l2cap_core.c:4917 [inline]
l2cap_move_fail net/bluetooth/l2cap_core.c:5401 [inline]
l2cap_move_channel_rsp net/bluetooth/l2cap_core.c:5440 [inline]
l2cap_bredr_sig_cmd net/bluetooth/l2cap_core.c:5719 [inline]
l2cap_sig_channel net/bluetooth/l2cap_core.c:6418 [inline]
l2cap_recv_frame+0x6936/0xae10 net/bluetooth/l2cap_core.c:7660
l2cap_recv_acldata+0x7f6/0x8e0 net/bluetooth/l2cap_core.c:8313
hci_acldata_packet net/bluetooth/hci_core.c:4520 [inline]
hci_rx_work+0x4c7/0xb10 net/bluetooth/hci_core.c:4710
process_one_work+0x94c/0x1670 kernel/workqueue.c:2269
worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
kthread+0x3b5/0x4a0 kernel/kthread.c:291
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293

Allocated by task 1533:
save_stack+0x1b/0x40 mm/kasan/common.c:48
set_track mm/kasan/common.c:56 [inline]
__kasan_kmalloc.constprop.0+0xc2/0xd0 mm/kasan/common.c:494
kmem_cache_alloc_trace+0x14f/0x2d0 mm/slab.c:3551
kmalloc include/linux/slab.h:555 [inline]
kzalloc include/linux/slab.h:669 [inline]
hci_chan_create+0x9b/0x330 net/bluetooth/hci_conn.c:1692
l2cap_conn_add.part.0+0x1e/0xe10 net/bluetooth/l2cap_core.c:7699
l2cap_conn_add net/bluetooth/l2cap_core.c:8139 [inline]
l2cap_connect_cfm+0x23b/0x1090 net/bluetooth/l2cap_core.c:8097
hci_connect_cfm include/net/bluetooth/hci_core.h:1340 [inline]
hci_remote_features_evt net/bluetooth/hci_event.c:3210 [inline]
hci_event_packet+0x3e01/0x86f5 net/bluetooth/hci_event.c:6061
hci_rx_work+0x22e/0xb10 net/bluetooth/hci_core.c:4705
process_one_work+0x94c/0x1670 kernel/workqueue.c:2269
worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
kthread+0x3b5/0x4a0 kernel/kthread.c:291
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293

Freed by task 8259:
save_stack+0x1b/0x40 mm/kasan/common.c:48
set_track mm/kasan/common.c:56 [inline]
kasan_set_free_info mm/kasan/common.c:316 [inline]
__kasan_slab_free+0xf5/0x140 mm/kasan/common.c:455
__cache_free mm/slab.c:3426 [inline]
kfree+0x103/0x2c0 mm/slab.c:3757
hci_disconn_loglink_complete_evt net/bluetooth/hci_event.c:4999 [inline]
hci_event_packet+0x319a/0x86f5 net/bluetooth/hci_event.c:6188
hci_rx_work+0x22e/0xb10 net/bluetooth/hci_core.c:4705
process_one_work+0x94c/0x1670 kernel/workqueue.c:2269
worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
kthread+0x3b5/0x4a0 kernel/kthread.c:291
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293

The buggy address belongs to the object at ffff8880a7fa5e00
which belongs to the cache kmalloc-128 of size 128
The buggy address is located 24 bytes inside of
128-byte region [ffff8880a7fa5e00, ffff8880a7fa5e80)
The buggy address belongs to the page:
page:ffffea00029fe940 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0
flags: 0xfffe0000000200(slab)
raw: 00fffe0000000200 ffffea0002454a08 ffffea0002a02248 ffff8880aa000700
raw: 0000000000000000 ffff8880a7fa5000 0000000100000010 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
ffff8880a7fa5d00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
ffff8880a7fa5d80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff8880a7fa5e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
^
ffff8880a7fa5e80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff8880a7fa5f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
==================================================================


Tested on:

commit: 916d16c2 Replace hci_conn_put with hci_conn_drop
git tree: https://github.com/coiby/linux.git syzbot4
console output: https://syzkaller.appspot.com/x/log.txt?x=15da8d66900000
kernel config: https://syzkaller.appspot.com/x/.config?x=c0cfcf935bcc94d2
dashboard link: https://syzkaller.appspot.com/bug?extid=305a91e025a73e4fd6ce
compiler: gcc (GCC) 10.1.0-syz 20200507

syzbot

unread,
Aug 5, 2020, 5:20:06 AM8/5/20
to coib...@gmail.com, syzkall...@googlegroups.com
Hello,

syzbot has tested the proposed patch and the reproducer did not trigger any issue:

Reported-and-tested-by: syzbot+305a91...@syzkaller.appspotmail.com

Tested on:

commit: 45c266f6 Use l2cap_conn_del when destroying AMP logical link
Note: testing is done by a robot and is best-effort only.

ETenal

unread,
May 4, 2021, 5:50:08 PM5/4/21
to da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
Hi,

This is SyzScope, a research project that aims to reveal high-risk
primitives from a seemingly low-risk bug (UAF/OOB read, WARNING, BUG, etc.).

We are currently testing seemingly low-risk bugs on syzbot's open
section(https://syzkaller.appspot.com/upstream), and try to reach out to
kernel developers as long as SyzScope discovers any high-risk primitives.

Please let us know if SyzScope indeed helps, and any suggestions/feedback.

Regrading the bug "KASAN: use-after-free Read in hci_chan_del", SyzScope
reports 3 memory write capability.

The detailed comments can be found at
https://sites.google.com/view/syzscope/kasan-use-after-free-read-in-hci_chan_del

Dan Carpenter

unread,
May 6, 2021, 2:01:52 AM5/6/21
to ETenal, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
On Tue, May 04, 2021 at 02:50:03PM -0700, ETenal wrote:
> Hi,
>
> This is SyzScope, a research project that aims to reveal high-risk
> primitives from a seemingly low-risk bug (UAF/OOB read, WARNING, BUG, etc.).
>
> We are currently testing seemingly low-risk bugs on syzbot's open
> section(https://syzkaller.appspot.com/upstream), and try to reach out to
> kernel developers as long as SyzScope discovers any high-risk primitives.
>
> Please let us know if SyzScope indeed helps, and any suggestions/feedback.
>
> Regrading the bug "KASAN: use-after-free Read in hci_chan_del", SyzScope
> reports 3 memory write capability.
>
> The detailed comments can be found at https://sites.google.com/view/syzscope/kasan-use-after-free-read-in-hci_chan_del
>

I don't understand what you are saying at all. This looks like a normal
syzbot email. Are you saying that part of it generated by SyzScope?
I don't think there is anyone who thinks a UAF/OOB read is low impact.

There are no comments at the "detailed comments" URL.

regards,
dan carpenter
> --
> You received this message because you are subscribed to the Google Groups "syzkaller-bugs" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller-bug...@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/syzkaller-bugs/c2004663-e54a-7fbc-ee19-b2749549e2dd%40gmail.com.

SyzScope

unread,
May 6, 2021, 2:42:27 AM5/6/21
to Dan Carpenter, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
Hi Dan,

Sorry for the confusing. This bug was reported by a normal syzbot email.
What we are trying to do is discovering more primitives such as memory
write or function pointer dereference hidden behind the primitive shown
on syzbot(which is a memory read in hci_chan_del() in this case).

We realized that some primitives may not be found by fuzzing because of
the heap layout. By symbolizing the UAF/OOB memory and perform a
symbolic execution, we are able to go deeper in the buggy code instead
of encountering kernel panic(NULL pointer dereference from UAF/OOB
memory) or complicated constraints that prevent fuzzing from entering.

In our measurement, we found that memory write bugs are usually fixed
faster than memory read bugs or non-security bugs(e.g., WARNING). Thus,
we think evaluating the real impact of a bug helps people understand the
how risky the bug really is and benefit the patching process.

Regarding this bug, syzbot originally reported a memory read primitive
(KASAN read  in hci_chan_del()). In the detailed comments URL, we are
showing that we find a memory write primitive using the same PoC on
syzbot, we believe the memory write primitive makes the bug more risky.

SyzScope

unread,
May 28, 2021, 5:12:05 PM5/28/21
to syzbot, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
Sorry for the confusion on our last email. We did a little more analysis
after then and hope to help developers fix this bug.

The bug was reported by syzbot first in Aug 2020. Since it remains
unpatched to this date, we have conducted some analysis to determine its
security impact and root causes, which hopefully can help with the
patching decisions.
Specifically, we find that even though it is labeled as "UAF read" by
syzbot, it can in fact lead to double free and control flow hijacking as
well. Here is our analysis below (on this kernel version:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/log/?id=af5043c89a8ef6b6949a245fff355a552eaed240)

----------------------------- Root cause analysis:
--------------------------
The use-after-free bug happened because the object has two different
references. But when it was freed, only one reference was removed,
allowing the other reference to be used incorrectly.

Specifically, the object of type "struct hci_chan" can be referenced in
two places from an object called hcon(or conn in hci_chan_create)of type
struct hci_conn : "hcon->chan_list" and "hcon->l2cap_data->hchan". But
only one of them (conn->chan_list) was deleted when freeing "struct
hci_chan" from "hci_disconn_loglink_complete_evt()".

The function "hci_chan_create" shows how the first reference is created.

struct hci_chan *hci_chan_create(struct hci_conn *conn)
{
    struct hci_dev *hdev = conn->hdev;
    struct hci_chan *chan;

    ...
    chan = kzalloc(sizeof(*chan), GFP_KERNEL);
    ...
    list_add_rcu(&chan->list, &conn->chan_list); // Assign chan to
hcon->chan_list. This is the first reference created.

    return chan;
}

"l2cap_conn_add" is the caller of the previous function which shows how
the second reference is created.

static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
{
    struct l2cap_conn *conn = hcon->l2cap_data;
    struct hci_chan *hchan;

    ...

    hchan = hci_chan_create(hcon); //"hchan" was created in hci_chan_create
    if (!hchan)
        return NULL;

    conn = kzalloc(sizeof(*conn), GFP_KERNEL);
    ...
    kref_init(&conn->ref);
    hcon->l2cap_data = conn;
    conn->hcon = hci_conn_get(hcon);
    conn->hchan = hchan; // "chan" was assigned to
"hcon->l2cap_data->hchan". This is the second reference.
    ...
}

When the chan was freed in "hci_disconn_loglink_complete_evt"
(hci_disconn_loglink_complete_evt()->amp_destroy_logical_link()->hci_chan_del()),
we only deleted the reference of "((struct hci_conn *)hcon)->chan_list"
(effectively removing the entry from the list), but the reference of
"((struct hci_conn *)hcon)->l2cap_data->hchan" is still valid.

The function below shows exactly how the free of the object occurs and
how its first reference is removed.

void hci_chan_del(struct hci_chan *chan)
{

    struct hci_conn *conn = chan->conn;
    struct hci_dev *hdev = conn->hdev;

    BT_DBG("%s hcon %p chan %p", hdev->name, conn, chan);
    list_del_rcu(&chan->list); // removed "chan" from the list (the
first reference). The second reference((struct hci_conn
*)hcon->l2cap_data->hchan) remains however.
    synchronize_rcu();
    set_bit(HCI_CONN_DROP, &conn->flags);
    hci_conn_put(conn);

    skb_queue_purge(&chan->data_q);

    kfree(chan); // free "chan"
}

----------------------------- Potential fix: --------------------------
Based on the analysis, it appears that in hci_chan_del(), we should
remove the second reference of (struct hci_conn
*)hcon->l2cap_data->hchan,e.g., setting it to NULL

-------------------------- Control flow hijacking Primitve:
-----------------------------

This function is where the bug impact was originally reported on syzbot

void hci_chan_del(struct hci_chan *chan) //"chan" was freed
{

    struct hci_conn *conn = chan->conn; // Syzbot reported the UAF read
    struct hci_dev *hdev = conn->hdev;

    ...

    skb_queue_purge(&chan->data_q); // "data_q" comes from the freed
object "chan" therefore it can point to an arbitrary memory address
    kfree(chan);
}


The skb was dequeued from the list, however the list is controllable by
an attacker and it can point to an arbitrary memory address.

void skb_queue_purge(struct sk_buff_head *list)
{
    struct sk_buff *skb;

    while ((skb = skb_dequeue(list)) != NULL) // skb is also controllable
        kfree_skb(skb); // dangerous use of skb further down
}

After going through a long call chain:
skb_queue_purge->kfree_skb->__kfree_skb->skb_release_all->skb_release_data,
skb enters "skb_zcopy_clear".

static void skb_release_data(struct sk_buff *skb)
{
    ...
    skb_zcopy_clear(skb, true); // skb entered skb_zcopy_clear() and
will dereference a function pointer inside.
    skb_free_head(skb);
}



static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
{
    struct ubuf_info *uarg = skb_zcopy(skb); // uarg comes from skb,
therefore it also controllable by attacker

    if (uarg) {
        if (skb_zcopy_is_nouarg(skb)) {
            /* no notification callback */
        } else if (uarg->callback == sock_zerocopy_callback) {
            uarg->zerocopy = uarg->zerocopy && zerocopy;
            sock_zerocopy_put(uarg); // uarg enters sock_zerocopy_put()
        }
...
    }
}

Inside the function below, uarg's function pointer will be dereferenced.
This makes a control flow hijacking possible because uarg is totally
controllable by attackers.

void sock_zerocopy_put(struct ubuf_info *uarg)

{
    if (uarg && refcount_dec_and_test(&uarg->refcnt)) {
        if (uarg->callback)
            uarg->callback(uarg, uarg->zerocopy); // uarg dereferences
a function pointer, and thus we grant a control flow hijacking primitive
        ...
    }

}


SyzScope Team.

On 8/2/2020 1:45 PM, syzbot wrote:

SyzScope

unread,
Jun 3, 2021, 2:30:13 PM6/3/21
to syzbot, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com, gre...@linuxfoundation.org
Hi developers,

Besides the control flow hijacking primitive we sent before, we managed
to discover an additional double free primitive in this bug, making this
bug even more dangerous.

We created a web page with detailed descriptions:
https://sites.google.com/view/syzscope/kasan-use-after-free-read-in-hci_chan_del

We understand that creating a patch can be time-consuming and there is
probably a long list of bugs pending fixes. We hope that our security
analysis can enable an informed decision on which bugs to fix first
(prioritization).

Since the bug has been on syzbot for over ten months (first found on
08-03-2020 and still can be triggered on 05-08-2021), it is best to have
the bug fixed early enough to avoid it being weaponized.

Greg KH

unread,
Jun 3, 2021, 2:36:43 PM6/3/21
to SyzScope, syzbot, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
On Thu, Jun 03, 2021 at 11:30:08AM -0700, SyzScope wrote:
> Hi developers,
>
> Besides the control flow hijacking primitive we sent before, we managed to
> discover an additional double free primitive in this bug, making this bug
> even more dangerous.
>
> We created a web page with detailed descriptions: https://sites.google.com/view/syzscope/kasan-use-after-free-read-in-hci_chan_del
>
> We understand that creating a patch can be time-consuming and there is
> probably a long list of bugs pending fixes. We hope that our security
> analysis can enable an informed decision on which bugs to fix first
> (prioritization).
>
> Since the bug has been on syzbot for over ten months (first found on
> 08-03-2020 and still can be triggered on 05-08-2021), it is best to have the
> bug fixed early enough to avoid it being weaponized.

Wonderful, please help out by sending a fix for this.

thanks,

greg k-h

Greg KH

unread,
Jun 4, 2021, 5:48:22 AM6/4/21
to ETenal, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
On Tue, May 04, 2021 at 02:50:03PM -0700, ETenal wrote:
> Hi,
>
> This is SyzScope, a research project that aims to reveal high-risk
> primitives from a seemingly low-risk bug (UAF/OOB read, WARNING, BUG, etc.).

Who is working on and doing this "reseach project"? And what is it
doing to actually fix the issues that syzbot finds? Seems like that
would be a better solution instead of just trying to send emails saying,
in short "why isn't this reported issue fixed yet?"

thanks,

greg k-h

SyzScope

unread,
Jun 4, 2021, 1:11:06 PM6/4/21
to Greg KH, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
Hi Greg,

> Who is working on and doing this "reseach project"?
We are a group of researchers from University of California, Riverside
(we introduced ourselves in an earlier email to secu...@kernel.org if
you recall).  Please allow us to articulate the goal of our research.
We'd be happy to hear your feedback and suggestions.

> And what is it
> doing to actually fix the issues that syzbot finds? Seems like that
> would be a better solution instead of just trying to send emails saying,
> in short "why isn't this reported issue fixed yet?"
From our limited understanding, we know a key problem with syzbot bugs
is that there are too many of them - more than what can be handled by
developers and maintainers. Therefore, it seems some form of
prioritization on bug fixing would be helpful. The goal of the SyzScope
project is to *automatically* analyze the security impact of syzbot
bugs, which helps with prioritizing bug fixes. In other words, when a
syzbot bug is reported, we aim to attach a corresponding security impact
"signal" to help developers make an informed decision on which ones to
fix first.

Currently,  SyzScope is a standalone prototype system that we plan to
open source. We hope to keep developing it to make it more and more
useful and have it eventually integrated into syzbot (we are in talks
with Dmitry).

We are happy to talk more offline (perhaps even in a zoom meeting if you
would like). Thanks in advance for any feedback and suggestions you may
have.

Greg KH

unread,
Jun 5, 2021, 3:43:48 AM6/5/21
to SyzScope, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
On Fri, Jun 04, 2021 at 10:11:03AM -0700, SyzScope wrote:
> Hi Greg,
>
> > Who is working on and doing this "reseach project"?
> We are a group of researchers from University of California, Riverside (we
> introduced ourselves in an earlier email to secu...@kernel.org if you
> recall).

I do not recall that, sorry, when was that?

> Please allow us to articulate the goal of our research. We'd be
> happy to hear your feedback and suggestions.
>
> > And what is it
> > doing to actually fix the issues that syzbot finds? Seems like that
> > would be a better solution instead of just trying to send emails saying,
> > in short "why isn't this reported issue fixed yet?"
> From our limited understanding, we know a key problem with syzbot bugs is
> that there are too many of them - more than what can be handled by
> developers and maintainers. Therefore, it seems some form of prioritization
> on bug fixing would be helpful. The goal of the SyzScope project is to
> *automatically* analyze the security impact of syzbot bugs, which helps with
> prioritizing bug fixes. In other words, when a syzbot bug is reported, we
> aim to attach a corresponding security impact "signal" to help developers
> make an informed decision on which ones to fix first.

Is that really the reason why syzbot-reported problems are not being
fixed? Just because we don't know which ones are more "important"?

As someone who has been managing many interns for a year or so working
on these, I do not think that is the problem, but hey, what do I know...

> Currently,  SyzScope is a standalone prototype system that we plan to open
> source. We hope to keep developing it to make it more and more useful and
> have it eventually integrated into syzbot (we are in talks with Dmitry).
>
> We are happy to talk more offline (perhaps even in a zoom meeting if you
> would like). Thanks in advance for any feedback and suggestions you may
> have.

Meetings are not really how kernel development works, sorry.

At the moment, these emails really do not seem all that useful, trying
to tell other people what to do does not get you very far when dealing
with people who you have no "authority" over...

Technical solutions to human issues almost never work, however writing a
procmail filter to keep me from having to see these will work quite well :)

good luck!

greg k-h

SyzScope

unread,
Jun 5, 2021, 2:12:51 PM6/5/21
to Greg KH, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
Hi Greg,

> I do not recall that, sorry, when was that?
We sent an email to secu...@kernel.org from xzo...@ucr.edu account on
May 20, the title is "KASAN: use-after-free Read in hci_chan_del has
dangerous security impact".
> Is that really the reason why syzbot-reported problems are not being
> fixed? Just because we don't know which ones are more "important"?
>
> As someone who has been managing many interns for a year or so working
> on these, I do not think that is the problem, but hey, what do I know...

Perhaps we misunderstood the problem of syzbot-generated bugs. Our
understanding is that if a syzbot-generated bug is exploited in the wild
and/or the exploit code is made publicly available somehow, then the bug
will be fixed in a prioritized fashion. If our understanding is correct,
wouldn't it be nice if we, as good guys, can figure out which bugs are
security-critical and patch them before the bad guys exploit them.

Leon Romanovsky

unread,
Jun 6, 2021, 1:06:09 AM6/6/21
to Greg KH, SyzScope, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
My 2 cents, as the one who is fixing these external and internal syzkaller bugs
in RDMA. I would say that the main reason is lack of specific knowledge to fix
them or/and amount of work to actually do it.

Many of such failures are in neglected parts of code.

And no, I personally won't care if someone adds security score or not to
syzkaller report, all reports should be fixed.

Thanks

Greg KH

unread,
Jun 6, 2021, 1:16:05 AM6/6/21
to SyzScope, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
On Sat, Jun 05, 2021 at 11:12:49AM -0700, SyzScope wrote:
> Hi Greg,
>
> > I do not recall that, sorry, when was that?
> We sent an email to secu...@kernel.org from xzo...@ucr.edu account on May
> 20, the title is "KASAN: use-after-free Read in hci_chan_del has dangerous
> security impact".

So you used a different email address and we were supposed to know how
to correlate between the two? How?

> > Is that really the reason why syzbot-reported problems are not being
> > fixed? Just because we don't know which ones are more "important"?
> >
> > As someone who has been managing many interns for a year or so working
> > on these, I do not think that is the problem, but hey, what do I know...
>
> Perhaps we misunderstood the problem of syzbot-generated bugs. Our
> understanding is that if a syzbot-generated bug is exploited in the wild
> and/or the exploit code is made publicly available somehow, then the bug
> will be fixed in a prioritized fashion. If our understanding is correct,
> wouldn't it be nice if we, as good guys, can figure out which bugs are
> security-critical and patch them before the bad guys exploit them.

The "problem" is that no one seems willing to provide the resources to
fix the issues being found as quickly as they are being found. It
usually takes an exponentially longer amount of time for a fix than to
find the problem. Try it yourself and see! Fix these issues that your
tool is somehow categorizing as "more important" and let us know how it
goes.

Or is just fixing found bugs somehow not as much fun as writing new
tools?

good luck!

greg k-h

Leon Romanovsky

unread,
Jun 6, 2021, 1:29:40 AM6/6/21
to Greg KH, SyzScope, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
On Sun, Jun 06, 2021 at 07:16:00AM +0200, Greg KH wrote:
> On Sat, Jun 05, 2021 at 11:12:49AM -0700, SyzScope wrote:
> > Hi Greg,

<...>

> > Perhaps we misunderstood the problem of syzbot-generated bugs. Our
> > understanding is that if a syzbot-generated bug is exploited in the wild
> > and/or the exploit code is made publicly available somehow, then the bug
> > will be fixed in a prioritized fashion. If our understanding is correct,
> > wouldn't it be nice if we, as good guys, can figure out which bugs are
> > security-critical and patch them before the bad guys exploit them.
>
> The "problem" is that no one seems willing to provide the resources to
> fix the issues being found as quickly as they are being found. It
> usually takes an exponentially longer amount of time for a fix than to
> find the problem.

And this is even an easy case, the more complex and common situation
where repro is not available or it doesn't reproduce locally, because
it is race.

Thanks

Hillf Danton

unread,
Jun 6, 2021, 4:50:29 AM6/6/21
to Leon Romanovsky, Greg KH, SyzScope, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, Luiz Augusto von Dentz, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
On 2020-08-02 20:45
To fix the uaf reported, add reference count to hci channel to track users.
Then only channels with zero users will be released.

It is now only for thoughts.

+++ x/include/net/bluetooth/hci_core.h
@@ -704,6 +704,7 @@ struct hci_chan {
struct sk_buff_head data_q;
unsigned int sent;
__u8 state;
+ atomic_t ref;
};

struct hci_conn_params {
+++ x/net/bluetooth/hci_conn.c
@@ -1769,6 +1769,8 @@ void hci_chan_del(struct hci_chan *chan)
struct hci_conn *conn = chan->conn;
struct hci_dev *hdev = conn->hdev;

+ if (atomic_read(&chan->ref))
+ return;
BT_DBG("%s hcon %p chan %p", hdev->name, conn, chan);

list_del_rcu(&chan->list);
+++ x/net/bluetooth/l2cap_core.c
@@ -1903,6 +1903,7 @@ static void l2cap_conn_del(struct hci_co

mutex_unlock(&conn->chan_lock);

+ atomic_dec(&conn->hchan->ref);
hci_chan_del(conn->hchan);

if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)
@@ -7716,6 +7717,8 @@ static struct l2cap_conn *l2cap_conn_add
kref_init(&conn->ref);
hcon->l2cap_data = conn;
conn->hcon = hci_conn_get(hcon);
+ /* dec in l2cap_conn_del() */
+ atomic_inc(&hchan->ref);
conn->hchan = hchan;

BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan);

Greg KH

unread,
Jun 6, 2021, 5:54:27 AM6/6/21
to Hillf Danton, Leon Romanovsky, SyzScope, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, Luiz Augusto von Dentz, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
Please no, never use "raw" atomic variables. Especially for something
like this, use a kref.

thanks,

greg k-h

Hillf Danton

unread,
Jun 7, 2021, 3:48:48 AM6/7/21
to Greg KH, Hillf Danton, Leon Romanovsky, SyzScope, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, Luiz Augusto von Dentz, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
Fair, thanks for taking a look at it.

Spin with care for the race the added ref fails to cut.
To ease review the full syzreport is also attached.

===
To fix uaf, add user track to hci channel and we will only release channel if
its user hits zero. And a dryrun mechanism is also added to take care of the
race user track fails to cut.

CPU0 CPU1
---- ----
hci_chan_del l2cap_conn_del
chan->user = 0;

if (chan->user != 0)
return;
synchronize_rcu();
kfree(chan);

hci_chan_del();

It is now only for thoughts.

+++ x/include/net/bluetooth/hci_core.h
@@ -704,6 +704,10 @@ struct hci_chan {
struct sk_buff_head data_q;
unsigned int sent;
__u8 state;
+ __u8 user;
+ __u8 release;
+
+#define HCHAN_RELEASE_DRYRUN 1
};

struct hci_conn_params {
+++ x/net/bluetooth/l2cap_core.c
@@ -1903,6 +1903,12 @@ static void l2cap_conn_del(struct hci_co

mutex_unlock(&conn->chan_lock);

+ /* see comment in hci_chan_del() */
+ conn->hchan->release = HCHAN_RELEASE_DRYRUN;
+ smp_wmb();
+ conn->hchan->user--;
+ hci_chan_del(conn->hchan);
+ conn->hchan->release = 0;
hci_chan_del(conn->hchan);

if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)
@@ -7716,6 +7722,8 @@ static struct l2cap_conn *l2cap_conn_add
kref_init(&conn->ref);
hcon->l2cap_data = conn;
conn->hcon = hci_conn_get(hcon);
+ /* dec in l2cap_conn_del() */
+ hchan->user++;
conn->hchan = hchan;

BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan);
+++ x/net/bluetooth/hci_conn.c
@@ -1769,12 +1769,26 @@ void hci_chan_del(struct hci_chan *chan)
struct hci_conn *conn = chan->conn;
struct hci_dev *hdev = conn->hdev;

+ if (chan->user != 0)
+ return;
+
BT_DBG("%s hcon %p chan %p", hdev->name, conn, chan);

- list_del_rcu(&chan->list);
+ if (!list_empty(&chan->list))
+ list_del_rcu(&chan->list);

synchronize_rcu();

+ if (chan->release == HCHAN_RELEASE_DRYRUN) {
+ /*
+ * after list_del chan is only visible to the owner of dryrun,
+ * which is needed to take care of the race chan->user fails to
+ * cut, and return to owner.
+ */
+ INIT_LIST_HEAD(&chan->list);
+ return;
+ }
+
/* Prevent new hci_chan's to be created for this hci_conn */
set_bit(HCI_CONN_DROP, &conn->flags);

--

Greg KH

unread,
Jun 7, 2021, 3:55:36 AM6/7/21
to Hillf Danton, Leon Romanovsky, SyzScope, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, Luiz Augusto von Dentz, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
On Mon, Jun 07, 2021 at 03:48:28PM +0800, Hillf Danton wrote:
> On Sun, 6 Jun 2021 11:54:22 +0200 Greg KH wrote:
> >On Sun, Jun 06, 2021 at 04:50:04PM +0800, Hillf Danton wrote:
> >>
> >> To fix the uaf reported, add reference count to hci channel to track users.
> >> Then only channels with zero users will be released.
> >>
> >> It is now only for thoughts.
> >>
> >> +++ x/include/net/bluetooth/hci_core.h
> >> @@ -704,6 +704,7 @@ struct hci_chan {
> >> struct sk_buff_head data_q;
> >> unsigned int sent;
> >> __u8 state;
> >> + atomic_t ref;
> >
> >Please no, never use "raw" atomic variables. Especially for something
> >like this, use a kref.
>
> Fair, thanks for taking a look at it.
>
> Spin with care for the race the added ref fails to cut.

I do not understand what you mean here.

> To ease review the full syzreport is also attached.
>
> To fix uaf, add user track to hci channel and we will only release channel if
> its user hits zero. And a dryrun mechanism is also added to take care of the
> race user track fails to cut.
>
> CPU0 CPU1
> ---- ----
> hci_chan_del l2cap_conn_del
> chan->user = 0;
>
> if (chan->user != 0)
> return;
> synchronize_rcu();
> kfree(chan);
>
> hci_chan_del();
>
> It is now only for thoughts.
>
> +++ x/include/net/bluetooth/hci_core.h
> @@ -704,6 +704,10 @@ struct hci_chan {
> struct sk_buff_head data_q;
> unsigned int sent;
> __u8 state;
> + __u8 user;

No.

> + __u8 release;

No please no.

> +
> +#define HCHAN_RELEASE_DRYRUN 1
> };
>
> struct hci_conn_params {
> +++ x/net/bluetooth/l2cap_core.c
> @@ -1903,6 +1903,12 @@ static void l2cap_conn_del(struct hci_co
>
> mutex_unlock(&conn->chan_lock);
>
> + /* see comment in hci_chan_del() */
> + conn->hchan->release = HCHAN_RELEASE_DRYRUN;
> + smp_wmb();
> + conn->hchan->user--;

And the reason you are open-coding a kref is why???

Please again no.

> + hci_chan_del(conn->hchan);
> + conn->hchan->release = 0;
> hci_chan_del(conn->hchan);
>
> if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)
> @@ -7716,6 +7722,8 @@ static struct l2cap_conn *l2cap_conn_add
> kref_init(&conn->ref);
> hcon->l2cap_data = conn;
> conn->hcon = hci_conn_get(hcon);
> + /* dec in l2cap_conn_del() */
> + hchan->user++;

{sigh}

No, there is a reason we wrote kref many _decades_ ago. Please use it,
your original attempt with an atomic was just fine, just use the proper
data structures the kernel provides you as this is obviously a reference
counted object.

thanks,

greg k-h

Hillf Danton

unread,
Jun 7, 2021, 6:02:23 AM6/7/21
to Greg KH, Hillf Danton, Leon Romanovsky, SyzScope, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, Luiz Augusto von Dentz, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
I see your concern. I thought this is a simpler user track than kref and
open coded a couple of lines. I see it is incorrect. Sorry for that.

After taking another look at the added user track, I realised that it serves
no more than a one-off state word that prevents channel from being released.
Then the race behind the uaf can be fixed by adding a state on top of the
dryrun introduced even without tracking users.

The state machine works as the following,
1) it is initialised to be backoff that means channel cannot be released
at the moment.
2) it is changed to be dryrun on releasing to cut the race that survived
backoff.
3) it is finally set to zero for release after cutting the chance for race.


+++ x/include/net/bluetooth/hci_core.h
@@ -704,6 +704,10 @@ struct hci_chan {
struct sk_buff_head data_q;
unsigned int sent;
__u8 state;
+ __u8 release;
+
+#define HCHAN_RELEASE_DRYRUN 1
+#define HCHAN_RELEASE_BACKOFF 2
};

struct hci_conn_params {
+++ x/net/bluetooth/l2cap_core.c
@@ -1903,6 +1903,10 @@ static void l2cap_conn_del(struct hci_co

mutex_unlock(&conn->chan_lock);

+ /* see comment in hci_chan_del() */
+ conn->hchan->release = HCHAN_RELEASE_DRYRUN;
+ hci_chan_del(conn->hchan);
+ conn->hchan->release = 0;
hci_chan_del(conn->hchan);

if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)
@@ -7716,6 +7720,8 @@ static struct l2cap_conn *l2cap_conn_add
kref_init(&conn->ref);
hcon->l2cap_data = conn;
conn->hcon = hci_conn_get(hcon);
+ /* release is changed in l2cap_conn_del() */
+ hchan->release = HCHAN_RELEASE_BACKOFF;
conn->hchan = hchan;

BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan);
+++ x/net/bluetooth/hci_conn.c
@@ -1769,12 +1769,26 @@ void hci_chan_del(struct hci_chan *chan)
struct hci_conn *conn = chan->conn;
struct hci_dev *hdev = conn->hdev;

+ if (chan->release == HCHAN_RELEASE_BACKOFF)
+ return;
+
BT_DBG("%s hcon %p chan %p", hdev->name, conn, chan);

- list_del_rcu(&chan->list);
+ if (!list_empty(&chan->list))
+ list_del_rcu(&chan->list);

synchronize_rcu();

+ if (chan->release == HCHAN_RELEASE_DRYRUN) {
+ /*
+ * after list_del chan is only visible to the owner of dryrun,
+ * which is needed to cut the race that survived backoff,
+ * and simply return to owner.

Jason A. Donenfeld

unread,
Jun 7, 2021, 6:21:18 AM6/7/21
to SyzScope, syzbot, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com, kernel-h...@lists.openwall.com
Hi SyzScope,

On Fri, May 28, 2021 at 02:12:01PM -0700, SyzScope wrote:

> The bug was reported by syzbot first in Aug 2020. Since it remains
> unpatched to this date, we have conducted some analysis to determine its
> security impact and root causes, which hopefully can help with the
> patching decisions.
> Specifically, we find that even though it is labeled as "UAF read" by
> syzbot, it can in fact lead to double free and control flow hijacking as
> well. Here is our analysis below (on this kernel version:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/log/?id=af5043c89a8ef6b6949a245fff355a552eaed240)
>
> ----------------------------- Root cause analysis:
> --------------------------
> The use-after-free bug happened because the object has two different
> references. But when it was freed, only one reference was removed,
> allowing the other reference to be used incorrectly.
> [...]

Thank you very much for your detailed analysis. I think this is very
valuable work, and I appreciate you doing it. I wanted to jump in to
this thread here so as not to discourage you, following Greg's hasty
dismissal. The bad arguments made I've seen have been something like:

- Who cares about the impact? Bugs are bugs and these should be fixed
regardless. Severity ratings are a waste of time.
- Spend your time writing patches, not writing tools to discover
security issues.
- This doesn't help my interns.
- "research project" scare quotes.

I think this entire set of argumentation is entirely bogus, and I really
hope it doesn't dissuade you from continuing to conduct useful research
on the kernel.

Specifically, it sounds like your tool is scanning through syzbot
reports, loading them into a symbolic execution engine, and seeing what
other primitives you can finesse out of the bugs, all in an automated
way. So, in the end, a developer gets a report that, rather than just
saying "4 byte out of bounds read into all zeroed memory so not a big
deal anyway even if it should be fixed," the developer gets a report
that says, "4 byte out of bounds read, or a UaF if approached in this
other way." Knowing that seems like very useful information, not just
for prioritization, but also for the urgency at which patches might be
deployed. For example, that's a meaningful distinction were that kind of
bug found in core networking stack or in wifi or ethernet drivers. I
also think it's great that you're pushing forward the field of automated
vulnerability discovery and exploit writing. Over time, hopefully that
leads to crushing all sorts of classes of bugs. It's also impressive
that you're able to do so much with kernel code in a symbolic execution
environment; this sounds a few steps beyond Angr ;-)...

My one suggestion would be that your email alerts / follow-ups to syzbot
reports, if automated, contain a bit more "dumbed-down" information
about what's happening. Not all kernel developers speak security, and as
you've seen, in some places it might be an uphill battle to have your
contributions taken seriously. On the other hand, it sounds like you
might already be working with Dmitry to integrate this into the
syzkaller infrastructure itself, somehow? If so, that'd be great.

Regards,
Jason

Dmitry Vyukov

unread,
Jun 7, 2021, 6:28:26 AM6/7/21
to Jason A. Donenfeld, SyzScope, syzbot, David Miller, Johan Hedberg, Jakub Kicinski, linux-bluetooth, LKML, Marcel Holtmann, netdev, syzkaller-bugs, Kernel Hardening
We discussed this with authors, but no integration work is happening
right now yet.
Yes, it would be useful for syzbot to do this assessment automatically
for all bugs and, say, tag bugs on the dashboard (less noisy then
sending separate emails). If/when syzbot sends, say, monthly
per-subsystem summary, that priority info could also be included
there.

Greg KH

unread,
Jun 7, 2021, 6:31:43 AM6/7/21
to Hillf Danton, Leon Romanovsky, SyzScope, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, Luiz Augusto von Dentz, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
Adding another state on top of this feels rough, does it really solve
the race here? Normally a reference count should be enough to properly
tear things down when needed, rolling back from a "can I try this now"
state still seems racy without the needed lock somewhere.
checkpatch will ding you for this not being the proper format for
networking, it hit me last week as well :)

thanks,

greg k-h

Greg KH

unread,
Jun 7, 2021, 7:20:39 AM6/7/21
to Jason A. Donenfeld, SyzScope, syzbot, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com, kernel-h...@lists.openwall.com
Ok, I'd like to apologize if that was the attitude that came across
here, as I did not mean it that way.

What I saw here was an anonymous email, saying "here is a whole bunch of
information about a random syzbot report that means you should fix this
sooner!" When there's a dump this big of "information", but no patch,
that's almost always a bad sign that the information really isn't all
that good, otherwise the author would have just sent a patch to fix it.

We are drowning in syzbot bugs at the moment, with almost no one helping
to fix them. So much so that the only people I know of working on this
are the interns with with the LF has funded because no other company
seems willing to help out with this task.

That's not the syzbot author's fault, it's the fault of every other
company that relies on Linux at the moment. By not providing time for
their engineers to fix these found bugs, and only add new features, it's
not going to get any better.

So this combined two things I'm really annoyed at, anonymous
contributions combined with "why are you not fixing this" type of
a report. Neither of which were, in the end, actually helpful to us.

I'm not asking for any help for my interns, nor am I telling anyone what
to work on. I am saying please don't annoy the maintainers who are
currently overwhelmed at the moment with additional reports of this type
when they obviously can not handle the ones that we have.

Working with the syzbot people to provide a more indepth analysis of the
problem is wonderful, and will go a long way toward helping being able
to do semi-automatic fixing of problems like this, which would be
wonderful. But how were we supposed to know this anonymous gmail
account, with a half-completed google pages web site was not just a
troll trying to waste our time?

What proof did we have that this really was a correct report if a real
person didn't even provide their name to it?

thanks,

greg k-h

SyzScope

unread,
Jun 7, 2021, 2:26:30 PM6/7/21
to Greg KH, Jason A. Donenfeld, syzbot, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com, kernel-h...@lists.openwall.com
Hi all,
We are really thankful for all the suggestions and concerns. We are
definitely interested in continuing this line of research.

Just to clarify:  SyzScope is an ongoing research project that is
currently under submission, which has an anonymity requirement.
Therefore we chose to use a gmail address initially in the public
channel. Since Greg asked, we did reveal our university affiliation and
email address, as well as cross-referenced a private email (again using
university address) to secu...@kernel.org. We are sorry for the chaos
of using several different email addresses. In the future, we will try
to use our university address directly (we checked with other
researchers and it seems to be okay).

syzbot

unread,
Jun 7, 2021, 6:25:08 PM6/7/21
to Ja...@zx2c4.com, anmol.k...@gmail.com, coib...@gmail.com, core...@netfilter.org, da...@davemloft.net, de...@driverdev.osuosl.org, dsa...@kernel.org, dvy...@google.com, er...@anholt.net, f...@strlen.de, gr...@kroah.com, gre...@linuxfoundation.org, johan....@gmail.com, ka...@trash.net, kad...@blackhole.kfki.hu, kad...@netfilter.org, kernel-h...@lists.openwall.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-kern...@lists.linuxfoundation.org, linux-...@vger.kernel.org, mar...@holtmann.org, net...@vger.kernel.org, netfilt...@vger.kernel.org, pa...@netfilter.org, ph...@philpotter.co.uk, syzkall...@googlegroups.com, syzs...@gmail.com, yosh...@linux-ipv6.org
syzbot suspects this issue was fixed by commit:

commit 43016d02cf6e46edfc4696452251d34bba0c0435
Author: Florian Westphal <f...@strlen.de>
Date: Mon May 3 11:51:15 2021 +0000

netfilter: arptables: use pernet ops struct during unregister

bisection log: https://syzkaller.appspot.com/x/bisect.txt?x=1325d967d00000
start commit: af5043c8 Merge tag 'acpi-5.10-rc4' of git://git.kernel.org..
git tree: upstream
kernel config: https://syzkaller.appspot.com/x/.config?x=f9aa2432c01bcb1f
dashboard link: https://syzkaller.appspot.com/bug?extid=305a91e025a73e4fd6ce
syz repro: https://syzkaller.appspot.com/x/repro.syz?x=130152a1500000
C reproducer: https://syzkaller.appspot.com/x/repro.c?x=102b1bba500000

If the result looks correct, please mark the issue as fixed by replying with:

#syz fix: netfilter: arptables: use pernet ops struct during unregister

For information about bisection process see: https://goo.gl/tpsmEJ#bisection

Hillf Danton

unread,
Jun 8, 2021, 4:18:20 AM6/8/21
to Greg KH, Hillf Danton, Leon Romanovsky, SyzScope, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, Luiz Augusto von Dentz, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
On Mon, 7 Jun 2021 12:31:39 +0200 Greg KH wrote:
>On Mon, Jun 07, 2021 at 06:02:01PM +0800, Hillf Danton wrote:
>> After taking another look at the added user track, I realised that it serves
>> no more than a one-off state word that prevents channel from being released.
>> Then the race behind the uaf can be fixed by adding a state on top of the
>> dryrun introduced even without tracking users.
>>
>> The state machine works as the following,
>> 1) it is initialised to be backoff that means channel cannot be released
>> at the moment.
>> 2) it is changed to be dryrun on releasing to cut the race that survived
>> backoff.
>> 3) it is finally set to zero for release after cutting the chance for race.
>
>Adding another state on top of this feels rough, does it really solve
>the race here?

No, frankly, given the list_del_rcu() in hci_chan_del().

>Normally a reference count should be enough to properly
>tear things down when needed, rolling back from a "can I try this now"
>state still seems racy without the needed lock somewhere.

The rollback is added only for making sure that the channel released in
l2cap_conn_del() would not be freed in the other pathes. That exclusiveness
adds more barriers than thought to fixing the rare race with kref and spinlock
in the usual and simple manner.

If OTOH channel is created with the exclusiveness taken into account by adding
the exclusive create and delete methods for l2cap, then the race can be fixed
by checking the exclusive mark in addition to aquiring the hdev lock at release
time.

+++ x/include/net/bluetooth/hci_core.h
@@ -704,6 +704,7 @@ struct hci_chan {
struct sk_buff_head data_q;
unsigned int sent;
__u8 state;
+ __u8 exclusive;
};

struct hci_conn_params {
@@ -1082,6 +1083,8 @@ void hci_conn_check_pending(struct hci_d

struct hci_chan *hci_chan_create(struct hci_conn *conn);
void hci_chan_del(struct hci_chan *chan);
+struct hci_chan *hci_chan_create_exclusive(struct hci_conn *conn);
+void hci_chan_del_exclusive(struct hci_chan *chan);
void hci_chan_list_flush(struct hci_conn *conn);
struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle);

+++ x/net/bluetooth/l2cap_core.c
@@ -1903,7 +1903,7 @@ static void l2cap_conn_del(struct hci_co

mutex_unlock(&conn->chan_lock);

- hci_chan_del(conn->hchan);
+ hci_chan_del_exclusive(conn->hchan);

if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)
cancel_delayed_work_sync(&conn->info_timer);
@@ -7703,13 +7703,13 @@ static struct l2cap_conn *l2cap_conn_add
if (conn)
return conn;

- hchan = hci_chan_create(hcon);
+ hchan = hci_chan_create_exclusive(hcon);
if (!hchan)
return NULL;

conn = kzalloc(sizeof(*conn), GFP_KERNEL);
if (!conn) {
- hci_chan_del(hchan);
+ hci_chan_del_exclusive(hchan);
return NULL;
}

+++ x/net/bluetooth/hci_conn.c
@@ -1739,7 +1739,8 @@ int hci_get_auth_info(struct hci_dev *hd
return copy_to_user(arg, &req, sizeof(req)) ? -EFAULT : 0;
}

-struct hci_chan *hci_chan_create(struct hci_conn *conn)
+static struct hci_chan *__hci_chan_create(struct hci_conn *conn,
+ int exclusive)
{
struct hci_dev *hdev = conn->hdev;
struct hci_chan *chan;
@@ -1758,13 +1759,25 @@ struct hci_chan *hci_chan_create(struct
chan->conn = hci_conn_get(conn);
skb_queue_head_init(&chan->data_q);
chan->state = BT_CONNECTED;
+ if (exclusive)
+ chan->exclusive = 1;

list_add_rcu(&chan->list, &conn->chan_list);

return chan;
}

-void hci_chan_del(struct hci_chan *chan)
+struct hci_chan *hci_chan_create(struct hci_conn *conn)
+{
+ return __hci_chan_create(conn, 0);
+}
+
+struct hci_chan *hci_chan_create_exclusive(struct hci_conn *conn)
+{
+ return __hci_chan_create(conn, 1);
+}
+
+static void __hci_chan_del(struct hci_chan *chan)
{
struct hci_conn *conn = chan->conn;
struct hci_dev *hdev = conn->hdev;
@@ -1784,6 +1797,23 @@ void hci_chan_del(struct hci_chan *chan)
kfree(chan);
}

+void hci_chan_del(struct hci_chan *chan)
+{
+ if (!chan->exclusive)
+ __hci_chan_del(chan);
+}
+
+void hci_chan_del_exclusive(struct hci_chan *chan)
+{
+ if (chan->exclusive) {
+ struct hci_dev *hdev = chan->conn->hdev;
+
+ hci_dev_lock(hdev);
+ __hci_chan_del(chan);
+ hci_dev_unlock(hdev);
+ }
+}
+
void hci_chan_list_flush(struct hci_conn *conn)
{
struct hci_chan *chan, *n;

Greg KH

unread,
Jun 8, 2021, 4:40:16 AM6/8/21
to Hillf Danton, Leon Romanovsky, SyzScope, da...@davemloft.net, johan....@gmail.com, ku...@kernel.org, linux-b...@vger.kernel.org, linux-...@vger.kernel.org, Luiz Augusto von Dentz, mar...@holtmann.org, net...@vger.kernel.org, syzkall...@googlegroups.com
One would think that the state machine for the channel would fix this
whole mess, why do we need an "additional" state here in the first
place?

Would be nice if one of the bluetooth maintainers weighed in on this...

thanks,

greg k-h

Greg KH

unread,
Jun 8, 2021, 4:46:08 AM6/8/21