[PATCH] perf: Fix data race in perf_event_set_bpf_handler()

11 views
Skip to first unread message

Henry Zhang

unread,
Jan 27, 2026, 2:30:37 AMJan 27
to pet...@infradead.org, mi...@redhat.com, ac...@kernel.org, linux-pe...@vger.kernel.org, linux-...@vger.kernel.org, syzkall...@googlegroups.com, Henry Zhang, syzbot+2a077c...@syzkaller.appspotmail.com
KCSAN reported a data race where perf_event_set_bpf_handler() writes
event->prog while __perf_event_overflow() reads it concurrently from
interrupt context:

BUG: KCSAN: data-race in __perf_event_overflow / __perf_event_set_bpf_prog

write to 0xffff88811b219168 of 8 bytes by task 13065 on cpu 0:
perf_event_set_bpf_handler kernel/events/core.c:10352 [inline]
__perf_event_set_bpf_prog+0x418/0x470 kernel/events/core.c:11303
...

read to 0xffff88811b219168 of 8 bytes by interrupt on cpu 1:
__perf_event_overflow+0x252/0x920 kernel/events/core.c:10410
...

Annotate event->prog access with WRITE_ONCE/READ_ONCE.

Reported-by: syzbot+2a077c...@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=2a077cb788749964cf68
Signed-off-by: Henry Zhang <ze...@umich.edu>
---
kernel/events/core.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index a0fa488bce84..1f3ed9e87507 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10349,7 +10349,7 @@ static inline int perf_event_set_bpf_handler(struct perf_event *event,
return -EPROTO;
}

- event->prog = prog;
+ WRITE_ONCE(event->prog, prog);
event->bpf_cookie = bpf_cookie;
return 0;
}
@@ -10407,7 +10407,9 @@ static int __perf_event_overflow(struct perf_event *event,
if (event->attr.aux_pause)
perf_event_aux_pause(event->aux_event, true);

- if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
+ struct bpf_prog *prog = READ_ONCE(event->prog);
+
+ if (prog && prog->type == BPF_PROG_TYPE_PERF_EVENT &&
!bpf_overflow_handler(event, data, regs))
goto out;

--
2.34.1

Qing Wang

unread,
Jan 27, 2026, 3:37:28 AMJan 27
to henryzh...@gmail.com, ac...@kernel.org, linux-...@vger.kernel.org, linux-pe...@vger.kernel.org, mi...@redhat.com, pet...@infradead.org, syzbot+2a077c...@syzkaller.appspotmail.com, syzkall...@googlegroups.com, ze...@umich.edu
On Tue, 27 Jan 2026 at 10:36, Henry Zhang <henryzh...@gmail.com> wrote:
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index a0fa488bce84..1f3ed9e87507 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -10349,7 +10349,7 @@ static inline int perf_event_set_bpf_handler(struct perf_event *event,
> return -EPROTO;
> }
>
> - event->prog = prog;
> + WRITE_ONCE(event->prog, prog);
> event->bpf_cookie = bpf_cookie;
> return 0;
> }
> @@ -10407,7 +10407,9 @@ static int __perf_event_overflow(struct perf_event *event,
> if (event->attr.aux_pause)
> perf_event_aux_pause(event->aux_event, true);
>
> - if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
> + struct bpf_prog *prog = READ_ONCE(event->prog);
> +
> + if (prog && prog->type == BPF_PROG_TYPE_PERF_EVENT &&
> !bpf_overflow_handler(event, data, regs))
> goto out;

Looking at this code, I guess there may be an serious issue: a potential
use-after-free (UAF) risk when accessing event->prog in __perf_event_overflow.

CPU 0 (interrupt context) CPU 1 (process context)
read event->prog
perf_event_free_bpf_handler()
put(prog)
free(prog)
access memory pointed to by prog

This scenario need to be more analysis.

--
Qing

Qing Wang

unread,
Jan 27, 2026, 5:36:21 AMJan 27
to henryzh...@gmail.com, ac...@kernel.org, linux-...@vger.kernel.org, linux-pe...@vger.kernel.org, mi...@redhat.com, pet...@infradead.org, syzbot+2a077c...@syzkaller.appspotmail.com, syzkall...@googlegroups.com, ze...@umich.edu
On Tue, 27 Jan 2026 at 16:37, Qing Wang <wangqi...@gmail.com> wrote:
> On Tue, 27 Jan 2026 at 10:36, Henry Zhang <henryzh...@gmail.com> wrote:
> > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > index a0fa488bce84..1f3ed9e87507 100644
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -10349,7 +10349,7 @@ static inline int perf_event_set_bpf_handler(struct perf_event *event,
> > return -EPROTO;
> > }
> >
> > - event->prog = prog;
> > + WRITE_ONCE(event->prog, prog);
> > event->bpf_cookie = bpf_cookie;
> > return 0;
> > }
> > @@ -10407,7 +10407,9 @@ static int __perf_event_overflow(struct perf_event *event,
> > if (event->attr.aux_pause)
> > perf_event_aux_pause(event->aux_event, true);
> >
> > - if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
> > + struct bpf_prog *prog = READ_ONCE(event->prog);
> > +
> > + if (prog && prog->type == BPF_PROG_TYPE_PERF_EVENT &&
> > !bpf_overflow_handler(event, data, regs))
> > goto out;
>
> Looking at this code, I guess there may be an serious issue: a potential
> use-after-free (UAF) risk when accessing event->prog in __perf_event_overflow.
>
> CPU 0 (interrupt context) CPU 1 (process context)
> read event->prog
> perf_event_free_bpf_handler()
> put(prog)
> free(prog)
> access memory pointed to by prog
>
> This scenario need to be more analysis.
>
> --
> Qing

This is my idea for solving the problem of data competition and potential UAF.

diff --git a/kernel/events/core.c b/kernel/events/core.c
index a0fa488bce84..3abf3689157d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10291,7 +10291,12 @@ static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *r
}

#ifdef CONFIG_BPF_SYSCALL
+/*
+ * Execute the attached BPF program. Caller must ensure prog is non-NULL
+ * and of type BPF_PROG_TYPE_PERF_EVENT under RCU protection.
+ */
static int bpf_overflow_handler(struct perf_event *event,
+ struct bpf_prog *prog,
struct perf_sample_data *data,
struct pt_regs *regs)
{
@@ -10299,22 +10304,17 @@ static int bpf_overflow_handler(struct perf_event *event,
.data = data,
.event = event,
};
- struct bpf_prog *prog;
int ret = 0;

ctx.regs = perf_arch_bpf_user_pt_regs(regs);
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
goto out;
- rcu_read_lock();
- prog = READ_ONCE(event->prog);
- if (prog) {
- perf_prepare_sample(data, event, regs);
- ret = bpf_prog_run(prog, &ctx);
- }
- rcu_read_unlock();
+
+ perf_prepare_sample(data, event, regs);
+ ret = bpf_prog_run(prog, &ctx);
+
out:
__this_cpu_dec(bpf_prog_active);
-
return ret;
}

@@ -10349,7 +10349,7 @@ static inline int perf_event_set_bpf_handler(struct perf_event *event,
return -EPROTO;
}

- event->prog = prog;
+ WRITE_ONCE(event->prog, prog);
event->bpf_cookie = bpf_cookie;
return 0;
}
@@ -10361,13 +10361,14 @@ static inline void perf_event_free_bpf_handler(struct perf_event *event)
if (!prog)
return;

- event->prog = NULL;
+ WRITE_ONCE(event->prog, NULL);
bpf_prog_put(prog);
}
#else
static inline int bpf_overflow_handler(struct perf_event *event,
- struct perf_sample_data *data,
- struct pt_regs *regs)
+ struct bpf_prog *prog,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
{
return 1;
}
@@ -10407,9 +10408,19 @@ static int __perf_event_overflow(struct perf_event *event,
if (event->attr.aux_pause)
perf_event_aux_pause(event->aux_event, true);

- if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
- !bpf_overflow_handler(event, data, regs))
+ /*
+ * For BPF-based overflow handling. If a BPF_PROG_TYPE_PERF_EVENT
+ * program is attached, execute it and skip default overflow handling.
+ */
+ rcu_read_lock();
+ struct bpf_prog *prog = rcu_dereference(event->prog);
+
+ if (prog && prog->type == BPF_PROG_TYPE_PERF_EVENT &&
+ !bpf_overflow_handler(event, prog, data, regs)) {
+ rcu_read_unlock();
goto out;
+ }
+ rcu_read_unlock();

/*
* XXX event_limit might not quite work as expected on inherited

What do you think about this solution? Looking forward to your review.
--
Qing

Qing Wang

unread,
Jan 27, 2026, 9:48:48 PMJan 27
to ze...@umich.edu, ac...@kernel.org, henryzh...@gmail.com, linux-...@vger.kernel.org, linux-pe...@vger.kernel.org, mi...@redhat.com, pet...@infradead.org, syzbot+2a077c...@syzkaller.appspotmail.com, syzkall...@googlegroups.com, wangqi...@gmail.com
On Wed, 28 Jan 2026 at 05:29, Henry Zhang <ze...@umich.edu> wrote:
> Thanks, this looks good.

Thanks for your review. Let's wait for maintainer's reply.

--
Qing

Qing Wang

unread,
Jan 29, 2026, 10:34:28 PMJan 29
to ze...@umich.edu, ac...@kernel.org, henryzh...@gmail.com, linux-...@vger.kernel.org, linux-pe...@vger.kernel.org, mi...@redhat.com, pet...@infradead.org, syzbot+2a077c...@syzkaller.appspotmail.com, syzkall...@googlegroups.com, wangqi...@gmail.com
On Wed, 28 Jan 2026 at 05:29, Henry Zhang <ze...@umich.edu> wrote:
> Thanks, this looks good.

I thought my patch is incorrect after review.

1. The RCU protects 'prog->aux' but not 'prog', so 'rcu_dereference(event->prog)'
is incorrect.
2. The UAF issue of 'prog' may not exist. It's needed further anlysis.

Let's forget my patch.

The data race is correctly fixed by your patch but there is still a little
suggestion:

The 'event->prog = NULL' in perf_event_free_bpf_handler is needed WRITE_ONCE.

--
Best regards,
Qing

Henry Zhang

unread,
Jan 30, 2026, 4:14:52 AMJan 30
to Qing Wang, henryzh...@gmail.com, ac...@kernel.org, linux-...@vger.kernel.org, linux-pe...@vger.kernel.org, mi...@redhat.com, pet...@infradead.org, syzbot+2a077c...@syzkaller.appspotmail.com, syzkall...@googlegroups.com
Thanks, this looks good.

--
Henry

Peter Zijlstra

unread,
Jan 30, 2026, 5:07:38 AMJan 30
to Qing Wang, henryzh...@gmail.com, ac...@kernel.org, linux-...@vger.kernel.org, linux-pe...@vger.kernel.org, mi...@redhat.com, syzbot+2a077c...@syzkaller.appspotmail.com, syzkall...@googlegroups.com, ze...@umich.edu
This can only happen if the event can overlap with removal, which it
typically cannot -- but I'll have to audit the software events.

Specifically, events happen in IRQ/NMI context, and event removal
involves an IPI to that very CPU, which by necessity will then have to
wait for event completion.

Peter Zijlstra

unread,
Jan 30, 2026, 5:23:07 AMJan 30
to Henry Zhang, mi...@redhat.com, ac...@kernel.org, linux-pe...@vger.kernel.org, linux-...@vger.kernel.org, syzkall...@googlegroups.com, Henry Zhang, syzbot+2a077c...@syzkaller.appspotmail.com, and...@kernel.org
What about that cookie thing? The consumer seems to be a bpf function
(bpf_get_attach_cookie_pe) which can equally run concurrently, no?

Also, there seems to be a coherency issue here, if prog runs, it expects
cookie to be present and all that.

Would that not suggest something like:

WRITE_ONCE(event->bpf_cookie, bpf_cookie);
smp_store_release(&event->prog, prog);

> return 0;
> }
> @@ -10407,7 +10407,9 @@ static int __perf_event_overflow(struct perf_event *event,
> if (event->attr.aux_pause)
> perf_event_aux_pause(event->aux_event, true);
>
> - if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
> + struct bpf_prog *prog = READ_ONCE(event->prog);

smp_load_acquire(&event->prog);

> +
> + if (prog && prog->type == BPF_PROG_TYPE_PERF_EVENT &&
> !bpf_overflow_handler(event, data, regs))
> goto out;
>

Hmm?

Henry Zhang

unread,
Jan 30, 2026, 8:48:42 AMJan 30
to Peter Zijlstra, Henry Zhang, mi...@redhat.com, ac...@kernel.org, linux-pe...@vger.kernel.org, linux-...@vger.kernel.org, syzkall...@googlegroups.com, syzbot+2a077c...@syzkaller.appspotmail.com, and...@kernel.org
Thanks for the feedback. I'll send a v2 later in the day.

--
Henry

Andrii Nakryiko

unread,
Jan 30, 2026, 3:32:44 PMJan 30
to Peter Zijlstra, Henry Zhang, mi...@redhat.com, ac...@kernel.org, linux-pe...@vger.kernel.org, linux-...@vger.kernel.org, syzkall...@googlegroups.com, Henry Zhang, syzbot+2a077c...@syzkaller.appspotmail.com, and...@kernel.org
yeah, once we set event->prog, we can theoretically have that BPF
program triggered before we set event->bpf_cookie.

But this setup thing is one-time and can be expensive...

>
> > return 0;
> > }
> > @@ -10407,7 +10407,9 @@ static int __perf_event_overflow(struct perf_event *event,
> > if (event->attr.aux_pause)
> > perf_event_aux_pause(event->aux_event, true);
> >
> > - if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
> > + struct bpf_prog *prog = READ_ONCE(event->prog);
>
> smp_load_acquire(&event->prog);

while this is very frequent. So shouldn't we try to avoid unnecessary
overhead here? Maybe just use more expensive memory barriers in
perf_event_set_bpf_handler() to ensure that bpf_cookie will always be
set before event->prog can be seen by any CPU?

Peter Zijlstra

unread,
Feb 24, 2026, 7:29:15 AM (20 hours ago) Feb 24
to Qing Wang, henryzh...@gmail.com, ac...@kernel.org, linux-...@vger.kernel.org, linux-pe...@vger.kernel.org, mi...@redhat.com, syzbot+2a077c...@syzkaller.appspotmail.com, syzkall...@googlegroups.com, ze...@umich.edu
---
Subject: perf: Fix __perf_event_overflow() vs perf_remove_from_context() race

Make sure that __perf_event_overflow() runs with IRQs disabled for all
possible callchains. Specifically the software events can end up running
it with only preemption disabled.

This opens up a race vs perf_event_exit_event() and friends that will go
and free various things the overflow path expects to be present, like
the BPF program.

Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
---
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 22a0f405585b..1f5699b339ec 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10777,6 +10777,13 @@ int perf_event_overflow(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
+ /*
+ * Entry point from hardware PMI, interrupts should be disabled here.
+ * This serializes us against perf_event_remove_from_context() in
+ * things like perf_event_release_kernel().
+ */
+ lockdep_assert_irqs_disabled();
+
return __perf_event_overflow(event, 1, data, regs);
}

@@ -10853,6 +10860,19 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
{
struct hw_perf_event *hwc = &event->hw;

+ /*
+ * This is:
+ * - software preempt
+ * - tracepoint preempt
+ * - tp_target_task irq (ctx->lock)
+ * - uprobes preempt/irq
+ * - kprobes preempt/irq
+ * - hw_breakpoint irq
+ *
+ * Any of these are sufficient to hold off RCU and thus ensure @event
+ * exists.
+ */
+ lockdep_assert_preemption_disabled();
local64_add(nr, &event->count);

if (!regs)
@@ -10861,6 +10881,16 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
if (!is_sampling_event(event))
return;

+ /*
+ * Serialize against event_function_call() IPIs like normal overflow
+ * event handling. Specifically, must not allow
+ * perf_event_release_kernel() -> perf_remove_from_context() to make
+ * progress and 'release' the event from under us.
+ */
+ guard(irqsave)();
+ if (event->state != PERF_EVENT_STATE_ACTIVE)
+ return;
+
if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
data->period = nr;
return perf_swevent_overflow(event, 1, data, regs);
@@ -11359,6 +11389,11 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
struct perf_sample_data data;
struct perf_event *event;

+ /*
+ * Per being a tracepoint, this runs with preemption disabled.
+ */
+ lockdep_assert_preemption_disabled();
+
struct perf_raw_record raw = {
.frag = {
.size = entry_size,
@@ -11691,6 +11726,11 @@ void perf_bp_event(struct perf_event *bp, void *data)
struct perf_sample_data sample;
struct pt_regs *regs = data;

+ /*
+ * Exception context, will have interrupts disabled.
+ */
+ lockdep_assert_irqs_disabled();
+
perf_sample_data_init(&sample, bp->attr.bp_addr, 0);

if (!bp->hw.state && !perf_exclude_event(bp, regs))
@@ -12155,7 +12195,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)

if (regs && !perf_exclude_event(event, regs)) {
if (!(event->attr.exclude_idle && is_idle_task(current)))
- if (__perf_event_overflow(event, 1, &data, regs))
+ if (perf_event_overflow(event, &data, regs))
ret = HRTIMER_NORESTART;
}

Reply all
Reply to author
Forward
0 new messages