[PATCH 0/3] KCOV function entry/exit records

0 views
Skip to first unread message

Jann Horn

unread,
Mar 11, 2026, 5:06:51 PM (10 days ago) Mar 11
to Dmitry Vyukov, Andrey Konovalov, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Jann Horn, Ingo Molnar, Peter Zijlstra, Josh Poimboeuf
This series adds a KCOV feature that userspace can use to keep track of
the current call stack. When userspace enables the new mode
KCOV_TRACE_PC_EXT, collected instruction addresses are tagged with one
of three types:

- function entry
- non-entry basic block
- function exit

This requires corresponding LLVM support; an LLVM patch implementing
this feature has been uploaded for review at
<https://github.com/llvm/llvm-project/pull/185972>, but hasn't landed yet.

A simple example of how to use KCOV_TRACE_PC_EXT:
```
user@vm:~/kcov/u$ cat kcov-u.c

typeof(x) __res = (x); \
if (__res == (typeof(x))-1) \
err(1, "SYSCHK(" #x ")"); \
__res; \
})

static void indent(int depth) {
for (int i=0; i<depth; i++)
printf(" ");
}

int main(void) {
int fd = SYSCHK(open("/sys/kernel/debug/kcov", O_RDWR));
SYSCHK(ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE));
unsigned long *cover = (unsigned long*)SYSCHK(
mmap(NULL, COVER_SIZE * sizeof(unsigned long), PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0));
SYSCHK(ioctl(fd, KCOV_ENABLE, KCOV_TRACE_PC_EXT));
usleep(1000); // fault in stuff
__atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED); // start recording
usleep(1000);
unsigned long cover_num = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); // end

int depth = 0;
for (unsigned long i = 0; i < cover_num; i++) {
unsigned long record = cover[1+i];
unsigned long pc = record | ~KCOV_RECORD_IP_MASK;
switch (record & KCOV_RECORDFLAG_TYPEMASK) {
case KCOV_RECORDFLAG_TYPE_NORMAL:
indent(depth);
printf("BB 0x%lx\n", pc);
break;
case KCOV_RECORDFLAG_TYPE_ENTRY:
indent(depth);
printf("ENTER 0x%lx\n", pc);
depth++;
break;
case KCOV_RECORDFLAG_TYPE_EXIT:
if (depth == 0)
errx(1, "exit at depth 0");
depth--;
indent(depth);
printf("EXIT 0x%lx\n", pc);
break;
default: errx(1, "unknown record type in 0x%016lx", record);
}
}
}
user@vm:~/kcov/u$ cat symbolize.py

import sys
syms = []
with open('/proc/kallsyms') as f:
for line in f:
parts = line.strip().split(' ')
if len(parts) < 3:
continue
syms.append((int(parts[0], 16), parts[2]))

for line in sys.stdin:
parts = line.rstrip().split('0x')
if len(parts) != 2:
continue
record_pc = int(parts[1], 16)
for i in range(0, len(syms)-1):
if syms[i+1][0] > record_pc:
print(parts[0] + syms[i][1] + '+' + hex(record_pc - syms[i][0]))
break
user@vm:~/kcov/u$ gcc -o kcov-u kcov-u.c -Wall
user@vm:~/kcov/u$ sudo ./kcov-u | sudo ./symbolize.py
ENTER __audit_syscall_entry+0x2c
BB __audit_syscall_entry+0xa4
BB __audit_syscall_entry+0xd2
BB __audit_syscall_entry+0x1ab
ENTER ktime_get_coarse_real_ts64+0x1a
BB ktime_get_coarse_real_ts64+0x3f
BB ktime_get_coarse_real_ts64+0x96
EXIT ktime_get_coarse_real_ts64+0x9b
EXIT __audit_syscall_entry+0x12b
ENTER __x64_sys_clock_nanosleep+0x18
ENTER __se_sys_clock_nanosleep+0x33
BB __se_sys_clock_nanosleep+0x10e
ENTER get_timespec64+0x29
ENTER _copy_from_user+0x17
BB _copy_from_user+0x5d
EXIT _copy_from_user+0x62
BB get_timespec64+0xaf
EXIT get_timespec64+0xd5
BB __se_sys_clock_nanosleep+0x1c0
ENTER common_nsleep+0x1f
ENTER hrtimer_nanosleep+0x2f
ENTER hrtimer_setup_sleeper_on_stack+0x20
BB hrtimer_setup_sleeper_on_stack+0x2a
BB hrtimer_setup_sleeper_on_stack+0x7e
EXIT hrtimer_setup_sleeper_on_stack+0x14c
ENTER do_nanosleep+0x2d
BB do_nanosleep+0x3b
ENTER hrtimer_start_range_ns+0x28
BB hrtimer_start_range_ns+0x67
ENTER remove_hrtimer+0x22
BB remove_hrtimer+0x4b
EXIT remove_hrtimer+0x1ea
BB hrtimer_start_range_ns+0x173
ENTER __hrtimer_cb_get_time+0x11
BB __hrtimer_cb_get_time+0x32
ENTER ktime_get+0x17
BB ktime_get+0x33
BB ktime_get+0x58
ENTER kvm_clock_get_cycles+0xc
BB kvm_clock_get_cycles+0x48
EXIT kvm_clock_get_cycles+0x4d
BB ktime_get+0xb7
BB ktime_get+0x149
EXIT ktime_get+0x151
EXIT __hrtimer_cb_get_time+0x84
BB hrtimer_start_range_ns+0x3bc
BB hrtimer_start_range_ns+0x5a4
ENTER enqueue_hrtimer+0x20
BB enqueue_hrtimer+0x2a
BB enqueue_hrtimer+0x5b
ENTER timerqueue_add+0x1c
BB timerqueue_add+0x41
BB timerqueue_add+0xb2
BB timerqueue_add+0xb2
BB timerqueue_add+0xf9
EXIT timerqueue_add+0x150
EXIT enqueue_hrtimer+0xaf
BB hrtimer_start_range_ns+0x714
ENTER hrtimer_reprogram+0x1b
BB hrtimer_reprogram+0x65
BB hrtimer_reprogram+0x13a
BB hrtimer_reprogram+0x1dc
ENTER tick_program_event+0x25
BB tick_program_event+0x65
ENTER clockevents_program_event+0x20
BB clockevents_program_event+0x7e
ENTER ktime_get+0x17
BB ktime_get+0x33
BB ktime_get+0x58
ENTER kvm_clock_get_cycles+0xc
BB kvm_clock_get_cycles+0x48
EXIT kvm_clock_get_cycles+0x4d
BB ktime_get+0xb7
BB ktime_get+0x149
EXIT ktime_get+0x151
BB clockevents_program_event+0x219
EXIT clockevents_program_event+0x22b
EXIT tick_program_event+0x89
EXIT hrtimer_reprogram+0x211
EXIT hrtimer_start_range_ns+0x74d
BB do_nanosleep+0x9c
ENTER sched_clock+0xc
BB sched_clock+0x40
EXIT sched_clock+0x45
ENTER arch_scale_cpu_capacity+0x13
BB arch_scale_cpu_capacity+0x1a
EXIT arch_scale_cpu_capacity+0x24
ENTER __cgroup_account_cputime+0x1b
ENTER css_rstat_updated+0x2c
BB css_rstat_updated+0x77
BB css_rstat_updated+0xbe
EXIT css_rstat_updated+0x1bc
BB __cgroup_account_cputime+0x81
EXIT __cgroup_account_cputime+0x86
ENTER sched_clock+0xc
BB sched_clock+0x40
EXIT sched_clock+0x45
ENTER sched_clock+0xc
BB sched_clock+0x40
EXIT sched_clock+0x45
ENTER __msecs_to_jiffies+0x13
BB __msecs_to_jiffies+0x25
EXIT __msecs_to_jiffies+0x4c
ENTER prandom_u32_state+0x15
EXIT prandom_u32_state+0xbe
ENTER hrtimer_try_to_cancel+0x1e
BB hrtimer_try_to_cancel+0x6a
BB hrtimer_try_to_cancel+0x1da
EXIT hrtimer_try_to_cancel+0x1be
BB do_nanosleep+0xbf
BB do_nanosleep+0x166
BB do_nanosleep+0x177
BB do_nanosleep+0x275
EXIT do_nanosleep+0x2d5
BB hrtimer_nanosleep+0x182
EXIT hrtimer_nanosleep+0x194
EXIT common_nsleep+0x77
EXIT __se_sys_clock_nanosleep+0x15d
EXIT __x64_sys_clock_nanosleep+0x62
ENTER __audit_syscall_exit+0x1d
BB __audit_syscall_exit+0x5c
ENTER audit_reset_context+0x1e
BB audit_reset_context+0x52
EXIT audit_reset_context+0x5f6
EXIT __audit_syscall_exit+0x168
ENTER fpregs_assert_state_consistent+0x11
BB fpregs_assert_state_consistent+0x48
BB fpregs_assert_state_consistent+0xa6
EXIT fpregs_assert_state_consistent+0xcc
ENTER switch_fpu_return+0xe
ENTER fpregs_restore_userregs+0x12
BB fpregs_restore_userregs+0x4c
BB fpregs_restore_userregs+0xb8
EXIT fpregs_restore_userregs+0x107
EXIT switch_fpu_return+0x18
```

Signed-off-by: Jann Horn <ja...@google.com>
---
Jann Horn (3):
sched: Ensure matching stack state for kcov disable/enable on switch
kcov: wire up compiler instrumentation for CONFIG_KCOV_EXT_RECORDS
kcov: introduce extended PC coverage collection mode

include/linux/kcov.h | 2 ++
include/linux/sched.h | 6 ++--
include/uapi/linux/kcov.h | 12 ++++++++
kernel/kcov.c | 76 +++++++++++++++++++++++++++++++++++++++--------
kernel/sched/core.c | 13 ++++++--
lib/Kconfig.debug | 14 +++++++++
scripts/Makefile.kcov | 2 ++
tools/objtool/check.c | 2 ++
8 files changed, 110 insertions(+), 17 deletions(-)
---
base-commit: b29fb8829bff243512bb8c8908fd39406f9fd4c3
change-id: 20260311-kcov-extrecord-6e0d9a2b0a8c

--
Jann Horn <ja...@google.com>

Jann Horn

unread,
Mar 11, 2026, 5:06:52 PM (10 days ago) Mar 11
to Dmitry Vyukov, Andrey Konovalov, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Jann Horn, Ingo Molnar, Peter Zijlstra
Ensure that kcov is disabled and enabled with the same call stack.
This will be relied on by subsequent patches for recording function
entry/exit records via kcov.

This patch should not affect compilation of normal kernels without KCOV
(though it changes "inline" to "__always_inline").

To: Ingo Molnar <mi...@redhat.com>
To: Peter Zijlstra <pet...@infradead.org>
Signed-off-by: Jann Horn <ja...@google.com>
---
kernel/sched/core.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b7f77c165a6e..c470f0a669ec 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5072,8 +5072,10 @@ static inline void kmap_local_sched_in(void)
*
* prepare_task_switch sets up locking and calls architecture specific
* hooks.
+ *
+ * Must be inlined for kcov_prepare_switch().
*/
-static inline void
+static __always_inline void
prepare_task_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
__must_hold(__rq_lockp(rq))
@@ -5149,7 +5151,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
tick_nohz_task_switch();
finish_lock_switch(rq);
finish_arch_post_lock_switch();
- kcov_finish_switch(current);
/*
* kmap_local_sched_out() is invoked with rq::lock held and
* interrupts disabled. There is no requirement for that, but the
@@ -5295,7 +5296,13 @@ context_switch(struct rq *rq, struct task_struct *prev,
switch_to(prev, next, prev);
barrier();

- return finish_task_switch(prev);
+ rq = finish_task_switch(prev);
+ /*
+ * This has to happen outside finish_task_switch() to ensure that
+ * entry/exit records are balanced.
+ */
+ kcov_finish_switch(current);
+ return rq;
}

/*

--
2.53.0.473.g4a7958ca14-goog

Jann Horn

unread,
Mar 11, 2026, 5:06:53 PM (10 days ago) Mar 11
to Dmitry Vyukov, Andrey Konovalov, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Jann Horn, Josh Poimboeuf, Peter Zijlstra
This is the first half of CONFIG_KCOV_EXT_RECORDS.

Set the appropriate compiler flags to call separate hooks for function
entry/exit, and provide these hooks, but don't make it visible in the KCOV
UAPI yet.

With -fsanitize-coverage=trace-pc-entry-exit, the compiler behavior changes
as follows:

- The __sanitizer_cov_trace_pc() call on function entry is replaced with a
call to __sanitizer_cov_trace_pc_entry(); so for now,
__sanitizer_cov_trace_pc_entry() must be treated the same way as
__sanitizer_cov_trace_pc().
- On function exit, an extra call to __sanitizer_cov_trace_pc_exit()
happens; since function exit produced no coverage in the old UAPI,
__sanitizer_cov_trace_pc_exit() should do nothing for now.

Cc: Josh Poimboeuf <jpoi...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Signed-off-by: Jann Horn <ja...@google.com>
---
include/linux/kcov.h | 2 ++
kernel/kcov.c | 30 +++++++++++++++++++++++-------
lib/Kconfig.debug | 14 ++++++++++++++
scripts/Makefile.kcov | 2 ++
tools/objtool/check.c | 2 ++
5 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index 0143358874b0..e5502d674029 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -81,6 +81,8 @@ typedef unsigned long long kcov_u64;
#endif

void __sanitizer_cov_trace_pc(void);
+void __sanitizer_cov_trace_pc_entry(void);
+void __sanitizer_cov_trace_pc_exit(void);
void __sanitizer_cov_trace_cmp1(u8 arg1, u8 arg2);
void __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2);
void __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2);
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 0b369e88c7c9..2cc48b65384b 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -202,15 +202,10 @@ static notrace unsigned long canonicalize_ip(unsigned long ip)
return ip;
}

-/*
- * Entry point from instrumented code.
- * This is called once per basic-block/edge.
- */
-void notrace __sanitizer_cov_trace_pc(void)
+static void notrace kcov_add_pc_record(unsigned long record)
{
struct task_struct *t;
unsigned long *area;
- unsigned long ip = canonicalize_ip(_RET_IP_);
unsigned long pos;

t = current;
@@ -230,11 +225,32 @@ void notrace __sanitizer_cov_trace_pc(void)
*/
WRITE_ONCE(area[0], pos);
barrier();
- area[pos] = ip;
+ area[pos] = record;
}
}
+
+/*
+ * Entry point from instrumented code.
+ * This is called once per basic-block/edge.
+ */
+void notrace __sanitizer_cov_trace_pc(void)
+{
+ kcov_add_pc_record(canonicalize_ip(_RET_IP_));
+}
EXPORT_SYMBOL(__sanitizer_cov_trace_pc);

+#ifdef CONFIG_KCOV_EXT_RECORDS
+void notrace __sanitizer_cov_trace_pc_entry(void)
+{
+ unsigned long record = canonicalize_ip(_RET_IP_);
+
+ kcov_add_pc_record(record);
+}
+void notrace __sanitizer_cov_trace_pc_exit(void)
+{
+}
+#endif
+
#ifdef CONFIG_KCOV_ENABLE_COMPARISONS
static void notrace write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip)
{
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 93f356d2b3d9..dddc330ad3ca 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2215,6 +2215,20 @@ config KCOV

For more details, see Documentation/dev-tools/kcov.rst.

+config KCOV_EXT_RECORDS
+ bool "Support extended KCOV records with function entry/exit records"
+ depends on KCOV
+ depends on 64BIT
+ # TODO: check CLANG_VERSION instead once this has landed in an LLVM
+ # release
+ depends on $(cc-option,-fsanitize-coverage=trace-pc-entry-exit)
+ help
+ Extended KCOV records allow distinguishing between multiple types of
+ records: Normal edge coverage, function entry, and function exit.
+
+ This will likely cause a small additional slowdown compared to normal
+ KCOV.
+
config KCOV_ENABLE_COMPARISONS
bool "Enable comparison operands collection by KCOV"
depends on KCOV
diff --git a/scripts/Makefile.kcov b/scripts/Makefile.kcov
index 78305a84ba9d..aa0be904268f 100644
--- a/scripts/Makefile.kcov
+++ b/scripts/Makefile.kcov
@@ -1,10 +1,12 @@
# SPDX-License-Identifier: GPL-2.0-only
kcov-flags-y += -fsanitize-coverage=trace-pc
+kcov-flags-$(CONFIG_KCOV_EXT_RECORDS) += -fsanitize-coverage=trace-pc-entry-exit
kcov-flags-$(CONFIG_KCOV_ENABLE_COMPARISONS) += -fsanitize-coverage=trace-cmp

kcov-rflags-y += -Cpasses=sancov-module
kcov-rflags-y += -Cllvm-args=-sanitizer-coverage-level=3
kcov-rflags-y += -Cllvm-args=-sanitizer-coverage-trace-pc
+kcov-rflags-$(CONFIG_KCOV_EXT_RECORDS) += -Cllvm-args=-sanitizer-coverage-trace-pc-entry-exit
kcov-rflags-$(CONFIG_KCOV_ENABLE_COMPARISONS) += -Cllvm-args=-sanitizer-coverage-trace-compares

export CFLAGS_KCOV := $(kcov-flags-y)
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index a30379e4ff97..ae3127227621 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1251,6 +1251,8 @@ static const char *uaccess_safe_builtin[] = {
"write_comp_data",
"check_kcov_mode",
"__sanitizer_cov_trace_pc",
+ "__sanitizer_cov_trace_pc_entry",
+ "__sanitizer_cov_trace_pc_exit",
"__sanitizer_cov_trace_const_cmp1",
"__sanitizer_cov_trace_const_cmp2",
"__sanitizer_cov_trace_const_cmp4",

--
2.53.0.473.g4a7958ca14-goog

Jann Horn

unread,
Mar 11, 2026, 5:06:55 PM (10 days ago) Mar 11
to Dmitry Vyukov, Andrey Konovalov, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Jann Horn
This is the second half of CONFIG_KCOV_EXT_RECORDS.

Introduce a new KCOV mode KCOV_TRACE_PC_EXT which replaces the upper 8 bits
of recorded instruction pointers with metadata. For now, userspace can use
this metadata to distinguish three types of records:

- function entry
- function exit
- normal basic block inside the function

Signed-off-by: Jann Horn <ja...@google.com>
---
include/linux/sched.h | 6 ++++--
include/uapi/linux/kcov.h | 12 ++++++++++++
kernel/kcov.c | 46 +++++++++++++++++++++++++++++++++++++++++-----
3 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a7b4a980eb2f..9a297d2d2abc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1519,8 +1519,10 @@ struct task_struct {
int kcov_sequence;

/* Collect coverage from softirq context: */
- unsigned int kcov_softirq;
-#endif
+ unsigned int kcov_softirq : 1;
+ /* Emit KCOV records in extended format: */
+ unsigned int kcov_ext_format : 1;
+#endif /* CONFIG_KCOV */

#ifdef CONFIG_MEMCG_V1
struct mem_cgroup *memcg_in_oom;
diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h
index ed95dba9fa37..8d8a233bd61f 100644
--- a/include/uapi/linux/kcov.h
+++ b/include/uapi/linux/kcov.h
@@ -35,8 +35,20 @@ enum {
KCOV_TRACE_PC = 0,
/* Collecting comparison operands mode. */
KCOV_TRACE_CMP = 1,
+ /*
+ * Extended PC coverage collection mode.
+ * In this mode, the top byte of the PC is replaced with flag bits
+ * (KCOV_RECORDFLAG_*).
+ */
+ KCOV_TRACE_PC_EXT = 2,
};

+#define KCOV_RECORD_IP_MASK 0x00ffffffffffffff
+#define KCOV_RECORDFLAG_TYPEMASK 0xf000000000000000
+#define KCOV_RECORDFLAG_TYPE_NORMAL 0xf000000000000000
+#define KCOV_RECORDFLAG_TYPE_ENTRY 0x0000000000000000
+#define KCOV_RECORDFLAG_TYPE_EXIT 0x1000000000000000
+
/*
* The format for the types of collected comparisons.
*
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 2cc48b65384b..3482044a7bd5 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -71,6 +71,8 @@ struct kcov {
* kcov_remote_stop(), see the comment there.
*/
int sequence;
+ /* Whether emitted records should have type bits. */
+ unsigned int kcov_ext_format : 1 __guarded_by(&lock);
};

struct kcov_remote_area {
@@ -97,6 +99,7 @@ struct kcov_percpu_data {
void *saved_area;
struct kcov *saved_kcov;
int saved_sequence;
+ unsigned int saved_kcov_ext_format : 1;
};

static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data) = {
@@ -235,6 +238,12 @@ static void notrace kcov_add_pc_record(unsigned long record)
*/
void notrace __sanitizer_cov_trace_pc(void)
{
+ /*
+ * No bitops are needed here for setting the record type because
+ * KCOV_RECORDFLAG_TYPE_NORMAL has the high bits set.
+ * This relies on userspace not caring about the rest of the top byte
+ * for KCOV_RECORDFLAG_TYPE_NORMAL records.
+ */
kcov_add_pc_record(canonicalize_ip(_RET_IP_));
}
EXPORT_SYMBOL(__sanitizer_cov_trace_pc);
@@ -244,10 +253,26 @@ void notrace __sanitizer_cov_trace_pc_entry(void)
{
unsigned long record = canonicalize_ip(_RET_IP_);

+ /*
+ * This hook replaces __sanitizer_cov_trace_pc() for the function entry
+ * basic block; it should still emit a record even in classic kcov mode.
+ */
+ if (current->kcov_ext_format)
+ record = (record & KCOV_RECORD_IP_MASK) | KCOV_RECORDFLAG_TYPE_ENTRY;
kcov_add_pc_record(record);
}
void notrace __sanitizer_cov_trace_pc_exit(void)
{
+ unsigned long record;
+
+ /*
+ * Unlike __sanitizer_cov_trace_pc_entry(), this PC should only be
+ * reported in extended mode.
+ */
+ if (!current->kcov_ext_format)
+ return;
+ record = (canonicalize_ip(_RET_IP_) & KCOV_RECORD_IP_MASK) | KCOV_RECORDFLAG_TYPE_EXIT;
+ kcov_add_pc_record(record);
}
#endif

@@ -371,7 +396,7 @@ EXPORT_SYMBOL(__sanitizer_cov_trace_switch);

static void kcov_start(struct task_struct *t, struct kcov *kcov,
unsigned int size, void *area, enum kcov_mode mode,
- int sequence)
+ int sequence, unsigned int kcov_ext_format)
{
kcov_debug("t = %px, size = %u, area = %px\n", t, size, area);
t->kcov = kcov;
@@ -379,6 +404,7 @@ static void kcov_start(struct task_struct *t, struct kcov *kcov,
t->kcov_size = size;
t->kcov_area = area;
t->kcov_sequence = sequence;
+ t->kcov_ext_format = kcov_ext_format;
/* See comment in check_kcov_mode(). */
barrier();
WRITE_ONCE(t->kcov_mode, mode);
@@ -398,6 +424,7 @@ static void kcov_task_reset(struct task_struct *t)
kcov_stop(t);
t->kcov_sequence = 0;
t->kcov_handle = 0;
+ t->kcov_ext_format = 0;
}

void kcov_task_init(struct task_struct *t)
@@ -570,6 +597,8 @@ static int kcov_get_mode(unsigned long arg)
#else
return -ENOTSUPP;
#endif
+ else if (arg == KCOV_TRACE_PC_EXT)
+ return IS_ENABLED(CONFIG_KCOV_EXT_RECORDS) ? KCOV_MODE_TRACE_PC : -ENOTSUPP;
else
return -EINVAL;
}
@@ -636,8 +665,9 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
return mode;
kcov_fault_in_area(kcov);
kcov->mode = mode;
+ kcov->kcov_ext_format = (arg == KCOV_TRACE_PC_EXT);
kcov_start(t, kcov, kcov->size, kcov->area, kcov->mode,
- kcov->sequence);
+ kcov->sequence, kcov->kcov_ext_format);
kcov->t = t;
/* Put either in kcov_task_exit() or in KCOV_DISABLE. */
kcov_get(kcov);
@@ -668,7 +698,8 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
return -EINVAL;
kcov->mode = mode;
t->kcov = kcov;
- t->kcov_mode = KCOV_MODE_REMOTE;
+ t->kcov_mode = KCOV_MODE_REMOTE;
+ kcov->kcov_ext_format = (remote_arg->trace_mode == KCOV_TRACE_PC_EXT);
kcov->t = t;
kcov->remote = true;
kcov->remote_size = remote_arg->area_size;
@@ -853,6 +884,7 @@ static void kcov_remote_softirq_start(struct task_struct *t)
data->saved_area = t->kcov_area;
data->saved_sequence = t->kcov_sequence;
data->saved_kcov = t->kcov;
+ data->saved_kcov_ext_format = t->kcov_ext_format;
kcov_stop(t);
}
}
@@ -865,12 +897,14 @@ static void kcov_remote_softirq_stop(struct task_struct *t)
if (data->saved_kcov) {
kcov_start(t, data->saved_kcov, data->saved_size,
data->saved_area, data->saved_mode,
- data->saved_sequence);
+ data->saved_sequence,
+ data->saved_kcov_ext_format);
data->saved_mode = 0;
data->saved_size = 0;
data->saved_area = NULL;
data->saved_sequence = 0;
data->saved_kcov = NULL;
+ data->saved_kcov_ext_format = 0;
}
}

@@ -884,6 +918,7 @@ void kcov_remote_start(u64 handle)
unsigned int size;
int sequence;
unsigned long flags;
+ unsigned int kcov_ext_format;

if (WARN_ON(!kcov_check_handle(handle, true, true, true)))
return;
@@ -930,6 +965,7 @@ void kcov_remote_start(u64 handle)
* acquired _after_ kcov->lock elsewhere.
*/
mode = context_unsafe(kcov->mode);
+ kcov_ext_format = context_unsafe(kcov->kcov_ext_format);
sequence = kcov->sequence;
if (in_task()) {
size = kcov->remote_size;
@@ -958,7 +994,7 @@ void kcov_remote_start(u64 handle)
kcov_remote_softirq_start(t);
t->kcov_softirq = 1;
}
- kcov_start(t, kcov, size, area, mode, sequence);
+ kcov_start(t, kcov, size, area, mode, sequence, kcov_ext_format);

local_unlock_irqrestore(&kcov_percpu_data.lock, flags);


--
2.53.0.473.g4a7958ca14-goog

Peter Zijlstra

unread,
Mar 11, 2026, 6:57:24 PM (10 days ago) Mar 11
to Jann Horn, Dmitry Vyukov, Andrey Konovalov, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Ingo Molnar, Josh Poimboeuf
On Wed, Mar 11, 2026 at 10:06:13PM +0100, Jann Horn wrote:
> This series adds a KCOV feature that userspace can use to keep track of
> the current call stack. When userspace enables the new mode
> KCOV_TRACE_PC_EXT, collected instruction addresses are tagged with one
> of three types:
>
> - function entry
> - non-entry basic block
> - function exit
>
> This requires corresponding LLVM support; an LLVM patch implementing
> this feature has been uploaded for review at
> <https://github.com/llvm/llvm-project/pull/185972>, but hasn't landed yet.

And I don't suppose KCOV will finally honour noinstr ?

Jann Horn

unread,
Mar 12, 2026, 7:45:27 AM (9 days ago) Mar 12
to Peter Zijlstra, Dmitry Vyukov, Andrey Konovalov, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Ingo Molnar, Josh Poimboeuf
On Wed, Mar 11, 2026 at 11:57 PM Peter Zijlstra <pet...@infradead.org> wrote:
> And I don't suppose KCOV will finally honour noinstr ?

I believe it should, as long as the compiler is new enough?

We have this in include/linux/compiler-clang.h, and I just checked
that, for example, the noinstr function fpu_idle_fpregs indeed doesn't
get instrumented with clang:
```
/*
* Support for __has_feature(coverage_sanitizer) was added in Clang 13 together
* with no_sanitize("coverage"). Prior versions of Clang support coverage
* instrumentation, but cannot be queried for support by the preprocessor.
*/
#if __has_feature(coverage_sanitizer)
#define __no_sanitize_coverage __attribute__((no_sanitize("coverage")))
#else
#define __no_sanitize_coverage
#endif
```

And include/linux/compiler-gcc.h has this, which also seems to be
effective in a GCC build:
```
/*
* Only supported since gcc >= 12
*/
#if defined(CONFIG_KCOV) && __has_attribute(__no_sanitize_coverage__)
#define __no_sanitize_coverage __attribute__((__no_sanitize_coverage__))
#else
#define __no_sanitize_coverage
#endif
```

Dmitry Vyukov

unread,
Mar 13, 2026, 3:54:14 AM (8 days ago) Mar 13
to Jann Horn, Andrey Konovalov, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Josh Poimboeuf, Peter Zijlstra
I think the compiler option check is actually better, since it will
allow us to test earlier and supports compiler backports.
But it may be good to add a reference to the compiler patch in the
commit description.

Otherwise:

Reviewed-by: Dmitry Vyukov <dvy...@google.com>

Dmitry Vyukov

unread,
Mar 13, 2026, 3:59:00 AM (8 days ago) Mar 13
to Jann Horn, Alexander Potapenko, Andrey Konovalov, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev
On Wed, 11 Mar 2026 at 22:06, Jann Horn <ja...@google.com> wrote:
>
Setting/saving/restoring this flag is fragile. I afraid some of future
patches can break it in some corner cases.
Can we have a new kcov_mode and use some mask check on tracing fast
path, so that it's as cheap as the current == kcov_mode comparison?

+glider, what did you do in your coverage deduplication patch? I have
some vague memories we tried to do something similar.
It would help to explain _why_. The fact that it's not traced is
already in the code.

Dmitry Vyukov

unread,
Mar 13, 2026, 3:59:51 AM (8 days ago) Mar 13
to Jann Horn, Andrey Konovalov, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Josh Poimboeuf, Peter Zijlstra
__alwaysinline just in case

Jann Horn

unread,
Mar 13, 2026, 8:32:39 AM (8 days ago) Mar 13
to Dmitry Vyukov, Alexander Potapenko, Andrey Konovalov, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev
On Fri, Mar 13, 2026 at 8:58 AM Dmitry Vyukov <dvy...@google.com> wrote:
> On Wed, 11 Mar 2026 at 22:06, Jann Horn <ja...@google.com> wrote:
> > This is the second half of CONFIG_KCOV_EXT_RECORDS.
> >
> > Introduce a new KCOV mode KCOV_TRACE_PC_EXT which replaces the upper 8 bits
> > of recorded instruction pointers with metadata. For now, userspace can use
> > this metadata to distinguish three types of records:
[...]
> > @@ -1519,8 +1519,10 @@ struct task_struct {
> > int kcov_sequence;
> >
> > /* Collect coverage from softirq context: */
> > - unsigned int kcov_softirq;
> > -#endif
> > + unsigned int kcov_softirq : 1;
> > + /* Emit KCOV records in extended format: */
> > + unsigned int kcov_ext_format : 1;
>
> Setting/saving/restoring this flag is fragile. I afraid some of future
> patches can break it in some corner cases.
> Can we have a new kcov_mode and use some mask check on tracing fast
> path, so that it's as cheap as the current == kcov_mode comparison?

Yeah, I also thought that what I'm doing here didn't look particularly
pretty... I'll try to implement something like what you suggested for
v2.

> > void notrace __sanitizer_cov_trace_pc_exit(void)
> > {
> > + unsigned long record;
> > +
> > + /*
> > + * Unlike __sanitizer_cov_trace_pc_entry(), this PC should only be
> > + * reported in extended mode.
>
> It would help to explain _why_. The fact that it's not traced is
> already in the code.

Right, I'll change the comment to explain that this callback isn't at
the start of a basic block, and that the basic block is already
covered by a preceding hook call.

Jann Horn

unread,
Mar 13, 2026, 8:40:41 AM (8 days ago) Mar 13
to Dmitry Vyukov, Andrey Konovalov, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Josh Poimboeuf, Peter Zijlstra
On Fri, Mar 13, 2026 at 8:54 AM Dmitry Vyukov <dvy...@google.com> wrote:
> On Wed, 11 Mar 2026 at 22:06, Jann Horn <ja...@google.com> wrote:
> > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> > index 93f356d2b3d9..dddc330ad3ca 100644
> > --- a/lib/Kconfig.debug
> > +++ b/lib/Kconfig.debug
> > @@ -2215,6 +2215,20 @@ config KCOV
> >
> > For more details, see Documentation/dev-tools/kcov.rst.
> >
> > +config KCOV_EXT_RECORDS
> > + bool "Support extended KCOV records with function entry/exit records"
> > + depends on KCOV
> > + depends on 64BIT
> > + # TODO: check CLANG_VERSION instead once this has landed in an LLVM
> > + # release
>
> I think the compiler option check is actually better, since it will
> allow us to test earlier and supports compiler backports.

Makes sense, I'll remove the TODO. (I was thinking about it from the
perspective that every compiler flag test increases the kernel build
time a tiny bit, because it causes an extra compiler invocation at the
start of the build.)

> But it may be good to add a reference to the compiler patch in the
> commit description.

Ack, will add that.

> Otherwise:
>
> Reviewed-by: Dmitry Vyukov <dvy...@google.com>

Thanks!

Jann Horn

unread,
Mar 13, 2026, 8:41:10 AM (8 days ago) Mar 13
to Dmitry Vyukov, Andrey Konovalov, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Josh Poimboeuf, Peter Zijlstra
On Fri, Mar 13, 2026 at 8:59 AM Dmitry Vyukov <dvy...@google.com> wrote:
> On Fri, 13 Mar 2026 at 08:53, Dmitry Vyukov <dvy...@google.com> wrote:
> > On Wed, 11 Mar 2026 at 22:06, Jann Horn <ja...@google.com> wrote:
> > > @@ -202,15 +202,10 @@ static notrace unsigned long canonicalize_ip(unsigned long ip)
> > > return ip;
> > > }
> > >
> > > -/*
> > > - * Entry point from instrumented code.
> > > - * This is called once per basic-block/edge.
> > > - */
> > > -void notrace __sanitizer_cov_trace_pc(void)
> > > +static void notrace kcov_add_pc_record(unsigned long record)
>
> __alwaysinline just in case

Ack, will add in v2.

Alexander Potapenko

unread,
Mar 16, 2026, 12:51:21 PM (5 days ago) Mar 16
to Dmitry Vyukov, Jann Horn, Andrey Konovalov, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev
> Setting/saving/restoring this flag is fragile. I afraid some of future
> patches can break it in some corner cases.
> Can we have a new kcov_mode and use some mask check on tracing fast
> path, so that it's as cheap as the current == kcov_mode comparison?
>
> +glider, what did you do in your coverage deduplication patch? I have
> some vague memories we tried to do something similar.

In fact, no, we do switch (mode) for __sanitizer_cov_trace_pc_guard() here:
https://patchew.org/linux/20250731115139....@google.com/20250731115139....@google.com/

Jann Horn

unread,
Mar 18, 2026, 12:27:11 PM (3 days ago) Mar 18
to Dmitry Vyukov, Andrey Konovalov, Alexander Potapenko, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Jann Horn, Ingo Molnar, Peter Zijlstra, Josh Poimboeuf
This series adds a KCOV feature that userspace can use to keep track of
the current call stack. When userspace enables the new mode
KCOV_TRACE_PC_EXT, collected instruction addresses are tagged with one
of three types:

- function entry
- non-entry basic block
- function exit

This requires corresponding LLVM support, which was recently added in
LLVM commit:
https://github.com/llvm/llvm-project/commit/dc5c6d008f487eea8f5d646011f9b3dca6caebd7
Signed-off-by: Jann Horn <ja...@google.com>
---
Changes in v2:
- patch 2: change commit message (dvyukov)
- patch 2: add __always_inline (dvyukov)
- patch 2: add comment in __sanitizer_cov_trace_pc_entry
- replaced patch 3 with patches 3+4
- store extended record format flag as part of kcov_mode (dvyukov)
- clarify comment in __sanitizer_cov_trace_pc_exit (dvyukov)
- Link to v1: https://lore.kernel.org/r/20260311-kcov-extrec...@google.com

---
Jann Horn (4):
sched: Ensure matching stack state for kcov disable/enable on switch
kcov: wire up compiler instrumentation for CONFIG_KCOV_EXT_RECORDS
kcov: refactor mode check out of check_kcov_mode()
kcov: introduce extended PC coverage collection mode

include/linux/kcov.h | 9 +++++
include/uapi/linux/kcov.h | 12 ++++++
kernel/kcov.c | 94 ++++++++++++++++++++++++++++++++++++-----------
kernel/sched/core.c | 13 +++++--
lib/Kconfig.debug | 12 ++++++
scripts/Makefile.kcov | 2 +
tools/objtool/check.c | 2 +
7 files changed, 120 insertions(+), 24 deletions(-)

Jann Horn

unread,
Mar 18, 2026, 12:27:13 PM (3 days ago) Mar 18
to Dmitry Vyukov, Andrey Konovalov, Alexander Potapenko, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Jann Horn, Josh Poimboeuf, Peter Zijlstra
This is the first half of CONFIG_KCOV_EXT_RECORDS.

Set the appropriate compiler flags to call separate hooks for function
entry/exit, and provide these hooks, but don't make it visible in the KCOV
UAPI yet.

With -fsanitize-coverage=trace-pc-entry-exit, the compiler behavior changes
as follows:

- The __sanitizer_cov_trace_pc() call on function entry is replaced with a
call to __sanitizer_cov_trace_pc_entry(); so for now,
__sanitizer_cov_trace_pc_entry() must be treated the same way as
__sanitizer_cov_trace_pc().
- On function exit, an extra call to __sanitizer_cov_trace_pc_exit()
happens; since function exit produced no coverage in the old UAPI,
__sanitizer_cov_trace_pc_exit() should do nothing for now.

This feature was added to LLVM in commit:
https://github.com/llvm/llvm-project/commit/dc5c6d008f487eea8f5d646011f9b3dca6caebd7

Cc: Josh Poimboeuf <jpoi...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Reviewed-by: Dmitry Vyukov <dvy...@google.com>
Signed-off-by: Jann Horn <ja...@google.com>
---
include/linux/kcov.h | 2 ++
kernel/kcov.c | 34 +++++++++++++++++++++++++++-------
lib/Kconfig.debug | 12 ++++++++++++
scripts/Makefile.kcov | 2 ++
tools/objtool/check.c | 2 ++
5 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index 0143358874b0..e5502d674029 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -81,6 +81,8 @@ typedef unsigned long long kcov_u64;
#endif

void __sanitizer_cov_trace_pc(void);
+void __sanitizer_cov_trace_pc_entry(void);
+void __sanitizer_cov_trace_pc_exit(void);
void __sanitizer_cov_trace_cmp1(u8 arg1, u8 arg2);
void __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2);
void __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2);
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 0b369e88c7c9..86b681c7865c 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -202,15 +202,10 @@ static notrace unsigned long canonicalize_ip(unsigned long ip)
return ip;
}

-/*
- * Entry point from instrumented code.
- * This is called once per basic-block/edge.
- */
-void notrace __sanitizer_cov_trace_pc(void)
+static __always_inline void notrace kcov_add_pc_record(unsigned long record)
{
struct task_struct *t;
unsigned long *area;
- unsigned long ip = canonicalize_ip(_RET_IP_);
unsigned long pos;

t = current;
@@ -230,11 +225,36 @@ void notrace __sanitizer_cov_trace_pc(void)
*/
WRITE_ONCE(area[0], pos);
barrier();
- area[pos] = ip;
+ area[pos] = record;
}
}
+
+/*
+ * Entry point from instrumented code.
+ * This is called once per basic-block/edge.
+ */
+void notrace __sanitizer_cov_trace_pc(void)
+{
+ kcov_add_pc_record(canonicalize_ip(_RET_IP_));
+}
EXPORT_SYMBOL(__sanitizer_cov_trace_pc);

+#ifdef CONFIG_KCOV_EXT_RECORDS
+void notrace __sanitizer_cov_trace_pc_entry(void)
+{
+ unsigned long record = canonicalize_ip(_RET_IP_);
+
+ /*
+ * This hook replaces __sanitizer_cov_trace_pc() for the function entry
+ * basic block; it should still emit a record even in classic kcov mode.
+ */
+ kcov_add_pc_record(record);
+}
+void notrace __sanitizer_cov_trace_pc_exit(void)
+{
+}
+#endif
+
#ifdef CONFIG_KCOV_ENABLE_COMPARISONS
static void notrace write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip)
{
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 93f356d2b3d9..58686a99c40a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2215,6 +2215,18 @@ config KCOV

For more details, see Documentation/dev-tools/kcov.rst.

+config KCOV_EXT_RECORDS
+ bool "Support extended KCOV records with function entry/exit records"
+ depends on KCOV
+ depends on 64BIT
2.53.0.851.ga537e3e6e9-goog

Jann Horn

unread,
Mar 18, 2026, 12:27:13 PM (3 days ago) Mar 18
to Dmitry Vyukov, Andrey Konovalov, Alexander Potapenko, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Jann Horn, Ingo Molnar, Peter Zijlstra
Ensure that kcov is disabled and enabled with the same call stack.
This will be relied on by subsequent patches for recording function
entry/exit records via kcov.

This patch should not affect compilation of normal kernels without KCOV
(though it changes "inline" to "__always_inline").

To: Ingo Molnar <mi...@redhat.com>
To: Peter Zijlstra <pet...@infradead.org>
Signed-off-by: Jann Horn <ja...@google.com>
---
+ /*
+ * This has to happen outside finish_task_switch() to ensure that
+ * entry/exit records are balanced.
+ */
+ kcov_finish_switch(current);
+ return rq;
}

/*

--
2.53.0.851.ga537e3e6e9-goog

Jann Horn

unread,
Mar 18, 2026, 12:27:15 PM (3 days ago) Mar 18
to Dmitry Vyukov, Andrey Konovalov, Alexander Potapenko, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Jann Horn
The following patch will need to check t->kcov_mode in different ways at
different check_kcov_mode() call sites. In preparation for that, move the
mode check up the call hierarchy.

Signed-off-by: Jann Horn <ja...@google.com>
---
kernel/kcov.c | 31 +++++++++++++++++--------------
1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/kernel/kcov.c b/kernel/kcov.c
index 86b681c7865c..7edb39e18bfe 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -171,10 +171,8 @@ static __always_inline bool in_softirq_really(void)
return in_serving_softirq() && !in_hardirq() && !in_nmi();
}

-static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
+static notrace bool check_kcov_context(struct task_struct *t)
{
- unsigned int mode;
-
/*
* We are interested in code coverage as a function of a syscall inputs,
* so we ignore code executed in interrupts, unless we are in a remote
@@ -182,7 +180,6 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru
*/
if (!in_task() && !(in_softirq_really() && t->kcov_softirq))
return false;
- mode = READ_ONCE(t->kcov_mode);
/*
* There is some code that runs in interrupts but for which
* in_interrupt() returns false (e.g. preempt_schedule_irq()).
@@ -191,7 +188,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru
* kcov_start().
*/
barrier();
- return mode == needed_mode;
+ return true;
}

static notrace unsigned long canonicalize_ip(unsigned long ip)
@@ -202,14 +199,12 @@ static notrace unsigned long canonicalize_ip(unsigned long ip)
return ip;
}

-static __always_inline void notrace kcov_add_pc_record(unsigned long record)
+static __always_inline void notrace kcov_add_pc_record(struct task_struct *t, unsigned long record)
{
- struct task_struct *t;
unsigned long *area;
unsigned long pos;

- t = current;
- if (!check_kcov_mode(KCOV_MODE_TRACE_PC, t))
+ if (!check_kcov_context(t))
return;

area = t->kcov_area;
@@ -217,7 +212,7 @@ static __always_inline void notrace kcov_add_pc_record(unsigned long record)
pos = READ_ONCE(area[0]) + 1;
if (likely(pos < t->kcov_size)) {
/* Previously we write pc before updating pos. However, some
- * early interrupt code could bypass check_kcov_mode() check
+ * early interrupt code could bypass check_kcov_context() check
* and invoke __sanitizer_cov_trace_pc(). If such interrupt is
* raised between writing pc and updating pos, the pc could be
* overitten by the recursive __sanitizer_cov_trace_pc().
@@ -235,20 +230,28 @@ static __always_inline void notrace kcov_add_pc_record(unsigned long record)
*/
void notrace __sanitizer_cov_trace_pc(void)
{
- kcov_add_pc_record(canonicalize_ip(_RET_IP_));
+ struct task_struct *cur = current;
+
+ if (READ_ONCE(cur->kcov_mode) != KCOV_MODE_TRACE_PC)
+ return;
+ kcov_add_pc_record(cur, canonicalize_ip(_RET_IP_));
}
EXPORT_SYMBOL(__sanitizer_cov_trace_pc);

#ifdef CONFIG_KCOV_EXT_RECORDS
void notrace __sanitizer_cov_trace_pc_entry(void)
{
+ struct task_struct *cur = current;
unsigned long record = canonicalize_ip(_RET_IP_);
+ unsigned int kcov_mode = READ_ONCE(cur->kcov_mode);

/*
* This hook replaces __sanitizer_cov_trace_pc() for the function entry
* basic block; it should still emit a record even in classic kcov mode.
*/
- kcov_add_pc_record(record);
+ if (kcov_mode != KCOV_MODE_TRACE_PC)
+ return;
+ kcov_add_pc_record(cur, record);
}
void notrace __sanitizer_cov_trace_pc_exit(void)
{
@@ -263,7 +266,7 @@ static void notrace write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip)
u64 count, start_index, end_pos, max_pos;

t = current;
- if (!check_kcov_mode(KCOV_MODE_TRACE_CMP, t))
+ if (READ_ONCE(t->kcov_mode) != KCOV_MODE_TRACE_CMP || !check_kcov_context(t))
return;

ip = canonicalize_ip(ip);
@@ -383,7 +386,7 @@ static void kcov_start(struct task_struct *t, struct kcov *kcov,
t->kcov_size = size;
t->kcov_area = area;
t->kcov_sequence = sequence;
- /* See comment in check_kcov_mode(). */
+ /* See comment in check_kcov_context(). */
barrier();
WRITE_ONCE(t->kcov_mode, mode);
}

--
2.53.0.851.ga537e3e6e9-goog

Jann Horn

unread,
Mar 18, 2026, 12:27:17 PM (3 days ago) Mar 18
to Dmitry Vyukov, Andrey Konovalov, Alexander Potapenko, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Jann Horn
This is the second half of CONFIG_KCOV_EXT_RECORDS.

Introduce a new KCOV mode KCOV_TRACE_PC_EXT which replaces the upper 8 bits
of recorded instruction pointers with metadata. For now, userspace can use
this metadata to distinguish three types of records:

- function entry
- function exit
- normal basic block inside the function

Internally, this new mode is represented as a variant of
KCOV_MODE_TRACE_PC, distinguished with the flag KCOV_EXT_FORMAT.
Store this flag as part of the mode in task_struct::kcov_mode and in
kcov::mode to avoid having to pass it around separately everywhere.

Signed-off-by: Jann Horn <ja...@google.com>
---
include/linux/kcov.h | 7 +++++++
include/uapi/linux/kcov.h | 12 ++++++++++++
kernel/kcov.c | 39 ++++++++++++++++++++++++++++++++++-----
3 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index e5502d674029..455302b1cd1c 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -25,8 +25,15 @@ enum kcov_mode {
KCOV_MODE_REMOTE = 4,
};

+/*
+ * Modifier for KCOV_MODE_TRACE_PC to record function entry/exit marked with
+ * metadata bits.
+ */
+#define KCOV_EXT_FORMAT (1 << 29)
#define KCOV_IN_CTXSW (1 << 30)

+#define KCOV_MODE_TRACE_PC_EXT (KCOV_MODE_TRACE_PC | KCOV_EXT_FORMAT)
+
void kcov_task_init(struct task_struct *t);
void kcov_task_exit(struct task_struct *t);

diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h
index ed95dba9fa37..8d8a233bd61f 100644
--- a/include/uapi/linux/kcov.h
+++ b/include/uapi/linux/kcov.h
@@ -35,8 +35,20 @@ enum {
KCOV_TRACE_PC = 0,
/* Collecting comparison operands mode. */
KCOV_TRACE_CMP = 1,
+ /*
+ * Extended PC coverage collection mode.
+ * In this mode, the top byte of the PC is replaced with flag bits
+ * (KCOV_RECORDFLAG_*).
+ */
+ KCOV_TRACE_PC_EXT = 2,
};

+#define KCOV_RECORD_IP_MASK 0x00ffffffffffffff
+#define KCOV_RECORDFLAG_TYPEMASK 0xf000000000000000
+#define KCOV_RECORDFLAG_TYPE_NORMAL 0xf000000000000000
+#define KCOV_RECORDFLAG_TYPE_ENTRY 0x0000000000000000
+#define KCOV_RECORDFLAG_TYPE_EXIT 0x1000000000000000
+
/*
* The format for the types of collected comparisons.
*
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 7edb39e18bfe..3cd4ee4cc310 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -55,7 +55,12 @@ struct kcov {
refcount_t refcount;
/* The lock protects mode, size, area and t. */
spinlock_t lock;
- enum kcov_mode mode __guarded_by(&lock);
+ /*
+ * Mode, consists of:
+ * - enum kcov_mode
+ * - flag KCOV_EXT_FORMAT
+ */
+ unsigned int mode __guarded_by(&lock);
/* Size of arena (in long's). */
unsigned int size __guarded_by(&lock);
/* Coverage buffer shared with user space. */
@@ -232,8 +237,14 @@ void notrace __sanitizer_cov_trace_pc(void)
{
struct task_struct *cur = current;

- if (READ_ONCE(cur->kcov_mode) != KCOV_MODE_TRACE_PC)
+ if ((READ_ONCE(cur->kcov_mode) & ~KCOV_EXT_FORMAT) != KCOV_MODE_TRACE_PC)
return;
+ /*
+ * No bitops are needed here for setting the record type because
+ * KCOV_RECORDFLAG_TYPE_NORMAL has the high bits set.
+ * This relies on userspace not caring about the rest of the top byte
+ * for KCOV_RECORDFLAG_TYPE_NORMAL records.
+ */
kcov_add_pc_record(cur, canonicalize_ip(_RET_IP_));
}
EXPORT_SYMBOL(__sanitizer_cov_trace_pc);
@@ -249,12 +260,28 @@ void notrace __sanitizer_cov_trace_pc_entry(void)
* This hook replaces __sanitizer_cov_trace_pc() for the function entry
* basic block; it should still emit a record even in classic kcov mode.
*/
- if (kcov_mode != KCOV_MODE_TRACE_PC)
+ if ((kcov_mode & ~KCOV_EXT_FORMAT) != KCOV_MODE_TRACE_PC)
return;
+ if ((kcov_mode & KCOV_EXT_FORMAT) != 0)
+ record = (record & KCOV_RECORD_IP_MASK) | KCOV_RECORDFLAG_TYPE_ENTRY;
kcov_add_pc_record(cur, record);
}
void notrace __sanitizer_cov_trace_pc_exit(void)
{
+ struct task_struct *cur = current;
+ unsigned long record;
+
+ /*
+ * This hook is not called at the beginning of a basic block; the basic
+ * block from which the hook was invoked is already covered by a
+ * preceding hook call.
+ * So unlike __sanitizer_cov_trace_pc_entry(), this PC should only be
+ * reported in extended mode, where function exit events are recorded.
+ */
+ if (READ_ONCE(cur->kcov_mode) != KCOV_MODE_TRACE_PC_EXT)
+ return;
+ record = (canonicalize_ip(_RET_IP_) & KCOV_RECORD_IP_MASK) | KCOV_RECORDFLAG_TYPE_EXIT;
+ kcov_add_pc_record(cur, record);
}
#endif

@@ -377,7 +404,7 @@ EXPORT_SYMBOL(__sanitizer_cov_trace_switch);
#endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */

static void kcov_start(struct task_struct *t, struct kcov *kcov,
- unsigned int size, void *area, enum kcov_mode mode,
+ unsigned int size, void *area, unsigned int mode,
int sequence)
{
kcov_debug("t = %px, size = %u, area = %px\n", t, size, area);
@@ -577,6 +604,8 @@ static int kcov_get_mode(unsigned long arg)
#else
return -ENOTSUPP;
#endif
+ else if (arg == KCOV_TRACE_PC_EXT)
+ return IS_ENABLED(CONFIG_KCOV_EXT_RECORDS) ? KCOV_MODE_TRACE_PC_EXT : -ENOTSUPP;
else
return -EINVAL;
}
@@ -1089,7 +1118,7 @@ void kcov_remote_stop(void)
* and kcov_remote_stop(), hence the sequence check.
*/
if (sequence == kcov->sequence && kcov->remote)
- kcov_move_area(kcov->mode, kcov->area, kcov->size, area);
+ kcov_move_area(kcov->mode & ~KCOV_EXT_FORMAT, kcov->area, kcov->size, area);
spin_unlock(&kcov->lock);

if (in_task()) {

--
2.53.0.851.ga537e3e6e9-goog

Peter Zijlstra

unread,
Mar 20, 2026, 6:10:56 PM (19 hours ago) Mar 20
to Jann Horn, Dmitry Vyukov, Andrey Konovalov, Alexander Potapenko, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, linux-...@vger.kernel.org, kasa...@googlegroups.com, ll...@lists.linux.dev, Ingo Molnar
That's not exactly right; the requirement is that kcov_prepare_switch()
and kcov_finish_switch() are called from the exact same frame.

The wording above "outside finish_task_switch" could be anywhere and
doesn't cover the relation to prepare_switch().

> + kcov_finish_switch(current);
> + return rq;
> }

That said; there was a patch that marked finish_task_switch() as
__always_inline too:

https://lkml.kernel.org/r/20260301083520.11...@gmail.com

Except I think that does a little too much for one patch.

Anyway, I'm a little divided on this. Perhaps the simplest and most
obvious way is something like so.

But what about compiler funnies like the various IPA optimizations that
can do partial clones and whatnot? That could result in violating this
constraint, no?

---
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6e509e292f99..d9925220d51b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5135,7 +5135,6 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
__must_hold(__rq_lockp(rq))
{
- kcov_prepare_switch(prev);
sched_info_switch(rq, prev, next);
perf_event_task_sched_out(prev, next);
fire_sched_out_preempt_notifiers(prev, next);
@@ -5206,7 +5205,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
tick_nohz_task_switch();
finish_lock_switch(rq);
finish_arch_post_lock_switch();
- kcov_finish_switch(current);
/*
* kmap_local_sched_out() is invoked with rq::lock held and
* interrupts disabled. There is no requirement for that, but the
@@ -5294,6 +5292,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next, struct rq_flags *rf)
__releases(__rq_lockp(rq))
{
+ kcov_prepare_switch(prev);
prepare_task_switch(rq, prev, next);

/*
@@ -5352,7 +5351,13 @@ context_switch(struct rq *rq, struct task_struct *prev,
switch_to(prev, next, prev);
barrier();

- return finish_task_switch(prev);
+ rq = finish_task_switch(prev);
+ /*
+ * kcov_prepare_switch() above, and kcov_finish_switch() must be
+ * called from the same stack frame.
Reply all
Reply to author
Forward
0 new messages