[PATCH] x86/traps: Enable UBSAN traps on x86

0 views
Skip to first unread message

Gatlin Newhouse

unread,
May 28, 2024, 10:20:48 PMMay 28
to Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Kees Cook, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Gatlin Newhouse, Andrew Morton, Baoquan He, Rick Edgecombe, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
Bring x86 to parity with arm64, similar to commit 25b84002afb9
("arm64: Support Clang UBSAN trap codes for better reporting").
Enable the output of UBSAN type information on x86 architectures
compiled with clang when CONFIG_UBSAN_TRAP=y. Currently ARM
architectures output which specific sanitizer caused the trap,
via the encoded data in the trap instruction. Clang on x86
currently encodes the same data in ud1 instructions but the x86
handle_bug() and is_valid_bugaddr() functions currently only look
at ud2s.

Signed-off-by: Gatlin Newhouse <gatlin....@gmail.com>
---
MAINTAINERS | 2 ++
arch/x86/include/asm/bug.h | 8 ++++++++
arch/x86/include/asm/ubsan.h | 21 +++++++++++++++++++++
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/traps.c | 34 ++++++++++++++++++++++++++++------
arch/x86/kernel/ubsan.c | 32 ++++++++++++++++++++++++++++++++
6 files changed, 92 insertions(+), 6 deletions(-)
create mode 100644 arch/x86/include/asm/ubsan.h
create mode 100644 arch/x86/kernel/ubsan.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 28e20975c26f..b8512887ffb1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22635,6 +22635,8 @@ L: kasa...@googlegroups.com
L: linux-h...@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
+F: arch/x86/include/asm/ubsan.h
+F: arch/x86/kernel/ubsan.c
F: Documentation/dev-tools/ubsan.rst
F: include/linux/ubsan.h
F: lib/Kconfig.ubsan
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index a3ec87d198ac..e3fbed9073f8 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -13,6 +13,14 @@
#define INSN_UD2 0x0b0f
#define LEN_UD2 2

+/*
+ * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
+ */
+#define INSN_UD1 0xb90f
+#define LEN_UD1 2
+#define INSN_REX 0x67
+#define LEN_REX 1
+
#ifdef CONFIG_GENERIC_BUG

#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/ubsan.h b/arch/x86/include/asm/ubsan.h
new file mode 100644
index 000000000000..5235822eb4ae
--- /dev/null
+++ b/arch/x86/include/asm/ubsan.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_UBSAN_H
+#define _ASM_X86_UBSAN_H
+
+/*
+ * Clang Undefined Behavior Sanitizer trap mode support.
+ */
+#include <linux/bug.h>
+#include <linux/ubsan.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_UBSAN_TRAP
+enum bug_trap_type handle_ubsan_failure(struct pt_regs *regs, int insn);
+#else
+static inline enum bug_trap_type handle_ubsan_failure(struct pt_regs *regs, int insn)
+{
+ return BUG_TRAP_TYPE_NONE;
+}
+#endif /* CONFIG_UBSAN_TRAP */
+
+#endif /* _ASM_X86_UBSAN_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 74077694da7d..fe1d9db27500 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -145,6 +145,7 @@ obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o

obj-$(CONFIG_CFI_CLANG) += cfi.o
+obj-$(CONFIG_UBSAN_TRAP) += ubsan.o

obj-$(CONFIG_CALL_THUNKS) += callthunks.o

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4fa0b17e5043..7876449e97a0 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -67,6 +67,7 @@
#include <asm/vdso.h>
#include <asm/tdx.h>
#include <asm/cfi.h>
+#include <asm/ubsan.h>

#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
@@ -79,6 +80,9 @@

DECLARE_BITMAP(system_vectors, NR_VECTORS);

+/*
+ * Check for UD1, UD2, with or without REX instructions.
+ */
__always_inline int is_valid_bugaddr(unsigned long addr)
{
if (addr < TASK_SIZE_MAX)
@@ -88,7 +92,13 @@ __always_inline int is_valid_bugaddr(unsigned long addr)
* We got #UD, if the text isn't readable we'd have gotten
* a different exception.
*/
- return *(unsigned short *)addr == INSN_UD2;
+ if (*(u16 *)addr == INSN_UD2)
+ return INSN_UD2;
+ if (*(u16 *)addr == INSN_UD1)
+ return INSN_UD1;
+ if (*(u8 *)addr == INSN_REX && *(u16 *)(addr + 1) == INSN_UD1)
+ return INSN_REX;
+ return 0;
}

static nokprobe_inline int
@@ -216,6 +226,7 @@ static inline void handle_invalid_op(struct pt_regs *regs)
static noinstr bool handle_bug(struct pt_regs *regs)
{
bool handled = false;
+ int insn;

/*
* Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
@@ -223,7 +234,8 @@ static noinstr bool handle_bug(struct pt_regs *regs)
* irqentry_enter().
*/
kmsan_unpoison_entry_regs(regs);
- if (!is_valid_bugaddr(regs->ip))
+ insn = is_valid_bugaddr(regs->ip);
+ if (insn == 0)
return handled;

/*
@@ -236,10 +248,20 @@ static noinstr bool handle_bug(struct pt_regs *regs)
*/
if (regs->flags & X86_EFLAGS_IF)
raw_local_irq_enable();
- if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
- handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
- regs->ip += LEN_UD2;
- handled = true;
+
+ if (insn == INSN_UD2) {
+ if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
+ handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
+ regs->ip += LEN_UD2;
+ handled = true;
+ }
+ } else {
+ if (handle_ubsan_failure(regs, insn) == BUG_TRAP_TYPE_WARN) {
+ if (insn == INSN_REX)
+ regs->ip += LEN_REX;
+ regs->ip += LEN_UD1;
+ handled = true;
+ }
}
if (regs->flags & X86_EFLAGS_IF)
raw_local_irq_disable();
diff --git a/arch/x86/kernel/ubsan.c b/arch/x86/kernel/ubsan.c
new file mode 100644
index 000000000000..6cae11f4fe23
--- /dev/null
+++ b/arch/x86/kernel/ubsan.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Clang Undefined Behavior Sanitizer trap mode support.
+ */
+#include <linux/bug.h>
+#include <linux/string.h>
+#include <linux/printk.h>
+#include <linux/ubsan.h>
+#include <asm/ptrace.h>
+#include <asm/ubsan.h>
+
+/*
+ * Checks for the information embedded in the UD1 trap instruction
+ * for the UB Sanitizer in order to pass along debugging output.
+ */
+enum bug_trap_type handle_ubsan_failure(struct pt_regs *regs, int insn)
+{
+ u32 type = 0;
+
+ if (insn == INSN_REX) {
+ type = (*(u16 *)(regs->ip + LEN_REX + LEN_UD1));
+ if ((type & 0xFF) == 0x40)
+ type = (type >> 8) & 0xFF;
+ } else {
+ type = (*(u16 *)(regs->ip + LEN_UD1));
+ if ((type & 0xFF) == 0x40)
+ type = (type >> 8) & 0xFF;
+ }
+ pr_crit("%s at %pS\n", report_ubsan_failure(regs, type), (void *)regs->ip);
+
+ return BUG_TRAP_TYPE_NONE;
+}
--
2.25.1

Marco Elver

unread,
May 29, 2024, 3:26:01 AMMay 29
to Gatlin Newhouse, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Kees Cook, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Baoquan He, Rick Edgecombe, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
On Wed, 29 May 2024 at 04:20, Gatlin Newhouse <gatlin....@gmail.com> wrote:
[...]
> if (regs->flags & X86_EFLAGS_IF)
> raw_local_irq_enable();
> - if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
> - handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
> - regs->ip += LEN_UD2;
> - handled = true;
> +
> + if (insn == INSN_UD2) {
> + if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
> + handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
> + regs->ip += LEN_UD2;
> + handled = true;
> + }
> + } else {
> + if (handle_ubsan_failure(regs, insn) == BUG_TRAP_TYPE_WARN) {

handle_ubsan_failure currently only returns BUG_TRAP_TYPE_NONE?
Shouldn't this return BUG_TRAP_TYPE_WARN?

Gatlin Newhouse

unread,
May 29, 2024, 2:16:59 PMMay 29
to Marco Elver, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Kees Cook, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Baoquan He, Rick Edgecombe, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
So as far as I understand, UBSAN trap mode never warns. Perhaps it does on
arm64, although it calls die() so I am unsure. Maybe the condition in
handle_bug() should be rewritten in the case of UBSAN ud1s? Do you have any
suggestions?

Marco Elver

unread,
May 29, 2024, 2:31:01 PMMay 29
to Gatlin Newhouse, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Kees Cook, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Baoquan He, Rick Edgecombe, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
AFAIK on arm64 it's basically a kernel OOPS.

The main thing I just wanted to point out though is that your newly added branch

> if (handle_ubsan_failure(regs, insn) == BUG_TRAP_TYPE_WARN) {

will never be taken, because I don't see where handle_ubsan_failure()
returns BUG_TRAP_TYPE_WARN.

That means 'handled' will be false, and the code in exc_invalid_op
will proceed to call handle_invalid_op() which is probably not what
you intended - i.e. it's definitely not BUG_TRAP_TYPE_NONE, but one of
TYPE_WARN of TYPE_BUG.

Did you test it and you got the behaviour you expected?

Gatlin Newhouse

unread,
May 29, 2024, 4:36:44 PMMay 29
to Marco Elver, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Kees Cook, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Baoquan He, Rick Edgecombe, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
Initially I wrote this with some symmetry to the KCFI checks nearby, but I
was unsure if this would be considered handled or not.

>
> That means 'handled' will be false, and the code in exc_invalid_op
> will proceed to call handle_invalid_op() which is probably not what
> you intended - i.e. it's definitely not BUG_TRAP_TYPE_NONE, but one of
> TYPE_WARN of TYPE_BUG.
>

This remains a question to me as to whether it should be considered handled
or not. Which is why I'm happy to change this branch which is never taken to
something else that still outputs the UBSAN type information before calling
handle_invalid_op().

>
> Did you test it and you got the behaviour you expected?
>

Testing with LKDTM provided the output I expected. The UBSAN type information
along with file and offsets are output before an illegal op and trace.

Kees Cook

unread,
May 29, 2024, 7:32:43 PMMay 29
to Gatlin Newhouse, Marco Elver, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Baoquan He, Rick Edgecombe, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
Yeah, that seemed like the right "style" to me too. Perhaps, since it
can never warn, we could just rewrite it so it's a void function avoid
the checking, etc.

--
Kees Cook

Andrew Cooper

unread,
May 29, 2024, 8:25:00 PMMay 29
to Gatlin Newhouse, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Kees Cook, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Baoquan He, Rick Edgecombe, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
On 29/05/2024 3:20 am, Gatlin Newhouse wrote:
> diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
> index a3ec87d198ac..e3fbed9073f8 100644
> --- a/arch/x86/include/asm/bug.h
> +++ b/arch/x86/include/asm/bug.h
> @@ -13,6 +13,14 @@
> #define INSN_UD2 0x0b0f
> #define LEN_UD2 2
>
> +/*
> + * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
> + */
> +#define INSN_UD1 0xb90f
> +#define LEN_UD1 2
> +#define INSN_REX 0x67
> +#define LEN_REX 1

That's an address size override prefix, not a REX prefix.

What information is actually encoded in this UD1 instruction?  I can't
find anything any documentation which actually discusses how the ModRM
byte is encoded.

~Andrew

Gatlin Newhouse

unread,
May 31, 2024, 4:36:39 PMMay 31
to Andrew Cooper, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Kees Cook, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Baoquan He, Rick Edgecombe, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
On Thu, May 30, 2024 at 01:24:56AM UTC, Andrew Cooper wrote:
> On 29/05/2024 3:20 am, Gatlin Newhouse wrote:
> > diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
> > index a3ec87d198ac..e3fbed9073f8 100644
> > --- a/arch/x86/include/asm/bug.h
> > +++ b/arch/x86/include/asm/bug.h
> > @@ -13,6 +13,14 @@
> > #define INSN_UD2 0x0b0f
> > #define LEN_UD2 2
> >
> > +/*
> > + * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
> > + */
> > +#define INSN_UD1 0xb90f
> > +#define LEN_UD1 2
> > +#define INSN_REX 0x67
> > +#define LEN_REX 1
>
> That's an address size override prefix, not a REX prefix.

Good to know, thanks.

> What information is actually encoded in this UD1 instruction?  I can't
> find anything any documentation which actually discusses how the ModRM
> byte is encoded.

lib/ubsan.h has a comment before the ubsan_checks enum which links to line 113
in LLVM's clang/lib/CodeGen/CodeGenFunction.h which defines the values for the
ModRM byte. I think the Undefined Behavior Sanitizer pass does the actual
encoding of UB type to values but I'm not an expert in LLVM.

> ~Andrew

Gatlin Newhouse

unread,
May 31, 2024, 11:10:38 PMMay 31
to Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Kees Cook, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Gatlin Newhouse, Andrew Morton, Rick Edgecombe, Baoquan He, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Tina Zhang, Uros Bizjak, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
Bring x86 to parity with arm64, similar to commit 25b84002afb9
("arm64: Support Clang UBSAN trap codes for better reporting").
Enable the output of UBSAN type information on x86 architectures
compiled with clang when CONFIG_UBSAN_TRAP=y. Currently ARM
architectures output which specific sanitizer caused the trap,
via the encoded data in the trap instruction. Clang on x86
currently encodes the same data in ud1 instructions but the x86
handle_bug() and is_valid_bugaddr() functions currently only look
at ud2s.

Signed-off-by: Gatlin Newhouse <gatlin....@gmail.com>
---
Changes in v2:
- Name the new constants 'LEN_ASOP' and 'INSN_ASOP' instead of
'LEN_REX' and 'INSN_REX'
- Change handle_ubsan_failure() from enum bug_trap_type to void
function

v1: https://lore.kernel.org/linux-hardening/20240529022043.36617...@gmail.com/
---
MAINTAINERS | 2 ++
arch/x86/include/asm/bug.h | 8 ++++++++
arch/x86/include/asm/ubsan.h | 18 ++++++++++++++++++
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/traps.c | 29 +++++++++++++++++++++++------
arch/x86/kernel/ubsan.c | 30 ++++++++++++++++++++++++++++++
6 files changed, 82 insertions(+), 6 deletions(-)
create mode 100644 arch/x86/include/asm/ubsan.h
create mode 100644 arch/x86/kernel/ubsan.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 28e20975c26f..b8512887ffb1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22635,6 +22635,8 @@ L: kasa...@googlegroups.com
L: linux-h...@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
+F: arch/x86/include/asm/ubsan.h
+F: arch/x86/kernel/ubsan.c
F: Documentation/dev-tools/ubsan.rst
F: include/linux/ubsan.h
F: lib/Kconfig.ubsan
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index a3ec87d198ac..1023c149f93d 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -13,6 +13,14 @@
#define INSN_UD2 0x0b0f
#define LEN_UD2 2

+/*
+ * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
+ */
+#define INSN_UD1 0xb90f
+#define LEN_UD1 2
+#define INSN_ASOP 0x67
+#define LEN_ASOP 1
+
#ifdef CONFIG_GENERIC_BUG

#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/ubsan.h b/arch/x86/include/asm/ubsan.h
new file mode 100644
index 000000000000..896ad7bf587f
--- /dev/null
+++ b/arch/x86/include/asm/ubsan.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_UBSAN_H
+#define _ASM_X86_UBSAN_H
+
+/*
+ * Clang Undefined Behavior Sanitizer trap mode support.
+ */
+#include <linux/bug.h>
+#include <linux/ubsan.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_UBSAN_TRAP
+void handle_ubsan_failure(struct pt_regs *regs, int insn);
+#else
+static inline void handle_ubsan_failure(struct pt_regs *regs, int insn) { return; }
+#endif /* CONFIG_UBSAN_TRAP */
+
+#endif /* _ASM_X86_UBSAN_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 74077694da7d..fe1d9db27500 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -145,6 +145,7 @@ obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o

obj-$(CONFIG_CFI_CLANG) += cfi.o
+obj-$(CONFIG_UBSAN_TRAP) += ubsan.o

obj-$(CONFIG_CALL_THUNKS) += callthunks.o

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4fa0b17e5043..ee77c868090a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -67,6 +67,7 @@
#include <asm/vdso.h>
#include <asm/tdx.h>
#include <asm/cfi.h>
+#include <asm/ubsan.h>

#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
@@ -79,6 +80,9 @@

DECLARE_BITMAP(system_vectors, NR_VECTORS);

+/*
+ * Check for UD1, UD2, with or without Address Size Override Prefixes instructions.
+ */
__always_inline int is_valid_bugaddr(unsigned long addr)
{
if (addr < TASK_SIZE_MAX)
@@ -88,7 +92,13 @@ __always_inline int is_valid_bugaddr(unsigned long addr)
* We got #UD, if the text isn't readable we'd have gotten
* a different exception.
*/
- return *(unsigned short *)addr == INSN_UD2;
+ if (*(u16 *)addr == INSN_UD2)
+ return INSN_UD2;
+ if (*(u16 *)addr == INSN_UD1)
+ return INSN_UD1;
+ if (*(u8 *)addr == INSN_ASOP && *(u16 *)(addr + 1) == INSN_UD1)
+ return INSN_ASOP;
+ return 0;
}

static nokprobe_inline int
@@ -216,6 +226,7 @@ static inline void handle_invalid_op(struct pt_regs *regs)
static noinstr bool handle_bug(struct pt_regs *regs)
{
bool handled = false;
+ int insn;

/*
* Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
@@ -223,7 +234,8 @@ static noinstr bool handle_bug(struct pt_regs *regs)
* irqentry_enter().
*/
kmsan_unpoison_entry_regs(regs);
- if (!is_valid_bugaddr(regs->ip))
+ insn = is_valid_bugaddr(regs->ip);
+ if (insn == 0)
return handled;

/*
@@ -236,10 +248,15 @@ static noinstr bool handle_bug(struct pt_regs *regs)
*/
if (regs->flags & X86_EFLAGS_IF)
raw_local_irq_enable();
- if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
- handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
- regs->ip += LEN_UD2;
- handled = true;
+
+ if (insn == INSN_UD2) {
+ if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
+ handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
+ regs->ip += LEN_UD2;
+ handled = true;
+ }
+ } else {
+ handle_ubsan_failure(regs, insn);
}
if (regs->flags & X86_EFLAGS_IF)
raw_local_irq_disable();
diff --git a/arch/x86/kernel/ubsan.c b/arch/x86/kernel/ubsan.c
new file mode 100644
index 000000000000..35b2039a3b8f
--- /dev/null
+++ b/arch/x86/kernel/ubsan.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Clang Undefined Behavior Sanitizer trap mode support.
+ */
+#include <linux/bug.h>
+#include <linux/string.h>
+#include <linux/printk.h>
+#include <linux/ubsan.h>
+#include <asm/ptrace.h>
+#include <asm/ubsan.h>
+
+/*
+ * Checks for the information embedded in the UD1 trap instruction
+ * for the UB Sanitizer in order to pass along debugging output.
+ */
+void handle_ubsan_failure(struct pt_regs *regs, int insn)
+{
+ u32 type = 0;
+
+ if (insn == INSN_ASOP) {
+ type = (*(u16 *)(regs->ip + LEN_ASOP + LEN_UD1));
+ if ((type & 0xFF) == 0x40)
+ type = (type >> 8) & 0xFF;
+ } else {
+ type = (*(u16 *)(regs->ip + LEN_UD1));
+ if ((type & 0xFF) == 0x40)
+ type = (type >> 8) & 0xFF;
+ }
+ pr_crit("%s at %pS\n", report_ubsan_failure(regs, type), (void *)regs->ip);
+}
--
2.25.1

Kees Cook

unread,
Jun 1, 2024, 10:06:40 AMJun 1
to Gatlin Newhouse, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Rick Edgecombe, Baoquan He, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Tina Zhang, Uros Bizjak, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
On Sat, Jun 01, 2024 at 03:10:05AM +0000, Gatlin Newhouse wrote:
> +void handle_ubsan_failure(struct pt_regs *regs, int insn)
> +{
> + u32 type = 0;
> +
> + if (insn == INSN_ASOP) {
> + type = (*(u16 *)(regs->ip + LEN_ASOP + LEN_UD1));
> + if ((type & 0xFF) == 0x40)
> + type = (type >> 8) & 0xFF;
> + } else {
> + type = (*(u16 *)(regs->ip + LEN_UD1));
> + if ((type & 0xFF) == 0x40)
> + type = (type >> 8) & 0xFF;
> + }

The if/else code is repeated, but the only difference is the offset to
read from. Also, if the 0x40 is absent, we likely don't want to report
anything. So, perhaps:

u16 offset = LEN_UD1;
u32 type;

if (insn == INSN_ASOP)
offset += INSN_ASOP;
type = *(u16 *)(regs->ip + offset);
if ((type & 0xFF) != 0x40)
return;

type = (type >> 8) & 0xFF;
pr_crit("%s at %pS\n", report_ubsan_failure(regs, type), (void *)regs->ip);



--
Kees Cook

Thomas Gleixner

unread,
Jun 3, 2024, 12:13:56 PMJun 3
to Gatlin Newhouse, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Kees Cook, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Rick Edgecombe, Baoquan He, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Tina Zhang, Uros Bizjak, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
On Sat, Jun 01 2024 at 03:10, Gatlin Newhouse wrote:

> Bring x86 to parity with arm64, similar to commit 25b84002afb9
> ("arm64: Support Clang UBSAN trap codes for better reporting").
> Enable the output of UBSAN type information on x86 architectures
> compiled with clang when CONFIG_UBSAN_TRAP=y. Currently ARM
> architectures output which specific sanitizer caused the trap,
> via the encoded data in the trap instruction. Clang on x86
> currently encodes the same data in ud1 instructions but the x86
> handle_bug() and is_valid_bugaddr() functions currently only look
> at ud2s.

Please structure your change log properly instead of one paragraph of
unstructured word salad. See:

https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#changelog

> +/*
> + * Check for UD1, UD2, with or without Address Size Override Prefixes instructions.
> + */
> __always_inline int is_valid_bugaddr(unsigned long addr)
> {
> if (addr < TASK_SIZE_MAX)
> @@ -88,7 +92,13 @@ __always_inline int is_valid_bugaddr(unsigned long addr)
> * We got #UD, if the text isn't readable we'd have gotten
> * a different exception.
> */
> - return *(unsigned short *)addr == INSN_UD2;
> + if (*(u16 *)addr == INSN_UD2)
> + return INSN_UD2;
> + if (*(u16 *)addr == INSN_UD1)
> + return INSN_UD1;
> + if (*(u8 *)addr == INSN_ASOP && *(u16 *)(addr + 1) == INSN_UD1)

s/1/LEN_ASOP/ ?

> + return INSN_ASOP;
> + return 0;

I'm not really a fan of the reuse of the INSN defines here. Especially
not about INSN_ASOP. Also 0 is just lame.

Neither does the function name make sense anymore. is_valid_bugaddr() is
clearly telling that it's a boolean check (despite the return value
being int for hysterical raisins). But now you turn it into a
non-boolean integer which returns a instruction encoding. That's
hideous. Programming should result in obvious code and that should be
pretty obvious to people who create tools to validate code.

Also all UBSAN cares about is the actual failure type and not the
instruction itself:

#define INSN_UD_MASK 0xFFFF
#define INSN_ASOP_MASK 0x00FF

#define BUG_UD_NONE 0xFFFF
#define BUG_UD2 0xFFFE

__always_inline u16 get_ud_type(unsigned long addr)
{
u16 insn;

if (addr < TASK_SIZE_MAX)
return BUD_UD_NONE;

insn = *(u16 *)addr;
if ((insn & INSN_UD_MASK) == INSN_UD2)
return BUG_UD2;

if ((insn & INSN_ASOP_MASK) == INSN_ASOP)
insn = *(u16 *)(++addr);

// UBSAN encodes the failure type in the two bytes after UD1
if ((insn & INSN_UD_MASK) == INSN_UD1)
return *(u16 *)(addr + LEN_UD1);

return BUG_UD_NONE;
}

No?

> static nokprobe_inline int
> @@ -216,6 +226,7 @@ static inline void handle_invalid_op(struct pt_regs *regs)
> static noinstr bool handle_bug(struct pt_regs *regs)
> {
> bool handled = false;
> + int insn;
>
> /*
> * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
> @@ -223,7 +234,8 @@ static noinstr bool handle_bug(struct pt_regs *regs)
> * irqentry_enter().
> */
> kmsan_unpoison_entry_regs(regs);
> - if (!is_valid_bugaddr(regs->ip))
> + insn = is_valid_bugaddr(regs->ip);
> + if (insn == 0)

Sigh.

But with the above sanitized (pun intended) this becomes obvious by
itself:

ud_type = get_ud_type(regs->ip);
if (ud_type == BUG_UD_NONE)
return false;

See?

> return handled;
>
> /*
> @@ -236,10 +248,15 @@ static noinstr bool handle_bug(struct pt_regs *regs)
> */
> if (regs->flags & X86_EFLAGS_IF)
> raw_local_irq_enable();
> - if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
> - handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
> - regs->ip += LEN_UD2;
> - handled = true;
> +
> + if (insn == INSN_UD2) {
> + if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
> + handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {

Please indent the second condition properly:

if (a ||
b) {

I know you just added another tab, but when touching code, then please
do it right.

> + regs->ip += LEN_UD2;
> + handled = true;

> +/*
> + * Checks for the information embedded in the UD1 trap instruction
> + * for the UB Sanitizer in order to pass along debugging output.
> + */
> +void handle_ubsan_failure(struct pt_regs *regs, int insn)
> +{
> + u32 type = 0;

Pointless initialization.

> + if (insn == INSN_ASOP) {
> + type = (*(u16 *)(regs->ip + LEN_ASOP + LEN_UD1));
> + if ((type & 0xFF) == 0x40)

No magic constants please. What does 0x40 mean?

> + type = (type >> 8) & 0xFF;

That mask is pointless as u16 is zero extended when assigned to u32, but
why not using u16 in the first place to make it clear?

> + } else {
> + type = (*(u16 *)(regs->ip + LEN_UD1));
> + if ((type & 0xFF) == 0x40)
> + type = (type >> 8) & 0xFF;
> + }

Copy & pasta rules!

unsigned long addr = regs->ip + LEN_UD1;
u16 type;

type = insn == INSN_UD1 ? *(u16 *)addr : *(u16 *)(addr + LEN_ASOP);

if ((type & 0xFF) == UBSAN_MAGICALLY_USE_2ND_BYTE)
type >>= 8;
pr_crit("%s\n", report_ubsan_failure(regs, type));

I don't see the point for printing regs->ip as this is followed by a
stack trace anyway, but I don't have a strong opinion about it either.

Though with the above get_ud_type() variant this becomes even simpler:

void handle_ubsan_failure(struct pt_regs *regs, u16 type)
{
if ((type & 0xFF) == UBSAN_MAGICALLY_USE_2ND_BYTE)
type >>= 8;
pr_crit("%s\n", report_ubsan_failure(regs, type));
}

Thanks,

tglx

Gatlin Newhouse

unread,
Jun 11, 2024, 4:26:14 PMJun 11
to Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Kees Cook, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Rick Edgecombe, Baoquan He, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Tina Zhang, Uros Bizjak, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
Thanks for the feedback.

It seems that is_valid_bugaddr() needs to be implemented on all architectures
and the function get_ud_type() replaces it here. So how should the patch handle
is_valid_bugaddr()? Should the function remain as-is in traps.c despite no
longer being used?

Kees Cook

unread,
Jun 12, 2024, 2:42:05 PMJun 12
to Thomas Gleixner, Gatlin Newhouse, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Rick Edgecombe, Baoquan He, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Tina Zhang, Uros Bizjak, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
Yeah, this is why I'd suggested to Gatlin in early designs to reuse
is_valid_bugaddr()'s int value. It's a required function, so it seemed
sensible to just repurpose it from yes/no to no/type1/type2/type3/etc.

-Kees

--
Kees Cook

Thomas Gleixner

unread,
Jun 17, 2024, 6:13:35 PMJun 17
to Kees Cook, Gatlin Newhouse, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Rick Edgecombe, Baoquan He, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Tina Zhang, Uros Bizjak, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
It's not sensible, it's just tasteless.

If is_valid_bugaddr() is globaly required in it's boolean form then it
should just stay that way and not be abused just because it can be
abused.

What's wrong with doing:

__always_inline u16 get_ud_type(unsigned long addr)
{
....
}

int is_valid_bugaddr(unsigned long addr)
{
return get_ud_type() != BUG_UD_NONE;
}

Hmm?

In fact is_valid_bugaddr() should be globally fixed up to return bool to
match what the function name suggests.

The UD type information is x86 specific and has zero business in a
generic architecture agnostic function return value.

It's a sad state of affairs that I have to explain this to people who
care about code correctness. Readability and consistency are substantial
parts of correctness, really.

Thanks,

tglx

Kees Cook

unread,
Jun 17, 2024, 7:06:51 PMJun 17
to Thomas Gleixner, Gatlin Newhouse, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Rick Edgecombe, Baoquan He, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Tina Zhang, Uros Bizjak, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
Well, it's trade-offs. If get_ud_type() is in is_valid_bugaddr(), we
have to call it _again_ outside of is_valid_bugaddr(). That's suboptimal
as well. I was trying to find a reasonable way to avoid refactoring all
architectures and to avoid code code.

Looking at it all again, I actually think arch/x86/kernel/traps.c
shouldn't call is_valid_bugaddr() at all. That usage can continue to
stay in lib/bug.c, which is only ever used by x86 during very early
boot, according to the comments in early_fixup_exception(). So just a
direct replacement of is_valid_bugaddr() with the proposed get_ud_type()
should be fine in arch/x86/kernel/traps.c.

What do you think?

--
Kees Cook

Thomas Gleixner

unread,
Jun 17, 2024, 7:57:52 PMJun 17
to Kees Cook, Gatlin Newhouse, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Rick Edgecombe, Baoquan He, Changbin Du, Pengfei Xu, Josh Poimboeuf, Xin Li, Jason Gunthorpe, Tina Zhang, Uros Bizjak, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
On Mon, Jun 17 2024 at 16:06, Kees Cook wrote:
> On Tue, Jun 18, 2024 at 12:13:27AM +0200, Thomas Gleixner wrote:
>> In fact is_valid_bugaddr() should be globally fixed up to return bool to
>> match what the function name suggests.
>>
>> The UD type information is x86 specific and has zero business in a
>> generic architecture agnostic function return value.
>>
>> It's a sad state of affairs that I have to explain this to people who
>> care about code correctness. Readability and consistency are substantial
>> parts of correctness, really.
>
> Well, it's trade-offs. If get_ud_type() is in is_valid_bugaddr(), we
> have to call it _again_ outside of is_valid_bugaddr(). That's suboptimal
> as well. I was trying to find a reasonable way to avoid refactoring all
> architectures and to avoid code code.

There is not much of a trade-off. This is not the context switch hot
path, right?

Aside of that what is wrong with refactoring? If something does not fit
or the name does not make sense anymore then refactoring is the right
thing to do, no? It's not rocket science and just a little bit more work
but benefitial at the end.

> Looking at it all again, I actually think arch/x86/kernel/traps.c
> shouldn't call is_valid_bugaddr() at all. That usage can continue to
> stay in lib/bug.c, which is only ever used by x86 during very early
> boot, according to the comments in early_fixup_exception(). So just a
> direct replacement of is_valid_bugaddr() with the proposed get_ud_type()
> should be fine in arch/x86/kernel/traps.c.

I haven't looked at the details, but if that's the case then there is
even less of a reason to abuse is_valid_bugaddr().

That said it would still be sensible to convert is_valid_bugaddr() to a
boolean return value :)

Thanks,

tglx

Gatlin Newhouse

unread,
Jun 24, 2024, 11:25:45 PMJun 24
to Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Kees Cook, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Gatlin Newhouse, Andrew Morton, Mike Rapoport (IBM), Baoquan He, Rick Edgecombe, Changbin Du, Pengfei Xu, Xin Li, Jason Gunthorpe, Uros Bizjak, Arnd Bergmann, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
Currently ARM architectures output which specific sanitizer caused
the trap, via the encoded data in the trap instruction. Clang on
x86 currently encodes the same data in ud1 instructions but the x86
handle_bug() and is_valid_bugaddr() functions currently only look
at ud2s.

Bring x86 to parity with arm64, similar to commit 25b84002afb9
("arm64: Support Clang UBSAN trap codes for better reporting").
Enable the output of UBSAN type information on x86 architectures
compiled with clang when CONFIG_UBSAN_TRAP=y.

Signed-off-by: Gatlin Newhouse <gatlin....@gmail.com>
---
Changes in v3:
- Address Thomas's remarks about: change log structure,
get_ud_type() instead of is_valid_bugaddr(), handle_bug()
changes, and handle_ubsan_failure().

Changes in v2:
- Name the new constants 'LEN_ASOP' and 'INSN_ASOP' instead of
'LEN_REX' and 'INSN_REX'
- Change handle_ubsan_failure() from enum bug_trap_type to void
function

v1: https://lore.kernel.org/linux-hardening/20240529022043.36617...@gmail.com/
v2: https://lore.kernel.org/linux-hardening/20240601031019.37087...@gmail.com/
---
MAINTAINERS | 2 ++
arch/x86/include/asm/bug.h | 11 ++++++++++
arch/x86/include/asm/ubsan.h | 23 +++++++++++++++++++++
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/traps.c | 40 +++++++++++++++++++++++++++++++-----
arch/x86/kernel/ubsan.c | 21 +++++++++++++++++++
6 files changed, 93 insertions(+), 5 deletions(-)
create mode 100644 arch/x86/include/asm/ubsan.h
create mode 100644 arch/x86/kernel/ubsan.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 28e20975c26f..b8512887ffb1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22635,6 +22635,8 @@ L: kasa...@googlegroups.com
L: linux-h...@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
+F: arch/x86/include/asm/ubsan.h
+F: arch/x86/kernel/ubsan.c
F: Documentation/dev-tools/ubsan.rst
F: include/linux/ubsan.h
F: lib/Kconfig.ubsan
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index a3ec87d198ac..a363d13c263b 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -13,6 +13,17 @@
#define INSN_UD2 0x0b0f
#define LEN_UD2 2

+/*
+ * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
+ */
+#define INSN_UD1 0xb90f
+#define INSN_UD_MASK 0xFFFF
+#define LEN_UD1 2
+#define INSN_ASOP 0x67
+#define INSN_ASOP_MASK 0x00FF
+#define BUG_UD_NONE 0xFFFF
+#define BUG_UD2 0xFFFE
+
#ifdef CONFIG_GENERIC_BUG

#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/ubsan.h b/arch/x86/include/asm/ubsan.h
new file mode 100644
index 000000000000..ac2080984e83
--- /dev/null
+++ b/arch/x86/include/asm/ubsan.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_UBSAN_H
+#define _ASM_X86_UBSAN_H
+
+/*
+ * Clang Undefined Behavior Sanitizer trap mode support.
+ */
+#include <linux/bug.h>
+#include <linux/ubsan.h>
+#include <asm/ptrace.h>
+
+/*
+ * UBSAN uses the EAX register to encode its type in the ModRM byte.
+ */
+#define UBSAN_REG 0x40
+
+#ifdef CONFIG_UBSAN_TRAP
+void handle_ubsan_failure(struct pt_regs *regs, u16 insn);
+#else
+static inline void handle_ubsan_failure(struct pt_regs *regs, u16 insn) { return; }
+#endif /* CONFIG_UBSAN_TRAP */
+
+#endif /* _ASM_X86_UBSAN_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 74077694da7d..fe1d9db27500 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -145,6 +145,7 @@ obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o

obj-$(CONFIG_CFI_CLANG) += cfi.o
+obj-$(CONFIG_UBSAN_TRAP) += ubsan.o

obj-$(CONFIG_CALL_THUNKS) += callthunks.o

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4fa0b17e5043..aef21287e7ed 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -67,6 +67,7 @@
#include <asm/vdso.h>
#include <asm/tdx.h>
#include <asm/cfi.h>
+#include <asm/ubsan.h>

#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
@@ -91,6 +92,29 @@ __always_inline int is_valid_bugaddr(unsigned long addr)
return *(unsigned short *)addr == INSN_UD2;
}

+/*
+ * Check for UD1, UD2, with or without Address Size Override Prefixes instructions.
+ */
+__always_inline u16 get_ud_type(unsigned long addr)
+{
+ u16 insn;
+
+ if (addr < TASK_SIZE_MAX)
+ return BUG_UD_NONE;
+ insn = *(u16 *)addr;
+ if ((insn & INSN_UD_MASK) == INSN_UD2)
+ return BUG_UD2;
+ if ((insn & INSN_ASOP_MASK) == INSN_ASOP)
+ insn = *(u16 *)(++addr);
+
+ // UBSAN encode the failure type in the two bytes after UD1
+ if ((insn & INSN_UD_MASK) == INSN_UD1)
+ return *(u16 *)(addr + LEN_UD1);
+
+ return BUG_UD_NONE;
+}
+
+
static nokprobe_inline int
do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str,
struct pt_regs *regs, long error_code)
@@ -216,6 +240,7 @@ static inline void handle_invalid_op(struct pt_regs *regs)
static noinstr bool handle_bug(struct pt_regs *regs)
{
bool handled = false;
+ int ud_type;

/*
* Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
@@ -223,7 +248,8 @@ static noinstr bool handle_bug(struct pt_regs *regs)
* irqentry_enter().
*/
kmsan_unpoison_entry_regs(regs);
- if (!is_valid_bugaddr(regs->ip))
+ ud_type = get_ud_type(regs->ip);
+ if (ud_type == BUG_UD_NONE)
return handled;

/*
@@ -236,10 +262,14 @@ static noinstr bool handle_bug(struct pt_regs *regs)
*/
if (regs->flags & X86_EFLAGS_IF)
raw_local_irq_enable();
- if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
- handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
- regs->ip += LEN_UD2;
- handled = true;
+ if (ud_type == INSN_UD2) {
+ if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
+ handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
+ regs->ip += LEN_UD2;
+ handled = true;
+ }
+ } else {
+ handle_ubsan_failure(regs, ud_type);
}
if (regs->flags & X86_EFLAGS_IF)
raw_local_irq_disable();
diff --git a/arch/x86/kernel/ubsan.c b/arch/x86/kernel/ubsan.c
new file mode 100644
index 000000000000..c90e337a1b6a
--- /dev/null
+++ b/arch/x86/kernel/ubsan.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Clang Undefined Behavior Sanitizer trap mode support.
+ */
+#include <linux/bug.h>
+#include <linux/string.h>
+#include <linux/printk.h>
+#include <linux/ubsan.h>
+#include <asm/ptrace.h>
+#include <asm/ubsan.h>
+
+/*
+ * Checks for the information embedded in the UD1 trap instruction
+ * for the UB Sanitizer in order to pass along debugging output.
+ */
+void handle_ubsan_failure(struct pt_regs *regs, u16 type)
+{
+ if ((type & 0xFF) == UBSAN_REG)
+ type >>= 8;
+ pr_crit("%s at %pS\n", report_ubsan_failure(regs, type), (void *)regs->ip);
+}
--
2.25.1

Xin Li

unread,
Jun 25, 2024, 5:04:36 AMJun 25
to Gatlin Newhouse, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Kees Cook, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Mike Rapoport (IBM), Baoquan He, Rick Edgecombe, Changbin Du, Pengfei Xu, Xin Li, Jason Gunthorpe, Uros Bizjak, Arnd Bergmann, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
Add an empty line for better readability.

> + insn = *(u16 *)addr;
> + if ((insn & INSN_UD_MASK) == INSN_UD2)
> + return BUG_UD2;

Ditto.

There are extra empty lines in tglx's suggestion.

> + if ((insn & INSN_ASOP_MASK) == INSN_ASOP)
> + insn = *(u16 *)(++addr);
> +
> + // UBSAN encode the failure type in the two bytes after UD1
> + if ((insn & INSN_UD_MASK) == INSN_UD1)
> + return *(u16 *)(addr + LEN_UD1);
> +
> + return BUG_UD_NONE;
> +}
> +
> +

Better to add only one empty line.
Add one empty line.

Peter Zijlstra

unread,
Jun 25, 2024, 5:38:00 AMJun 25
to Gatlin Newhouse, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Kees Cook, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Mike Rapoport (IBM), Baoquan He, Rick Edgecombe, Changbin Du, Pengfei Xu, Xin Li, Jason Gunthorpe, Uros Bizjak, Arnd Bergmann, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
Please look at 790d1ce71de. Also your style above is inconsistent,
please use lower case consistently for the hex values.
Given that insn is u16, this INSN_UD_MASK seems eminently pointless.

Are the bytes after UD1 a proper ModRM such that the whole forms a
decodable instruction? You seem to not mention this anywhere. It is
paramount that the instruction stream is still correctly decodable.

Also, wouldn't it be saner to write this something like:

__always_inline int decode_bug(unsigned long addr, u32 *imm)
{
u8 v;

if (addr < TASK_SIZE)
return BUG_NONE;

v = *(u8 *)(addr++);
if (v == 0x67)
v = *(u8 *)(addr++);
if (v != 0x0f)
return BUG_NONE;
v = *(u8 *)(addr++);
if (v == 0x0b)
return BUG_UD2;
if (v != 0xb9)
return BUG_NONE;

if (X86_MODRM_RM(v) == 4)
addr++; /* consume SiB */

*imm = 0;
if (X86_MODRM_MOD(v) == 1)
*imm = *(u8 *)addr;
if (X86_MORRM_MOD(v) == 2)
*imm = *(u32 *)addr;

// WARN on MOD(v)==3 ??

return BUG_UD1;
}

Why does the thing emit the asop prefix at all through? afaict it
doesn't affect the immediate you want to get at. And if it does this
prefix, should we worry about other prefixes? Ideally we'd not accept
any prefixes.


Kees Cook

unread,
Jun 26, 2024, 3:07:57 PMJun 26
to Peter Zijlstra, Gatlin Newhouse, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Mike Rapoport (IBM), Baoquan He, Rick Edgecombe, Changbin Du, Pengfei Xu, Xin Li, Jason Gunthorpe, Uros Bizjak, Arnd Bergmann, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
Thanks for the example! (I think it should use macros instead of
open-coded "0x67", "0x0f", etc, but yeah.)

> Why does the thing emit the asop prefix at all through? afaict it
> doesn't affect the immediate you want to get at. And if it does this
> prefix, should we worry about other prefixes? Ideally we'd not accept
> any prefixes.

AFAICT it's because it's a small immediate? For an x86_64 build, this is
how Clang is generating the UD1.

-Kees

--
Kees Cook

Peter Zijlstra

unread,
Jun 28, 2024, 2:04:55 PMJun 28
to Kees Cook, Gatlin Newhouse, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x...@kernel.org, H. Peter Anvin, Marco Elver, Andrey Konovalov, Andrey Ryabinin, Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt, Andrew Morton, Mike Rapoport (IBM), Baoquan He, Rick Edgecombe, Changbin Du, Pengfei Xu, Xin Li, Jason Gunthorpe, Uros Bizjak, Arnd Bergmann, Kirill A. Shutemov, linux-...@vger.kernel.org, kasa...@googlegroups.com, linux-h...@vger.kernel.org, ll...@lists.linux.dev
On Wed, Jun 26, 2024 at 12:07:52PM -0700, Kees Cook wrote:
> On Tue, Jun 25, 2024 at 11:37:19AM +0200, Peter Zijlstra wrote:
> > Also, wouldn't it be saner to write this something like:
> >
> > __always_inline int decode_bug(unsigned long addr, u32 *imm)
> > {
> > u8 v;
> >
> > if (addr < TASK_SIZE)
> > return BUG_NONE;
> >
> > v = *(u8 *)(addr++);
> > if (v == 0x67)
> > v = *(u8 *)(addr++);
> > if (v != 0x0f)
> > return BUG_NONE;
> > v = *(u8 *)(addr++);
> > if (v == 0x0b)
> > return BUG_UD2;
> > if (v != 0xb9)
> > return BUG_NONE;
> >

Looks like I lost:

v = *(u8 *)(addr++);
> > if (X86_MODRM_RM(v) == 4)
> > addr++; /* consume SiB */
> >
> > *imm = 0;
> > if (X86_MODRM_MOD(v) == 1)
> > *imm = *(u8 *)addr;
> > if (X86_MORRM_MOD(v) == 2)
> > *imm = *(u32 *)addr;
> >
> > // WARN on MOD(v)==3 ??
> >
> > return BUG_UD1;
> > }
>
> Thanks for the example! (I think it should use macros instead of
> open-coded "0x67", "0x0f", etc, but yeah.)

Yeah, I didn't feel like hunting down pre-existing defines for all of
them, but yeah.

> > Why does the thing emit the asop prefix at all through? afaict it
> > doesn't affect the immediate you want to get at. And if it does this
> > prefix, should we worry about other prefixes? Ideally we'd not accept
> > any prefixes.
>
> AFAICT it's because it's a small immediate? For an x86_64 build, this is
> how Clang is generating the UD1.

So the disp8 immediate comes from MOD==1, MOD==2 has a disp32. What the
prefix does is change the size of the memory being referenced from 32bit
to 16bit iirc, but since UD does not actually perform the load, this is
entirely superfluous afaict.

It might be good to figure out *why* clang thinks it needs this.

A REX prefix is far more likely to be useful (upper 8 destination
register for instance).

Anyway, it seems to basically boil down to needing a fairly complete
instruction decoder without being able the use the normal one :/
Reply all
Reply to author
Forward
0 new messages