[PATCH] aarch64: handle exceptions on dedicated stack

17 views
Skip to first unread message

Waldemar Kozaczuk

unread,
Apr 29, 2022, 12:10:12 AM4/29/22
to osv...@googlegroups.com, Waldemar Kozaczuk
This patch changes exception handling mechanism to use dedicated
exception stack instead of the default stack provided for kernel and
application threads. This is critical to support Golang apps which
are known to use tiny stacks in coroutines and exception handler of
svc instruction cannnot use single byte of the application stack in such
case. Having separate exception stack has other benefits for debugging
and will allow future implementation of "lazy" stacks. This also makes
aarch64 port similar to x64 where we use dedicated stacks as well.

To support dedicated stacks, we take advantage of the fact that at every
exception level but EL0 there are two stack registers available -
SP_ELx and SP_EL0. OSv runs at the exception level EL1 and in boot.S
selects EP_EL1 to be used by default. The SP effectively is an alias to
one of the two stack registers and can be changed by setting the system
register SPSel (stack selector).

This patch changes all exception handlers (both synchrounous and
asynchronous (interrupts)) in entry.S to switch to the new exception
stack before pushing a frame by setting the SPSel to #0 which makes
SP point to SP_EL0. We have to switch to SP_EL0 even in the case of the
nested exception when we are on SP_EL0 as per ARM specification the SP
is always reset to SP_ELx (in our case SP_EL1) after taking an
exception. The typical case of nested exception is handling of a page
fault where we enable exceptions downstream in the page fault handler
(arch/aarch64/mmu.cc) and it may be interrupted by an asynchronous
exception like a timer one. To that end we also add the exception
handlers for curr_el_sp0 which system invokes when code is running
with SP pointing to SP_EL0.

Finally, we also change the context switch code in sched.S to make
it save not only default stack register but explicitly save
SP_EL0 and SP_EL1 and SPSel for old thread and then restore those
from arch_thread_state for new thread. This makes context switch
slightly more expensive and has been measured to add around 5% of
overhead.

This patch effectively enhances OSv to allow runing Golang apps on
AArch64.

Fixes #1155

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>
---
arch/aarch64/arch-cpu.hh | 1 +
arch/aarch64/arch-switch.hh | 12 +++--
arch/aarch64/arch-thread-state.hh | 3 ++
arch/aarch64/entry.S | 73 +++++++++++++++++++++----------
arch/aarch64/sched.S | 18 +++++++-
5 files changed, 78 insertions(+), 29 deletions(-)

diff --git a/arch/aarch64/arch-cpu.hh b/arch/aarch64/arch-cpu.hh
index 15edbdaa..8848d880 100644
--- a/arch/aarch64/arch-cpu.hh
+++ b/arch/aarch64/arch-cpu.hh
@@ -33,6 +33,7 @@ struct arch_cpu {
};

struct arch_thread {
+ char exception_stack[4096*4] __attribute__((aligned(16)));
};

struct arch_fpu {
diff --git a/arch/aarch64/arch-switch.hh b/arch/aarch64/arch-switch.hh
index 0401a4b8..c8848605 100644
--- a/arch/aarch64/arch-switch.hh
+++ b/arch/aarch64/arch-switch.hh
@@ -33,13 +33,15 @@ void thread::switch_to_first()
remote_thread_local_var(percpu_base) = _detached_state->_cpu->percpu_base;

asm volatile("\n"
- "ldp x29, x0, %2 \n"
- "ldp x22, x21, %3 \n"
+ "ldp x29, x0, %3 \n"
+ "ldp x22, x21, %4 \n"
"mov sp, x22 \n"
+ "ldr x22, %5 \n"
+ "msr sp_el0, x22 \n"
"blr x21 \n"
: // No output operands - this is to designate the input operands as earlyclobbers
- "=&Ump"(this->_state.fp), "=&Ump"(this->_state.sp)
- : "Ump"(this->_state.fp), "Ump"(this->_state.sp)
+ "=&Ump"(this->_state.fp), "=&Ump"(this->_state.sp), "=&Ump"(this->_state.exception_sp)
+ : "Ump"(this->_state.fp), "Ump"(this->_state.sp), "Ump"(this->_state.exception_sp)
: "x0", "x19", "x20", "x21", "x22", "x23", "x24",
"x25", "x26", "x27", "x28", "x30", "memory");
}
@@ -59,6 +61,8 @@ void thread::init_stack()
_state.thread = this;
_state.sp = stacktop;
_state.pc = reinterpret_cast<void*>(thread_main);
+ _state.exception_sp = _arch.exception_stack + sizeof(_arch.exception_stack);
+ _state.stack_selector = 1; //Select SP_ELx
}

void thread::setup_tcb()
diff --git a/arch/aarch64/arch-thread-state.hh b/arch/aarch64/arch-thread-state.hh
index 6f1b680d..f6a27ff2 100644
--- a/arch/aarch64/arch-thread-state.hh
+++ b/arch/aarch64/arch-thread-state.hh
@@ -15,6 +15,9 @@ struct thread_state {
void* sp;
void* pc;
void* tcb;
+
+ void* exception_sp; //SP_EL0
+ u64 stack_selector; //1 - selects SP_ELx (default), 0 - selects SP_EL0 (exceptions)
};

#endif /* ARCH_THREAD_STATE_HH_ */
diff --git a/arch/aarch64/entry.S b/arch/aarch64/entry.S
index 25354359..8322ee90 100644
--- a/arch/aarch64/entry.S
+++ b/arch/aarch64/entry.S
@@ -57,9 +57,16 @@ exception_vectors:
vector_entry lower_el_aarch32 fiq
vector_entry lower_el_aarch32 serror

-/* keep in sync with the struct in exceptions.hh */
-.macro push_state_to_exception_frame
- sub sp, sp, #48 // make space for align2, align1+ESR, PSTATE, PC, SP
+/* keep in sync with the struct in exceptions.hh
+ the switch argument (1 or 0) indicates if we would be switching from
+ SP_ELx -> SP_EL0 (1) or staying on the same stack - SP_EL0 -> SP_EL0 (0) */
+.macro push_state_to_exception_frame switch
+ // Regardless which stack (pointed by SP_ELx or SP_EL0) was in use when
+ // exception was taken, the stack is always reset to SP_ELx before exception
+ // handler is executed. To make sure the exception handler uses the exception
+ // stack pointed by SP_EL0 we need to set SPSEL to #0.
+ msr spsel, #0 // switch to exception stack by selecting SP_EL0
+ sub sp, sp, #48 // make space for align2, align1+ESR, PSTATE, PC, SP
.cfi_adjust_cfa_offset 48
push_pair x28, x29
push_pair x26, x27
@@ -76,7 +83,13 @@ exception_vectors:
push_pair x4, x5
push_pair x2, x3
push_pair x0, x1
+ .if \switch == 1
+ msr spsel, #1 // switch to regular stack (SP_ELx) for brief moment to read it
+ mov x1, sp // fetch SP of regular stack (spsel 1)
+ msr spsel, #0 // switch back to exception stack
+ .else
add x1, sp, #288 // x1 := old SP (48 + 16 * 15 = 288)
+ .endif
mrs x2, elr_el1
mrs x3, spsr_el1
stp x30, x1, [sp, #240] // store lr, old SP
@@ -102,6 +115,10 @@ exception_vectors:
pop_pair x24, x25
pop_pair x26, x27
pop_pair x28, x29
+ // please note we do not need to explicitly switch the stack when returning
+ // from exception by resetting the stack selector register, as it will
+ // happen automatically based on the value of spsr_el1 which we restored above
+ // (the spsr_el1 holds PSTATE and EL and SP selector)
ldr x30, [sp], #48
.cfi_adjust_cfa_offset -48
.endm /* pop_state_to_exception_frame */
@@ -143,7 +160,7 @@ entry_\level\()_\type:
.cfi_offset x30, -32 // Point to the elr register located at the -32 offset
// of the exception frame to help gdb link to the
// address when interrupt was raised
- push_state_to_exception_frame
+ push_state_to_exception_frame 1
mrs x1, esr_el1
str w1, [sp, #272] // Store Exception Syndrom Register in the frame
mov x0, sp // Save exception_frame to x0
@@ -165,8 +182,6 @@ entry_\level\()_\type:
.equ EX_TYPE_FIQ, 0x2
.equ EX_TYPE_SERROR, 0x3

-entry_unexpected_exception curr_el_sp0, sync, #CURR_EL_SP0, #EX_TYPE_SYNC
-entry_unexpected_exception curr_el_sp0, irq, #CURR_EL_SP0, #EX_TYPE_IRQ
entry_unexpected_exception curr_el_sp0, fiq, #CURR_EL_SP0, #EX_TYPE_FIQ
entry_unexpected_exception curr_el_sp0, serror, #CURR_EL_SP0, #EX_TYPE_SERROR

@@ -183,38 +198,39 @@ entry_unexpected_exception lower_el_aarch32, irq, #LOWER_EL_AARCH32, #EX_TYPE_IR
entry_unexpected_exception lower_el_aarch32, fiq, #LOWER_EL_AARCH32, #EX_TYPE_FIQ
entry_unexpected_exception lower_el_aarch32, serror, #LOWER_EL_AARCH32, #EX_TYPE_SERROR

-.global entry_curr_el_spx_sync
-.hidden entry_curr_el_spx_sync
-.type entry_curr_el_spx_sync, @function
-entry_curr_el_spx_sync:
+.macro entry_curr_el_sync stack, switch
+.global entry_curr_el_sp\stack\()_sync
+.hidden entry_curr_el_sp\stack\()_sync
+.type entry_curr_el_sp\stack\()_sync, @function
+entry_curr_el_sp\stack\()_sync:
.cfi_startproc simple
.cfi_signal_frame
.cfi_def_cfa sp, 0
.cfi_offset x30, -32 // Point to the elr register located at the -32 offset
// of the exception frame to help gdb link to the
// address when interrupt was raised
- push_state_to_exception_frame
+ push_state_to_exception_frame \switch
mrs x1, esr_el1
str w1, [sp, #272] // Store Exception Syndrom Register in the frame
ubfm x2, x1, #ESR_EC_BEG, #ESR_EC_END // Exception Class -> X2
ubfm x3, x1, #ESR_FLT_BEG, #ESR_FLT_END // FLT -> X3
cmp x2, #ESR_EC_SVC64
- b.eq handle_system_call
+ b.eq handle_system_call_sp\stack
cmp x2, #ESR_EC_DATA_ABORT
- b.eq handle_mem_abort
+ b.eq handle_mem_abort_sp\stack
cmp x2, #ESR_EC_INSN_ABORT
- b.ne unexpected_sync_exception
-handle_mem_abort:
- cbz x3, unexpected_sync_exception
+ b.ne unexpected_sync_exception_sp\stack
+handle_mem_abort_sp\stack:
+ cbz x3, unexpected_sync_exception_sp\stack
cmp x3, #3
- b.hi unexpected_sync_exception
+ b.hi unexpected_sync_exception_sp\stack

mov x0, sp // save exception_frame to x0
bl page_fault
pop_state_from_exception_frame
eret
.cfi_endproc
-handle_system_call:
+handle_system_call_sp\stack:
.cfi_startproc
//see https://man7.org/linux/man-pages/man2/syscall.2.html for details
//about calling convention for arm64
@@ -237,7 +253,7 @@ handle_system_call:
pop_state_from_exception_frame
eret
.cfi_endproc
-unexpected_sync_exception:
+unexpected_sync_exception_sp\stack:
.cfi_startproc
mov x0, sp // save exception_frame to x0
mov x1, #CURR_EL_SPX
@@ -246,23 +262,32 @@ unexpected_sync_exception:
pop_state_from_exception_frame
bl abort
.cfi_endproc
+.endm
+
+entry_curr_el_sync 0, 0 // the synchronous exception handler used when the SP_EL0 is active
+entry_curr_el_sync x, 1 // the synchronous exception handler used when the SP_ELx is active

-.global entry_curr_el_spx_irq
-.hidden entry_curr_el_spx_irq
-.type entry_curr_el_spx_irq, @function
-entry_curr_el_spx_irq:
+.macro entry_curr_el_irq stack, switch
+.global entry_curr_el_sp\stack\()_irq
+.hidden entry_curr_el_sp\stack\()_irq
+.type entry_curr_el_sp\stack\()_irq, @function
+entry_curr_el_sp\stack\()_irq:
.cfi_startproc simple
.cfi_signal_frame
.cfi_def_cfa sp, 0
.cfi_offset x30, -32 // Point to the elr register located at the -32 offset
// of the exception frame to help gdb link to the
// address when interrupt was raised
- push_state_to_exception_frame
+ push_state_to_exception_frame \switch
mov x0, sp
bl interrupt // extern "C"
pop_state_from_exception_frame
eret
.cfi_endproc
+.endm
+
+entry_curr_el_irq 0, 0 // the asynchronous exception handler used when the SP_EL0 is active
+entry_curr_el_irq x, 1 // the asynchronous exception handler used when the SP_ELx is active

.global call_signal_handler_thunk
.hidden call_signal_handler_thunk
diff --git a/arch/aarch64/sched.S b/arch/aarch64/sched.S
index 5949459a..ab049a6f 100644
--- a/arch/aarch64/sched.S
+++ b/arch/aarch64/sched.S
@@ -39,15 +39,31 @@ reschedule_from_interrupt:
isb

str x29, [x0, #0] //Save frame pointer of the old thread
+
+ mrs x2, spsel //Fetch old thread stack selector
+ msr spsel, #1 //Select SP_ELx
mov x3, sp //Fetch old thread stack pointer
+
adr x4, 1f //Fetch old thread instruction point
stp x3, x4, [x0, #16] //Save old thread sp and pc

+ msr spsel, #0 //Select SP_EL0
+ mov x3, sp //Fetch old thread exception stack pointer
+ stp x3, x2, [x0, #40] //Save old thread exception stack pointer and stack selector
+
ldp x29, x0, [x1, #0] //Set frame pointer of the new thread and this (x0) of the new thread
//Please note that the pc may point to thread_main_c(thread*) which is
//why we have to set x0 (1st argument) to new thread object
ldp x3, x4, [x1, #16] //Fetch new thread sp and pc
- mov sp, x3 //Set new thread stack pointer
+
+ msr spsel, #1 //Select SP_ELx
+ mov sp, x3 //Restore new thread stack pointer
+
+ ldp x3, x2, [x1, #40] //Load new thread exception stack pointer and stack selector
+ msr spsel, #0 //Select SP_EL0
+ mov sp, x3 //Restore new thread exception stack pointer
+ msr spsel, x2 //Restore new thread stack selector (1-SP_ELx,0-SP_EL0)
+
blr x4 //Jump to the new thread pc

1:
--
2.27.0

Commit Bot

unread,
May 4, 2022, 9:33:02 PM5/4/22
to osv...@googlegroups.com, Waldemar Kozaczuk
From: Waldemar Kozaczuk <jwkoz...@gmail.com>
Committer: Waldemar Kozaczuk <jwkoz...@gmail.com>
Branch: master

aarch64: handle exceptions on dedicated stack
diff --git a/arch/aarch64/arch-cpu.hh b/arch/aarch64/arch-cpu.hh
--- a/arch/aarch64/arch-cpu.hh
+++ b/arch/aarch64/arch-cpu.hh
@@ -33,6 +33,7 @@ struct arch_cpu {
};

struct arch_thread {
+ char exception_stack[4096*4] __attribute__((aligned(16)));
};

struct arch_fpu {
diff --git a/arch/aarch64/arch-switch.hh b/arch/aarch64/arch-switch.hh
--- a/arch/aarch64/arch-thread-state.hh
+++ b/arch/aarch64/arch-thread-state.hh
@@ -15,6 +15,9 @@ struct thread_state {
void* sp;
void* pc;
void* tcb;
+
+ void* exception_sp; //SP_EL0
+ u64 stack_selector; //1 - selects SP_ELx (default), 0 - selects SP_EL0 (exceptions)
};

#endif /* ARCH_THREAD_STATE_HH_ */
diff --git a/arch/aarch64/entry.S b/arch/aarch64/entry.S
Reply all
Reply to author
Forward
0 new messages