My main language is C/C++ and so I'm used to seeing function signatures such as the following:
void Func(void);
int Func(void);
void Func(double, int, char*);
which would be written as follows in Visual Basic:
Private Sub Func()
Private Function Func() As Integer
Private Sub Func(ByVal arg1 As Double, ByVal arg2 As Integer, ByVal arg3 As String)
A few months ago, I devised a way in x86_64 assembler and C++ of dynamically allocating a separate stack to use for a single invocation of a function, and in the end I even got it working with exception handling. But it only worked for functions that don't take an argument and don't return a value.
Since then, I've tried to come up with a universal solution that will work for all function signatures (even those with supernumerary parameters, or that return a very big struct by value), and I've got it working for System V x86_64.
Here's what I currently have, the assembler is inline among the C++ code:
https://godbolt.org/z/W4x7vbE7q
And here it is copy-pasted:
#include <cassert> // assert
#include <cstddef> // size_t
#include <memory> // unique_ptr
#include <utility> // forward
thread_local char *p_original, *p_replacement;
thread_local void (*f)(void);
thread_local char *bottom_of_stack;
extern "C" {
void Assembler_set_bottom_of_stack (void) noexcept;
void Assembler_set_stack_pointer_and_invoke(void) noexcept;
}
__asm("Assembler_set_bottom_of_stack: \n"
".intel_syntax noprefix \n"
" mov r10, rsp \n"
" add r10, 16 \n" // +8 return addr, +8 to be safe
" mov QWORD PTR fs:bottom_of_stack@tpoff, r10 \n"
" ret \n"
".att_syntax");
template<typename R, typename... Params>
class Invoker {
Invoker(char *const arg_p, R(*const arg_f)(Params...)) noexcept
{
p_replacement = arg_p; // sets a thread_local variable
f = reinterpret_cast<void (*)(void)>(arg_f); // sets a thread_local variable
}
public:
R operator()(Params... args) // This could be static function but I like operator()
{
Assembler_set_bottom_of_stack();
R (*const funcptr)(Params...) = reinterpret_cast<R(*)(Params...)>(Assembler_set_stack_pointer_and_invoke);
return funcptr( std::forward<Params>(args)... );
}
friend class Stacker;
};
class Stacker {
char *p;
std::unique_ptr<char[]> mystack;
public:
Stacker(std::size_t const len) noexcept(false) // might throw bad_alloc
{
assert( len >= 128u );
mystack.reset( new char[len] );
p = mystack.get() + len - 16u;
}
Stacker(char *const arg, std::size_t const len) noexcept
{
assert( nullptr != arg );
assert( len >= 128u );
p = arg + len - 16u;
}
template<typename R, typename... Params>
Invoker<R,Params...> operator()( R(*const arg)(Params...) )
{
return Invoker<R,Params...>(this->p, arg);
}
};
/* In the following function written in x86_64 assembler using
the System V calling convention, we can only clobber r10
and r11 because all of the other caller-saved registers
must be preserved for the 'jmp' to the target function. */
__asm("Assembler_set_stack_pointer_and_invoke:\n"
".intel_syntax noprefix \n"
// Step 1: Save the original stack pointer
" mov QWORD PTR fs:p_original@tpoff, rsp \n"
// Step 2: Retrieve the replacement stack pointer
" push r15 \n" // save to restore later
" mov r15, rsp \n" // pointer to the r15 we just pushed onto stack
" add r15, 8 \n" // sets 'r15' to top of old stack
" mov r10, QWORD PTR fs:p_replacement@tpoff \n" // sets 'r10' to top of new stack
" mov rax, QWORD PTR fs:bottom_of_stack@tpoff \n" // sets 'rax' to bottom of old stack
// Right now: R15 is the top of the old stack
// R10 is the top of the new stack
// RAX is the bottom of the old stack
// We want to do:
// while ( rax != r15 ) *r10-- = *rax--;
// Step 3: Copy the old stack to the new stack (it might contain supernumerary arguments or a big return struct)
" jmp cond \n" // Jump to condition of 'while' loop
"loop: \n" // ----<----<----<----<----
" mov r11, qword ptr [rax] \n" // |
" mov qword ptr [r10], r11 \n" // ^
" sub r10, 1 \n" // | Loop
" sub rax, 1 \n" // |
"cond: \n" // ^
" cmp rax, r15 \n" // |
" jne loop \n" // ---->---->---->---->----
" pop r15 \n" // restore original value
// Step 4: Change the stack pointer to the new stack =============================================
" mov rsp, r10 \n" // ================================================= new stack
// Step 5: Set the return address to after the 'jmp' instruction
" lea r10, [Label_Jump_Back] \n"
" add rsp, 8 \n" // This line and the next line replace the return address on the stack
" push r10 \n" // This line and the previous line replace the return address on the stack
// Step 5: Invoke the function
" jmp QWORD PTR fs:f@tpoff \n" // --- Invoke the function!
"Label_Jump_Back: \n"
// Note: The label has already been popped off the stack by the callee
// Step 9: Restore the original stack pointer
" mov rsp, QWORD PTR fs:p_original@tpoff \n"
// Step 10: Jump back to the original address
" ret \n"
".att_syntax");
// =================== And now the test code ===============================================
#include <iostream> // cout, endl
using std::cout, std::endl;
struct VeryBigStruct {
double a[3];
int b[3];
double c[3];
int d[3];
double e[3];
int f[3];
};
VeryBigStruct Func2(int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8, int a9, int a10)
{
VeryBigStruct vbs;
vbs.f[2] = a1+a2+a3+a4+a5+a6+a7+a8+a9+a10;
return vbs;
}
int main(void)
{
cout << "first line in main\n";
cout << "Retval: " << Func2(1,2,3,4,5,6,7,8,9,10).f[2] << endl;
cout << "Retval: " << Stacker(1048576000u)(Func2)(1,2,3,4,5,6,7,8,9,10).f[2] << endl;
cout << "last line in main\n";
}