Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

Invoke any function with custom stack (irrespective of signature)

22 views
Skip to first unread message

Frederick Virchanza Gotham

unread,
Jul 14, 2023, 8:12:54 AM7/14/23
to

My main language is C/C++ and so I'm used to seeing function signatures such as the following:

void Func(void);
int Func(void);
void Func(double, int, char*);

which would be written as follows in Visual Basic:

Private Sub Func()
Private Function Func() As Integer
Private Sub Func(ByVal arg1 As Double, ByVal arg2 As Integer, ByVal arg3 As String)

A few months ago, I devised a way in x86_64 assembler and C++ of dynamically allocating a separate stack to use for a single invocation of a function, and in the end I even got it working with exception handling. But it only worked for functions that don't take an argument and don't return a value.

Since then, I've tried to come up with a universal solution that will work for all function signatures (even those with supernumerary parameters, or that return a very big struct by value), and I've got it working for System V x86_64.

Here's what I currently have, the assembler is inline among the C++ code:

https://godbolt.org/z/W4x7vbE7q

And here it is copy-pasted:

#include <cassert> // assert
#include <cstddef> // size_t
#include <memory> // unique_ptr
#include <utility> // forward

thread_local char *p_original, *p_replacement;
thread_local void (*f)(void);
thread_local char *bottom_of_stack;

extern "C" {
void Assembler_set_bottom_of_stack (void) noexcept;
void Assembler_set_stack_pointer_and_invoke(void) noexcept;
}

__asm("Assembler_set_bottom_of_stack: \n"
".intel_syntax noprefix \n"
" mov r10, rsp \n"
" add r10, 16 \n" // +8 return addr, +8 to be safe
" mov QWORD PTR fs:bottom_of_stack@tpoff, r10 \n"
" ret \n"
".att_syntax");

template<typename R, typename... Params>
class Invoker {

Invoker(char *const arg_p, R(*const arg_f)(Params...)) noexcept
{
p_replacement = arg_p; // sets a thread_local variable
f = reinterpret_cast<void (*)(void)>(arg_f); // sets a thread_local variable
}

public:
R operator()(Params... args) // This could be static function but I like operator()
{
Assembler_set_bottom_of_stack();
R (*const funcptr)(Params...) = reinterpret_cast<R(*)(Params...)>(Assembler_set_stack_pointer_and_invoke);
return funcptr( std::forward<Params>(args)... );
}

friend class Stacker;
};

class Stacker {
char *p;
std::unique_ptr<char[]> mystack;

public:

Stacker(std::size_t const len) noexcept(false) // might throw bad_alloc
{
assert( len >= 128u );

mystack.reset( new char[len] );
p = mystack.get() + len - 16u;
}

Stacker(char *const arg, std::size_t const len) noexcept
{
assert( nullptr != arg );
assert( len >= 128u );

p = arg + len - 16u;
}

template<typename R, typename... Params>
Invoker<R,Params...> operator()( R(*const arg)(Params...) )
{
return Invoker<R,Params...>(this->p, arg);
}
};

/* In the following function written in x86_64 assembler using
the System V calling convention, we can only clobber r10
and r11 because all of the other caller-saved registers
must be preserved for the 'jmp' to the target function. */

__asm("Assembler_set_stack_pointer_and_invoke:\n"
".intel_syntax noprefix \n"
// Step 1: Save the original stack pointer
" mov QWORD PTR fs:p_original@tpoff, rsp \n"
// Step 2: Retrieve the replacement stack pointer
" push r15 \n" // save to restore later
" mov r15, rsp \n" // pointer to the r15 we just pushed onto stack
" add r15, 8 \n" // sets 'r15' to top of old stack
" mov r10, QWORD PTR fs:p_replacement@tpoff \n" // sets 'r10' to top of new stack
" mov rax, QWORD PTR fs:bottom_of_stack@tpoff \n" // sets 'rax' to bottom of old stack
// Right now: R15 is the top of the old stack
// R10 is the top of the new stack
// RAX is the bottom of the old stack
// We want to do:
// while ( rax != r15 ) *r10-- = *rax--;
// Step 3: Copy the old stack to the new stack (it might contain supernumerary arguments or a big return struct)
" jmp cond \n" // Jump to condition of 'while' loop
"loop: \n" // ----<----<----<----<----
" mov r11, qword ptr [rax] \n" // |
" mov qword ptr [r10], r11 \n" // ^
" sub r10, 1 \n" // | Loop
" sub rax, 1 \n" // |
"cond: \n" // ^
" cmp rax, r15 \n" // |
" jne loop \n" // ---->---->---->---->----
" pop r15 \n" // restore original value
// Step 4: Change the stack pointer to the new stack =============================================
" mov rsp, r10 \n" // ================================================= new stack
// Step 5: Set the return address to after the 'jmp' instruction
" lea r10, [Label_Jump_Back] \n"
" add rsp, 8 \n" // This line and the next line replace the return address on the stack
" push r10 \n" // This line and the previous line replace the return address on the stack
// Step 5: Invoke the function
" jmp QWORD PTR fs:f@tpoff \n" // --- Invoke the function!
"Label_Jump_Back: \n"
// Note: The label has already been popped off the stack by the callee
// Step 9: Restore the original stack pointer
" mov rsp, QWORD PTR fs:p_original@tpoff \n"
// Step 10: Jump back to the original address
" ret \n"
".att_syntax");

// =================== And now the test code ===============================================

#include <iostream> // cout, endl
using std::cout, std::endl;

struct VeryBigStruct {
double a[3];
int b[3];
double c[3];
int d[3];
double e[3];
int f[3];
};

VeryBigStruct Func2(int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8, int a9, int a10)
{
VeryBigStruct vbs;
vbs.f[2] = a1+a2+a3+a4+a5+a6+a7+a8+a9+a10;
return vbs;
}

int main(void)
{
cout << "first line in main\n";

cout << "Retval: " << Func2(1,2,3,4,5,6,7,8,9,10).f[2] << endl;

cout << "Retval: " << Stacker(1048576000u)(Func2)(1,2,3,4,5,6,7,8,9,10).f[2] << endl;

cout << "last line in main\n";
}


Frederick Virchanza Gotham

unread,
Jul 15, 2023, 9:38:55 AM7/15/23
to
On Friday, July 14, 2023 at 1:12:54 PM UTC+1, Frederick Virchanza Gotham wrote:
>
> A few months ago, I devised a way in x86_64 assembler and C++ of dynamically allocating a separate stack to use for a single invocation of a function, and in the end I even got it working with exception handling. But it only worked for functions that don't take an argument and don't return a value.
>
> Since then, I've tried to come up with a universal solution that will work for all function signatures (even those with supernumerary parameters, or that return a very big struct by value), and I've got it working for System V x86_64.


I'm trying to get it working with exception handling, but it's segfaulting. I've tried copying the entire stack but it's still segfaulting inside libgcc inside the function "uw_update_context_1" when it tries to copy the context struct. Anybody know what's wrong with the following?

https://godbolt.org/z/vv7hTGWdr

And here it is copy-pasted:

#include <cassert> // assert
#include <cstddef> // size_t
#include <climits> // ULONG_LONG_MAX
#include <cstdlib> // strtoull
#include <cstring> // strstr
#include <cstdint> // UINTPTR_MAX
#include <memory> // unique_ptr
#include <utility> // forward
#include <exception> // exception_ptr, current_exception
#include <type_traits> // is_rvalue_reference, is_trivially_destructible

#include <iostream> // cout, endl ============================= REMOVE THIS
using std::cerr, std::cout, std::endl;

#include <unistd.h> // lseek, read, close
#include <fcntl.h> // open

char *GetStackBottom(void) noexcept
{
static thread_local int fd = -1;

if ( fd < 0 )
{
fd = ::open("/proc/thread-self/maps", O_RDONLY);

if ( fd < 0 ) return nullptr;

// std::atexit( [](){ ::close(fd); } ); - Won't work
}

if ( 0 != ::lseek(fd, 0, SEEK_SET) ) return nullptr;

static char buf[8192u];
if ( ::read(fd, buf, 8191u) < 32 ) return nullptr;
buf[8191u] = '\0';

char *vm = std::strstr(buf, "[stack]\n");
if ( nullptr == vm ) return nullptr;

while ( '\n' != *vm ) --vm;

++vm;

while ( '-' != *vm ) ++vm;

char *vm2 = ++vm;

while ( ' ' != *vm2 ) ++vm2;

*vm2 = '\0';

static_assert( ULONG_LONG_MAX >= UINTPTR_MAX );
long long unsigned const addr = std::strtoull(vm,nullptr,16u);

return reinterpret_cast<char*>(addr);
}

thread_local char *p_original, *p_replacement;
thread_local void (*f)(void), (*g)(void);
thread_local char *bottom_of_stack;
thread_local std::exception_ptr e;

extern "C" {
void Assembler_set_bottom_of_stack (void) noexcept;
void Assembler_set_stack_pointer_and_invoke(void) noexcept;
}

__asm("Assembler_set_bottom_of_stack: \n"
".intel_syntax noprefix \n"
" mov r10, rsp \n"
" add r10, 16 \n" // +8 return addr, +8 to be safe
" mov QWORD PTR fs:bottom_of_stack@tpoff, r10 \n"
" ret \n"
".att_syntax");

template<typename T> requires std::is_trivially_destructible_v<std::remove_cvref_t<T> >
std::remove_cvref_t<T> dummy_prvalue(void) noexcept
{
typedef std::remove_cvref_t<T> TT;
void (*const tmp)(void) = [](){};
TT (*const funcptr)(void) = reinterpret_cast<TT(*)(void)>(tmp);
return funcptr(); // guaranteed elision of move/copy operations here
}

template<typename R, typename... Params>
class Invoker {

static R exception_capable(Params... args) noexcept
{
cerr << "Entered exception_capable\n";

R (*const funcptr)(Params...) = reinterpret_cast<R(*)(Params...)>(g);

try
{
cerr << "Entered try-block\n";
return funcptr( std::forward<Params>(args)... );
}
catch (...)
{
cerr << "Exception thrown!\n";
e = std::current_exception();
}

return dummy_prvalue<R>();
}

Invoker(char *const arg_p, R(*const arg_f)(Params...)) noexcept
{
p_replacement = arg_p; // sets a thread_local variable
g = reinterpret_cast<void (*)(void)>(arg_f); // sets a thread_local variable
f = reinterpret_cast<void (*)(void)>(exception_capable); // sets a thread_local variable
}

public:
R operator()(Params... args) noexcept(false) // This could be static function but I like operator()
{
//Assembler_set_bottom_of_stack();
//cout << "\nBottom of stack: " << (void*)bottom_of_stack << " (my own assembler)\n";
bottom_of_stack = GetStackBottom() - 8u;
//cout << "Bottom of stack: " << (void*)bottom_of_stack << " (thread-self/maps)\n";
e = nullptr;
R (*const funcptr)(Params...) = reinterpret_cast<R(*)(Params...)>(Assembler_set_stack_pointer_and_invoke);
R retval = funcptr( std::forward<Params>(args)... );
if ( nullptr != e )
{
cerr << "About to rethrow!\n";
std::rethrow_exception(e);
}
return retval;
}

friend class Stacker;
};

class Stacker {
char *p;
std::unique_ptr<char[]> mystack;

public:

Stacker(std::size_t const len) noexcept(false) // might throw bad_alloc
{
assert( len >= 128u );

mystack.reset( new char[len] );
p = mystack.get() + len - 16u;
}

Stacker(char *const arg, std::size_t const len) noexcept
{
assert( nullptr != arg );
assert( len >= 128u );

p = arg + len - 16u;
}

template<typename R, typename... Params>
Invoker<R,Params...> operator()( R(*const arg)(Params...) ) noexcept
// Step 6: Invoke the function
" jmp QWORD PTR fs:f@tpoff \n" // --- Invoke the function!
"Label_Jump_Back: \n"
// Note: The label has already been popped off the stack by the callee
// Step 7: Restore the original stack pointer
" mov rsp, QWORD PTR fs:p_original@tpoff \n"
// Step 8: Jump back to the original address
" ret \n"
".att_syntax");

// =================== And now the test code ===============================================

#include <iostream> // cout, endl
using std::cout, std::endl;

struct VeryBigStruct {
double a[3];
int b[3];
double c[3];
int d[3];
double e[3];
int f[3];
};

VeryBigStruct Func2(int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8, int a9, int a10)
{
cerr << "Entered Func2\n";
VeryBigStruct vbs;
vbs.f[2] = a1+a2+a3+a4+a5+a6+a7+a8+a9+a10;
throw int(3);
return vbs;
}

int main(void)
{
cout << "first line in main\n";

#if 1
try
{
cout << "Retval: " << Func2(1,2,7,4,5,6,7,8,9,10).f[2] << endl;
}
catch (int const n)
{
cout << "Caught an int: " << n << endl;
}
#endif

try
{
cout << "Retval: " << Stacker(1048576000u)(Func2)(1,2,7,4,5,6,7,8,9,10).f[2] << endl;
}
catch (int const n)
{
cout << "Caught an int: " << n << endl;
0 new messages